1/*
2 Copyright (c) 2005, 2017, Oracle and/or its affiliates.
3 Copyright (c) 2009, 2018, MariaDB
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; version 2 of the License.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17*/
18
19/*
20 This handler was developed by Mikael Ronstrom for version 5.1 of MySQL.
21 It is an abstraction layer on top of other handlers such as MyISAM,
22 InnoDB, Federated, Berkeley DB and so forth. Partitioned tables can also
23 be handled by a storage engine. The current example of this is NDB
24 Cluster that has internally handled partitioning. This have benefits in
25 that many loops needed in the partition handler can be avoided.
26
27 Partitioning has an inherent feature which in some cases is positive and
28 in some cases is negative. It splits the data into chunks. This makes
29 the data more manageable, queries can easily be parallelised towards the
30 parts and indexes are split such that there are less levels in the
31 index trees. The inherent disadvantage is that to use a split index
32 one has to scan all index parts which is ok for large queries but for
33 small queries it can be a disadvantage.
34
35 Partitioning lays the foundation for more manageable databases that are
36 extremely large. It does also lay the foundation for more parallelism
37 in the execution of queries. This functionality will grow with later
38 versions of MySQL/MariaDB.
39
40 The partition is setup to use table locks. It implements an partition "SHARE"
41 that is inserted into a hash by table name. You can use this to store
42 information of state that any partition handler object will be able to see
43 if it is using the same table.
44
45 Please read the object definition in ha_partition.h before reading the rest
46 if this file.
47*/
48
49#include "mariadb.h"
50#include "sql_priv.h"
51#include "sql_parse.h" // append_file_to_dir
52#include "create_options.h"
53
54#ifdef WITH_PARTITION_STORAGE_ENGINE
55#include "ha_partition.h"
56#include "sql_table.h" // tablename_to_filename
57#include "key.h"
58#include "sql_plugin.h"
59#include "sql_show.h" // append_identifier
60#include "sql_admin.h" // SQL_ADMIN_MSG_TEXT_SIZE
61#include "sql_select.h"
62
63#include "debug_sync.h"
64
65/* First 4 bytes in the .par file is the number of 32-bit words in the file */
66#define PAR_WORD_SIZE 4
67/* offset to the .par file checksum */
68#define PAR_CHECKSUM_OFFSET 4
69/* offset to the total number of partitions */
70#define PAR_NUM_PARTS_OFFSET 8
71/* offset to the engines array */
72#define PAR_ENGINES_OFFSET 12
73#define PARTITION_ENABLED_TABLE_FLAGS (HA_FILE_BASED | \
74 HA_REC_NOT_IN_SEQ | \
75 HA_CAN_REPAIR)
76#define PARTITION_DISABLED_TABLE_FLAGS (HA_CAN_GEOMETRY | \
77 HA_DUPLICATE_POS | \
78 HA_CAN_INSERT_DELAYED | \
79 HA_READ_BEFORE_WRITE_REMOVAL |\
80 HA_CAN_TABLES_WITHOUT_ROLLBACK)
81
82static const char *ha_par_ext= ".par";
83
84/****************************************************************************
85 MODULE create/delete handler object
86****************************************************************************/
87
88static handler *partition_create_handler(handlerton *hton,
89 TABLE_SHARE *share,
90 MEM_ROOT *mem_root);
91static uint partition_flags();
92static alter_table_operations alter_table_flags(alter_table_operations flags);
93
94/*
95 If frm_error() is called then we will use this to to find out what file
96 extensions exist for the storage engine. This is also used by the default
97 rename_table and delete_table method in handler.cc.
98*/
99static const char *ha_partition_ext[]=
100{
101 ha_par_ext, NullS
102};
103
104
105#ifdef HAVE_PSI_INTERFACE
106PSI_mutex_key key_partition_auto_inc_mutex;
107
108static PSI_mutex_info all_partition_mutexes[]=
109{
110 { &key_partition_auto_inc_mutex, "Partition_share::auto_inc_mutex", 0}
111};
112
113static void init_partition_psi_keys(void)
114{
115 const char* category= "partition";
116 int count;
117
118 count= array_elements(all_partition_mutexes);
119 mysql_mutex_register(category, all_partition_mutexes, count);
120}
121#endif /* HAVE_PSI_INTERFACE */
122
123static int partition_initialize(void *p)
124{
125 handlerton *partition_hton;
126 partition_hton= (handlerton *)p;
127
128 partition_hton->state= SHOW_OPTION_YES;
129 partition_hton->db_type= DB_TYPE_PARTITION_DB;
130 partition_hton->create= partition_create_handler;
131 partition_hton->partition_flags= partition_flags;
132 partition_hton->alter_table_flags= alter_table_flags;
133 partition_hton->flags= HTON_NOT_USER_SELECTABLE |
134 HTON_HIDDEN |
135 HTON_TEMPORARY_NOT_SUPPORTED;
136 partition_hton->tablefile_extensions= ha_partition_ext;
137
138#ifdef HAVE_PSI_INTERFACE
139 init_partition_psi_keys();
140#endif
141 return 0;
142}
143
144
145/**
146 Initialize and allocate space for partitions shares.
147
148 @param num_parts Number of partitions to allocate storage for.
149
150 @return Operation status.
151 @retval true Failure (out of memory).
152 @retval false Success.
153*/
154
155bool Partition_share::init(uint num_parts)
156{
157 DBUG_ENTER("Partition_share::init");
158 auto_inc_initialized= false;
159 partition_name_hash_initialized= false;
160 next_auto_inc_val= 0;
161 if (partitions_share_refs.init(num_parts))
162 {
163 DBUG_RETURN(true);
164 }
165 DBUG_RETURN(false);
166}
167
168
169/*
170 Create new partition handler
171
172 SYNOPSIS
173 partition_create_handler()
174 table Table object
175
176 RETURN VALUE
177 New partition object
178*/
179
180static handler *partition_create_handler(handlerton *hton,
181 TABLE_SHARE *share,
182 MEM_ROOT *mem_root)
183{
184 ha_partition *file= new (mem_root) ha_partition(hton, share);
185 if (file && file->initialize_partition(mem_root))
186 {
187 delete file;
188 file= 0;
189 }
190 return file;
191}
192
193/*
194 HA_CAN_PARTITION:
195 Used by storage engines that can handle partitioning without this
196 partition handler
197 (Partition, NDB)
198
199 HA_CAN_UPDATE_PARTITION_KEY:
200 Set if the handler can update fields that are part of the partition
201 function.
202
203 HA_CAN_PARTITION_UNIQUE:
204 Set if the handler can handle unique indexes where the fields of the
205 unique key are not part of the fields of the partition function. Thus
206 a unique key can be set on all fields.
207
208 HA_USE_AUTO_PARTITION
209 Set if the handler sets all tables to be partitioned by default.
210*/
211
212static uint partition_flags()
213{
214 return HA_CAN_PARTITION;
215}
216
217static alter_table_operations alter_table_flags(alter_table_operations flags __attribute__((unused)))
218{
219 return (HA_PARTITION_FUNCTION_SUPPORTED |
220 HA_FAST_CHANGE_PARTITION);
221}
222
223/*
224 Constructor method
225
226 SYNOPSIS
227 ha_partition()
228 table Table object
229
230 RETURN VALUE
231 NONE
232*/
233
234ha_partition::ha_partition(handlerton *hton, TABLE_SHARE *share)
235 :handler(hton, share)
236{
237 DBUG_ENTER("ha_partition::ha_partition(table)");
238 ha_partition_init();
239 DBUG_VOID_RETURN;
240}
241
242
243/* Initialize all partition variables */
244
245void ha_partition::ha_partition_init()
246{
247 init_alloc_root(&m_mem_root, "ha_partition", 512, 512, MYF(0));
248 init_handler_variables();
249}
250
251/*
252 Constructor method
253
254 SYNOPSIS
255 ha_partition()
256 part_info Partition info
257
258 RETURN VALUE
259 NONE
260*/
261
262ha_partition::ha_partition(handlerton *hton, partition_info *part_info)
263 :handler(hton, NULL)
264{
265 DBUG_ENTER("ha_partition::ha_partition(part_info)");
266 DBUG_ASSERT(part_info);
267 ha_partition_init();
268 m_part_info= part_info;
269 m_create_handler= TRUE;
270 m_is_sub_partitioned= m_part_info->is_sub_partitioned();
271 DBUG_VOID_RETURN;
272}
273
274/**
275 ha_partition constructor method used by ha_partition::clone()
276
277 @param hton Handlerton (partition_hton)
278 @param share Table share object
279 @param part_info_arg partition_info to use
280 @param clone_arg ha_partition to clone
281 @param clme_mem_root_arg MEM_ROOT to use
282
283 @return New partition handler
284*/
285
286ha_partition::ha_partition(handlerton *hton, TABLE_SHARE *share,
287 partition_info *part_info_arg,
288 ha_partition *clone_arg,
289 MEM_ROOT *clone_mem_root_arg)
290 :handler(hton, share)
291{
292 DBUG_ENTER("ha_partition::ha_partition(clone)");
293 ha_partition_init();
294 m_part_info= part_info_arg;
295 m_create_handler= TRUE;
296 m_is_sub_partitioned= m_part_info->is_sub_partitioned();
297 m_is_clone_of= clone_arg;
298 m_clone_mem_root= clone_mem_root_arg;
299 part_share= clone_arg->part_share;
300 m_tot_parts= clone_arg->m_tot_parts;
301 m_pkey_is_clustered= clone_arg->primary_key_is_clustered();
302 DBUG_VOID_RETURN;
303}
304
305/*
306 Initialize handler object
307
308 SYNOPSIS
309 init_handler_variables()
310
311 RETURN VALUE
312 NONE
313*/
314
315void ha_partition::init_handler_variables()
316{
317 active_index= MAX_KEY;
318 m_mode= 0;
319 m_open_test_lock= 0;
320 m_file_buffer= NULL;
321 m_name_buffer_ptr= NULL;
322 m_engine_array= NULL;
323 m_connect_string= NULL;
324 m_file= NULL;
325 m_file_tot_parts= 0;
326 m_reorged_file= NULL;
327 m_new_file= NULL;
328 m_reorged_parts= 0;
329 m_added_file= NULL;
330 m_tot_parts= 0;
331 m_pkey_is_clustered= 0;
332 m_part_spec.start_part= NO_CURRENT_PART_ID;
333 m_scan_value= 2;
334 m_ref_length= 0;
335 m_part_spec.end_part= NO_CURRENT_PART_ID;
336 m_index_scan_type= partition_no_index_scan;
337 m_start_key.key= NULL;
338 m_start_key.length= 0;
339 m_myisam= FALSE;
340 m_innodb= FALSE;
341 m_extra_cache= FALSE;
342 m_extra_cache_size= 0;
343 m_extra_prepare_for_update= FALSE;
344 m_extra_cache_part_id= NO_CURRENT_PART_ID;
345 m_handler_status= handler_not_initialized;
346 m_part_field_array= NULL;
347 m_ordered_rec_buffer= NULL;
348 m_top_entry= NO_CURRENT_PART_ID;
349 m_rec_length= 0;
350 m_last_part= 0;
351 m_rec0= 0;
352 m_err_rec= NULL;
353 m_curr_key_info[0]= NULL;
354 m_curr_key_info[1]= NULL;
355 m_part_func_monotonicity_info= NON_MONOTONIC;
356 m_key_not_found= FALSE;
357 auto_increment_lock= FALSE;
358 auto_increment_safe_stmt_log_lock= FALSE;
359 /*
360 this allows blackhole to work properly
361 */
362 m_num_locks= 0;
363 m_part_info= NULL;
364 m_create_handler= FALSE;
365 m_is_sub_partitioned= 0;
366 m_is_clone_of= NULL;
367 m_clone_mem_root= NULL;
368 part_share= NULL;
369 m_new_partitions_share_refs.empty();
370 m_part_ids_sorted_by_num_of_records= NULL;
371 m_partitions_to_open= NULL;
372
373 m_range_info= NULL;
374 m_mrr_full_buffer_size= 0;
375 m_mrr_new_full_buffer_size= 0;
376 m_mrr_full_buffer= NULL;
377 m_mrr_range_first= NULL;
378
379 m_pre_calling= FALSE;
380 m_pre_call_use_parallel= FALSE;
381
382 ft_first= ft_current= NULL;
383 bulk_access_executing= FALSE; // For future
384
385 /*
386 Clear bitmaps to allow on one to call my_bitmap_free() on them at any time
387 */
388 my_bitmap_clear(&m_bulk_insert_started);
389 my_bitmap_clear(&m_locked_partitions);
390 my_bitmap_clear(&m_partitions_to_reset);
391 my_bitmap_clear(&m_key_not_found_partitions);
392 my_bitmap_clear(&m_mrr_used_partitions);
393 my_bitmap_clear(&m_opened_partitions);
394 m_file_sample= NULL;
395
396#ifdef DONT_HAVE_TO_BE_INITALIZED
397 m_start_key.flag= 0;
398 m_ordered= TRUE;
399#endif
400}
401
402
403const char *ha_partition::table_type() const
404{
405 // we can do this since we only support a single engine type
406 return m_file[0]->table_type();
407}
408
409
410/*
411 Destructor method
412
413 SYNOPSIS
414 ~ha_partition()
415
416 RETURN VALUE
417 NONE
418*/
419
420ha_partition::~ha_partition()
421{
422 DBUG_ENTER("ha_partition::~ha_partition()");
423 if (m_new_partitions_share_refs.elements)
424 m_new_partitions_share_refs.delete_elements();
425 if (m_file != NULL)
426 {
427 uint i;
428 for (i= 0; i < m_tot_parts; i++)
429 delete m_file[i];
430 }
431 destroy_record_priority_queue();
432 my_free(m_part_ids_sorted_by_num_of_records);
433
434 if (m_added_file)
435 {
436 for (handler **ph= m_added_file; *ph; ph++)
437 delete (*ph);
438 }
439 clear_handler_file();
440 free_root(&m_mem_root, MYF(0));
441
442 DBUG_VOID_RETURN;
443}
444
445
446/*
447 Initialize partition handler object
448
449 SYNOPSIS
450 initialize_partition()
451 mem_root Allocate memory through this
452
453 RETURN VALUE
454 1 Error
455 0 Success
456
457 DESCRIPTION
458
459 The partition handler is only a layer on top of other engines. Thus it
460 can't really perform anything without the underlying handlers. Thus we
461 add this method as part of the allocation of a handler object.
462
463 1) Allocation of underlying handlers
464 If we have access to the partition info we will allocate one handler
465 instance for each partition.
466 2) Allocation without partition info
467 The cases where we don't have access to this information is when called
468 in preparation for delete_table and rename_table and in that case we
469 only need to set HA_FILE_BASED. In that case we will use the .par file
470 that contains information about the partitions and their engines and
471 the names of each partition.
472 3) Table flags initialisation
473 We need also to set table flags for the partition handler. This is not
474 static since it depends on what storage engines are used as underlying
475 handlers.
476 The table flags is set in this routine to simulate the behaviour of a
477 normal storage engine
478 The flag HA_FILE_BASED will be set independent of the underlying handlers
479 4) Index flags initialisation
480 When knowledge exists on the indexes it is also possible to initialize the
481 index flags. Again the index flags must be initialized by using the under-
482 lying handlers since this is storage engine dependent.
483 The flag HA_READ_ORDER will be reset for the time being to indicate no
484 ordered output is available from partition handler indexes. Later a merge
485 sort will be performed using the underlying handlers.
486 5) primary_key_is_clustered and has_transactions are
487 calculated here.
488
489*/
490
491bool ha_partition::initialize_partition(MEM_ROOT *mem_root)
492{
493 handler **file_array, *file;
494 ulonglong check_table_flags;
495 DBUG_ENTER("ha_partition::initialize_partition");
496
497 if (m_create_handler)
498 {
499 m_tot_parts= m_part_info->get_tot_partitions();
500 DBUG_ASSERT(m_tot_parts > 0);
501 if (new_handlers_from_part_info(mem_root))
502 DBUG_RETURN(1);
503 }
504 else if (!table_share || !table_share->normalized_path.str)
505 {
506 /*
507 Called with dummy table share (delete, rename and alter table).
508 Don't need to set-up anything.
509 */
510 DBUG_RETURN(0);
511 }
512 else if (get_from_handler_file(table_share->normalized_path.str,
513 mem_root, false))
514 {
515 my_error(ER_FAILED_READ_FROM_PAR_FILE, MYF(0));
516 DBUG_RETURN(1);
517 }
518 /*
519 We create all underlying table handlers here. We do it in this special
520 method to be able to report allocation errors.
521
522 Set up primary_key_is_clustered and
523 has_transactions since they are called often in all kinds of places,
524 other parameters are calculated on demand.
525 Verify that all partitions have the same table_flags.
526 */
527 check_table_flags= m_file[0]->ha_table_flags();
528 m_pkey_is_clustered= TRUE;
529 file_array= m_file;
530 do
531 {
532 file= *file_array;
533 if (!file->primary_key_is_clustered())
534 m_pkey_is_clustered= FALSE;
535 if (check_table_flags != file->ha_table_flags())
536 {
537 my_error(ER_MIX_HANDLER_ERROR, MYF(0));
538 DBUG_RETURN(1);
539 }
540 } while (*(++file_array));
541 m_handler_status= handler_initialized;
542 DBUG_RETURN(0);
543}
544
545/****************************************************************************
546 MODULE meta data changes
547****************************************************************************/
548/*
549 Delete a table
550
551 SYNOPSIS
552 delete_table()
553 name Full path of table name
554
555 RETURN VALUE
556 >0 Error
557 0 Success
558
559 DESCRIPTION
560 Used to delete a table. By the time delete_table() has been called all
561 opened references to this table will have been closed (and your globally
562 shared references released. The variable name will just be the name of
563 the table. You will need to remove any files you have created at this
564 point.
565
566 If you do not implement this, the default delete_table() is called from
567 handler.cc and it will delete all files with the file extentions returned
568 by bas_ext().
569
570 Called from handler.cc by delete_table and ha_create_table(). Only used
571 during create if the table_flag HA_DROP_BEFORE_CREATE was specified for
572 the storage engine.
573*/
574
575int ha_partition::delete_table(const char *name)
576{
577 DBUG_ENTER("ha_partition::delete_table");
578
579 DBUG_RETURN(del_ren_table(name, NULL));
580}
581
582
583/*
584 Rename a table
585
586 SYNOPSIS
587 rename_table()
588 from Full path of old table name
589 to Full path of new table name
590
591 RETURN VALUE
592 >0 Error
593 0 Success
594
595 DESCRIPTION
596 Renames a table from one name to another from alter table call.
597
598 If you do not implement this, the default rename_table() is called from
599 handler.cc and it will rename all files with the file extentions returned
600 by bas_ext().
601
602 Called from sql_table.cc by mysql_rename_table().
603*/
604
605int ha_partition::rename_table(const char *from, const char *to)
606{
607 DBUG_ENTER("ha_partition::rename_table");
608
609 DBUG_RETURN(del_ren_table(from, to));
610}
611
612
613/*
614 Create the handler file (.par-file)
615
616 SYNOPSIS
617 create_partitioning_metadata()
618 name Full path of table name
619 create_info Create info generated for CREATE TABLE
620
621 RETURN VALUE
622 >0 Error
623 0 Success
624
625 DESCRIPTION
626 create_partitioning_metadata is called to create any handler specific files
627 before opening the file with openfrm to later call ::create on the
628 file object.
629 In the partition handler this is used to store the names of partitions
630 and types of engines in the partitions.
631*/
632
633int ha_partition::create_partitioning_metadata(const char *path,
634 const char *old_path,
635 int action_flag)
636{
637 DBUG_ENTER("ha_partition::create_partitioning_metadata()");
638
639 /*
640 We need to update total number of parts since we might write the handler
641 file as part of a partition management command
642 */
643 if (action_flag == CHF_DELETE_FLAG ||
644 action_flag == CHF_RENAME_FLAG)
645 {
646 char name[FN_REFLEN];
647 char old_name[FN_REFLEN];
648
649 strxmov(name, path, ha_par_ext, NullS);
650 strxmov(old_name, old_path, ha_par_ext, NullS);
651 if ((action_flag == CHF_DELETE_FLAG &&
652 mysql_file_delete(key_file_partition, name, MYF(MY_WME))) ||
653 (action_flag == CHF_RENAME_FLAG &&
654 mysql_file_rename(key_file_partition, old_name, name, MYF(MY_WME))))
655 {
656 DBUG_RETURN(TRUE);
657 }
658 }
659 else if (action_flag == CHF_CREATE_FLAG)
660 {
661 if (create_handler_file(path))
662 {
663 my_error(ER_CANT_CREATE_HANDLER_FILE, MYF(0));
664 DBUG_RETURN(1);
665 }
666 }
667 DBUG_RETURN(0);
668}
669
670
671/*
672 Create a partitioned table
673
674 SYNOPSIS
675 create()
676 name Full path of table name
677 table_arg Table object
678 create_info Create info generated for CREATE TABLE
679
680 RETURN VALUE
681 >0 Error
682 0 Success
683
684 DESCRIPTION
685 create() is called to create a table. The variable name will have the name
686 of the table. When create() is called you do not need to worry about
687 opening the table. Also, the FRM file will have already been created so
688 adjusting create_info will not do you any good. You can overwrite the frm
689 file at this point if you wish to change the table definition, but there
690 are no methods currently provided for doing that.
691
692 Called from handler.cc by ha_create_table().
693*/
694
695int ha_partition::create(const char *name, TABLE *table_arg,
696 HA_CREATE_INFO *create_info)
697{
698 int error;
699 char name_buff[FN_REFLEN + 1], name_lc_buff[FN_REFLEN];
700 char *name_buffer_ptr;
701 const char *path;
702 uint i;
703 List_iterator_fast <partition_element> part_it(m_part_info->partitions);
704 partition_element *part_elem;
705 handler **file, **abort_file;
706 DBUG_ENTER("ha_partition::create");
707 DBUG_PRINT("enter", ("name: '%s'", name));
708
709 DBUG_ASSERT(!fn_frm_ext(name));
710
711 /* Not allowed to create temporary partitioned tables */
712 if (create_info && create_info->tmp_table())
713 {
714 my_error(ER_PARTITION_NO_TEMPORARY, MYF(0));
715 DBUG_RETURN(TRUE);
716 }
717
718 if (get_from_handler_file(name, ha_thd()->mem_root, false))
719 DBUG_RETURN(TRUE);
720 DBUG_ASSERT(m_file_buffer);
721 name_buffer_ptr= m_name_buffer_ptr;
722 file= m_file;
723 /*
724 Since ha_partition has HA_FILE_BASED, it must alter underlying table names
725 if they do not have HA_FILE_BASED and lower_case_table_names == 2.
726 See Bug#37402, for Mac OS X.
727 The appended #P#<partname>[#SP#<subpartname>] will remain in current case.
728 Using the first partitions handler, since mixing handlers is not allowed.
729 */
730 path= get_canonical_filename(*file, name, name_lc_buff);
731 for (i= 0; i < m_part_info->num_parts; i++)
732 {
733 part_elem= part_it++;
734 if (m_is_sub_partitioned)
735 {
736 uint j;
737 List_iterator_fast <partition_element> sub_it(part_elem->subpartitions);
738 for (j= 0; j < m_part_info->num_subparts; j++)
739 {
740 part_elem= sub_it++;
741 if (unlikely((error= create_partition_name(name_buff,
742 sizeof(name_buff), path,
743 name_buffer_ptr,
744 NORMAL_PART_NAME, FALSE))))
745 goto create_error;
746 if (unlikely((error= set_up_table_before_create(table_arg, name_buff,
747 create_info,
748 part_elem)) ||
749 ((error= (*file)->ha_create(name_buff, table_arg,
750 create_info)))))
751 goto create_error;
752
753 name_buffer_ptr= strend(name_buffer_ptr) + 1;
754 file++;
755 }
756 }
757 else
758 {
759 if (unlikely((error= create_partition_name(name_buff, sizeof(name_buff),
760 path, name_buffer_ptr,
761 NORMAL_PART_NAME, FALSE))))
762 goto create_error;
763 if (unlikely((error= set_up_table_before_create(table_arg, name_buff,
764 create_info,
765 part_elem)) ||
766 ((error= (*file)->ha_create(name_buff, table_arg,
767 create_info)))))
768 goto create_error;
769
770 name_buffer_ptr= strend(name_buffer_ptr) + 1;
771 file++;
772 }
773 }
774 DBUG_RETURN(0);
775
776create_error:
777 name_buffer_ptr= m_name_buffer_ptr;
778 for (abort_file= file, file= m_file; file < abort_file; file++)
779 {
780 if (!create_partition_name(name_buff, sizeof(name_buff), path,
781 name_buffer_ptr, NORMAL_PART_NAME, FALSE))
782 (void) (*file)->ha_delete_table((const char*) name_buff);
783 name_buffer_ptr= strend(name_buffer_ptr) + 1;
784 }
785 handler::delete_table(name);
786 DBUG_RETURN(error);
787}
788
789
790/*
791 Drop partitions as part of ALTER TABLE of partitions
792
793 SYNOPSIS
794 drop_partitions()
795 path Complete path of db and table name
796
797 RETURN VALUE
798 >0 Failure
799 0 Success
800
801 DESCRIPTION
802 Use part_info object on handler object to deduce which partitions to
803 drop (each partition has a state attached to it)
804*/
805
806int ha_partition::drop_partitions(const char *path)
807{
808 List_iterator<partition_element> part_it(m_part_info->partitions);
809 char part_name_buff[FN_REFLEN + 1];
810 uint num_parts= m_part_info->partitions.elements;
811 uint num_subparts= m_part_info->num_subparts;
812 uint i= 0;
813 uint name_variant;
814 int ret_error;
815 int error= 0;
816 DBUG_ENTER("ha_partition::drop_partitions");
817
818 /*
819 Assert that it works without HA_FILE_BASED and lower_case_table_name = 2.
820 We use m_file[0] as long as all partitions have the same storage engine.
821 */
822 DBUG_ASSERT(!strcmp(path, get_canonical_filename(m_file[0], path,
823 part_name_buff)));
824 do
825 {
826 partition_element *part_elem= part_it++;
827 if (part_elem->part_state == PART_TO_BE_DROPPED)
828 {
829 handler *file;
830 /*
831 This part is to be dropped, meaning the part or all its subparts.
832 */
833 name_variant= NORMAL_PART_NAME;
834 if (m_is_sub_partitioned)
835 {
836 List_iterator<partition_element> sub_it(part_elem->subpartitions);
837 uint j= 0, part;
838 do
839 {
840 partition_element *sub_elem= sub_it++;
841 part= i * num_subparts + j;
842 if (unlikely((ret_error=
843 create_subpartition_name(part_name_buff,
844 sizeof(part_name_buff), path,
845 part_elem->partition_name,
846 sub_elem->partition_name,
847 name_variant))))
848 error= ret_error;
849 file= m_file[part];
850 DBUG_PRINT("info", ("Drop subpartition %s", part_name_buff));
851 if (unlikely((ret_error= file->ha_delete_table(part_name_buff))))
852 error= ret_error;
853 if (unlikely(deactivate_ddl_log_entry(sub_elem->log_entry->
854 entry_pos)))
855 error= 1;
856 } while (++j < num_subparts);
857 }
858 else
859 {
860 if ((ret_error= create_partition_name(part_name_buff,
861 sizeof(part_name_buff), path,
862 part_elem->partition_name, name_variant, TRUE)))
863 error= ret_error;
864 else
865 {
866 file= m_file[i];
867 DBUG_PRINT("info", ("Drop partition %s", part_name_buff));
868 if (unlikely((ret_error= file->ha_delete_table(part_name_buff))))
869 error= ret_error;
870 if (unlikely(deactivate_ddl_log_entry(part_elem->log_entry->
871 entry_pos)))
872 error= 1;
873 }
874 }
875 if (part_elem->part_state == PART_IS_CHANGED)
876 part_elem->part_state= PART_NORMAL;
877 else
878 part_elem->part_state= PART_IS_DROPPED;
879 }
880 } while (++i < num_parts);
881 (void) sync_ddl_log();
882 DBUG_RETURN(error);
883}
884
885
886/*
887 Rename partitions as part of ALTER TABLE of partitions
888
889 SYNOPSIS
890 rename_partitions()
891 path Complete path of db and table name
892
893 RETURN VALUE
894 TRUE Failure
895 FALSE Success
896
897 DESCRIPTION
898 When reorganising partitions, adding hash partitions and coalescing
899 partitions it can be necessary to rename partitions while holding
900 an exclusive lock on the table.
901 Which partitions to rename is given by state of partitions found by the
902 partition info struct referenced from the handler object
903*/
904
905int ha_partition::rename_partitions(const char *path)
906{
907 List_iterator<partition_element> part_it(m_part_info->partitions);
908 List_iterator<partition_element> temp_it(m_part_info->temp_partitions);
909 char part_name_buff[FN_REFLEN + 1];
910 char norm_name_buff[FN_REFLEN + 1];
911 uint num_parts= m_part_info->partitions.elements;
912 uint part_count= 0;
913 uint num_subparts= m_part_info->num_subparts;
914 uint i= 0;
915 uint j= 0;
916 int error= 0;
917 int ret_error;
918 uint temp_partitions= m_part_info->temp_partitions.elements;
919 handler *file;
920 partition_element *part_elem, *sub_elem;
921 DBUG_ENTER("ha_partition::rename_partitions");
922
923 /*
924 Assert that it works without HA_FILE_BASED and lower_case_table_name = 2.
925 We use m_file[0] as long as all partitions have the same storage engine.
926 */
927 DBUG_ASSERT(!strcmp(path, get_canonical_filename(m_file[0], path,
928 norm_name_buff)));
929
930 DEBUG_SYNC(ha_thd(), "before_rename_partitions");
931 if (temp_partitions)
932 {
933 /*
934 These are the reorganised partitions that have already been copied.
935 We delete the partitions and log the delete by inactivating the
936 delete log entry in the table log. We only need to synchronise
937 these writes before moving to the next loop since there is no
938 interaction among reorganised partitions, they cannot have the
939 same name.
940 */
941 do
942 {
943 part_elem= temp_it++;
944 if (m_is_sub_partitioned)
945 {
946 List_iterator<partition_element> sub_it(part_elem->subpartitions);
947 j= 0;
948 do
949 {
950 sub_elem= sub_it++;
951 file= m_reorged_file[part_count++];
952 if (unlikely((ret_error=
953 create_subpartition_name(norm_name_buff,
954 sizeof(norm_name_buff), path,
955 part_elem->partition_name,
956 sub_elem->partition_name,
957 NORMAL_PART_NAME))))
958 error= ret_error;
959 DBUG_PRINT("info", ("Delete subpartition %s", norm_name_buff));
960 if (unlikely((ret_error= file->ha_delete_table(norm_name_buff))))
961 error= ret_error;
962 else if (unlikely(deactivate_ddl_log_entry(sub_elem->log_entry->
963 entry_pos)))
964 error= 1;
965 else
966 sub_elem->log_entry= NULL; /* Indicate success */
967 } while (++j < num_subparts);
968 }
969 else
970 {
971 file= m_reorged_file[part_count++];
972 if (unlikely((ret_error=
973 create_partition_name(norm_name_buff,
974 sizeof(norm_name_buff), path,
975 part_elem->partition_name,
976 NORMAL_PART_NAME, TRUE))))
977 error= ret_error;
978 else
979 {
980 DBUG_PRINT("info", ("Delete partition %s", norm_name_buff));
981 if (unlikely((ret_error= file->ha_delete_table(norm_name_buff))))
982 error= ret_error;
983 else if (unlikely(deactivate_ddl_log_entry(part_elem->log_entry->
984 entry_pos)))
985 error= 1;
986 else
987 part_elem->log_entry= NULL; /* Indicate success */
988 }
989 }
990 } while (++i < temp_partitions);
991 (void) sync_ddl_log();
992 }
993 i= 0;
994 do
995 {
996 /*
997 When state is PART_IS_CHANGED it means that we have created a new
998 TEMP partition that is to be renamed to normal partition name and
999 we are to delete the old partition with currently the normal name.
1000
1001 We perform this operation by
1002 1) Delete old partition with normal partition name
1003 2) Signal this in table log entry
1004 3) Synch table log to ensure we have consistency in crashes
1005 4) Rename temporary partition name to normal partition name
1006 5) Signal this to table log entry
1007 It is not necessary to synch the last state since a new rename
1008 should not corrupt things if there was no temporary partition.
1009
1010 The only other parts we need to cater for are new parts that
1011 replace reorganised parts. The reorganised parts were deleted
1012 by the code above that goes through the temp_partitions list.
1013 Thus the synch above makes it safe to simply perform step 4 and 5
1014 for those entries.
1015 */
1016 part_elem= part_it++;
1017 if (part_elem->part_state == PART_IS_CHANGED ||
1018 part_elem->part_state == PART_TO_BE_DROPPED ||
1019 (part_elem->part_state == PART_IS_ADDED && temp_partitions))
1020 {
1021 if (m_is_sub_partitioned)
1022 {
1023 List_iterator<partition_element> sub_it(part_elem->subpartitions);
1024 uint part;
1025
1026 j= 0;
1027 do
1028 {
1029 sub_elem= sub_it++;
1030 part= i * num_subparts + j;
1031 if (unlikely((ret_error=
1032 create_subpartition_name(norm_name_buff,
1033 sizeof(norm_name_buff), path,
1034 part_elem->partition_name,
1035 sub_elem->partition_name,
1036 NORMAL_PART_NAME))))
1037 error= ret_error;
1038 if (part_elem->part_state == PART_IS_CHANGED)
1039 {
1040 file= m_reorged_file[part_count++];
1041 DBUG_PRINT("info", ("Delete subpartition %s", norm_name_buff));
1042 if (unlikely((ret_error= file->ha_delete_table(norm_name_buff))))
1043 error= ret_error;
1044 else if (unlikely(deactivate_ddl_log_entry(sub_elem->log_entry->
1045 entry_pos)))
1046 error= 1;
1047 (void) sync_ddl_log();
1048 }
1049 file= m_new_file[part];
1050 if (unlikely((ret_error=
1051 create_subpartition_name(part_name_buff,
1052 sizeof(part_name_buff), path,
1053 part_elem->partition_name,
1054 sub_elem->partition_name,
1055 TEMP_PART_NAME))))
1056 error= ret_error;
1057 DBUG_PRINT("info", ("Rename subpartition from %s to %s",
1058 part_name_buff, norm_name_buff));
1059 if (unlikely((ret_error= file->ha_rename_table(part_name_buff,
1060 norm_name_buff))))
1061 error= ret_error;
1062 else if (unlikely(deactivate_ddl_log_entry(sub_elem->log_entry->
1063 entry_pos)))
1064 error= 1;
1065 else
1066 sub_elem->log_entry= NULL;
1067 } while (++j < num_subparts);
1068 }
1069 else
1070 {
1071 if (unlikely((ret_error=
1072 create_partition_name(norm_name_buff,
1073 sizeof(norm_name_buff), path,
1074 part_elem->partition_name,
1075 NORMAL_PART_NAME, TRUE)) ||
1076 (ret_error= create_partition_name(part_name_buff,
1077 sizeof(part_name_buff),
1078 path,
1079 part_elem->
1080 partition_name,
1081 TEMP_PART_NAME, TRUE))))
1082 error= ret_error;
1083 else
1084 {
1085 if (part_elem->part_state == PART_IS_CHANGED)
1086 {
1087 file= m_reorged_file[part_count++];
1088 DBUG_PRINT("info", ("Delete partition %s", norm_name_buff));
1089 if (unlikely((ret_error= file->ha_delete_table(norm_name_buff))))
1090 error= ret_error;
1091 else if (unlikely(deactivate_ddl_log_entry(part_elem->log_entry->
1092 entry_pos)))
1093 error= 1;
1094 (void) sync_ddl_log();
1095 }
1096 file= m_new_file[i];
1097 DBUG_PRINT("info", ("Rename partition from %s to %s",
1098 part_name_buff, norm_name_buff));
1099 if (unlikely((ret_error= file->ha_rename_table(part_name_buff,
1100 norm_name_buff))))
1101 error= ret_error;
1102 else if (unlikely(deactivate_ddl_log_entry(part_elem->log_entry->
1103 entry_pos)))
1104 error= 1;
1105 else
1106 part_elem->log_entry= NULL;
1107 }
1108 }
1109 }
1110 } while (++i < num_parts);
1111 (void) sync_ddl_log();
1112 DBUG_RETURN(error);
1113}
1114
1115
1116#define OPTIMIZE_PARTS 1
1117#define ANALYZE_PARTS 2
1118#define CHECK_PARTS 3
1119#define REPAIR_PARTS 4
1120#define ASSIGN_KEYCACHE_PARTS 5
1121#define PRELOAD_KEYS_PARTS 6
1122
1123static const char *opt_op_name[]= {NULL,
1124 "optimize", "analyze", "check", "repair",
1125 "assign_to_keycache", "preload_keys"};
1126
1127/*
1128 Optimize table
1129
1130 SYNOPSIS
1131 optimize()
1132 thd Thread object
1133 check_opt Check/analyze/repair/optimize options
1134
1135 RETURN VALUES
1136 >0 Error
1137 0 Success
1138*/
1139
1140int ha_partition::optimize(THD *thd, HA_CHECK_OPT *check_opt)
1141{
1142 DBUG_ENTER("ha_partition::optimize");
1143
1144 DBUG_RETURN(handle_opt_partitions(thd, check_opt, OPTIMIZE_PARTS));
1145}
1146
1147
1148/*
1149 Analyze table
1150
1151 SYNOPSIS
1152 analyze()
1153 thd Thread object
1154 check_opt Check/analyze/repair/optimize options
1155
1156 RETURN VALUES
1157 >0 Error
1158 0 Success
1159*/
1160
1161int ha_partition::analyze(THD *thd, HA_CHECK_OPT *check_opt)
1162{
1163 DBUG_ENTER("ha_partition::analyze");
1164
1165 DBUG_RETURN(handle_opt_partitions(thd, check_opt, ANALYZE_PARTS));
1166}
1167
1168
1169/*
1170 Check table
1171
1172 SYNOPSIS
1173 check()
1174 thd Thread object
1175 check_opt Check/analyze/repair/optimize options
1176
1177 RETURN VALUES
1178 >0 Error
1179 0 Success
1180*/
1181
1182int ha_partition::check(THD *thd, HA_CHECK_OPT *check_opt)
1183{
1184 DBUG_ENTER("ha_partition::check");
1185
1186 DBUG_RETURN(handle_opt_partitions(thd, check_opt, CHECK_PARTS));
1187}
1188
1189
1190/*
1191 Repair table
1192
1193 SYNOPSIS
1194 repair()
1195 thd Thread object
1196 check_opt Check/analyze/repair/optimize options
1197
1198 RETURN VALUES
1199 >0 Error
1200 0 Success
1201*/
1202
1203int ha_partition::repair(THD *thd, HA_CHECK_OPT *check_opt)
1204{
1205 DBUG_ENTER("ha_partition::repair");
1206
1207 int res= handle_opt_partitions(thd, check_opt, REPAIR_PARTS);
1208 DBUG_RETURN(res);
1209}
1210
1211/**
1212 Assign to keycache
1213
1214 @param thd Thread object
1215 @param check_opt Check/analyze/repair/optimize options
1216
1217 @return
1218 @retval >0 Error
1219 @retval 0 Success
1220*/
1221
1222int ha_partition::assign_to_keycache(THD *thd, HA_CHECK_OPT *check_opt)
1223{
1224 DBUG_ENTER("ha_partition::assign_to_keycache");
1225
1226 DBUG_RETURN(handle_opt_partitions(thd, check_opt, ASSIGN_KEYCACHE_PARTS));
1227}
1228
1229
1230/**
1231 Preload to keycache
1232
1233 @param thd Thread object
1234 @param check_opt Check/analyze/repair/optimize options
1235
1236 @return
1237 @retval >0 Error
1238 @retval 0 Success
1239*/
1240
1241int ha_partition::preload_keys(THD *thd, HA_CHECK_OPT *check_opt)
1242{
1243 DBUG_ENTER("ha_partition::preload_keys");
1244
1245 DBUG_RETURN(handle_opt_partitions(thd, check_opt, PRELOAD_KEYS_PARTS));
1246}
1247
1248
1249/*
1250 Handle optimize/analyze/check/repair of one partition
1251
1252 SYNOPSIS
1253 handle_opt_part()
1254 thd Thread object
1255 check_opt Options
1256 file Handler object of partition
1257 flag Optimize/Analyze/Check/Repair flag
1258
1259 RETURN VALUE
1260 >0 Failure
1261 0 Success
1262*/
1263
1264int ha_partition::handle_opt_part(THD *thd, HA_CHECK_OPT *check_opt,
1265 uint part_id, uint flag)
1266{
1267 int error;
1268 handler *file= m_file[part_id];
1269 DBUG_ENTER("handle_opt_part");
1270 DBUG_PRINT("enter", ("flag: %u", flag));
1271
1272 if (flag == OPTIMIZE_PARTS)
1273 error= file->ha_optimize(thd, check_opt);
1274 else if (flag == ANALYZE_PARTS)
1275 error= file->ha_analyze(thd, check_opt);
1276 else if (flag == CHECK_PARTS)
1277 {
1278 error= file->ha_check(thd, check_opt);
1279 if (!error ||
1280 error == HA_ADMIN_ALREADY_DONE ||
1281 error == HA_ADMIN_NOT_IMPLEMENTED)
1282 {
1283 if (check_opt->flags & (T_MEDIUM | T_EXTEND))
1284 error= check_misplaced_rows(part_id, false);
1285 }
1286 }
1287 else if (flag == REPAIR_PARTS)
1288 {
1289 error= file->ha_repair(thd, check_opt);
1290 if (!error ||
1291 error == HA_ADMIN_ALREADY_DONE ||
1292 error == HA_ADMIN_NOT_IMPLEMENTED)
1293 {
1294 if (check_opt->flags & (T_MEDIUM | T_EXTEND))
1295 error= check_misplaced_rows(part_id, true);
1296 }
1297 }
1298 else if (flag == ASSIGN_KEYCACHE_PARTS)
1299 error= file->assign_to_keycache(thd, check_opt);
1300 else if (flag == PRELOAD_KEYS_PARTS)
1301 error= file->preload_keys(thd, check_opt);
1302 else
1303 {
1304 DBUG_ASSERT(FALSE);
1305 error= 1;
1306 }
1307 if (error == HA_ADMIN_ALREADY_DONE)
1308 error= 0;
1309 DBUG_RETURN(error);
1310}
1311
1312
1313/*
1314 print a message row formatted for ANALYZE/CHECK/OPTIMIZE/REPAIR TABLE
1315 (modelled after mi_check_print_msg)
1316 TODO: move this into the handler, or rewrite mysql_admin_table.
1317*/
1318bool print_admin_msg(THD* thd, uint len,
1319 const char* msg_type,
1320 const char* db_name, String &table_name,
1321 const char* op_name, const char *fmt, ...)
1322 ATTRIBUTE_FORMAT(printf, 7, 8);
1323bool print_admin_msg(THD* thd, uint len,
1324 const char* msg_type,
1325 const char* db_name, String &table_name,
1326 const char* op_name, const char *fmt, ...)
1327{
1328 va_list args;
1329 Protocol *protocol= thd->protocol;
1330 size_t length;
1331 size_t msg_length;
1332 char name[NAME_LEN*2+2];
1333 char *msgbuf;
1334 bool error= true;
1335
1336 if (!(msgbuf= (char*) my_malloc(len, MYF(0))))
1337 return true;
1338 va_start(args, fmt);
1339 msg_length= my_vsnprintf(msgbuf, len, fmt, args);
1340 va_end(args);
1341 if (msg_length >= (len - 1))
1342 goto err;
1343 msgbuf[len - 1]= 0; // healthy paranoia
1344
1345
1346 if (!thd->vio_ok())
1347 {
1348 sql_print_error("%s", msgbuf);
1349 goto err;
1350 }
1351
1352 length=(size_t)(strxmov(name, db_name, ".", table_name.c_ptr_safe(), NullS) - name);
1353 /*
1354 TODO: switch from protocol to push_warning here. The main reason we didn't
1355 it yet is parallel repair, which threads have no THD object accessible via
1356 current_thd.
1357
1358 Also we likely need to lock mutex here (in both cases with protocol and
1359 push_warning).
1360 */
1361 DBUG_PRINT("info",("print_admin_msg: %s, %s, %s, %s", name, op_name,
1362 msg_type, msgbuf));
1363 protocol->prepare_for_resend();
1364 protocol->store(name, length, system_charset_info);
1365 protocol->store(op_name, system_charset_info);
1366 protocol->store(msg_type, system_charset_info);
1367 protocol->store(msgbuf, msg_length, system_charset_info);
1368 if (protocol->write())
1369 {
1370 sql_print_error("Failed on my_net_write, writing to stderr instead: %s\n",
1371 msgbuf);
1372 goto err;
1373 }
1374 error= false;
1375err:
1376 my_free(msgbuf);
1377 return error;
1378}
1379
1380
1381/*
1382 Handle optimize/analyze/check/repair of partitions
1383
1384 SYNOPSIS
1385 handle_opt_partitions()
1386 thd Thread object
1387 check_opt Options
1388 flag Optimize/Analyze/Check/Repair flag
1389
1390 RETURN VALUE
1391 >0 Failure
1392 0 Success
1393*/
1394
1395int ha_partition::handle_opt_partitions(THD *thd, HA_CHECK_OPT *check_opt,
1396 uint flag)
1397{
1398 List_iterator<partition_element> part_it(m_part_info->partitions);
1399 uint num_parts= m_part_info->num_parts;
1400 uint num_subparts= m_part_info->num_subparts;
1401 uint i= 0;
1402 int error;
1403 DBUG_ENTER("ha_partition::handle_opt_partitions");
1404 DBUG_PRINT("enter", ("flag= %u", flag));
1405
1406 do
1407 {
1408 partition_element *part_elem= part_it++;
1409 /*
1410 when ALTER TABLE <CMD> PARTITION ...
1411 it should only do named partitions, otherwise all partitions
1412 */
1413 if (!(thd->lex->alter_info.partition_flags & ALTER_PARTITION_ADMIN) ||
1414 part_elem->part_state == PART_ADMIN)
1415 {
1416 if (m_is_sub_partitioned)
1417 {
1418 List_iterator<partition_element> subpart_it(part_elem->subpartitions);
1419 partition_element *sub_elem;
1420 uint j= 0, part;
1421 do
1422 {
1423 sub_elem= subpart_it++;
1424 part= i * num_subparts + j;
1425 DBUG_PRINT("info", ("Optimize subpartition %u (%s)",
1426 part, sub_elem->partition_name));
1427 if (unlikely((error= handle_opt_part(thd, check_opt, part, flag))))
1428 {
1429 /* print a line which partition the error belongs to */
1430 if (error != HA_ADMIN_NOT_IMPLEMENTED &&
1431 error != HA_ADMIN_ALREADY_DONE &&
1432 error != HA_ADMIN_TRY_ALTER)
1433 {
1434 print_admin_msg(thd, MYSQL_ERRMSG_SIZE, "error",
1435 table_share->db.str, table->alias,
1436 opt_op_name[flag],
1437 "Subpartition %s returned error",
1438 sub_elem->partition_name);
1439 }
1440 /* reset part_state for the remaining partitions */
1441 do
1442 {
1443 if (part_elem->part_state == PART_ADMIN)
1444 part_elem->part_state= PART_NORMAL;
1445 } while ((part_elem= part_it++));
1446 DBUG_RETURN(error);
1447 }
1448 } while (++j < num_subparts);
1449 }
1450 else
1451 {
1452 DBUG_PRINT("info", ("Optimize partition %u (%s)", i,
1453 part_elem->partition_name));
1454 if (unlikely((error= handle_opt_part(thd, check_opt, i, flag))))
1455 {
1456 /* print a line which partition the error belongs to */
1457 if (error != HA_ADMIN_NOT_IMPLEMENTED &&
1458 error != HA_ADMIN_ALREADY_DONE &&
1459 error != HA_ADMIN_TRY_ALTER)
1460 {
1461 print_admin_msg(thd, MYSQL_ERRMSG_SIZE, "error",
1462 table_share->db.str, table->alias,
1463 opt_op_name[flag], "Partition %s returned error",
1464 part_elem->partition_name);
1465 }
1466 /* reset part_state for the remaining partitions */
1467 do
1468 {
1469 if (part_elem->part_state == PART_ADMIN)
1470 part_elem->part_state= PART_NORMAL;
1471 } while ((part_elem= part_it++));
1472 DBUG_RETURN(error);
1473 }
1474 }
1475 part_elem->part_state= PART_NORMAL;
1476 }
1477 } while (++i < num_parts);
1478 DBUG_RETURN(FALSE);
1479}
1480
1481
1482/**
1483 @brief Check and repair the table if neccesary
1484
1485 @param thd Thread object
1486
1487 @retval TRUE Error/Not supported
1488 @retval FALSE Success
1489
1490 @note Called if open_table_from_share fails and ::is_crashed().
1491*/
1492
1493bool ha_partition::check_and_repair(THD *thd)
1494{
1495 handler **file= m_file;
1496 DBUG_ENTER("ha_partition::check_and_repair");
1497
1498 do
1499 {
1500 if ((*file)->ha_check_and_repair(thd))
1501 DBUG_RETURN(TRUE);
1502 } while (*(++file));
1503 DBUG_RETURN(FALSE);
1504}
1505
1506
1507/**
1508 @breif Check if the table can be automatically repaired
1509
1510 @retval TRUE Can be auto repaired
1511 @retval FALSE Cannot be auto repaired
1512*/
1513
1514bool ha_partition::auto_repair(int error) const
1515{
1516 DBUG_ENTER("ha_partition::auto_repair");
1517
1518 /*
1519 As long as we only support one storage engine per table,
1520 we can use the first partition for this function.
1521 */
1522 DBUG_RETURN(m_file[0]->auto_repair(error));
1523}
1524
1525
1526/**
1527 @breif Check if the table is crashed
1528
1529 @retval TRUE Crashed
1530 @retval FALSE Not crashed
1531*/
1532
1533bool ha_partition::is_crashed() const
1534{
1535 handler **file= m_file;
1536 DBUG_ENTER("ha_partition::is_crashed");
1537
1538 do
1539 {
1540 if ((*file)->is_crashed())
1541 DBUG_RETURN(TRUE);
1542 } while (*(++file));
1543 DBUG_RETURN(FALSE);
1544}
1545
1546
1547/*
1548 Prepare by creating a new partition
1549
1550 SYNOPSIS
1551 prepare_new_partition()
1552 table Table object
1553 create_info Create info from CREATE TABLE
1554 file Handler object of new partition
1555 part_name partition name
1556
1557 RETURN VALUE
1558 >0 Error
1559 0 Success
1560*/
1561
1562int ha_partition::prepare_new_partition(TABLE *tbl,
1563 HA_CREATE_INFO *create_info,
1564 handler *file, const char *part_name,
1565 partition_element *p_elem,
1566 uint disable_non_uniq_indexes)
1567{
1568 int error;
1569 DBUG_ENTER("prepare_new_partition");
1570
1571 /*
1572 This call to set_up_table_before_create() is done for an alter table.
1573 So this may be the second time around for this partition_element,
1574 depending on how many partitions and subpartitions there were before,
1575 and how many there are now.
1576 The first time, on the CREATE, data_file_name and index_file_name
1577 came from the parser. They did not have the file name attached to
1578 the end. But if this partition is less than the total number of
1579 previous partitions, it's data_file_name has the filename attached.
1580 So we need to take the partition filename off if it exists.
1581 That file name may be different from part_name, which will be
1582 attached in append_file_to_dir().
1583 */
1584 truncate_partition_filename((char*) p_elem->data_file_name);
1585 truncate_partition_filename((char*) p_elem->index_file_name);
1586
1587 if (unlikely((error= set_up_table_before_create(tbl, part_name, create_info,
1588 p_elem))))
1589 goto error_create;
1590
1591 if (!(file->ht->flags & HTON_CAN_READ_CONNECT_STRING_IN_PARTITION))
1592 tbl->s->connect_string= p_elem->connect_string;
1593 if ((error= file->ha_create(part_name, tbl, create_info)))
1594 {
1595 /*
1596 Added for safety, InnoDB reports HA_ERR_FOUND_DUPP_KEY
1597 if the table/partition already exists.
1598 If we return that error code, then print_error would try to
1599 get_dup_key on a non-existing partition.
1600 So return a more reasonable error code.
1601 */
1602 if (error == HA_ERR_FOUND_DUPP_KEY)
1603 error= HA_ERR_TABLE_EXIST;
1604 goto error_create;
1605 }
1606 DBUG_PRINT("info", ("partition %s created", part_name));
1607 if (unlikely((error= file->ha_open(tbl, part_name, m_mode,
1608 m_open_test_lock | HA_OPEN_NO_PSI_CALL))))
1609 goto error_open;
1610 DBUG_PRINT("info", ("partition %s opened", part_name));
1611
1612 /*
1613 Note: if you plan to add another call that may return failure,
1614 better to do it before external_lock() as cleanup_new_partition()
1615 assumes that external_lock() is last call that may fail here.
1616 Otherwise see description for cleanup_new_partition().
1617 */
1618 if (unlikely((error= file->ha_external_lock(ha_thd(), F_WRLCK))))
1619 goto error_external_lock;
1620 DBUG_PRINT("info", ("partition %s external locked", part_name));
1621
1622 if (disable_non_uniq_indexes)
1623 file->ha_disable_indexes(HA_KEY_SWITCH_NONUNIQ_SAVE);
1624
1625 DBUG_RETURN(0);
1626error_external_lock:
1627 (void) file->ha_close();
1628error_open:
1629 (void) file->ha_delete_table(part_name);
1630error_create:
1631 DBUG_RETURN(error);
1632}
1633
1634
1635/*
1636 Cleanup by removing all created partitions after error
1637
1638 SYNOPSIS
1639 cleanup_new_partition()
1640 part_count Number of partitions to remove
1641
1642 RETURN VALUE
1643 NONE
1644
1645 DESCRIPTION
1646 This function is called immediately after prepare_new_partition() in
1647 case the latter fails.
1648
1649 In prepare_new_partition() last call that may return failure is
1650 external_lock(). That means if prepare_new_partition() fails,
1651 partition does not have external lock. Thus no need to call
1652 external_lock(F_UNLCK) here.
1653
1654 TODO:
1655 We must ensure that in the case that we get an error during the process
1656 that we call external_lock with F_UNLCK, close the table and delete the
1657 table in the case where we have been successful with prepare_handler.
1658 We solve this by keeping an array of successful calls to prepare_handler
1659 which can then be used to undo the call.
1660*/
1661
1662void ha_partition::cleanup_new_partition(uint part_count)
1663{
1664 DBUG_ENTER("ha_partition::cleanup_new_partition");
1665
1666 if (m_added_file)
1667 {
1668 THD *thd= ha_thd();
1669 handler **file= m_added_file;
1670 while ((part_count > 0) && (*file))
1671 {
1672 (*file)->ha_external_lock(thd, F_UNLCK);
1673 (*file)->ha_close();
1674
1675 /* Leave the (*file)->ha_delete_table(part_name) to the ddl-log */
1676
1677 file++;
1678 part_count--;
1679 }
1680 m_added_file= NULL;
1681 }
1682 DBUG_VOID_RETURN;
1683}
1684
1685/*
1686 Implement the partition changes defined by ALTER TABLE of partitions
1687
1688 SYNOPSIS
1689 change_partitions()
1690 create_info HA_CREATE_INFO object describing all
1691 fields and indexes in table
1692 path Complete path of db and table name
1693 out: copied Output parameter where number of copied
1694 records are added
1695 out: deleted Output parameter where number of deleted
1696 records are added
1697 pack_frm_data Reference to packed frm file
1698 pack_frm_len Length of packed frm file
1699
1700 RETURN VALUE
1701 >0 Failure
1702 0 Success
1703
1704 DESCRIPTION
1705 Add and copy if needed a number of partitions, during this operation
1706 no other operation is ongoing in the server. This is used by
1707 ADD PARTITION all types as well as by REORGANIZE PARTITION. For
1708 one-phased implementations it is used also by DROP and COALESCE
1709 PARTITIONs.
1710 One-phased implementation needs the new frm file, other handlers will
1711 get zero length and a NULL reference here.
1712*/
1713
1714int ha_partition::change_partitions(HA_CREATE_INFO *create_info,
1715 const char *path,
1716 ulonglong * const copied,
1717 ulonglong * const deleted,
1718 const uchar *pack_frm_data
1719 __attribute__((unused)),
1720 size_t pack_frm_len
1721 __attribute__((unused)))
1722{
1723 List_iterator<partition_element> part_it(m_part_info->partitions);
1724 List_iterator <partition_element> t_it(m_part_info->temp_partitions);
1725 char part_name_buff[FN_REFLEN + 1];
1726 uint num_parts= m_part_info->partitions.elements;
1727 uint num_subparts= m_part_info->num_subparts;
1728 uint i= 0;
1729 uint num_remain_partitions, part_count, orig_count;
1730 handler **new_file_array;
1731 int error= 1;
1732 bool first;
1733 uint temp_partitions= m_part_info->temp_partitions.elements;
1734 THD *thd= ha_thd();
1735 DBUG_ENTER("ha_partition::change_partitions");
1736
1737 /*
1738 Assert that it works without HA_FILE_BASED and lower_case_table_name = 2.
1739 We use m_file[0] as long as all partitions have the same storage engine.
1740 */
1741 DBUG_ASSERT(!strcmp(path, get_canonical_filename(m_file[0], path,
1742 part_name_buff)));
1743 m_reorged_parts= 0;
1744 if (!m_part_info->is_sub_partitioned())
1745 num_subparts= 1;
1746
1747 /*
1748 Step 1:
1749 Calculate number of reorganised partitions and allocate space for
1750 their handler references.
1751 */
1752 if (temp_partitions)
1753 {
1754 m_reorged_parts= temp_partitions * num_subparts;
1755 }
1756 else
1757 {
1758 do
1759 {
1760 partition_element *part_elem= part_it++;
1761 if (part_elem->part_state == PART_CHANGED ||
1762 part_elem->part_state == PART_REORGED_DROPPED)
1763 {
1764 m_reorged_parts+= num_subparts;
1765 }
1766 } while (++i < num_parts);
1767 }
1768 if (m_reorged_parts &&
1769 !(m_reorged_file= (handler**) thd->calloc(sizeof(handler*)*
1770 (m_reorged_parts + 1))))
1771 {
1772 DBUG_RETURN(HA_ERR_OUT_OF_MEM);
1773 }
1774
1775 /*
1776 Step 2:
1777 Calculate number of partitions after change and allocate space for
1778 their handler references.
1779 */
1780 num_remain_partitions= 0;
1781 if (temp_partitions)
1782 {
1783 num_remain_partitions= num_parts * num_subparts;
1784 }
1785 else
1786 {
1787 part_it.rewind();
1788 i= 0;
1789 do
1790 {
1791 partition_element *part_elem= part_it++;
1792 if (part_elem->part_state == PART_NORMAL ||
1793 part_elem->part_state == PART_TO_BE_ADDED ||
1794 part_elem->part_state == PART_CHANGED)
1795 {
1796 num_remain_partitions+= num_subparts;
1797 }
1798 } while (++i < num_parts);
1799 }
1800 if (!(new_file_array= ((handler**)
1801 thd->calloc(sizeof(handler*)*
1802 (2*(num_remain_partitions + 1))))))
1803 {
1804 DBUG_RETURN(HA_ERR_OUT_OF_MEM);
1805 }
1806 m_added_file= &new_file_array[num_remain_partitions + 1];
1807
1808 /*
1809 Step 3:
1810 Fill m_reorged_file with handler references and NULL at the end
1811 */
1812 if (m_reorged_parts)
1813 {
1814 i= 0;
1815 part_count= 0;
1816 first= TRUE;
1817 part_it.rewind();
1818 do
1819 {
1820 partition_element *part_elem= part_it++;
1821 if (part_elem->part_state == PART_CHANGED ||
1822 part_elem->part_state == PART_REORGED_DROPPED)
1823 {
1824 memcpy((void*)&m_reorged_file[part_count],
1825 (void*)&m_file[i*num_subparts],
1826 sizeof(handler*)*num_subparts);
1827 part_count+= num_subparts;
1828 }
1829 else if (first && temp_partitions &&
1830 part_elem->part_state == PART_TO_BE_ADDED)
1831 {
1832 /*
1833 When doing an ALTER TABLE REORGANIZE PARTITION a number of
1834 partitions is to be reorganised into a set of new partitions.
1835 The reorganised partitions are in this case in the temp_partitions
1836 list. We copy all of them in one batch and thus we only do this
1837 until we find the first partition with state PART_TO_BE_ADDED
1838 since this is where the new partitions go in and where the old
1839 ones used to be.
1840 */
1841 first= FALSE;
1842 DBUG_ASSERT(((i*num_subparts) + m_reorged_parts) <= m_file_tot_parts);
1843 memcpy((void*)m_reorged_file, &m_file[i*num_subparts],
1844 sizeof(handler*)*m_reorged_parts);
1845 }
1846 } while (++i < num_parts);
1847 }
1848
1849 /*
1850 Step 4:
1851 Fill new_array_file with handler references. Create the handlers if
1852 needed.
1853 */
1854 i= 0;
1855 part_count= 0;
1856 orig_count= 0;
1857 first= TRUE;
1858 part_it.rewind();
1859 do
1860 {
1861 partition_element *part_elem= part_it++;
1862 if (part_elem->part_state == PART_NORMAL)
1863 {
1864 DBUG_ASSERT(orig_count + num_subparts <= m_file_tot_parts);
1865 memcpy((void*)&new_file_array[part_count], (void*)&m_file[orig_count],
1866 sizeof(handler*)*num_subparts);
1867 part_count+= num_subparts;
1868 orig_count+= num_subparts;
1869 }
1870 else if (part_elem->part_state == PART_CHANGED ||
1871 part_elem->part_state == PART_TO_BE_ADDED)
1872 {
1873 uint j= 0;
1874 Parts_share_refs *p_share_refs;
1875 /*
1876 The Handler_shares for each partition's handler can be allocated
1877 within this handler, since there will not be any more instances of the
1878 new partitions, until the table is reopened after the ALTER succeeded.
1879 */
1880 p_share_refs= new Parts_share_refs;
1881 if (!p_share_refs)
1882 DBUG_RETURN(HA_ERR_OUT_OF_MEM);
1883 if (p_share_refs->init(num_subparts))
1884 DBUG_RETURN(HA_ERR_OUT_OF_MEM);
1885 if (m_new_partitions_share_refs.push_back(p_share_refs, thd->mem_root))
1886 DBUG_RETURN(HA_ERR_OUT_OF_MEM);
1887 do
1888 {
1889 handler **new_file= &new_file_array[part_count++];
1890 if (!(*new_file=
1891 get_new_handler(table->s,
1892 thd->mem_root,
1893 part_elem->engine_type)))
1894 {
1895 DBUG_RETURN(HA_ERR_OUT_OF_MEM);
1896 }
1897 if ((*new_file)->set_ha_share_ref(&p_share_refs->ha_shares[j]))
1898 {
1899 DBUG_RETURN(HA_ERR_OUT_OF_MEM);
1900 }
1901 } while (++j < num_subparts);
1902 if (part_elem->part_state == PART_CHANGED)
1903 orig_count+= num_subparts;
1904 else if (temp_partitions && first)
1905 {
1906 orig_count+= (num_subparts * temp_partitions);
1907 first= FALSE;
1908 }
1909 }
1910 } while (++i < num_parts);
1911 first= FALSE;
1912 /*
1913 Step 5:
1914 Create the new partitions and also open, lock and call external_lock
1915 on them to prepare them for copy phase and also for later close
1916 calls
1917 */
1918
1919 /*
1920 Before creating new partitions check whether indexes are disabled
1921 in the partitions.
1922 */
1923
1924 uint disable_non_uniq_indexes= indexes_are_disabled();
1925
1926 i= 0;
1927 part_count= 0;
1928 part_it.rewind();
1929 do
1930 {
1931 partition_element *part_elem= part_it++;
1932 if (part_elem->part_state == PART_TO_BE_ADDED ||
1933 part_elem->part_state == PART_CHANGED)
1934 {
1935 /*
1936 A new partition needs to be created PART_TO_BE_ADDED means an
1937 entirely new partition and PART_CHANGED means a changed partition
1938 that will still exist with either more or less data in it.
1939 */
1940 uint name_variant= NORMAL_PART_NAME;
1941 if (part_elem->part_state == PART_CHANGED ||
1942 (part_elem->part_state == PART_TO_BE_ADDED && temp_partitions))
1943 name_variant= TEMP_PART_NAME;
1944 if (m_part_info->is_sub_partitioned())
1945 {
1946 List_iterator<partition_element> sub_it(part_elem->subpartitions);
1947 uint j= 0, part;
1948 do
1949 {
1950 partition_element *sub_elem= sub_it++;
1951 if (unlikely((error=
1952 create_subpartition_name(part_name_buff,
1953 sizeof(part_name_buff), path,
1954 part_elem->partition_name,
1955 sub_elem->partition_name,
1956 name_variant))))
1957 {
1958 cleanup_new_partition(part_count);
1959 DBUG_RETURN(error);
1960 }
1961 part= i * num_subparts + j;
1962 DBUG_PRINT("info", ("Add subpartition %s", part_name_buff));
1963 if (unlikely((error=
1964 prepare_new_partition(table, create_info,
1965 new_file_array[part],
1966 (const char *)part_name_buff,
1967 sub_elem,
1968 disable_non_uniq_indexes))))
1969 {
1970 cleanup_new_partition(part_count);
1971 DBUG_RETURN(error);
1972 }
1973
1974 m_added_file[part_count++]= new_file_array[part];
1975 } while (++j < num_subparts);
1976 }
1977 else
1978 {
1979 if (unlikely((error=
1980 create_partition_name(part_name_buff,
1981 sizeof(part_name_buff), path,
1982 part_elem->partition_name,
1983 name_variant, TRUE))))
1984 {
1985 cleanup_new_partition(part_count);
1986 DBUG_RETURN(error);
1987 }
1988
1989 DBUG_PRINT("info", ("Add partition %s", part_name_buff));
1990 if (unlikely((error=
1991 prepare_new_partition(table, create_info,
1992 new_file_array[i],
1993 (const char *)part_name_buff,
1994 part_elem,
1995 disable_non_uniq_indexes))))
1996 {
1997 cleanup_new_partition(part_count);
1998 DBUG_RETURN(error);
1999 }
2000
2001 m_added_file[part_count++]= new_file_array[i];
2002 }
2003 }
2004 } while (++i < num_parts);
2005
2006 /*
2007 Step 6:
2008 State update to prepare for next write of the frm file.
2009 */
2010 i= 0;
2011 part_it.rewind();
2012 do
2013 {
2014 partition_element *part_elem= part_it++;
2015 if (part_elem->part_state == PART_TO_BE_ADDED)
2016 part_elem->part_state= PART_IS_ADDED;
2017 else if (part_elem->part_state == PART_CHANGED)
2018 part_elem->part_state= PART_IS_CHANGED;
2019 else if (part_elem->part_state == PART_REORGED_DROPPED)
2020 part_elem->part_state= PART_TO_BE_DROPPED;
2021 } while (++i < num_parts);
2022 for (i= 0; i < temp_partitions; i++)
2023 {
2024 partition_element *part_elem= t_it++;
2025 DBUG_ASSERT(part_elem->part_state == PART_TO_BE_REORGED);
2026 part_elem->part_state= PART_TO_BE_DROPPED;
2027 }
2028 m_new_file= new_file_array;
2029 if (unlikely((error= copy_partitions(copied, deleted))))
2030 {
2031 /*
2032 Close and unlock the new temporary partitions.
2033 They will later be deleted through the ddl-log.
2034 */
2035 cleanup_new_partition(part_count);
2036 }
2037 DBUG_RETURN(error);
2038}
2039
2040
2041/*
2042 Copy partitions as part of ALTER TABLE of partitions
2043
2044 SYNOPSIS
2045 copy_partitions()
2046 out:copied Number of records copied
2047 out:deleted Number of records deleted
2048
2049 RETURN VALUE
2050 >0 Error code
2051 0 Success
2052
2053 DESCRIPTION
2054 change_partitions has done all the preparations, now it is time to
2055 actually copy the data from the reorganised partitions to the new
2056 partitions.
2057*/
2058
2059int ha_partition::copy_partitions(ulonglong * const copied,
2060 ulonglong * const deleted)
2061{
2062 uint reorg_part= 0;
2063 int result= 0;
2064 longlong func_value;
2065 DBUG_ENTER("ha_partition::copy_partitions");
2066
2067 if (m_part_info->linear_hash_ind)
2068 {
2069 if (m_part_info->part_type == HASH_PARTITION)
2070 set_linear_hash_mask(m_part_info, m_part_info->num_parts);
2071 else
2072 set_linear_hash_mask(m_part_info, m_part_info->num_subparts);
2073 }
2074 else if (m_part_info->part_type == VERSIONING_PARTITION)
2075 {
2076 if (m_part_info->check_constants(ha_thd(), m_part_info))
2077 goto init_error;
2078 }
2079
2080 while (reorg_part < m_reorged_parts)
2081 {
2082 handler *file= m_reorged_file[reorg_part];
2083 uint32 new_part;
2084
2085 late_extra_cache(reorg_part);
2086 if (unlikely((result= file->ha_rnd_init_with_error(1))))
2087 goto init_error;
2088 while (TRUE)
2089 {
2090 if ((result= file->ha_rnd_next(m_rec0)))
2091 {
2092 if (result != HA_ERR_END_OF_FILE)
2093 goto error;
2094 /*
2095 End-of-file reached, break out to continue with next partition or
2096 end the copy process.
2097 */
2098 break;
2099 }
2100 /* Found record to insert into new handler */
2101 if (m_part_info->get_partition_id(m_part_info, &new_part,
2102 &func_value))
2103 {
2104 /*
2105 This record is in the original table but will not be in the new
2106 table since it doesn't fit into any partition any longer due to
2107 changed partitioning ranges or list values.
2108 */
2109 (*deleted)++;
2110 }
2111 else
2112 {
2113 THD *thd= ha_thd();
2114 /* Copy record to new handler */
2115 (*copied)++;
2116 tmp_disable_binlog(thd); /* Do not replicate the low-level changes. */
2117 result= m_new_file[new_part]->ha_write_row(m_rec0);
2118 reenable_binlog(thd);
2119 if (result)
2120 goto error;
2121 }
2122 }
2123 late_extra_no_cache(reorg_part);
2124 file->ha_rnd_end();
2125 reorg_part++;
2126 }
2127 DBUG_RETURN(FALSE);
2128error:
2129 m_reorged_file[reorg_part]->ha_rnd_end();
2130init_error:
2131 DBUG_RETURN(result);
2132}
2133
2134/*
2135 Update create info as part of ALTER TABLE
2136
2137 SYNOPSIS
2138 update_create_info()
2139 create_info Create info from ALTER TABLE
2140
2141 RETURN VALUE
2142 NONE
2143
2144 DESCRIPTION
2145 Forward this handler call to the storage engine foreach
2146 partition handler. The data_file_name for each partition may
2147 need to be reset if the tablespace was moved. Use a dummy
2148 HA_CREATE_INFO structure and transfer necessary data.
2149*/
2150
2151void ha_partition::update_create_info(HA_CREATE_INFO *create_info)
2152{
2153 DBUG_ENTER("ha_partition::update_create_info");
2154
2155 /*
2156 Fix for bug#38751, some engines needs info-calls in ALTER.
2157 Archive need this since it flushes in ::info.
2158 HA_STATUS_AUTO is optimized so it will not always be forwarded
2159 to all partitions, but HA_STATUS_VARIABLE will.
2160 */
2161 info(HA_STATUS_VARIABLE | HA_STATUS_OPEN);
2162
2163 info(HA_STATUS_AUTO);
2164
2165 if (!(create_info->used_fields & HA_CREATE_USED_AUTO))
2166 create_info->auto_increment_value= stats.auto_increment_value;
2167
2168 /*
2169 DATA DIRECTORY and INDEX DIRECTORY are never applied to the whole
2170 partitioned table, only its parts.
2171 */
2172 my_bool from_alter= (create_info->data_file_name == (const char*) -1);
2173 create_info->data_file_name= create_info->index_file_name= NULL;
2174
2175 if (!(m_file[0]->ht->flags & HTON_CAN_READ_CONNECT_STRING_IN_PARTITION))
2176 create_info->connect_string= null_clex_str;
2177
2178 /*
2179 We do not need to update the individual partition DATA DIRECTORY settings
2180 since they can be changed by ALTER TABLE ... REORGANIZE PARTITIONS.
2181 */
2182 if (from_alter)
2183 DBUG_VOID_RETURN;
2184
2185 /*
2186 send Handler::update_create_info() to the storage engine for each
2187 partition that currently has a handler object. Using a dummy
2188 HA_CREATE_INFO structure to collect DATA and INDEX DIRECTORYs.
2189 */
2190
2191 List_iterator<partition_element> part_it(m_part_info->partitions);
2192 partition_element *part_elem, *sub_elem;
2193 uint num_subparts= m_part_info->num_subparts;
2194 uint num_parts= (num_subparts ? m_file_tot_parts / num_subparts :
2195 m_file_tot_parts);
2196 HA_CREATE_INFO dummy_info;
2197 memset(&dummy_info, 0, sizeof(dummy_info));
2198
2199 /*
2200 Since update_create_info() can be called from mysql_prepare_alter_table()
2201 when not all handlers are set up, we look for that condition first.
2202 If all handlers are not available, do not call update_create_info for any.
2203 */
2204 uint i, j, part;
2205 for (i= 0; i < num_parts; i++)
2206 {
2207 part_elem= part_it++;
2208 if (!part_elem)
2209 DBUG_VOID_RETURN;
2210 if (m_is_sub_partitioned)
2211 {
2212 List_iterator<partition_element> subpart_it(part_elem->subpartitions);
2213 for (j= 0; j < num_subparts; j++)
2214 {
2215 sub_elem= subpart_it++;
2216 if (!sub_elem)
2217 DBUG_VOID_RETURN;
2218 part= i * num_subparts + j;
2219 if (part >= m_file_tot_parts || !m_file[part])
2220 DBUG_VOID_RETURN;
2221 }
2222 }
2223 else
2224 {
2225 if (!m_file[i])
2226 DBUG_VOID_RETURN;
2227 }
2228 }
2229 part_it.rewind();
2230
2231 for (i= 0; i < num_parts; i++)
2232 {
2233 part_elem= part_it++;
2234 DBUG_ASSERT(part_elem);
2235 if (m_is_sub_partitioned)
2236 {
2237 List_iterator<partition_element> subpart_it(part_elem->subpartitions);
2238 for (j= 0; j < num_subparts; j++)
2239 {
2240 sub_elem= subpart_it++;
2241 DBUG_ASSERT(sub_elem);
2242 part= i * num_subparts + j;
2243 DBUG_ASSERT(part < m_file_tot_parts && m_file[part]);
2244 dummy_info.data_file_name= dummy_info.index_file_name = NULL;
2245 m_file[part]->update_create_info(&dummy_info);
2246 sub_elem->data_file_name = (char*) dummy_info.data_file_name;
2247 sub_elem->index_file_name = (char*) dummy_info.index_file_name;
2248 }
2249 }
2250 else
2251 {
2252 DBUG_ASSERT(m_file[i]);
2253 dummy_info.data_file_name= dummy_info.index_file_name= NULL;
2254 m_file[i]->update_create_info(&dummy_info);
2255 part_elem->data_file_name = (char*) dummy_info.data_file_name;
2256 part_elem->index_file_name = (char*) dummy_info.index_file_name;
2257 }
2258 }
2259 DBUG_VOID_RETURN;
2260}
2261
2262
2263/**
2264 Change the internal TABLE_SHARE pointer
2265
2266 @param table_arg TABLE object
2267 @param share New share to use
2268
2269 @note Is used in error handling in ha_delete_table.
2270 All handlers should exist (lock_partitions should not be used)
2271*/
2272
2273void ha_partition::change_table_ptr(TABLE *table_arg, TABLE_SHARE *share)
2274{
2275 handler **file_array;
2276 table= table_arg;
2277 table_share= share;
2278 /*
2279 m_file can be NULL when using an old cached table in DROP TABLE, when the
2280 table just has REMOVED PARTITIONING, see Bug#42438
2281 */
2282 if (m_file)
2283 {
2284 file_array= m_file;
2285 DBUG_ASSERT(*file_array);
2286 do
2287 {
2288 (*file_array)->change_table_ptr(table_arg, share);
2289 } while (*(++file_array));
2290 }
2291
2292 if (m_added_file && m_added_file[0])
2293 {
2294 /* if in middle of a drop/rename etc */
2295 file_array= m_added_file;
2296 do
2297 {
2298 (*file_array)->change_table_ptr(table_arg, share);
2299 } while (*(++file_array));
2300 }
2301}
2302
2303/*
2304 Change comments specific to handler
2305
2306 SYNOPSIS
2307 update_table_comment()
2308 comment Original comment
2309
2310 RETURN VALUE
2311 new comment
2312
2313 DESCRIPTION
2314 No comment changes so far
2315*/
2316
2317char *ha_partition::update_table_comment(const char *comment)
2318{
2319 return (char*) comment; /* Nothing to change */
2320}
2321
2322
2323/**
2324 Handle delete and rename table
2325
2326 @param from Full path of old table
2327 @param to Full path of new table
2328
2329 @return Operation status
2330 @retval >0 Error
2331 @retval 0 Success
2332
2333 @note Common routine to handle delete_table and rename_table.
2334 The routine uses the partition handler file to get the
2335 names of the partition instances. Both these routines
2336 are called after creating the handler without table
2337 object and thus the file is needed to discover the
2338 names of the partitions and the underlying storage engines.
2339*/
2340
2341uint ha_partition::del_ren_table(const char *from, const char *to)
2342{
2343 int save_error= 0;
2344 int error;
2345 char from_buff[FN_REFLEN + 1], to_buff[FN_REFLEN + 1],
2346 from_lc_buff[FN_REFLEN], to_lc_buff[FN_REFLEN];
2347 char *name_buffer_ptr;
2348 const char *from_path;
2349 const char *to_path= NULL;
2350 uint i;
2351 handler **file, **abort_file;
2352 DBUG_ENTER("ha_partition::del_ren_table");
2353
2354 if (get_from_handler_file(from, ha_thd()->mem_root, false))
2355 DBUG_RETURN(TRUE);
2356 DBUG_ASSERT(m_file_buffer);
2357 DBUG_PRINT("enter", ("from: (%s) to: (%s)", from, to ? to : "(nil)"));
2358 name_buffer_ptr= m_name_buffer_ptr;
2359 file= m_file;
2360 if (to == NULL)
2361 {
2362 /*
2363 Delete table, start by delete the .par file. If error, break, otherwise
2364 delete as much as possible.
2365 */
2366 if (unlikely((error= handler::delete_table(from))))
2367 DBUG_RETURN(error);
2368 }
2369 /*
2370 Since ha_partition has HA_FILE_BASED, it must alter underlying table names
2371 if they do not have HA_FILE_BASED and lower_case_table_names == 2.
2372 See Bug#37402, for Mac OS X.
2373 The appended #P#<partname>[#SP#<subpartname>] will remain in current case.
2374 Using the first partitions handler, since mixing handlers is not allowed.
2375 */
2376 from_path= get_canonical_filename(*file, from, from_lc_buff);
2377 if (to != NULL)
2378 to_path= get_canonical_filename(*file, to, to_lc_buff);
2379 i= 0;
2380 do
2381 {
2382 if (unlikely((error= create_partition_name(from_buff, sizeof(from_buff),
2383 from_path, name_buffer_ptr,
2384 NORMAL_PART_NAME, FALSE))))
2385 goto rename_error;
2386
2387 if (to != NULL)
2388 { // Rename branch
2389 if (unlikely((error= create_partition_name(to_buff, sizeof(to_buff),
2390 to_path, name_buffer_ptr,
2391 NORMAL_PART_NAME, FALSE))))
2392 goto rename_error;
2393 error= (*file)->ha_rename_table(from_buff, to_buff);
2394 if (unlikely(error))
2395 goto rename_error;
2396 }
2397 else // delete branch
2398 {
2399 error= (*file)->ha_delete_table(from_buff);
2400 }
2401 name_buffer_ptr= strend(name_buffer_ptr) + 1;
2402 if (unlikely(error))
2403 save_error= error;
2404 i++;
2405 } while (*(++file));
2406 if (to != NULL)
2407 {
2408 if (unlikely((error= handler::rename_table(from, to))))
2409 {
2410 /* Try to revert everything, ignore errors */
2411 (void) handler::rename_table(to, from);
2412 goto rename_error;
2413 }
2414 }
2415 DBUG_RETURN(save_error);
2416rename_error:
2417 name_buffer_ptr= m_name_buffer_ptr;
2418 for (abort_file= file, file= m_file; file < abort_file; file++)
2419 {
2420 /* Revert the rename, back from 'to' to the original 'from' */
2421 if (!create_partition_name(from_buff, sizeof(from_buff), from_path,
2422 name_buffer_ptr, NORMAL_PART_NAME, FALSE) &&
2423 !create_partition_name(to_buff, sizeof(to_buff), to_path,
2424 name_buffer_ptr, NORMAL_PART_NAME, FALSE))
2425 {
2426 /* Ignore error here */
2427 (void) (*file)->ha_rename_table(to_buff, from_buff);
2428 }
2429 name_buffer_ptr= strend(name_buffer_ptr) + 1;
2430 }
2431 DBUG_RETURN(error);
2432}
2433
2434uint ha_partition::count_query_cache_dependant_tables(uint8 *tables_type)
2435{
2436 DBUG_ENTER("ha_partition::count_query_cache_dependant_tables");
2437 /* Here we rely on the fact that all tables are of the same type */
2438 uint8 type= m_file[0]->table_cache_type();
2439 (*tables_type)|= type;
2440 DBUG_PRINT("enter", ("cnt: %u", (uint) m_tot_parts));
2441 /*
2442 We need save underlying tables only for HA_CACHE_TBL_ASKTRANSACT:
2443 HA_CACHE_TBL_NONTRANSACT - because all changes goes through partition table
2444 HA_CACHE_TBL_NOCACHE - because will not be cached
2445 HA_CACHE_TBL_TRANSACT - QC need to know that such type present
2446 */
2447 DBUG_RETURN(type == HA_CACHE_TBL_ASKTRANSACT ? m_tot_parts : 0);
2448}
2449
2450my_bool ha_partition::
2451reg_query_cache_dependant_table(THD *thd,
2452 char *engine_key, uint engine_key_len,
2453 char *cache_key, uint cache_key_len,
2454 uint8 type,
2455 Query_cache *cache,
2456 Query_cache_block_table **block_table,
2457 handler *file,
2458 uint *n)
2459{
2460 DBUG_ENTER("ha_partition::reg_query_cache_dependant_table");
2461 qc_engine_callback engine_callback;
2462 ulonglong engine_data;
2463 /* ask undelying engine */
2464 if (!file->register_query_cache_table(thd, engine_key,
2465 engine_key_len,
2466 &engine_callback,
2467 &engine_data))
2468 {
2469 DBUG_PRINT("qcache", ("Handler does not allow caching for %.*s",
2470 engine_key_len, engine_key));
2471 /*
2472 As this can change from call to call, don't reset set
2473 thd->lex->safe_to_cache_query
2474 */
2475 thd->query_cache_is_applicable= 0; // Query can't be cached
2476 DBUG_RETURN(TRUE);
2477 }
2478 (++(*block_table))->n= ++(*n);
2479 if (!cache->insert_table(thd, cache_key_len,
2480 cache_key, (*block_table),
2481 (uint32) table_share->db.length,
2482 (uint8) (cache_key_len -
2483 table_share->table_cache_key.length),
2484 type,
2485 engine_callback, engine_data,
2486 FALSE))
2487 DBUG_RETURN(TRUE);
2488 DBUG_RETURN(FALSE);
2489}
2490
2491
2492my_bool ha_partition::
2493register_query_cache_dependant_tables(THD *thd,
2494 Query_cache *cache,
2495 Query_cache_block_table **block_table,
2496 uint *n)
2497{
2498 char *engine_key_end, *query_cache_key_end;
2499 uint i;
2500 uint num_parts= m_part_info->num_parts;
2501 uint num_subparts= m_part_info->num_subparts;
2502 int diff_length;
2503 List_iterator<partition_element> part_it(m_part_info->partitions);
2504 char engine_key[FN_REFLEN], query_cache_key[FN_REFLEN];
2505 DBUG_ENTER("ha_partition::register_query_cache_dependant_tables");
2506
2507 /* see ha_partition::count_query_cache_dependant_tables */
2508 if (m_file[0]->table_cache_type() != HA_CACHE_TBL_ASKTRANSACT)
2509 DBUG_RETURN(FALSE); // nothing to register
2510
2511 /* prepare static part of the key */
2512 memcpy(engine_key, table_share->normalized_path.str,
2513 table_share->normalized_path.length);
2514 memcpy(query_cache_key, table_share->table_cache_key.str,
2515 table_share->table_cache_key.length);
2516
2517 diff_length= ((int) table_share->table_cache_key.length -
2518 (int) table_share->normalized_path.length -1);
2519
2520 engine_key_end= engine_key + table_share->normalized_path.length;
2521 query_cache_key_end= query_cache_key + table_share->table_cache_key.length -1;
2522
2523 engine_key_end[0]= engine_key_end[2]= query_cache_key_end[0]=
2524 query_cache_key_end[2]= '#';
2525 query_cache_key_end[1]= engine_key_end[1]= 'P';
2526 engine_key_end+= 3;
2527 query_cache_key_end+= 3;
2528
2529 i= 0;
2530 do
2531 {
2532 partition_element *part_elem= part_it++;
2533 char *engine_pos= strmov(engine_key_end, part_elem->partition_name);
2534 if (m_is_sub_partitioned)
2535 {
2536 List_iterator<partition_element> subpart_it(part_elem->subpartitions);
2537 partition_element *sub_elem;
2538 uint j= 0, part;
2539 engine_pos[0]= engine_pos[3]= '#';
2540 engine_pos[1]= 'S';
2541 engine_pos[2]= 'P';
2542 engine_pos += 4;
2543 do
2544 {
2545 char *end;
2546 uint length;
2547 sub_elem= subpart_it++;
2548 part= i * num_subparts + j;
2549 /* we store the end \0 as part of the key */
2550 end= strmov(engine_pos, sub_elem->partition_name);
2551 length= (uint)(end - engine_key);
2552 /* Copy the suffix also to query cache key */
2553 memcpy(query_cache_key_end, engine_key_end, (end - engine_key_end));
2554 if (reg_query_cache_dependant_table(thd, engine_key, length,
2555 query_cache_key,
2556 length + diff_length,
2557 m_file[part]->table_cache_type(),
2558 cache,
2559 block_table, m_file[part],
2560 n))
2561 DBUG_RETURN(TRUE);
2562 } while (++j < num_subparts);
2563 }
2564 else
2565 {
2566 char *end= engine_pos+1; // copy end \0
2567 uint length= (uint)(end - engine_key);
2568 /* Copy the suffix also to query cache key */
2569 memcpy(query_cache_key_end, engine_key_end, (end - engine_key_end));
2570 if (reg_query_cache_dependant_table(thd, engine_key, length,
2571 query_cache_key,
2572 length + diff_length,
2573 m_file[i]->table_cache_type(),
2574 cache,
2575 block_table, m_file[i],
2576 n))
2577 DBUG_RETURN(TRUE);
2578 }
2579 } while (++i < num_parts);
2580 DBUG_PRINT("info", ("cnt: %u", (uint)m_tot_parts));
2581 DBUG_RETURN(FALSE);
2582}
2583
2584
2585/**
2586 Set up table share object before calling create on underlying handler
2587
2588 @param table Table object
2589 @param info Create info
2590 @param part_elem[in,out] Pointer to used partition_element, searched if NULL
2591
2592 @return status
2593 @retval TRUE Error
2594 @retval FALSE Success
2595
2596 @details
2597 Set up
2598 1) Comment on partition
2599 2) MAX_ROWS, MIN_ROWS on partition
2600 3) Index file name on partition
2601 4) Data file name on partition
2602*/
2603
2604int ha_partition::set_up_table_before_create(TABLE *tbl,
2605 const char *partition_name_with_path,
2606 HA_CREATE_INFO *info,
2607 partition_element *part_elem)
2608{
2609 int error= 0;
2610 LEX_CSTRING part_name;
2611 THD *thd= ha_thd();
2612 DBUG_ENTER("set_up_table_before_create");
2613
2614 DBUG_ASSERT(part_elem);
2615
2616 if (!part_elem)
2617 DBUG_RETURN(1);
2618 tbl->s->max_rows= part_elem->part_max_rows;
2619 tbl->s->min_rows= part_elem->part_min_rows;
2620 part_name.str= strrchr(partition_name_with_path, FN_LIBCHAR)+1;
2621 part_name.length= strlen(part_name.str);
2622 if ((part_elem->index_file_name &&
2623 (error= append_file_to_dir(thd,
2624 (const char**)&part_elem->index_file_name,
2625 &part_name))) ||
2626 (part_elem->data_file_name &&
2627 (error= append_file_to_dir(thd,
2628 (const char**)&part_elem->data_file_name,
2629 &part_name))))
2630 {
2631 DBUG_RETURN(error);
2632 }
2633 info->index_file_name= part_elem->index_file_name;
2634 info->data_file_name= part_elem->data_file_name;
2635 info->connect_string= part_elem->connect_string;
2636 if (info->connect_string.length)
2637 info->used_fields|= HA_CREATE_USED_CONNECTION;
2638 tbl->s->connect_string= part_elem->connect_string;
2639 DBUG_RETURN(0);
2640}
2641
2642
2643/*
2644 Add two names together
2645
2646 SYNOPSIS
2647 name_add()
2648 out:dest Destination string
2649 first_name First name
2650 sec_name Second name
2651
2652 RETURN VALUE
2653 >0 Error
2654 0 Success
2655
2656 DESCRIPTION
2657 Routine used to add two names with '_' in between then. Service routine
2658 to create_handler_file
2659 Include the NULL in the count of characters since it is needed as separator
2660 between the partition names.
2661*/
2662
2663static uint name_add(char *dest, const char *first_name, const char *sec_name)
2664{
2665 return (uint) (strxmov(dest, first_name, "#SP#", sec_name, NullS) -dest) + 1;
2666}
2667
2668
2669/**
2670 Create the special .par file
2671
2672 @param name Full path of table name
2673
2674 @return Operation status
2675 @retval FALSE Error code
2676 @retval TRUE Success
2677
2678 @note
2679 Method used to create handler file with names of partitions, their
2680 engine types and the number of partitions.
2681*/
2682
2683bool ha_partition::create_handler_file(const char *name)
2684{
2685 partition_element *part_elem, *subpart_elem;
2686 size_t i, j, part_name_len, subpart_name_len;
2687 size_t tot_partition_words, tot_name_len, num_parts;
2688 size_t tot_parts= 0;
2689 size_t tot_len_words, tot_len_byte, chksum, tot_name_words;
2690 char *name_buffer_ptr;
2691 uchar *file_buffer, *engine_array;
2692 bool result= TRUE;
2693 char file_name[FN_REFLEN];
2694 char part_name[FN_REFLEN];
2695 char subpart_name[FN_REFLEN];
2696 File file;
2697 List_iterator_fast <partition_element> part_it(m_part_info->partitions);
2698 DBUG_ENTER("create_handler_file");
2699
2700 num_parts= m_part_info->partitions.elements;
2701 DBUG_PRINT("enter", ("table name: %s num_parts: %zu", name, num_parts));
2702 tot_name_len= 0;
2703 for (i= 0; i < num_parts; i++)
2704 {
2705 part_elem= part_it++;
2706 if (part_elem->part_state != PART_NORMAL &&
2707 part_elem->part_state != PART_TO_BE_ADDED &&
2708 part_elem->part_state != PART_CHANGED)
2709 continue;
2710 tablename_to_filename(part_elem->partition_name, part_name,
2711 FN_REFLEN);
2712 part_name_len= strlen(part_name);
2713 if (!m_is_sub_partitioned)
2714 {
2715 tot_name_len+= part_name_len + 1;
2716 tot_parts++;
2717 }
2718 else
2719 {
2720 List_iterator_fast <partition_element> sub_it(part_elem->subpartitions);
2721 for (j= 0; j < m_part_info->num_subparts; j++)
2722 {
2723 subpart_elem= sub_it++;
2724 tablename_to_filename(subpart_elem->partition_name,
2725 subpart_name,
2726 FN_REFLEN);
2727 subpart_name_len= strlen(subpart_name);
2728 tot_name_len+= part_name_len + subpart_name_len + 5;
2729 tot_parts++;
2730 }
2731 }
2732 }
2733 /*
2734 File format:
2735 Length in words 4 byte
2736 Checksum 4 byte
2737 Total number of partitions 4 byte
2738 Array of engine types n * 4 bytes where
2739 n = (m_tot_parts + 3)/4
2740 Length of name part in bytes 4 bytes
2741 (Names in filename format)
2742 Name part m * 4 bytes where
2743 m = ((length_name_part + 3)/4)*4
2744
2745 All padding bytes are zeroed
2746 */
2747 tot_partition_words= (tot_parts + PAR_WORD_SIZE - 1) / PAR_WORD_SIZE;
2748 tot_name_words= (tot_name_len + PAR_WORD_SIZE - 1) / PAR_WORD_SIZE;
2749 /* 4 static words (tot words, checksum, tot partitions, name length) */
2750 tot_len_words= 4 + tot_partition_words + tot_name_words;
2751 tot_len_byte= PAR_WORD_SIZE * tot_len_words;
2752 if (!(file_buffer= (uchar *) my_malloc(tot_len_byte, MYF(MY_ZEROFILL))))
2753 DBUG_RETURN(TRUE);
2754 engine_array= (file_buffer + PAR_ENGINES_OFFSET);
2755 name_buffer_ptr= (char*) (engine_array + tot_partition_words * PAR_WORD_SIZE
2756 + PAR_WORD_SIZE);
2757 part_it.rewind();
2758 for (i= 0; i < num_parts; i++)
2759 {
2760 part_elem= part_it++;
2761 if (part_elem->part_state != PART_NORMAL &&
2762 part_elem->part_state != PART_TO_BE_ADDED &&
2763 part_elem->part_state != PART_CHANGED)
2764 continue;
2765 if (!m_is_sub_partitioned)
2766 {
2767 tablename_to_filename(part_elem->partition_name, part_name, FN_REFLEN);
2768 name_buffer_ptr= strmov(name_buffer_ptr, part_name)+1;
2769 *engine_array= (uchar) ha_legacy_type(part_elem->engine_type);
2770 DBUG_PRINT("info", ("engine: %u", *engine_array));
2771 engine_array++;
2772 }
2773 else
2774 {
2775 List_iterator_fast <partition_element> sub_it(part_elem->subpartitions);
2776 for (j= 0; j < m_part_info->num_subparts; j++)
2777 {
2778 subpart_elem= sub_it++;
2779 tablename_to_filename(part_elem->partition_name, part_name,
2780 FN_REFLEN);
2781 tablename_to_filename(subpart_elem->partition_name, subpart_name,
2782 FN_REFLEN);
2783 name_buffer_ptr+= name_add(name_buffer_ptr,
2784 part_name,
2785 subpart_name);
2786 *engine_array= (uchar) ha_legacy_type(subpart_elem->engine_type);
2787 DBUG_PRINT("info", ("engine: %u", *engine_array));
2788 engine_array++;
2789 }
2790 }
2791 }
2792 chksum= 0;
2793 int4store(file_buffer, tot_len_words);
2794 int4store(file_buffer + PAR_NUM_PARTS_OFFSET, tot_parts);
2795 int4store(file_buffer + PAR_ENGINES_OFFSET +
2796 (tot_partition_words * PAR_WORD_SIZE),
2797 tot_name_len);
2798 for (i= 0; i < tot_len_words; i++)
2799 chksum^= uint4korr(file_buffer + PAR_WORD_SIZE * i);
2800 int4store(file_buffer + PAR_CHECKSUM_OFFSET, chksum);
2801 /*
2802 Add .par extension to the file name.
2803 Create and write and close file
2804 to be used at open, delete_table and rename_table
2805 */
2806 fn_format(file_name, name, "", ha_par_ext, MY_APPEND_EXT);
2807 if ((file= mysql_file_create(key_file_partition,
2808 file_name, CREATE_MODE, O_RDWR | O_TRUNC,
2809 MYF(MY_WME))) >= 0)
2810 {
2811 result= mysql_file_write(file, (uchar *) file_buffer, tot_len_byte,
2812 MYF(MY_WME | MY_NABP)) != 0;
2813
2814 /* Write connection information (for federatedx engine) */
2815 part_it.rewind();
2816 for (i= 0; i < num_parts && !result; i++)
2817 {
2818 uchar buffer[4];
2819 part_elem= part_it++;
2820 size_t length= part_elem->connect_string.length;
2821 int4store(buffer, length);
2822 if (my_write(file, buffer, 4, MYF(MY_WME | MY_NABP)) ||
2823 my_write(file, (uchar *) part_elem->connect_string.str, length,
2824 MYF(MY_WME | MY_NABP)))
2825 {
2826 result= TRUE;
2827 break;
2828 }
2829 }
2830 (void) mysql_file_close(file, MYF(0));
2831 }
2832 else
2833 result= TRUE;
2834 my_free(file_buffer);
2835 DBUG_RETURN(result);
2836}
2837
2838
2839/**
2840 Clear handler variables and free some memory
2841*/
2842
2843void ha_partition::clear_handler_file()
2844{
2845 if (m_engine_array)
2846 plugin_unlock_list(NULL, m_engine_array, m_tot_parts);
2847 free_root(&m_mem_root, MYF(MY_KEEP_PREALLOC));
2848 m_file_buffer= NULL;
2849 m_engine_array= NULL;
2850 m_connect_string= NULL;
2851}
2852
2853
2854/**
2855 Create underlying handler objects
2856
2857 @param mem_root Allocate memory through this
2858
2859 @return Operation status
2860 @retval TRUE Error
2861 @retval FALSE Success
2862*/
2863
2864bool ha_partition::create_handlers(MEM_ROOT *mem_root)
2865{
2866 uint i;
2867 uint alloc_len= (m_tot_parts + 1) * sizeof(handler*);
2868 handlerton *hton0;
2869 DBUG_ENTER("create_handlers");
2870
2871 if (!(m_file= (handler **) alloc_root(mem_root, alloc_len)))
2872 DBUG_RETURN(TRUE);
2873 m_file_tot_parts= m_tot_parts;
2874 bzero((char*) m_file, alloc_len);
2875 for (i= 0; i < m_tot_parts; i++)
2876 {
2877 handlerton *hton= plugin_data(m_engine_array[i], handlerton*);
2878 if (!(m_file[i]= get_new_handler(table_share, mem_root, hton)))
2879 DBUG_RETURN(TRUE);
2880 DBUG_PRINT("info", ("engine_type: %u", hton->db_type));
2881 }
2882 /* For the moment we only support partition over the same table engine */
2883 hton0= plugin_data(m_engine_array[0], handlerton*);
2884 if (hton0 == myisam_hton)
2885 {
2886 DBUG_PRINT("info", ("MyISAM"));
2887 m_myisam= TRUE;
2888 }
2889 /* INNODB may not be compiled in... */
2890 else if (ha_legacy_type(hton0) == DB_TYPE_INNODB)
2891 {
2892 DBUG_PRINT("info", ("InnoDB"));
2893 m_innodb= TRUE;
2894 }
2895 DBUG_RETURN(FALSE);
2896}
2897
2898
2899/*
2900 Create underlying handler objects from partition info
2901
2902 SYNOPSIS
2903 new_handlers_from_part_info()
2904 mem_root Allocate memory through this
2905
2906 RETURN VALUE
2907 TRUE Error
2908 FALSE Success
2909*/
2910
2911bool ha_partition::new_handlers_from_part_info(MEM_ROOT *mem_root)
2912{
2913 uint i, j, part_count;
2914 partition_element *part_elem;
2915 uint alloc_len= (m_tot_parts + 1) * sizeof(handler*);
2916 List_iterator_fast <partition_element> part_it(m_part_info->partitions);
2917 DBUG_ENTER("ha_partition::new_handlers_from_part_info");
2918
2919 if (!(m_file= (handler **) alloc_root(mem_root, alloc_len)))
2920 goto error;
2921
2922 m_file_tot_parts= m_tot_parts;
2923 bzero((char*) m_file, alloc_len);
2924 DBUG_ASSERT(m_part_info->num_parts > 0);
2925
2926 i= 0;
2927 part_count= 0;
2928 /*
2929 Don't know the size of the underlying storage engine, invent a number of
2930 bytes allocated for error message if allocation fails
2931 */
2932 do
2933 {
2934 part_elem= part_it++;
2935 if (m_is_sub_partitioned)
2936 {
2937 for (j= 0; j < m_part_info->num_subparts; j++)
2938 {
2939 if (!(m_file[part_count++]= get_new_handler(table_share, mem_root,
2940 part_elem->engine_type)))
2941 goto error;
2942 DBUG_PRINT("info", ("engine_type: %u",
2943 (uint) ha_legacy_type(part_elem->engine_type)));
2944 }
2945 }
2946 else
2947 {
2948 if (!(m_file[part_count++]= get_new_handler(table_share, mem_root,
2949 part_elem->engine_type)))
2950 goto error;
2951 DBUG_PRINT("info", ("engine_type: %u",
2952 (uint) ha_legacy_type(part_elem->engine_type)));
2953 }
2954 } while (++i < m_part_info->num_parts);
2955 if (part_elem->engine_type == myisam_hton)
2956 {
2957 DBUG_PRINT("info", ("MyISAM"));
2958 m_myisam= TRUE;
2959 }
2960 DBUG_RETURN(FALSE);
2961error:
2962 DBUG_RETURN(TRUE);
2963}
2964
2965
2966/**
2967 Read the .par file to get the partitions engines and names
2968
2969 @param name Name of table file (without extention)
2970
2971 @return Operation status
2972 @retval true Failure
2973 @retval false Success
2974
2975 @note On success, m_file_buffer is allocated and must be
2976 freed by the caller. m_name_buffer_ptr and m_tot_parts is also set.
2977*/
2978
2979bool ha_partition::read_par_file(const char *name)
2980{
2981 char buff[FN_REFLEN];
2982 uchar *tot_name_len_offset;
2983 File file;
2984 uchar *file_buffer;
2985 uint i, len_bytes, len_words, tot_partition_words, tot_name_words, chksum;
2986 DBUG_ENTER("ha_partition::read_par_file");
2987 DBUG_PRINT("enter", ("table name: '%s'", name));
2988
2989 if (m_file_buffer)
2990 DBUG_RETURN(false);
2991 fn_format(buff, name, "", ha_par_ext, MY_APPEND_EXT);
2992
2993 /* Following could be done with mysql_file_stat to read in whole file */
2994 if ((file= mysql_file_open(key_file_partition,
2995 buff, O_RDONLY | O_SHARE, MYF(0))) < 0)
2996 DBUG_RETURN(TRUE);
2997 if (mysql_file_read(file, (uchar *) &buff[0], PAR_WORD_SIZE, MYF(MY_NABP)))
2998 goto err1;
2999 len_words= uint4korr(buff);
3000 len_bytes= PAR_WORD_SIZE * len_words;
3001 if (mysql_file_seek(file, 0, MY_SEEK_SET, MYF(0)) == MY_FILEPOS_ERROR)
3002 goto err1;
3003 if (!(file_buffer= (uchar*) alloc_root(&m_mem_root, len_bytes)))
3004 goto err1;
3005 if (mysql_file_read(file, file_buffer, len_bytes, MYF(MY_NABP)))
3006 goto err2;
3007
3008 chksum= 0;
3009 for (i= 0; i < len_words; i++)
3010 chksum ^= uint4korr((file_buffer) + PAR_WORD_SIZE * i);
3011 if (chksum)
3012 goto err2;
3013 m_tot_parts= uint4korr((file_buffer) + PAR_NUM_PARTS_OFFSET);
3014 DBUG_PRINT("info", ("No of parts: %u", m_tot_parts));
3015 tot_partition_words= (m_tot_parts + PAR_WORD_SIZE - 1) / PAR_WORD_SIZE;
3016
3017 tot_name_len_offset= file_buffer + PAR_ENGINES_OFFSET +
3018 PAR_WORD_SIZE * tot_partition_words;
3019 tot_name_words= (uint4korr(tot_name_len_offset) + PAR_WORD_SIZE - 1) /
3020 PAR_WORD_SIZE;
3021 /*
3022 Verify the total length = tot size word, checksum word, num parts word +
3023 engines array + name length word + name array.
3024 */
3025 if (len_words != (tot_partition_words + tot_name_words + 4))
3026 goto err2;
3027 m_file_buffer= file_buffer; // Will be freed in clear_handler_file()
3028 m_name_buffer_ptr= (char*) (tot_name_len_offset + PAR_WORD_SIZE);
3029
3030 if (!(m_connect_string= (LEX_CSTRING*)
3031 alloc_root(&m_mem_root, m_tot_parts * sizeof(LEX_CSTRING))))
3032 goto err2;
3033 bzero(m_connect_string, m_tot_parts * sizeof(LEX_CSTRING));
3034
3035 /* Read connection arguments (for federated X engine) */
3036 for (i= 0; i < m_tot_parts; i++)
3037 {
3038 LEX_CSTRING connect_string;
3039 uchar buffer[4];
3040 char *tmp;
3041 if (my_read(file, buffer, 4, MYF(MY_NABP)))
3042 {
3043 /* No extra options; Probably not a federatedx engine */
3044 break;
3045 }
3046 connect_string.length= uint4korr(buffer);
3047 connect_string.str= tmp= (char*) alloc_root(&m_mem_root,
3048 connect_string.length+1);
3049 if (my_read(file, (uchar*) connect_string.str, connect_string.length,
3050 MYF(MY_NABP)))
3051 break;
3052 tmp[connect_string.length]= 0;
3053 m_connect_string[i]= connect_string;
3054 }
3055
3056 (void) mysql_file_close(file, MYF(0));
3057 DBUG_RETURN(false);
3058
3059err2:
3060err1:
3061 (void) mysql_file_close(file, MYF(0));
3062 DBUG_RETURN(true);
3063}
3064
3065
3066/**
3067 Setup m_engine_array
3068
3069 @param mem_root MEM_ROOT to use for allocating new handlers
3070
3071 @return Operation status
3072 @retval false Success
3073 @retval true Failure
3074*/
3075
3076bool ha_partition::setup_engine_array(MEM_ROOT *mem_root)
3077{
3078 uint i;
3079 uchar *buff;
3080 handlerton **engine_array, *first_engine;
3081 enum legacy_db_type db_type, first_db_type;
3082
3083 DBUG_ASSERT(!m_file);
3084 DBUG_ENTER("ha_partition::setup_engine_array");
3085 engine_array= (handlerton **) my_alloca(m_tot_parts * sizeof(handlerton*));
3086 if (!engine_array)
3087 DBUG_RETURN(true);
3088
3089 buff= (uchar *) (m_file_buffer + PAR_ENGINES_OFFSET);
3090 first_db_type= (enum legacy_db_type) buff[0];
3091 first_engine= ha_resolve_by_legacy_type(ha_thd(), first_db_type);
3092 if (!first_engine)
3093 goto err;
3094
3095 if (!(m_engine_array= (plugin_ref*)
3096 alloc_root(&m_mem_root, m_tot_parts * sizeof(plugin_ref))))
3097 goto err;
3098
3099 for (i= 0; i < m_tot_parts; i++)
3100 {
3101 db_type= (enum legacy_db_type) buff[i];
3102 if (db_type != first_db_type)
3103 {
3104 DBUG_PRINT("error", ("partition %u engine %d is not same as "
3105 "first partition %d", i, db_type,
3106 (int) first_db_type));
3107 DBUG_ASSERT(0);
3108 clear_handler_file();
3109 goto err;
3110 }
3111 m_engine_array[i]= ha_lock_engine(NULL, first_engine);
3112 if (!m_engine_array[i])
3113 {
3114 clear_handler_file();
3115 goto err;
3116 }
3117 }
3118
3119 my_afree(engine_array);
3120
3121 if (create_handlers(mem_root))
3122 {
3123 clear_handler_file();
3124 DBUG_RETURN(true);
3125 }
3126
3127 DBUG_RETURN(false);
3128
3129err:
3130 my_afree(engine_array);
3131 DBUG_RETURN(true);
3132}
3133
3134
3135/**
3136 Get info about partition engines and their names from the .par file
3137
3138 @param name Full path of table name
3139 @param mem_root Allocate memory through this
3140 @param is_clone If it is a clone, don't create new handlers
3141
3142 @return Operation status
3143 @retval true Error
3144 @retval false Success
3145
3146 @note Open handler file to get partition names, engine types and number of
3147 partitions.
3148*/
3149
3150bool ha_partition::get_from_handler_file(const char *name, MEM_ROOT *mem_root,
3151 bool is_clone)
3152{
3153 DBUG_ENTER("ha_partition::get_from_handler_file");
3154 DBUG_PRINT("enter", ("table name: '%s'", name));
3155
3156 if (m_file_buffer)
3157 DBUG_RETURN(false);
3158
3159 if (read_par_file(name))
3160 DBUG_RETURN(true);
3161
3162 if (!is_clone && setup_engine_array(mem_root))
3163 DBUG_RETURN(true);
3164
3165 DBUG_RETURN(false);
3166}
3167
3168
3169/****************************************************************************
3170 MODULE open/close object
3171****************************************************************************/
3172
3173/**
3174 Get the partition name.
3175
3176 @param part Struct containing name and length
3177 @param[out] length Length of the name
3178
3179 @return Partition name
3180*/
3181
3182static uchar *get_part_name(PART_NAME_DEF *part, size_t *length,
3183 my_bool not_used __attribute__((unused)))
3184{
3185 *length= part->length;
3186 return part->partition_name;
3187}
3188
3189
3190/**
3191 Insert a partition name in the partition_name_hash.
3192
3193 @param name Name of partition
3194 @param part_id Partition id (number)
3195 @param is_subpart Set if the name belongs to a subpartition
3196
3197 @return Operation status
3198 @retval true Failure
3199 @retval false Sucess
3200*/
3201
3202bool ha_partition::insert_partition_name_in_hash(const char *name, uint part_id,
3203 bool is_subpart)
3204{
3205 PART_NAME_DEF *part_def;
3206 uchar *part_name;
3207 size_t part_name_length;
3208 DBUG_ENTER("ha_partition::insert_partition_name_in_hash");
3209 /*
3210 Calculate and store the length here, to avoid doing it when
3211 searching the hash.
3212 */
3213 part_name_length= strlen(name);
3214 /*
3215 Must use memory that lives as long as table_share.
3216 Freed in the Partition_share destructor.
3217 Since we use my_multi_malloc, then my_free(part_def) will also free
3218 part_name, as a part of my_hash_free.
3219 */
3220 if (!my_multi_malloc(MY_WME,
3221 &part_def, sizeof(PART_NAME_DEF),
3222 &part_name, part_name_length + 1,
3223 NULL))
3224 DBUG_RETURN(true);
3225 memcpy(part_name, name, part_name_length + 1);
3226 part_def->partition_name= part_name;
3227 part_def->length= (uint)part_name_length;
3228 part_def->part_id= part_id;
3229 part_def->is_subpart= is_subpart;
3230 if (my_hash_insert(&part_share->partition_name_hash, (uchar *) part_def))
3231 {
3232 my_free(part_def);
3233 DBUG_RETURN(true);
3234 }
3235 DBUG_RETURN(false);
3236}
3237
3238
3239/**
3240 Populate the partition_name_hash in part_share.
3241*/
3242
3243bool ha_partition::populate_partition_name_hash()
3244{
3245 List_iterator<partition_element> part_it(m_part_info->partitions);
3246 uint num_parts= m_part_info->num_parts;
3247 uint num_subparts= m_is_sub_partitioned ? m_part_info->num_subparts : 1;
3248 uint tot_names;
3249 uint i= 0;
3250 DBUG_ASSERT(part_share);
3251
3252 DBUG_ENTER("ha_partition::populate_partition_name_hash");
3253
3254 /*
3255 partition_name_hash is only set once and never changed
3256 -> OK to check without locking.
3257 */
3258
3259 if (part_share->partition_name_hash_initialized)
3260 DBUG_RETURN(false);
3261 lock_shared_ha_data();
3262 if (part_share->partition_name_hash_initialized)
3263 {
3264 unlock_shared_ha_data();
3265 DBUG_RETURN(false);
3266 }
3267 tot_names= m_is_sub_partitioned ? m_tot_parts + num_parts : num_parts;
3268 if (my_hash_init(&part_share->partition_name_hash,
3269 system_charset_info, tot_names, 0, 0,
3270 (my_hash_get_key) get_part_name,
3271 my_free, HASH_UNIQUE))
3272 {
3273 unlock_shared_ha_data();
3274 DBUG_RETURN(TRUE);
3275 }
3276
3277 do
3278 {
3279 partition_element *part_elem= part_it++;
3280 DBUG_ASSERT(part_elem->part_state == PART_NORMAL);
3281 if (part_elem->part_state == PART_NORMAL)
3282 {
3283 if (insert_partition_name_in_hash(part_elem->partition_name,
3284 i * num_subparts, false))
3285 goto err;
3286 if (m_is_sub_partitioned)
3287 {
3288 List_iterator<partition_element>
3289 subpart_it(part_elem->subpartitions);
3290 partition_element *sub_elem;
3291 uint j= 0;
3292 do
3293 {
3294 sub_elem= subpart_it++;
3295 if (insert_partition_name_in_hash(sub_elem->partition_name,
3296 i * num_subparts + j, true))
3297 goto err;
3298
3299 } while (++j < num_subparts);
3300 }
3301 }
3302 } while (++i < num_parts);
3303
3304 part_share->partition_name_hash_initialized= true;
3305 unlock_shared_ha_data();
3306
3307 DBUG_RETURN(FALSE);
3308err:
3309 my_hash_free(&part_share->partition_name_hash);
3310 unlock_shared_ha_data();
3311
3312 DBUG_RETURN(TRUE);
3313}
3314
3315
3316/**
3317 Set Handler_share pointer and allocate Handler_share pointers
3318 for each partition and set those.
3319
3320 @param ha_share_arg Where to store/retrieve the Partitioning_share pointer
3321 to be shared by all instances of the same table.
3322
3323 @return Operation status
3324 @retval true Failure
3325 @retval false Sucess
3326*/
3327
3328bool ha_partition::set_ha_share_ref(Handler_share **ha_share_arg)
3329{
3330 Handler_share **ha_shares;
3331 uint i;
3332 DBUG_ENTER("ha_partition::set_ha_share_ref");
3333
3334 DBUG_ASSERT(!part_share);
3335 DBUG_ASSERT(table_share);
3336 DBUG_ASSERT(!m_is_clone_of);
3337 DBUG_ASSERT(m_tot_parts);
3338 if (handler::set_ha_share_ref(ha_share_arg))
3339 DBUG_RETURN(true);
3340 if (!(part_share= get_share()))
3341 DBUG_RETURN(true);
3342 DBUG_ASSERT(part_share->partitions_share_refs.num_parts >= m_tot_parts);
3343 ha_shares= part_share->partitions_share_refs.ha_shares;
3344 for (i= 0; i < m_tot_parts; i++)
3345 {
3346 if (m_file[i]->set_ha_share_ref(&ha_shares[i]))
3347 DBUG_RETURN(true);
3348 }
3349 DBUG_RETURN(false);
3350}
3351
3352
3353/**
3354 Get the PARTITION_SHARE for the table.
3355
3356 @return Operation status
3357 @retval true Error
3358 @retval false Success
3359
3360 @note Gets or initializes the Partition_share object used by partitioning.
3361 The Partition_share is used for handling the auto_increment etc.
3362*/
3363
3364Partition_share *ha_partition::get_share()
3365{
3366 Partition_share *tmp_share;
3367 DBUG_ENTER("ha_partition::get_share");
3368 DBUG_ASSERT(table_share);
3369
3370 lock_shared_ha_data();
3371 if (!(tmp_share= static_cast<Partition_share*>(get_ha_share_ptr())))
3372 {
3373 tmp_share= new Partition_share;
3374 if (!tmp_share)
3375 goto err;
3376 if (tmp_share->init(m_tot_parts))
3377 {
3378 delete tmp_share;
3379 tmp_share= NULL;
3380 goto err;
3381 }
3382 set_ha_share_ptr(static_cast<Handler_share*>(tmp_share));
3383 }
3384err:
3385 unlock_shared_ha_data();
3386 DBUG_RETURN(tmp_share);
3387}
3388
3389
3390
3391/**
3392 Helper function for freeing all internal bitmaps.
3393*/
3394
3395void ha_partition::free_partition_bitmaps()
3396{
3397 /* Initialize the bitmap we use to minimize ha_start_bulk_insert calls */
3398 my_bitmap_free(&m_bulk_insert_started);
3399 my_bitmap_free(&m_locked_partitions);
3400 my_bitmap_free(&m_partitions_to_reset);
3401 my_bitmap_free(&m_key_not_found_partitions);
3402 my_bitmap_free(&m_opened_partitions);
3403 my_bitmap_free(&m_mrr_used_partitions);
3404}
3405
3406
3407/**
3408 Helper function for initializing all internal bitmaps.
3409
3410 Note:
3411 All bitmaps, including partially allocated, are freed in
3412 free_partion_bitmaps()
3413*/
3414
3415bool ha_partition::init_partition_bitmaps()
3416{
3417 DBUG_ENTER("ha_partition::init_partition_bitmaps");
3418
3419 /* Initialize the bitmap we use to minimize ha_start_bulk_insert calls */
3420 if (my_bitmap_init(&m_bulk_insert_started, NULL, m_tot_parts + 1, FALSE))
3421 DBUG_RETURN(true);
3422
3423 /* Initialize the bitmap we use to keep track of locked partitions */
3424 if (my_bitmap_init(&m_locked_partitions, NULL, m_tot_parts, FALSE))
3425 DBUG_RETURN(true);
3426
3427 /*
3428 Initialize the bitmap we use to keep track of partitions which may have
3429 something to reset in ha_reset().
3430 */
3431 if (my_bitmap_init(&m_partitions_to_reset, NULL, m_tot_parts, FALSE))
3432 DBUG_RETURN(true);
3433
3434 /*
3435 Initialize the bitmap we use to keep track of partitions which returned
3436 HA_ERR_KEY_NOT_FOUND from index_read_map.
3437 */
3438 if (my_bitmap_init(&m_key_not_found_partitions, NULL, m_tot_parts, FALSE))
3439 DBUG_RETURN(true);
3440
3441 if (bitmap_init(&m_mrr_used_partitions, NULL, m_tot_parts, TRUE))
3442 DBUG_RETURN(true);
3443
3444 if (my_bitmap_init(&m_opened_partitions, NULL, m_tot_parts, FALSE))
3445 DBUG_RETURN(true);
3446
3447 m_file_sample= NULL;
3448
3449 /* Initialize the bitmap for read/lock_partitions */
3450 if (!m_is_clone_of)
3451 {
3452 DBUG_ASSERT(!m_clone_mem_root);
3453 if (m_part_info->set_partition_bitmaps(NULL))
3454 DBUG_RETURN(true);
3455 }
3456 DBUG_RETURN(false);
3457}
3458
3459
3460/*
3461 Open handler object
3462
3463 SYNOPSIS
3464 open()
3465 name Full path of table name
3466 mode Open mode flags
3467 test_if_locked ?
3468
3469 RETURN VALUE
3470 >0 Error
3471 0 Success
3472
3473 DESCRIPTION
3474 Used for opening tables. The name will be the name of the file.
3475 A table is opened when it needs to be opened. For instance
3476 when a request comes in for a select on the table (tables are not
3477 open and closed for each request, they are cached).
3478
3479 Called from handler.cc by handler::ha_open(). The server opens all tables
3480 by calling ha_open() which then calls the handler specific open().
3481*/
3482
3483int ha_partition::open(const char *name, int mode, uint test_if_locked)
3484{
3485 int error= HA_ERR_INITIALIZATION;
3486 handler **file;
3487 char name_buff[FN_REFLEN + 1];
3488 ulonglong check_table_flags;
3489 DBUG_ENTER("ha_partition::open");
3490
3491 DBUG_ASSERT(table->s == table_share);
3492 ref_length= 0;
3493 m_mode= mode;
3494 m_open_test_lock= test_if_locked;
3495 m_part_field_array= m_part_info->full_part_field_array;
3496 if (get_from_handler_file(name, &table->mem_root, MY_TEST(m_is_clone_of)))
3497 DBUG_RETURN(error);
3498 if (populate_partition_name_hash())
3499 {
3500 DBUG_RETURN(HA_ERR_INITIALIZATION);
3501 }
3502 m_start_key.length= 0;
3503 m_rec0= table->record[0];
3504 m_rec_length= table_share->reclength;
3505 if (!m_part_ids_sorted_by_num_of_records)
3506 {
3507 if (!(m_part_ids_sorted_by_num_of_records=
3508 (uint32*) my_malloc(m_tot_parts * sizeof(uint32), MYF(MY_WME))))
3509 DBUG_RETURN(error);
3510 uint32 i;
3511 /* Initialize it with all partition ids. */
3512 for (i= 0; i < m_tot_parts; i++)
3513 m_part_ids_sorted_by_num_of_records[i]= i;
3514 }
3515
3516 if (init_partition_bitmaps())
3517 goto err_alloc;
3518
3519 if (unlikely((error=
3520 m_part_info->set_partition_bitmaps(m_partitions_to_open))))
3521 goto err_alloc;
3522
3523 /* Allocate memory used with MMR */
3524 if (!(m_range_info= (void **)
3525 my_multi_malloc(MYF(MY_WME),
3526 &m_range_info, sizeof(range_id_t) * m_tot_parts,
3527 &m_stock_range_seq, sizeof(uint) * m_tot_parts,
3528 &m_mrr_buffer, sizeof(HANDLER_BUFFER) * m_tot_parts,
3529 &m_mrr_buffer_size, sizeof(uint) * m_tot_parts,
3530 &m_part_mrr_range_length, sizeof(uint) * m_tot_parts,
3531 &m_part_mrr_range_first,
3532 sizeof(PARTITION_PART_KEY_MULTI_RANGE *) * m_tot_parts,
3533 &m_part_mrr_range_current,
3534 sizeof(PARTITION_PART_KEY_MULTI_RANGE *) * m_tot_parts,
3535 &m_partition_part_key_multi_range_hld,
3536 sizeof(PARTITION_PART_KEY_MULTI_RANGE_HLD) *
3537 m_tot_parts,
3538 NullS)))
3539 goto err_alloc;
3540
3541 bzero(m_mrr_buffer, m_tot_parts * sizeof(HANDLER_BUFFER));
3542 bzero(m_part_mrr_range_first,
3543 sizeof(PARTITION_PART_KEY_MULTI_RANGE *) * m_tot_parts);
3544
3545 if (m_is_clone_of)
3546 {
3547 uint i, alloc_len;
3548 char *name_buffer_ptr;
3549 DBUG_ASSERT(m_clone_mem_root);
3550 /* Allocate an array of handler pointers for the partitions handlers. */
3551 alloc_len= (m_tot_parts + 1) * sizeof(handler*);
3552 if (!(m_file= (handler **) alloc_root(m_clone_mem_root, alloc_len)))
3553 {
3554 error= HA_ERR_INITIALIZATION;
3555 goto err_alloc;
3556 }
3557 memset(m_file, 0, alloc_len);
3558 name_buffer_ptr= m_name_buffer_ptr;
3559 /*
3560 Populate them by cloning the original partitions. This also opens them.
3561 Note that file->ref is allocated too.
3562 */
3563 file= m_is_clone_of->m_file;
3564 for (i= 0; i < m_tot_parts; i++)
3565 {
3566 if (!bitmap_is_set(&m_is_clone_of->m_opened_partitions, i))
3567 continue;
3568
3569 if (unlikely((error= create_partition_name(name_buff, sizeof(name_buff),
3570 name, name_buffer_ptr,
3571 NORMAL_PART_NAME, FALSE))))
3572 goto err_handler;
3573 /* ::clone() will also set ha_share from the original. */
3574 if (!(m_file[i]= file[i]->clone(name_buff, m_clone_mem_root)))
3575 {
3576 error= HA_ERR_INITIALIZATION;
3577 file= &m_file[i];
3578 goto err_handler;
3579 }
3580 if (!m_file_sample)
3581 m_file_sample= m_file[i];
3582 name_buffer_ptr+= strlen(name_buffer_ptr) + 1;
3583 bitmap_set_bit(&m_opened_partitions, i);
3584 }
3585 }
3586 else
3587 {
3588 if (unlikely((error= open_read_partitions(name_buff, sizeof(name_buff)))))
3589 goto err_handler;
3590 m_num_locks= m_file_sample->lock_count();
3591 }
3592 /*
3593 We want to know the upper bound for locks, to allocate enough memory.
3594 There is no performance lost if we simply return in lock_count() the
3595 maximum number locks needed, only some minor over allocation of memory
3596 in get_lock_data().
3597 */
3598 m_num_locks*= m_tot_parts;
3599
3600 file= m_file;
3601 ref_length= get_open_file_sample()->ref_length;
3602 check_table_flags= ((get_open_file_sample()->ha_table_flags() &
3603 ~(PARTITION_DISABLED_TABLE_FLAGS)) |
3604 (PARTITION_ENABLED_TABLE_FLAGS));
3605 while (*(++file))
3606 {
3607 if (!bitmap_is_set(&m_opened_partitions, (uint)(file - m_file)))
3608 continue;
3609 /* MyISAM can have smaller ref_length for partitions with MAX_ROWS set */
3610 set_if_bigger(ref_length, ((*file)->ref_length));
3611 /*
3612 Verify that all partitions have the same set of table flags.
3613 Mask all flags that partitioning enables/disables.
3614 */
3615 if (check_table_flags != (((*file)->ha_table_flags() &
3616 ~(PARTITION_DISABLED_TABLE_FLAGS)) |
3617 (PARTITION_ENABLED_TABLE_FLAGS)))
3618 {
3619 error= HA_ERR_INITIALIZATION;
3620 /* set file to last handler, so all of them are closed */
3621 file= &m_file[m_tot_parts - 1];
3622 goto err_handler;
3623 }
3624 }
3625 key_used_on_scan= get_open_file_sample()->key_used_on_scan;
3626 implicit_emptied= get_open_file_sample()->implicit_emptied;
3627 /*
3628 Add 2 bytes for partition id in position ref length.
3629 ref_length=max_in_all_partitions(ref_length) + PARTITION_BYTES_IN_POS
3630 */
3631 ref_length+= PARTITION_BYTES_IN_POS;
3632 m_ref_length= ref_length;
3633
3634 /*
3635 Release buffer read from .par file. It will not be reused again after
3636 being opened once.
3637 */
3638 clear_handler_file();
3639
3640 /*
3641 Some handlers update statistics as part of the open call. This will in
3642 some cases corrupt the statistics of the partition handler and thus
3643 to ensure we have correct statistics we call info from open after
3644 calling open on all individual handlers.
3645 */
3646 m_handler_status= handler_opened;
3647 if (m_part_info->part_expr)
3648 m_part_func_monotonicity_info=
3649 m_part_info->part_expr->get_monotonicity_info();
3650 else if (m_part_info->list_of_part_fields)
3651 m_part_func_monotonicity_info= MONOTONIC_STRICT_INCREASING;
3652 info(HA_STATUS_VARIABLE | HA_STATUS_CONST | HA_STATUS_OPEN);
3653 DBUG_RETURN(0);
3654
3655err_handler:
3656 DEBUG_SYNC(ha_thd(), "partition_open_error");
3657 file= &m_file[m_tot_parts - 1];
3658 while (file-- != m_file)
3659 {
3660 if (bitmap_is_set(&m_opened_partitions, (uint)(file - m_file)))
3661 (*file)->ha_close();
3662 }
3663err_alloc:
3664 free_partition_bitmaps();
3665 my_free(m_range_info);
3666 m_range_info= 0;
3667
3668 DBUG_RETURN(error);
3669}
3670
3671
3672/*
3673 Disabled since it is not possible to prune yet.
3674 without pruning, it need to rebind/unbind every partition in every
3675 statement which uses a table from the table cache. Will also use
3676 as many PSI_tables as there are partitions.
3677*/
3678#ifdef HAVE_M_PSI_PER_PARTITION
3679void ha_partition::unbind_psi()
3680{
3681 uint i;
3682
3683 DBUG_ENTER("ha_partition::unbind_psi");
3684 handler::unbind_psi();
3685 for (i= 0; i < m_tot_parts; i++)
3686 {
3687 DBUG_ASSERT(m_file[i] != NULL);
3688 m_file[i]->unbind_psi();
3689 }
3690 DBUG_VOID_RETURN;
3691}
3692
3693void ha_partition::rebind_psi()
3694{
3695 uint i;
3696
3697 DBUG_ENTER("ha_partition::rebind_psi");
3698 handler::rebind_psi();
3699 for (i= 0; i < m_tot_parts; i++)
3700 {
3701 DBUG_ASSERT(m_file[i] != NULL);
3702 m_file[i]->rebind_psi();
3703 }
3704 DBUG_VOID_RETURN;
3705}
3706#endif /* HAVE_M_PSI_PER_PARTITION */
3707
3708
3709/**
3710 Clone the open and locked partitioning handler.
3711
3712 @param mem_root MEM_ROOT to use.
3713
3714 @return Pointer to the successfully created clone or NULL
3715
3716 @details
3717 This function creates a new ha_partition handler as a clone/copy. The
3718 original (this) must already be opened and locked. The clone will use
3719 the originals m_part_info.
3720 It also allocates memory for ref + ref_dup.
3721 In ha_partition::open() it will clone its original handlers partitions
3722 which will allocate then on the correct MEM_ROOT and also open them.
3723*/
3724
3725handler *ha_partition::clone(const char *name, MEM_ROOT *mem_root)
3726{
3727 ha_partition *new_handler;
3728
3729 DBUG_ENTER("ha_partition::clone");
3730 new_handler= new (mem_root) ha_partition(ht, table_share, m_part_info,
3731 this, mem_root);
3732 if (!new_handler)
3733 DBUG_RETURN(NULL);
3734
3735 /*
3736 We will not clone each partition's handler here, it will be done in
3737 ha_partition::open() for clones. Also set_ha_share_ref is not needed
3738 here, since 1) ha_share is copied in the constructor used above
3739 2) each partition's cloned handler will set it from its original.
3740 */
3741
3742 /*
3743 Allocate new_handler->ref here because otherwise ha_open will allocate it
3744 on this->table->mem_root and we will not be able to reclaim that memory
3745 when the clone handler object is destroyed.
3746 */
3747 if (!(new_handler->ref= (uchar*) alloc_root(mem_root,
3748 ALIGN_SIZE(m_ref_length)*2)))
3749 goto err;
3750
3751 if (new_handler->ha_open(table, name,
3752 table->db_stat,
3753 HA_OPEN_IGNORE_IF_LOCKED | HA_OPEN_NO_PSI_CALL))
3754 goto err;
3755
3756 DBUG_RETURN((handler*) new_handler);
3757
3758err:
3759 delete new_handler;
3760 DBUG_RETURN(NULL);
3761}
3762
3763
3764/*
3765 Close handler object
3766
3767 SYNOPSIS
3768 close()
3769
3770 RETURN VALUE
3771 >0 Error code
3772 0 Success
3773
3774 DESCRIPTION
3775 Called from sql_base.cc, sql_select.cc, and table.cc.
3776 In sql_select.cc it is only used to close up temporary tables or during
3777 the process where a temporary table is converted over to being a
3778 myisam table.
3779 For sql_base.cc look at close_data_tables().
3780*/
3781
3782int ha_partition::close(void)
3783{
3784 bool first= TRUE;
3785 handler **file;
3786 uint i;
3787 st_partition_ft_info *tmp_ft_info;
3788 DBUG_ENTER("ha_partition::close");
3789 DBUG_ASSERT(table->s == table_share);
3790 DBUG_ASSERT(m_part_info);
3791
3792 destroy_record_priority_queue();
3793
3794 for (; ft_first ; ft_first= tmp_ft_info)
3795 {
3796 tmp_ft_info= ft_first->next;
3797 my_free(ft_first);
3798 }
3799
3800 /* Free active mrr_ranges */
3801 for (i= 0; i < m_tot_parts; i++)
3802 {
3803 if (m_part_mrr_range_first[i])
3804 {
3805 PARTITION_PART_KEY_MULTI_RANGE *tmp_mrr_range_first=
3806 m_part_mrr_range_first[i];
3807 do
3808 {
3809 PARTITION_PART_KEY_MULTI_RANGE *tmp_mrr_range_current;
3810 tmp_mrr_range_current= tmp_mrr_range_first;
3811 tmp_mrr_range_first= tmp_mrr_range_first->next;
3812 my_free(tmp_mrr_range_current);
3813 } while (tmp_mrr_range_first);
3814 }
3815 }
3816 if (m_mrr_range_first)
3817 {
3818 do
3819 {
3820 m_mrr_range_current= m_mrr_range_first;
3821 m_mrr_range_first= m_mrr_range_first->next;
3822 if (m_mrr_range_current->key[0])
3823 my_free(m_mrr_range_current->key[0]);
3824 if (m_mrr_range_current->key[1])
3825 my_free(m_mrr_range_current->key[1]);
3826 my_free(m_mrr_range_current);
3827 } while (m_mrr_range_first);
3828 }
3829 my_free(m_range_info);
3830 m_range_info= NULL; // Safety
3831
3832 if (m_mrr_full_buffer)
3833 {
3834 my_free(m_mrr_full_buffer);
3835 m_mrr_full_buffer= NULL;
3836 m_mrr_full_buffer_size= 0;
3837 }
3838 file= m_file;
3839
3840repeat:
3841 do
3842 {
3843 if (!first || bitmap_is_set(&m_opened_partitions, (uint)(file - m_file)))
3844 (*file)->ha_close();
3845 } while (*(++file));
3846
3847 free_partition_bitmaps();
3848
3849 if (first && m_added_file && m_added_file[0])
3850 {
3851 file= m_added_file;
3852 first= FALSE;
3853 goto repeat;
3854 }
3855
3856 m_handler_status= handler_closed;
3857 DBUG_RETURN(0);
3858}
3859
3860/****************************************************************************
3861 MODULE start/end statement
3862****************************************************************************/
3863/*
3864 A number of methods to define various constants for the handler. In
3865 the case of the partition handler we need to use some max and min
3866 of the underlying handlers in most cases.
3867*/
3868
3869/*
3870 Set external locks on table
3871
3872 SYNOPSIS
3873 external_lock()
3874 thd Thread object
3875 lock_type Type of external lock
3876
3877 RETURN VALUE
3878 >0 Error code
3879 0 Success
3880
3881 DESCRIPTION
3882 First you should go read the section "locking functions for mysql" in
3883 lock.cc to understand this.
3884 This create a lock on the table. If you are implementing a storage engine
3885 that can handle transactions look at ha_berkeley.cc to see how you will
3886 want to go about doing this. Otherwise you should consider calling
3887 flock() here.
3888 Originally this method was used to set locks on file level to enable
3889 several MySQL Servers to work on the same data. For transactional
3890 engines it has been "abused" to also mean start and end of statements
3891 to enable proper rollback of statements and transactions. When LOCK
3892 TABLES has been issued the start_stmt method takes over the role of
3893 indicating start of statement but in this case there is no end of
3894 statement indicator(?).
3895
3896 Called from lock.cc by lock_external() and unlock_external(). Also called
3897 from sql_table.cc by copy_data_between_tables().
3898*/
3899
3900int ha_partition::external_lock(THD *thd, int lock_type)
3901{
3902 int error;
3903 uint i, first_used_partition;
3904 MY_BITMAP *used_partitions;
3905 DBUG_ENTER("ha_partition::external_lock");
3906
3907 DBUG_ASSERT(!auto_increment_lock && !auto_increment_safe_stmt_log_lock);
3908
3909 if (lock_type == F_UNLCK)
3910 used_partitions= &m_locked_partitions;
3911 else
3912 used_partitions= &(m_part_info->lock_partitions);
3913
3914 first_used_partition= bitmap_get_first_set(used_partitions);
3915
3916 for (i= first_used_partition;
3917 i < m_tot_parts;
3918 i= bitmap_get_next_set(used_partitions, i))
3919 {
3920 DBUG_PRINT("info", ("external_lock(thd, %d) part %u", lock_type, i));
3921 if (unlikely((error= m_file[i]->ha_external_lock(thd, lock_type))))
3922 {
3923 if (lock_type != F_UNLCK)
3924 goto err_handler;
3925 }
3926 DBUG_PRINT("info", ("external_lock part %u lock %d", i, lock_type));
3927 if (lock_type != F_UNLCK)
3928 bitmap_set_bit(&m_locked_partitions, i);
3929 }
3930 if (lock_type == F_UNLCK)
3931 {
3932 bitmap_clear_all(used_partitions);
3933 }
3934 else
3935 {
3936 /* Add touched partitions to be included in reset(). */
3937 bitmap_union(&m_partitions_to_reset, used_partitions);
3938 }
3939
3940 if (m_added_file && m_added_file[0])
3941 {
3942 handler **file= m_added_file;
3943 DBUG_ASSERT(lock_type == F_UNLCK);
3944 do
3945 {
3946 (void) (*file)->ha_external_lock(thd, lock_type);
3947 } while (*(++file));
3948 }
3949 if (lock_type == F_WRLCK)
3950 {
3951 if (m_part_info->part_expr)
3952 m_part_info->part_expr->walk(&Item::register_field_in_read_map, 1, 0);
3953 if (m_part_info->part_type == VERSIONING_PARTITION)
3954 m_part_info->vers_set_hist_part(thd);
3955 }
3956 DBUG_RETURN(0);
3957
3958err_handler:
3959 uint j;
3960 for (j= first_used_partition;
3961 j < i;
3962 j= bitmap_get_next_set(&m_locked_partitions, j))
3963 {
3964 (void) m_file[j]->ha_external_lock(thd, F_UNLCK);
3965 }
3966 bitmap_clear_all(&m_locked_partitions);
3967 DBUG_RETURN(error);
3968}
3969
3970
3971/*
3972 Get the lock(s) for the table and perform conversion of locks if needed
3973
3974 SYNOPSIS
3975 store_lock()
3976 thd Thread object
3977 to Lock object array
3978 lock_type Table lock type
3979
3980 RETURN VALUE
3981 >0 Error code
3982 0 Success
3983
3984 DESCRIPTION
3985 The idea with handler::store_lock() is the following:
3986
3987 The statement decided which locks we should need for the table
3988 for updates/deletes/inserts we get WRITE locks, for SELECT... we get
3989 read locks.
3990
3991 Before adding the lock into the table lock handler (see thr_lock.c)
3992 mysqld calls store lock with the requested locks. Store lock can now
3993 modify a write lock to a read lock (or some other lock), ignore the
3994 lock (if we don't want to use MySQL table locks at all) or add locks
3995 for many tables (like we do when we are using a MERGE handler).
3996
3997 Berkeley DB for partition changes all WRITE locks to TL_WRITE_ALLOW_WRITE
3998 (which signals that we are doing WRITES, but we are still allowing other
3999 reader's and writer's.
4000
4001 When releasing locks, store_lock() is also called. In this case one
4002 usually doesn't have to do anything.
4003
4004 store_lock is called when holding a global mutex to ensure that only
4005 one thread at a time changes the locking information of tables.
4006
4007 In some exceptional cases MySQL may send a request for a TL_IGNORE;
4008 This means that we are requesting the same lock as last time and this
4009 should also be ignored. (This may happen when someone does a flush
4010 table when we have opened a part of the tables, in which case mysqld
4011 closes and reopens the tables and tries to get the same locks as last
4012 time). In the future we will probably try to remove this.
4013
4014 Called from lock.cc by get_lock_data().
4015*/
4016
4017THR_LOCK_DATA **ha_partition::store_lock(THD *thd,
4018 THR_LOCK_DATA **to,
4019 enum thr_lock_type lock_type)
4020{
4021 uint i;
4022 DBUG_ENTER("ha_partition::store_lock");
4023 DBUG_ASSERT(thd == current_thd);
4024
4025 /*
4026 This can be called from get_lock_data() in mysql_lock_abort_for_thread(),
4027 even when thd != table->in_use. In that case don't use partition pruning,
4028 but use all partitions instead to avoid using another threads structures.
4029 */
4030 if (thd != table->in_use)
4031 {
4032 for (i= 0; i < m_tot_parts; i++)
4033 to= m_file[i]->store_lock(thd, to, lock_type);
4034 }
4035 else
4036 {
4037 for (i= bitmap_get_first_set(&(m_part_info->lock_partitions));
4038 i < m_tot_parts;
4039 i= bitmap_get_next_set(&m_part_info->lock_partitions, i))
4040 {
4041 DBUG_PRINT("info", ("store lock %u iteration", i));
4042 to= m_file[i]->store_lock(thd, to, lock_type);
4043 }
4044 }
4045 DBUG_RETURN(to);
4046}
4047
4048/*
4049 Start a statement when table is locked
4050
4051 SYNOPSIS
4052 start_stmt()
4053 thd Thread object
4054 lock_type Type of external lock
4055
4056 RETURN VALUE
4057 >0 Error code
4058 0 Success
4059
4060 DESCRIPTION
4061 This method is called instead of external lock when the table is locked
4062 before the statement is executed.
4063*/
4064
4065int ha_partition::start_stmt(THD *thd, thr_lock_type lock_type)
4066{
4067 int error= 0;
4068 uint i;
4069 /* Assert that read_partitions is included in lock_partitions */
4070 DBUG_ASSERT(bitmap_is_subset(&m_part_info->read_partitions,
4071 &m_part_info->lock_partitions));
4072 /*
4073 m_locked_partitions is set in previous external_lock/LOCK TABLES.
4074 Current statement's lock requests must not include any partitions
4075 not previously locked.
4076 */
4077 DBUG_ASSERT(bitmap_is_subset(&m_part_info->lock_partitions,
4078 &m_locked_partitions));
4079 DBUG_ENTER("ha_partition::start_stmt");
4080
4081 for (i= bitmap_get_first_set(&(m_part_info->lock_partitions));
4082 i < m_tot_parts;
4083 i= bitmap_get_next_set(&m_part_info->lock_partitions, i))
4084 {
4085 if (unlikely((error= m_file[i]->start_stmt(thd, lock_type))))
4086 break;
4087 /* Add partition to be called in reset(). */
4088 bitmap_set_bit(&m_partitions_to_reset, i);
4089 }
4090 if (lock_type == F_WRLCK && m_part_info->part_expr)
4091 m_part_info->part_expr->walk(&Item::register_field_in_read_map, 1, 0);
4092 DBUG_RETURN(error);
4093}
4094
4095
4096/**
4097 Get number of lock objects returned in store_lock
4098
4099 @returns Number of locks returned in call to store_lock
4100
4101 @desc
4102 Returns the maxinum possible number of store locks needed in call to
4103 store lock.
4104*/
4105
4106uint ha_partition::lock_count() const
4107{
4108 DBUG_ENTER("ha_partition::lock_count");
4109 DBUG_RETURN(m_num_locks);
4110}
4111
4112
4113/*
4114 Unlock last accessed row
4115
4116 SYNOPSIS
4117 unlock_row()
4118
4119 RETURN VALUE
4120 NONE
4121
4122 DESCRIPTION
4123 Record currently processed was not in the result set of the statement
4124 and is thus unlocked. Used for UPDATE and DELETE queries.
4125*/
4126
4127void ha_partition::unlock_row()
4128{
4129 DBUG_ENTER("ha_partition::unlock_row");
4130 m_file[m_last_part]->unlock_row();
4131 DBUG_VOID_RETURN;
4132}
4133
4134/**
4135 Check if semi consistent read was used
4136
4137 SYNOPSIS
4138 was_semi_consistent_read()
4139
4140 RETURN VALUE
4141 TRUE Previous read was a semi consistent read
4142 FALSE Previous read was not a semi consistent read
4143
4144 DESCRIPTION
4145 See handler.h:
4146 In an UPDATE or DELETE, if the row under the cursor was locked by another
4147 transaction, and the engine used an optimistic read of the last
4148 committed row value under the cursor, then the engine returns 1 from this
4149 function. MySQL must NOT try to update this optimistic value. If the
4150 optimistic value does not match the WHERE condition, MySQL can decide to
4151 skip over this row. Currently only works for InnoDB. This can be used to
4152 avoid unnecessary lock waits.
4153
4154 If this method returns nonzero, it will also signal the storage
4155 engine that the next read will be a locking re-read of the row.
4156*/
4157bool ha_partition::was_semi_consistent_read()
4158{
4159 DBUG_ENTER("ha_partition::was_semi_consistent_read");
4160 DBUG_ASSERT(m_last_part < m_tot_parts &&
4161 bitmap_is_set(&(m_part_info->read_partitions), m_last_part));
4162 DBUG_RETURN(m_file[m_last_part]->was_semi_consistent_read());
4163}
4164
4165/**
4166 Use semi consistent read if possible
4167
4168 SYNOPSIS
4169 try_semi_consistent_read()
4170 yes Turn on semi consistent read
4171
4172 RETURN VALUE
4173 NONE
4174
4175 DESCRIPTION
4176 See handler.h:
4177 Tell the engine whether it should avoid unnecessary lock waits.
4178 If yes, in an UPDATE or DELETE, if the row under the cursor was locked
4179 by another transaction, the engine may try an optimistic read of
4180 the last committed row value under the cursor.
4181 Note: prune_partitions are already called before this call, so using
4182 pruning is OK.
4183*/
4184void ha_partition::try_semi_consistent_read(bool yes)
4185{
4186 uint i;
4187 DBUG_ENTER("ha_partition::try_semi_consistent_read");
4188
4189 i= bitmap_get_first_set(&(m_part_info->read_partitions));
4190 DBUG_ASSERT(i != MY_BIT_NONE);
4191 for (;
4192 i < m_tot_parts;
4193 i= bitmap_get_next_set(&m_part_info->read_partitions, i))
4194 {
4195 m_file[i]->try_semi_consistent_read(yes);
4196 }
4197 DBUG_VOID_RETURN;
4198}
4199
4200
4201/****************************************************************************
4202 MODULE change record
4203****************************************************************************/
4204
4205/*
4206 Insert a row to the table
4207
4208 SYNOPSIS
4209 write_row()
4210 buf The row in MySQL Row Format
4211
4212 RETURN VALUE
4213 >0 Error code
4214 0 Success
4215
4216 DESCRIPTION
4217 write_row() inserts a row. buf() is a byte array of data, normally
4218 record[0].
4219
4220 You can use the field information to extract the data from the native byte
4221 array type.
4222
4223 Example of this would be:
4224 for (Field **field=table->field ; *field ; field++)
4225 {
4226 ...
4227 }
4228
4229 See ha_tina.cc for a variant of extracting all of the data as strings.
4230 ha_berkeley.cc has a variant of how to store it intact by "packing" it
4231 for ha_berkeley's own native storage type.
4232
4233 Called from item_sum.cc, item_sum.cc, sql_acl.cc, sql_insert.cc,
4234 sql_insert.cc, sql_select.cc, sql_table.cc, sql_udf.cc, and sql_update.cc.
4235
4236 ADDITIONAL INFO:
4237
4238 We have to set auto_increment fields, because those may be used in
4239 determining which partition the row should be written to.
4240*/
4241
4242int ha_partition::write_row(uchar * buf)
4243{
4244 uint32 part_id;
4245 int error;
4246 longlong func_value;
4247 bool have_auto_increment= table->next_number_field && buf == table->record[0];
4248 my_bitmap_map *old_map;
4249 THD *thd= ha_thd();
4250 sql_mode_t saved_sql_mode= thd->variables.sql_mode;
4251 bool saved_auto_inc_field_not_null= table->auto_increment_field_not_null;
4252 DBUG_ENTER("ha_partition::write_row");
4253 DBUG_PRINT("enter", ("partition this: %p", this));
4254
4255 /*
4256 If we have an auto_increment column and we are writing a changed row
4257 or a new row, then update the auto_increment value in the record.
4258 */
4259 if (have_auto_increment)
4260 {
4261 if (!table_share->next_number_keypart)
4262 update_next_auto_inc_val();
4263 error= update_auto_increment();
4264
4265 /*
4266 If we have failed to set the auto-increment value for this row,
4267 it is highly likely that we will not be able to insert it into
4268 the correct partition. We must check and fail if neccessary.
4269 */
4270 if (unlikely(error))
4271 goto exit;
4272
4273 /*
4274 Don't allow generation of auto_increment value the partitions handler.
4275 If a partitions handler would change the value, then it might not
4276 match the partition any longer.
4277 This can occur if 'SET INSERT_ID = 0; INSERT (NULL)',
4278 So allow this by adding 'MODE_NO_AUTO_VALUE_ON_ZERO' to sql_mode.
4279 The partitions handler::next_insert_id must always be 0. Otherwise
4280 we need to forward release_auto_increment, or reset it for all
4281 partitions.
4282 */
4283 if (table->next_number_field->val_int() == 0)
4284 {
4285 table->auto_increment_field_not_null= TRUE;
4286 thd->variables.sql_mode|= MODE_NO_AUTO_VALUE_ON_ZERO;
4287 }
4288 }
4289
4290 old_map= dbug_tmp_use_all_columns(table, table->read_set);
4291 error= m_part_info->get_partition_id(m_part_info, &part_id, &func_value);
4292 dbug_tmp_restore_column_map(table->read_set, old_map);
4293 if (unlikely(error))
4294 {
4295 m_part_info->err_value= func_value;
4296 goto exit;
4297 }
4298 if (!bitmap_is_set(&(m_part_info->lock_partitions), part_id))
4299 {
4300 DBUG_PRINT("info", ("Write to non-locked partition %u (func_value: %ld)",
4301 part_id, (long) func_value));
4302 error= HA_ERR_NOT_IN_LOCK_PARTITIONS;
4303 goto exit;
4304 }
4305 m_last_part= part_id;
4306 DBUG_PRINT("info", ("Insert in partition %u", part_id));
4307 start_part_bulk_insert(thd, part_id);
4308
4309 tmp_disable_binlog(thd); /* Do not replicate the low-level changes. */
4310 error= m_file[part_id]->ha_write_row(buf);
4311 if (have_auto_increment && !table->s->next_number_keypart)
4312 set_auto_increment_if_higher(table->next_number_field);
4313 reenable_binlog(thd);
4314
4315exit:
4316 thd->variables.sql_mode= saved_sql_mode;
4317 table->auto_increment_field_not_null= saved_auto_inc_field_not_null;
4318 DBUG_RETURN(error);
4319}
4320
4321
4322/*
4323 Update an existing row
4324
4325 SYNOPSIS
4326 update_row()
4327 old_data Old record in MySQL Row Format
4328 new_data New record in MySQL Row Format
4329
4330 RETURN VALUE
4331 >0 Error code
4332 0 Success
4333
4334 DESCRIPTION
4335 Yes, update_row() does what you expect, it updates a row. old_data will
4336 have the previous row record in it, while new_data will have the newest
4337 data in it.
4338 Keep in mind that the server can do updates based on ordering if an
4339 ORDER BY clause was used. Consecutive ordering is not guarenteed.
4340
4341 Called from sql_select.cc, sql_acl.cc, sql_update.cc, and sql_insert.cc.
4342 new_data is always record[0]
4343 old_data is always record[1]
4344*/
4345
4346int ha_partition::update_row(const uchar *old_data, const uchar *new_data)
4347{
4348 THD *thd= ha_thd();
4349 uint32 new_part_id, old_part_id= m_last_part;
4350 int error= 0;
4351 DBUG_ENTER("ha_partition::update_row");
4352 m_err_rec= NULL;
4353
4354 // Need to read partition-related columns, to locate the row's partition:
4355 DBUG_ASSERT(bitmap_is_subset(&m_part_info->full_part_field_set,
4356 table->read_set));
4357#ifndef DBUG_OFF
4358 /*
4359 The protocol for updating a row is:
4360 1) position the handler (cursor) on the row to be updated,
4361 either through the last read row (rnd or index) or by rnd_pos.
4362 2) call update_row with both old and new full records as arguments.
4363
4364 This means that m_last_part should already be set to actual partition
4365 where the row was read from. And if that is not the same as the
4366 calculated part_id we found a misplaced row, we return an error to
4367 notify the user that something is broken in the row distribution
4368 between partitions! Since we don't check all rows on read, we return an
4369 error instead of correcting m_last_part, to make the user aware of the
4370 problem!
4371
4372 Notice that HA_READ_BEFORE_WRITE_REMOVAL does not require this protocol,
4373 so this is not supported for this engine.
4374 */
4375 error= get_part_for_buf(old_data, m_rec0, m_part_info, &old_part_id);
4376 DBUG_ASSERT(!error);
4377 DBUG_ASSERT(old_part_id == m_last_part);
4378 DBUG_ASSERT(bitmap_is_set(&(m_part_info->read_partitions), old_part_id));
4379#endif
4380
4381 if (unlikely((error= get_part_for_buf(new_data, m_rec0, m_part_info,
4382 &new_part_id))))
4383 goto exit;
4384 if (unlikely(!bitmap_is_set(&(m_part_info->lock_partitions), new_part_id)))
4385 {
4386 error= HA_ERR_NOT_IN_LOCK_PARTITIONS;
4387 goto exit;
4388 }
4389
4390
4391 m_last_part= new_part_id;
4392 start_part_bulk_insert(thd, new_part_id);
4393 if (new_part_id == old_part_id)
4394 {
4395 DBUG_PRINT("info", ("Update in partition %u", (uint) new_part_id));
4396 tmp_disable_binlog(thd); /* Do not replicate the low-level changes. */
4397 error= m_file[new_part_id]->ha_update_row(old_data, new_data);
4398 reenable_binlog(thd);
4399 goto exit;
4400 }
4401 else
4402 {
4403 Field *saved_next_number_field= table->next_number_field;
4404 /*
4405 Don't allow generation of auto_increment value for update.
4406 table->next_number_field is never set on UPDATE.
4407 But is set for INSERT ... ON DUPLICATE KEY UPDATE,
4408 and since update_row() does not generate or update an auto_inc value,
4409 we cannot have next_number_field set when moving a row
4410 to another partition with write_row(), since that could
4411 generate/update the auto_inc value.
4412 This gives the same behavior for partitioned vs non partitioned tables.
4413 */
4414 table->next_number_field= NULL;
4415 DBUG_PRINT("info", ("Update from partition %u to partition %u",
4416 (uint) old_part_id, (uint) new_part_id));
4417 tmp_disable_binlog(thd); /* Do not replicate the low-level changes. */
4418 error= m_file[new_part_id]->ha_write_row((uchar*) new_data);
4419 reenable_binlog(thd);
4420 table->next_number_field= saved_next_number_field;
4421 if (unlikely(error))
4422 goto exit;
4423
4424 tmp_disable_binlog(thd); /* Do not replicate the low-level changes. */
4425 error= m_file[old_part_id]->ha_delete_row(old_data);
4426 reenable_binlog(thd);
4427 if (unlikely(error))
4428 goto exit;
4429 }
4430
4431exit:
4432 /*
4433 if updating an auto_increment column, update
4434 part_share->next_auto_inc_val if needed.
4435 (not to be used if auto_increment on secondary field in a multi-column
4436 index)
4437 mysql_update does not set table->next_number_field, so we use
4438 table->found_next_number_field instead.
4439 Also checking that the field is marked in the write set.
4440 */
4441 if (table->found_next_number_field &&
4442 new_data == table->record[0] &&
4443 !table->s->next_number_keypart &&
4444 bitmap_is_set(table->write_set,
4445 table->found_next_number_field->field_index))
4446 {
4447 update_next_auto_inc_val();
4448 /*
4449 The following call is safe as part_share->auto_inc_initialized
4450 (tested in the call) is guaranteed to be set for update statements.
4451 */
4452 set_auto_increment_if_higher(table->found_next_number_field);
4453 }
4454 DBUG_RETURN(error);
4455}
4456
4457
4458/*
4459 Remove an existing row
4460
4461 SYNOPSIS
4462 delete_row
4463 buf Deleted row in MySQL Row Format
4464
4465 RETURN VALUE
4466 >0 Error Code
4467 0 Success
4468
4469 DESCRIPTION
4470 This will delete a row. buf will contain a copy of the row to be deleted.
4471 The server will call this right after the current row has been read
4472 (from either a previous rnd_xxx() or index_xxx() call).
4473 If you keep a pointer to the last row or can access a primary key it will
4474 make doing the deletion quite a bit easier.
4475 Keep in mind that the server does no guarentee consecutive deletions.
4476 ORDER BY clauses can be used.
4477
4478 Called in sql_acl.cc and sql_udf.cc to manage internal table information.
4479 Called in sql_delete.cc, sql_insert.cc, and sql_select.cc. In sql_select
4480 it is used for removing duplicates while in insert it is used for REPLACE
4481 calls.
4482
4483 buf is either record[0] or record[1]
4484*/
4485
4486int ha_partition::delete_row(const uchar *buf)
4487{
4488 int error;
4489 THD *thd= ha_thd();
4490 DBUG_ENTER("ha_partition::delete_row");
4491 m_err_rec= NULL;
4492
4493 DBUG_ASSERT(bitmap_is_subset(&m_part_info->full_part_field_set,
4494 table->read_set));
4495#ifndef DBUG_OFF
4496 /*
4497 The protocol for deleting a row is:
4498 1) position the handler (cursor) on the row to be deleted,
4499 either through the last read row (rnd or index) or by rnd_pos.
4500 2) call delete_row with the full record as argument.
4501
4502 This means that m_last_part should already be set to actual partition
4503 where the row was read from. And if that is not the same as the
4504 calculated part_id we found a misplaced row, we return an error to
4505 notify the user that something is broken in the row distribution
4506 between partitions! Since we don't check all rows on read, we return an
4507 error instead of forwarding the delete to the correct (m_last_part)
4508 partition!
4509
4510 Notice that HA_READ_BEFORE_WRITE_REMOVAL does not require this protocol,
4511 so this is not supported for this engine.
4512
4513 For partitions by system_time, get_part_for_buf() is always either current
4514 or last historical partition, but DELETE HISTORY can delete from any
4515 historical partition. So, skip the check in this case.
4516 */
4517 if (!thd->lex->vers_conditions.is_set()) // if not DELETE HISTORY
4518 {
4519 uint32 part_id;
4520 error= get_part_for_buf(buf, m_rec0, m_part_info, &part_id);
4521 DBUG_ASSERT(!error);
4522 DBUG_ASSERT(part_id == m_last_part);
4523 }
4524 DBUG_ASSERT(bitmap_is_set(&(m_part_info->read_partitions), m_last_part));
4525 DBUG_ASSERT(bitmap_is_set(&(m_part_info->lock_partitions), m_last_part));
4526#endif
4527
4528 if (!bitmap_is_set(&(m_part_info->lock_partitions), m_last_part))
4529 DBUG_RETURN(HA_ERR_NOT_IN_LOCK_PARTITIONS);
4530
4531 tmp_disable_binlog(thd);
4532 error= m_file[m_last_part]->ha_delete_row(buf);
4533 reenable_binlog(thd);
4534 DBUG_RETURN(error);
4535}
4536
4537
4538/*
4539 Delete all rows in a table
4540
4541 SYNOPSIS
4542 delete_all_rows()
4543
4544 RETURN VALUE
4545 >0 Error Code
4546 0 Success
4547
4548 DESCRIPTION
4549 Used to delete all rows in a table. Both for cases of truncate and
4550 for cases where the optimizer realizes that all rows will be
4551 removed as a result of a SQL statement.
4552
4553 Called from item_sum.cc by Item_func_group_concat::clear(),
4554 Item_sum_count::clear(), and Item_func_group_concat::clear().
4555 Called from sql_delete.cc by mysql_delete().
4556 Called from sql_select.cc by JOIN::reset().
4557 Called from sql_union.cc by st_select_lex_unit::exec().
4558*/
4559
4560int ha_partition::delete_all_rows()
4561{
4562 int error;
4563 uint i;
4564 DBUG_ENTER("ha_partition::delete_all_rows");
4565
4566 for (i= bitmap_get_first_set(&m_part_info->read_partitions);
4567 i < m_tot_parts;
4568 i= bitmap_get_next_set(&m_part_info->read_partitions, i))
4569 {
4570 /* Can be pruned, like DELETE FROM t PARTITION (pX) */
4571 if (unlikely((error= m_file[i]->ha_delete_all_rows())))
4572 DBUG_RETURN(error);
4573 }
4574 DBUG_RETURN(0);
4575}
4576
4577
4578/**
4579 Manually truncate the table.
4580
4581 @retval 0 Success.
4582 @retval > 0 Error code.
4583*/
4584
4585int ha_partition::truncate()
4586{
4587 int error;
4588 handler **file;
4589 DBUG_ENTER("ha_partition::truncate");
4590
4591 /*
4592 TRUNCATE also means resetting auto_increment. Hence, reset
4593 it so that it will be initialized again at the next use.
4594 */
4595 lock_auto_increment();
4596 part_share->next_auto_inc_val= 0;
4597 part_share->auto_inc_initialized= false;
4598 unlock_auto_increment();
4599
4600 file= m_file;
4601 do
4602 {
4603 if (unlikely((error= (*file)->ha_truncate())))
4604 DBUG_RETURN(error);
4605 } while (*(++file));
4606 DBUG_RETURN(0);
4607}
4608
4609
4610/**
4611 Truncate a set of specific partitions.
4612
4613 @remark Auto increment value will be truncated in that partition as well!
4614
4615 ALTER TABLE t TRUNCATE PARTITION ...
4616*/
4617
4618int ha_partition::truncate_partition(Alter_info *alter_info, bool *binlog_stmt)
4619{
4620 int error= 0;
4621 List_iterator<partition_element> part_it(m_part_info->partitions);
4622 uint num_parts= m_part_info->num_parts;
4623 uint num_subparts= m_part_info->num_subparts;
4624 uint i= 0;
4625 DBUG_ENTER("ha_partition::truncate_partition");
4626
4627 /* Only binlog when it starts any call to the partitions handlers */
4628 *binlog_stmt= false;
4629
4630 if (set_part_state(alter_info, m_part_info, PART_ADMIN))
4631 DBUG_RETURN(HA_ERR_NO_PARTITION_FOUND);
4632
4633 /*
4634 TRUNCATE also means resetting auto_increment. Hence, reset
4635 it so that it will be initialized again at the next use.
4636 */
4637 lock_auto_increment();
4638 part_share->next_auto_inc_val= 0;
4639 part_share->auto_inc_initialized= FALSE;
4640 unlock_auto_increment();
4641
4642 *binlog_stmt= true;
4643
4644 do
4645 {
4646 partition_element *part_elem= part_it++;
4647 if (part_elem->part_state == PART_ADMIN)
4648 {
4649 if (m_is_sub_partitioned)
4650 {
4651 List_iterator<partition_element>
4652 subpart_it(part_elem->subpartitions);
4653 partition_element *sub_elem;
4654 uint j= 0, part;
4655 do
4656 {
4657 sub_elem= subpart_it++;
4658 part= i * num_subparts + j;
4659 DBUG_PRINT("info", ("truncate subpartition %u (%s)",
4660 part, sub_elem->partition_name));
4661 if (unlikely((error= m_file[part]->ha_truncate())))
4662 break;
4663 sub_elem->part_state= PART_NORMAL;
4664 } while (++j < num_subparts);
4665 }
4666 else
4667 {
4668 DBUG_PRINT("info", ("truncate partition %u (%s)", i,
4669 part_elem->partition_name));
4670 error= m_file[i]->ha_truncate();
4671 }
4672 part_elem->part_state= PART_NORMAL;
4673 }
4674 } while (!error && (++i < num_parts));
4675 DBUG_RETURN(error);
4676}
4677
4678
4679/*
4680 Start a large batch of insert rows
4681
4682 SYNOPSIS
4683 start_bulk_insert()
4684 rows Number of rows to insert
4685 flags Flags to control index creation
4686
4687 RETURN VALUE
4688 NONE
4689
4690 DESCRIPTION
4691 rows == 0 means we will probably insert many rows
4692*/
4693void ha_partition::start_bulk_insert(ha_rows rows, uint flags)
4694{
4695 DBUG_ENTER("ha_partition::start_bulk_insert");
4696
4697 m_bulk_inserted_rows= 0;
4698 bitmap_clear_all(&m_bulk_insert_started);
4699 /* use the last bit for marking if bulk_insert_started was called */
4700 bitmap_set_bit(&m_bulk_insert_started, m_tot_parts);
4701 DBUG_VOID_RETURN;
4702}
4703
4704
4705/*
4706 Check if start_bulk_insert has been called for this partition,
4707 if not, call it and mark it called
4708*/
4709void ha_partition::start_part_bulk_insert(THD *thd, uint part_id)
4710{
4711 long old_buffer_size;
4712 if (!bitmap_is_set(&m_bulk_insert_started, part_id) &&
4713 bitmap_is_set(&m_bulk_insert_started, m_tot_parts))
4714 {
4715 DBUG_ASSERT(bitmap_is_set(&(m_part_info->lock_partitions), part_id));
4716 old_buffer_size= thd->variables.read_buff_size;
4717 /* Update read_buffer_size for this partition */
4718 thd->variables.read_buff_size= estimate_read_buffer_size(old_buffer_size);
4719 m_file[part_id]->ha_start_bulk_insert(guess_bulk_insert_rows());
4720 bitmap_set_bit(&m_bulk_insert_started, part_id);
4721 thd->variables.read_buff_size= old_buffer_size;
4722 }
4723 m_bulk_inserted_rows++;
4724}
4725
4726/*
4727 Estimate the read buffer size for each partition.
4728 SYNOPSIS
4729 ha_partition::estimate_read_buffer_size()
4730 original_size read buffer size originally set for the server
4731 RETURN VALUE
4732 estimated buffer size.
4733 DESCRIPTION
4734 If the estimated number of rows to insert is less than 10 (but not 0)
4735 the new buffer size is same as original buffer size.
4736 In case of first partition of when partition function is monotonic
4737 new buffer size is same as the original buffer size.
4738 For rest of the partition total buffer of 10*original_size is divided
4739 equally if number of partition is more than 10 other wise each partition
4740 will be allowed to use original buffer size.
4741*/
4742long ha_partition::estimate_read_buffer_size(long original_size)
4743{
4744 /*
4745 If number of rows to insert is less than 10, but not 0,
4746 return original buffer size.
4747 */
4748 if (estimation_rows_to_insert && (estimation_rows_to_insert < 10))
4749 return (original_size);
4750 /*
4751 If first insert/partition and monotonic partition function,
4752 allow using buffer size originally set.
4753 */
4754 if (!m_bulk_inserted_rows &&
4755 m_part_func_monotonicity_info != NON_MONOTONIC &&
4756 m_tot_parts > 1)
4757 return original_size;
4758 /*
4759 Allow total buffer used in all partition to go up to 10*read_buffer_size.
4760 11*read_buffer_size in case of monotonic partition function.
4761 */
4762
4763 if (m_tot_parts < 10)
4764 return original_size;
4765 return (original_size * 10 / m_tot_parts);
4766}
4767
4768/*
4769 Try to predict the number of inserts into this partition.
4770
4771 If less than 10 rows (including 0 which means Unknown)
4772 just give that as a guess
4773 If monotonic partitioning function was used
4774 guess that 50 % of the inserts goes to the first partition
4775 For all other cases, guess on equal distribution between the partitions
4776*/
4777ha_rows ha_partition::guess_bulk_insert_rows()
4778{
4779 DBUG_ENTER("guess_bulk_insert_rows");
4780
4781 if (estimation_rows_to_insert < 10)
4782 DBUG_RETURN(estimation_rows_to_insert);
4783
4784 /* If first insert/partition and monotonic partition function, guess 50%. */
4785 if (!m_bulk_inserted_rows &&
4786 m_part_func_monotonicity_info != NON_MONOTONIC &&
4787 m_tot_parts > 1)
4788 DBUG_RETURN(estimation_rows_to_insert / 2);
4789
4790 /* Else guess on equal distribution (+1 is to avoid returning 0/Unknown) */
4791 if (m_bulk_inserted_rows < estimation_rows_to_insert)
4792 DBUG_RETURN(((estimation_rows_to_insert - m_bulk_inserted_rows)
4793 / m_tot_parts) + 1);
4794 /* The estimation was wrong, must say 'Unknown' */
4795 DBUG_RETURN(0);
4796}
4797
4798
4799/*
4800 Finish a large batch of insert rows
4801
4802 SYNOPSIS
4803 end_bulk_insert()
4804
4805 RETURN VALUE
4806 >0 Error code
4807 0 Success
4808
4809 Note: end_bulk_insert can be called without start_bulk_insert
4810 being called, see bug#44108.
4811
4812*/
4813
4814int ha_partition::end_bulk_insert()
4815{
4816 int error= 0;
4817 uint i;
4818 DBUG_ENTER("ha_partition::end_bulk_insert");
4819
4820 if (!bitmap_is_set(&m_bulk_insert_started, m_tot_parts))
4821 DBUG_RETURN(error);
4822
4823 for (i= bitmap_get_first_set(&m_bulk_insert_started);
4824 i < m_tot_parts;
4825 i= bitmap_get_next_set(&m_bulk_insert_started, i))
4826 {
4827 int tmp;
4828 if ((tmp= m_file[i]->ha_end_bulk_insert()))
4829 error= tmp;
4830 }
4831 bitmap_clear_all(&m_bulk_insert_started);
4832 DBUG_RETURN(error);
4833}
4834
4835
4836/****************************************************************************
4837 MODULE full table scan
4838****************************************************************************/
4839/*
4840 Initialize engine for random reads
4841
4842 SYNOPSIS
4843 ha_partition::rnd_init()
4844 scan 0 Initialize for random reads through rnd_pos()
4845 1 Initialize for random scan through rnd_next()
4846
4847 RETURN VALUE
4848 >0 Error code
4849 0 Success
4850
4851 DESCRIPTION
4852 rnd_init() is called when the server wants the storage engine to do a
4853 table scan or when the server wants to access data through rnd_pos.
4854
4855 When scan is used we will scan one handler partition at a time.
4856 When preparing for rnd_pos we will init all handler partitions.
4857 No extra cache handling is needed when scannning is not performed.
4858
4859 Before initialising we will call rnd_end to ensure that we clean up from
4860 any previous incarnation of a table scan.
4861 Called from filesort.cc, records.cc, sql_handler.cc, sql_select.cc,
4862 sql_table.cc, and sql_update.cc.
4863*/
4864
4865int ha_partition::rnd_init(bool scan)
4866{
4867 int error;
4868 uint i= 0;
4869 uint32 part_id;
4870 DBUG_ENTER("ha_partition::rnd_init");
4871
4872 /*
4873 For operations that may need to change data, we may need to extend
4874 read_set.
4875 */
4876 if (get_lock_type() == F_WRLCK)
4877 {
4878 /*
4879 If write_set contains any of the fields used in partition and
4880 subpartition expression, we need to set all bits in read_set because
4881 the row may need to be inserted in a different [sub]partition. In
4882 other words update_row() can be converted into write_row(), which
4883 requires a complete record.
4884 */
4885 if (bitmap_is_overlapping(&m_part_info->full_part_field_set,
4886 table->write_set))
4887 {
4888 DBUG_PRINT("info", ("partition set full bitmap"));
4889 bitmap_set_all(table->read_set);
4890 }
4891 else
4892 {
4893 /*
4894 Some handlers only read fields as specified by the bitmap for the
4895 read set. For partitioned handlers we always require that the
4896 fields of the partition functions are read such that we can
4897 calculate the partition id to place updated and deleted records.
4898 */
4899 DBUG_PRINT("info", ("partition set part_field bitmap"));
4900 bitmap_union(table->read_set, &m_part_info->full_part_field_set);
4901 }
4902 }
4903
4904 /* Now we see what the index of our first important partition is */
4905 DBUG_PRINT("info", ("m_part_info->read_partitions: %p",
4906 m_part_info->read_partitions.bitmap));
4907 part_id= bitmap_get_first_set(&(m_part_info->read_partitions));
4908 DBUG_PRINT("info", ("m_part_spec.start_part: %u", (uint) part_id));
4909
4910 if (part_id == MY_BIT_NONE)
4911 {
4912 error= 0;
4913 goto err1;
4914 }
4915
4916 /*
4917 We have a partition and we are scanning with rnd_next
4918 so we bump our cache
4919 */
4920 DBUG_PRINT("info", ("rnd_init on partition: %u", (uint) part_id));
4921 if (scan)
4922 {
4923 /*
4924 rnd_end() is needed for partitioning to reset internal data if scan
4925 is already in use
4926 */
4927 rnd_end();
4928 late_extra_cache(part_id);
4929
4930 m_index_scan_type= partition_no_index_scan;
4931 }
4932
4933 for (i= part_id;
4934 i < m_tot_parts;
4935 i= bitmap_get_next_set(&m_part_info->read_partitions, i))
4936 {
4937 if (unlikely((error= m_file[i]->ha_rnd_init(scan))))
4938 goto err;
4939 }
4940
4941 m_scan_value= scan;
4942 m_part_spec.start_part= part_id;
4943 m_part_spec.end_part= m_tot_parts - 1;
4944 m_rnd_init_and_first= TRUE;
4945 DBUG_PRINT("info", ("m_scan_value: %u", m_scan_value));
4946 DBUG_RETURN(0);
4947
4948err:
4949 if (scan)
4950 late_extra_no_cache(part_id);
4951
4952 /* Call rnd_end for all previously inited partitions. */
4953 for (;
4954 part_id < i;
4955 part_id= bitmap_get_next_set(&m_part_info->read_partitions, part_id))
4956 {
4957 m_file[part_id]->ha_rnd_end();
4958 }
4959err1:
4960 m_scan_value= 2;
4961 m_part_spec.start_part= NO_CURRENT_PART_ID;
4962 DBUG_RETURN(error);
4963}
4964
4965
4966/*
4967 End of a table scan
4968
4969 SYNOPSIS
4970 rnd_end()
4971
4972 RETURN VALUE
4973 >0 Error code
4974 0 Success
4975*/
4976
4977int ha_partition::rnd_end()
4978{
4979 DBUG_ENTER("ha_partition::rnd_end");
4980 switch (m_scan_value) {
4981 case 2: // Error
4982 break;
4983 case 1: // Table scan
4984 if (m_part_spec.start_part != NO_CURRENT_PART_ID)
4985 late_extra_no_cache(m_part_spec.start_part);
4986 /* fall through */
4987 case 0:
4988 uint i;
4989 for (i= bitmap_get_first_set(&m_part_info->read_partitions);
4990 i < m_tot_parts;
4991 i= bitmap_get_next_set(&m_part_info->read_partitions, i))
4992 {
4993 m_file[i]->ha_rnd_end();
4994 }
4995 break;
4996 }
4997 m_scan_value= 2;
4998 m_part_spec.start_part= NO_CURRENT_PART_ID;
4999 DBUG_RETURN(0);
5000}
5001
5002
5003/*
5004 read next row during full table scan (scan in random row order)
5005
5006 SYNOPSIS
5007 rnd_next()
5008 buf buffer that should be filled with data
5009
5010 RETURN VALUE
5011 >0 Error code
5012 0 Success
5013
5014 DESCRIPTION
5015 This is called for each row of the table scan. When you run out of records
5016 you should return HA_ERR_END_OF_FILE.
5017 The Field structure for the table is the key to getting data into buf
5018 in a manner that will allow the server to understand it.
5019
5020 Called from filesort.cc, records.cc, sql_handler.cc, sql_select.cc,
5021 sql_table.cc, and sql_update.cc.
5022*/
5023
5024int ha_partition::rnd_next(uchar *buf)
5025{
5026 handler *file;
5027 int result= HA_ERR_END_OF_FILE, error;
5028 uint part_id= m_part_spec.start_part;
5029 DBUG_ENTER("ha_partition::rnd_next");
5030 DBUG_PRINT("enter", ("partition this: %p", this));
5031
5032 /* upper level will increment this once again at end of call */
5033 decrement_statistics(&SSV::ha_read_rnd_next_count);
5034
5035 if (part_id == NO_CURRENT_PART_ID)
5036 {
5037 /*
5038 The original set of partitions to scan was empty and thus we report
5039 the result here.
5040 */
5041 goto end;
5042 }
5043
5044 DBUG_ASSERT(m_scan_value == 1);
5045
5046 if (m_rnd_init_and_first)
5047 {
5048 m_rnd_init_and_first= FALSE;
5049 error= handle_pre_scan(FALSE, check_parallel_search());
5050 if (m_pre_calling || error)
5051 DBUG_RETURN(error);
5052 }
5053
5054 file= m_file[part_id];
5055
5056 while (TRUE)
5057 {
5058 result= file->ha_rnd_next(buf);
5059 if (!result)
5060 {
5061 m_last_part= part_id;
5062 DBUG_PRINT("info", ("partition m_last_part: %u", (uint) m_last_part));
5063 m_part_spec.start_part= part_id;
5064 table->status= 0;
5065 DBUG_RETURN(0);
5066 }
5067
5068 /*
5069 if we get here, then the current partition ha_rnd_next returned failure
5070 */
5071 if (result != HA_ERR_END_OF_FILE)
5072 goto end_dont_reset_start_part; // Return error
5073
5074 /* End current partition */
5075 late_extra_no_cache(part_id);
5076 /* Shift to next partition */
5077 part_id= bitmap_get_next_set(&m_part_info->read_partitions, part_id);
5078 if (part_id >= m_tot_parts)
5079 {
5080 result= HA_ERR_END_OF_FILE;
5081 break;
5082 }
5083 m_last_part= part_id;
5084 DBUG_PRINT("info", ("partition m_last_part: %u", (uint) m_last_part));
5085 m_part_spec.start_part= part_id;
5086 file= m_file[part_id];
5087 late_extra_cache(part_id);
5088 }
5089
5090end:
5091 DBUG_PRINT("exit", ("reset start_part"));
5092 m_part_spec.start_part= NO_CURRENT_PART_ID;
5093end_dont_reset_start_part:
5094 DBUG_RETURN(result);
5095}
5096
5097
5098/*
5099 Save position of current row
5100
5101 SYNOPSIS
5102 position()
5103 record Current record in MySQL Row Format
5104
5105 RETURN VALUE
5106 NONE
5107
5108 DESCRIPTION
5109 position() is called after each call to rnd_next() if the data needs
5110 to be ordered. You can do something like the following to store
5111 the position:
5112 ha_store_ptr(ref, ref_length, current_position);
5113
5114 The server uses ref to store data. ref_length in the above case is
5115 the size needed to store current_position. ref is just a byte array
5116 that the server will maintain. If you are using offsets to mark rows, then
5117 current_position should be the offset. If it is a primary key like in
5118 BDB, then it needs to be a primary key.
5119
5120 Called from filesort.cc, sql_select.cc, sql_delete.cc and sql_update.cc.
5121*/
5122
5123void ha_partition::position(const uchar *record)
5124{
5125 handler *file= m_file[m_last_part];
5126 size_t pad_length;
5127 DBUG_ASSERT(bitmap_is_set(&(m_part_info->read_partitions), m_last_part));
5128 DBUG_ENTER("ha_partition::position");
5129
5130 file->position(record);
5131 int2store(ref, m_last_part);
5132 memcpy((ref + PARTITION_BYTES_IN_POS), file->ref, file->ref_length);
5133 pad_length= m_ref_length - PARTITION_BYTES_IN_POS - file->ref_length;
5134 if (pad_length)
5135 memset((ref + PARTITION_BYTES_IN_POS + file->ref_length), 0, pad_length);
5136
5137 DBUG_VOID_RETURN;
5138}
5139
5140
5141/*
5142 Read row using position
5143
5144 SYNOPSIS
5145 rnd_pos()
5146 out:buf Row read in MySQL Row Format
5147 position Position of read row
5148
5149 RETURN VALUE
5150 >0 Error code
5151 0 Success
5152
5153 DESCRIPTION
5154 This is like rnd_next, but you are given a position to use
5155 to determine the row. The position will be of the type that you stored in
5156 ref. You can use ha_get_ptr(pos,ref_length) to retrieve whatever key
5157 or position you saved when position() was called.
5158 Called from filesort.cc records.cc sql_insert.cc sql_select.cc
5159 sql_update.cc.
5160*/
5161
5162int ha_partition::rnd_pos(uchar * buf, uchar *pos)
5163{
5164 uint part_id;
5165 handler *file;
5166 DBUG_ENTER("ha_partition::rnd_pos");
5167 decrement_statistics(&SSV::ha_read_rnd_count);
5168
5169 part_id= uint2korr((const uchar *) pos);
5170 DBUG_ASSERT(part_id < m_tot_parts);
5171 file= m_file[part_id];
5172 DBUG_ASSERT(bitmap_is_set(&(m_part_info->read_partitions), part_id));
5173 m_last_part= part_id;
5174 DBUG_RETURN(file->ha_rnd_pos(buf, (pos + PARTITION_BYTES_IN_POS)));
5175}
5176
5177
5178/*
5179 Read row using position using given record to find
5180
5181 SYNOPSIS
5182 rnd_pos_by_record()
5183 record Current record in MySQL Row Format
5184
5185 RETURN VALUE
5186 >0 Error code
5187 0 Success
5188
5189 DESCRIPTION
5190 this works as position()+rnd_pos() functions, but does some extra work,
5191 calculating m_last_part - the partition to where the 'record'
5192 should go.
5193
5194 called from replication (log_event.cc)
5195*/
5196
5197int ha_partition::rnd_pos_by_record(uchar *record)
5198{
5199 DBUG_ENTER("ha_partition::rnd_pos_by_record");
5200
5201 if (unlikely(get_part_for_buf(record, m_rec0, m_part_info, &m_last_part)))
5202 DBUG_RETURN(1);
5203
5204 DBUG_RETURN(handler::rnd_pos_by_record(record));
5205}
5206
5207
5208/****************************************************************************
5209 MODULE index scan
5210****************************************************************************/
5211/*
5212 Positions an index cursor to the index specified in the handle. Fetches the
5213 row if available. If the key value is null, begin at the first key of the
5214 index.
5215
5216 There are loads of optimisations possible here for the partition handler.
5217 The same optimisations can also be checked for full table scan although
5218 only through conditions and not from index ranges.
5219 Phase one optimisations:
5220 Check if the fields of the partition function are bound. If so only use
5221 the single partition it becomes bound to.
5222 Phase two optimisations:
5223 If it can be deducted through range or list partitioning that only a
5224 subset of the partitions are used, then only use those partitions.
5225*/
5226
5227
5228/**
5229 Setup the ordered record buffer and the priority queue.
5230*/
5231
5232bool ha_partition::init_record_priority_queue()
5233{
5234 DBUG_ENTER("ha_partition::init_record_priority_queue");
5235 DBUG_ASSERT(!m_ordered_rec_buffer);
5236 /*
5237 Initialize the ordered record buffer.
5238 */
5239 if (!m_ordered_rec_buffer)
5240 {
5241 size_t alloc_len;
5242 uint used_parts= bitmap_bits_set(&m_part_info->read_partitions);
5243 DBUG_ASSERT(used_parts > 0);
5244 /* Allocate record buffer for each used partition. */
5245 m_priority_queue_rec_len= m_rec_length + PARTITION_BYTES_IN_POS;
5246 if (!m_using_extended_keys)
5247 m_priority_queue_rec_len += get_open_file_sample()->ref_length;
5248 alloc_len= used_parts * m_priority_queue_rec_len;
5249 /* Allocate a key for temporary use when setting up the scan. */
5250 alloc_len+= table_share->max_key_length;
5251
5252 if (!(m_ordered_rec_buffer= (uchar*)my_malloc(alloc_len, MYF(MY_WME))))
5253 DBUG_RETURN(true);
5254
5255 /*
5256 We set-up one record per partition and each record has 2 bytes in
5257 front where the partition id is written. This is used by ordered
5258 index_read.
5259 We also set-up a reference to the first record for temporary use in
5260 setting up the scan.
5261 */
5262 char *ptr= (char*) m_ordered_rec_buffer;
5263 uint i;
5264 for (i= bitmap_get_first_set(&m_part_info->read_partitions);
5265 i < m_tot_parts;
5266 i= bitmap_get_next_set(&m_part_info->read_partitions, i))
5267 {
5268 DBUG_PRINT("info", ("init rec-buf for part %u", i));
5269 int2store(ptr, i);
5270 ptr+= m_priority_queue_rec_len;
5271 }
5272 m_start_key.key= (const uchar*)ptr;
5273
5274 /* Initialize priority queue, initialized to reading forward. */
5275 int (*cmp_func)(void *, uchar *, uchar *);
5276 void *cmp_arg= (void*) this;
5277 if (!m_using_extended_keys && !(table_flags() & HA_CMP_REF_IS_EXPENSIVE))
5278 cmp_func= cmp_key_rowid_part_id;
5279 else
5280 cmp_func= cmp_key_part_id;
5281 DBUG_PRINT("info", ("partition queue_init(1) used_parts: %u", used_parts));
5282 if (init_queue(&m_queue, used_parts, 0, 0, cmp_func, cmp_arg, 0, 0))
5283 {
5284 my_free(m_ordered_rec_buffer);
5285 m_ordered_rec_buffer= NULL;
5286 DBUG_RETURN(true);
5287 }
5288 }
5289 DBUG_RETURN(false);
5290}
5291
5292
5293/**
5294 Destroy the ordered record buffer and the priority queue.
5295*/
5296
5297void ha_partition::destroy_record_priority_queue()
5298{
5299 DBUG_ENTER("ha_partition::destroy_record_priority_queue");
5300 if (m_ordered_rec_buffer)
5301 {
5302 delete_queue(&m_queue);
5303 my_free(m_ordered_rec_buffer);
5304 m_ordered_rec_buffer= NULL;
5305 }
5306 DBUG_VOID_RETURN;
5307}
5308
5309
5310/*
5311 Initialize handler before start of index scan
5312
5313 SYNOPSIS
5314 index_init()
5315 inx Index number
5316 sorted Is rows to be returned in sorted order
5317
5318 RETURN VALUE
5319 >0 Error code
5320 0 Success
5321
5322 DESCRIPTION
5323 index_init is always called before starting index scans (except when
5324 starting through index_read_idx and using read_range variants).
5325*/
5326
5327int ha_partition::index_init(uint inx, bool sorted)
5328{
5329 int error= 0;
5330 uint i;
5331 DBUG_ENTER("ha_partition::index_init");
5332 DBUG_PRINT("enter", ("partition this: %p inx: %u sorted: %u", this, inx, sorted));
5333
5334 active_index= inx;
5335 m_part_spec.start_part= NO_CURRENT_PART_ID;
5336 m_start_key.length= 0;
5337 m_ordered= sorted;
5338 m_ordered_scan_ongoing= FALSE;
5339 m_curr_key_info[0]= table->key_info+inx;
5340 if (m_pkey_is_clustered && table->s->primary_key != MAX_KEY)
5341 {
5342 /*
5343 if PK is clustered, then the key cmp must use the pk to
5344 differentiate between equal key in given index.
5345 */
5346 DBUG_PRINT("info", ("Clustered pk, using pk as secondary cmp"));
5347 m_curr_key_info[1]= table->key_info+table->s->primary_key;
5348 m_curr_key_info[2]= NULL;
5349 m_using_extended_keys= TRUE;
5350 }
5351 else
5352 {
5353 m_curr_key_info[1]= NULL;
5354 m_using_extended_keys= FALSE;
5355 }
5356
5357 if (init_record_priority_queue())
5358 DBUG_RETURN(HA_ERR_OUT_OF_MEM);
5359
5360 /*
5361 Some handlers only read fields as specified by the bitmap for the
5362 read set. For partitioned handlers we always require that the
5363 fields of the partition functions are read such that we can
5364 calculate the partition id to place updated and deleted records.
5365 But this is required for operations that may need to change data only.
5366 */
5367 if (get_lock_type() == F_WRLCK)
5368 {
5369 DBUG_PRINT("info", ("partition set part_field bitmap"));
5370 bitmap_union(table->read_set, &m_part_info->full_part_field_set);
5371 }
5372 if (sorted)
5373 {
5374 /*
5375 An ordered scan is requested. We must make sure all fields of the
5376 used index are in the read set, as partitioning requires them for
5377 sorting (see ha_partition::handle_ordered_index_scan).
5378
5379 The SQL layer may request an ordered index scan without having index
5380 fields in the read set when
5381 - it needs to do an ordered scan over an index prefix.
5382 - it evaluates ORDER BY with SELECT COUNT(*) FROM t1.
5383
5384 TODO: handle COUNT(*) queries via unordered scan.
5385 */
5386 KEY **key_info= m_curr_key_info;
5387 do
5388 {
5389 for (i= 0; i < (*key_info)->user_defined_key_parts; i++)
5390 bitmap_set_bit(table->read_set,
5391 (*key_info)->key_part[i].field->field_index);
5392 } while (*(++key_info));
5393 }
5394 for (i= bitmap_get_first_set(&m_part_info->read_partitions);
5395 i < m_tot_parts;
5396 i= bitmap_get_next_set(&m_part_info->read_partitions, i))
5397 {
5398 if (unlikely((error= m_file[i]->ha_index_init(inx, sorted))))
5399 goto err;
5400
5401 DBUG_EXECUTE_IF("ha_partition_fail_index_init", {
5402 i++;
5403 error= HA_ERR_NO_PARTITION_FOUND;
5404 goto err;
5405 });
5406 }
5407err:
5408 if (unlikely(error))
5409 {
5410 /* End the previously initialized indexes. */
5411 uint j;
5412 for (j= bitmap_get_first_set(&m_part_info->read_partitions);
5413 j < i;
5414 j= bitmap_get_next_set(&m_part_info->read_partitions, j))
5415 {
5416 (void) m_file[j]->ha_index_end();
5417 }
5418 destroy_record_priority_queue();
5419 }
5420 DBUG_RETURN(error);
5421}
5422
5423
5424/*
5425 End of index scan
5426
5427 SYNOPSIS
5428 index_end()
5429
5430 RETURN VALUE
5431 >0 Error code
5432 0 Success
5433
5434 DESCRIPTION
5435 index_end is called at the end of an index scan to clean up any
5436 things needed to clean up.
5437*/
5438
5439int ha_partition::index_end()
5440{
5441 int error= 0;
5442 handler **file;
5443 DBUG_ENTER("ha_partition::index_end");
5444
5445 active_index= MAX_KEY;
5446 m_part_spec.start_part= NO_CURRENT_PART_ID;
5447 file= m_file;
5448 do
5449 {
5450 if ((*file)->inited == INDEX)
5451 {
5452 int tmp;
5453 if ((tmp= (*file)->ha_index_end()))
5454 error= tmp;
5455 }
5456 } while (*(++file));
5457 destroy_record_priority_queue();
5458 DBUG_RETURN(error);
5459}
5460
5461
5462/*
5463 Read one record in an index scan and start an index scan
5464
5465 SYNOPSIS
5466 index_read_map()
5467 buf Read row in MySQL Row Format
5468 key Key parts in consecutive order
5469 keypart_map Which part of key is used
5470 find_flag What type of key condition is used
5471
5472 RETURN VALUE
5473 >0 Error code
5474 0 Success
5475
5476 DESCRIPTION
5477 index_read_map starts a new index scan using a start key. The MySQL Server
5478 will check the end key on its own. Thus to function properly the
5479 partitioned handler need to ensure that it delivers records in the sort
5480 order of the MySQL Server.
5481 index_read_map can be restarted without calling index_end on the previous
5482 index scan and without calling index_init. In this case the index_read_map
5483 is on the same index as the previous index_scan. This is particularly
5484 used in conjuntion with multi read ranges.
5485*/
5486
5487int ha_partition::index_read_map(uchar *buf, const uchar *key,
5488 key_part_map keypart_map,
5489 enum ha_rkey_function find_flag)
5490{
5491 DBUG_ENTER("ha_partition::index_read_map");
5492 decrement_statistics(&SSV::ha_read_key_count);
5493 end_range= 0;
5494 m_index_scan_type= partition_index_read;
5495 m_start_key.key= key;
5496 m_start_key.keypart_map= keypart_map;
5497 m_start_key.flag= find_flag;
5498 DBUG_RETURN(common_index_read(buf, TRUE));
5499}
5500
5501
5502/* Compare two part_no partition numbers */
5503static int cmp_part_ids(uchar *ref1, uchar *ref2)
5504{
5505 uint32 diff2= uint2korr(ref2);
5506 uint32 diff1= uint2korr(ref1);
5507 if (diff2 > diff1)
5508 return -1;
5509 if (diff2 < diff1)
5510 return 1;
5511 return 0;
5512}
5513
5514
5515/*
5516 @brief
5517 Provide ordering by (key_value, part_no).
5518*/
5519
5520extern "C" int cmp_key_part_id(void *ptr, uchar *ref1, uchar *ref2)
5521{
5522 ha_partition *file= (ha_partition*)ptr;
5523 int res;
5524 if ((res= key_rec_cmp(file->m_curr_key_info, ref1 + PARTITION_BYTES_IN_POS,
5525 ref2 + PARTITION_BYTES_IN_POS)))
5526 {
5527 return res;
5528 }
5529 return cmp_part_ids(ref1, ref2);
5530}
5531
5532/*
5533 @brief
5534 Provide ordering by (key_value, underying_table_rowid, part_no).
5535*/
5536extern "C" int cmp_key_rowid_part_id(void *ptr, uchar *ref1, uchar *ref2)
5537{
5538 ha_partition *file= (ha_partition*)ptr;
5539 int res;
5540
5541 if ((res= key_rec_cmp(file->m_curr_key_info, ref1 + PARTITION_BYTES_IN_POS,
5542 ref2 + PARTITION_BYTES_IN_POS)))
5543 {
5544 return res;
5545 }
5546 if ((res= file->m_file[0]->cmp_ref(ref1 + PARTITION_BYTES_IN_POS + file->m_rec_length,
5547 ref2 + PARTITION_BYTES_IN_POS + file->m_rec_length)))
5548 {
5549 return res;
5550 }
5551 return cmp_part_ids(ref1, ref2);
5552}
5553
5554
5555/**
5556 Common routine for a number of index_read variants
5557
5558 @param buf Buffer where the record should be returned.
5559 @param have_start_key TRUE <=> the left endpoint is available, i.e.
5560 we're in index_read call or in read_range_first
5561 call and the range has left endpoint.
5562 FALSE <=> there is no left endpoint (we're in
5563 read_range_first() call and the range has no left
5564 endpoint).
5565
5566 @return Operation status
5567 @retval 0 OK
5568 @retval HA_ERR_END_OF_FILE Whole index scanned, without finding the record.
5569 @retval HA_ERR_KEY_NOT_FOUND Record not found, but index cursor positioned.
5570 @retval other error code.
5571
5572 @details
5573 Start scanning the range (when invoked from read_range_first()) or doing
5574 an index lookup (when invoked from index_read_XXX):
5575 - If possible, perform partition selection
5576 - Find the set of partitions we're going to use
5577 - Depending on whether we need ordering:
5578 NO: Get the first record from first used partition (see
5579 handle_unordered_scan_next_partition)
5580 YES: Fill the priority queue and get the record that is the first in
5581 the ordering
5582*/
5583
5584int ha_partition::common_index_read(uchar *buf, bool have_start_key)
5585{
5586 int error;
5587 uint UNINIT_VAR(key_len); /* used if have_start_key==TRUE */
5588 bool reverse_order= FALSE;
5589 DBUG_ENTER("ha_partition::common_index_read");
5590
5591 DBUG_PRINT("info", ("m_ordered %u m_ordered_scan_ong %u",
5592 m_ordered, m_ordered_scan_ongoing));
5593
5594 if (have_start_key)
5595 {
5596 m_start_key.length= key_len= calculate_key_len(table, active_index,
5597 m_start_key.key,
5598 m_start_key.keypart_map);
5599 DBUG_PRINT("info", ("have_start_key map %lu find_flag %u len %u",
5600 m_start_key.keypart_map, m_start_key.flag, key_len));
5601 DBUG_ASSERT(key_len);
5602 }
5603 if (unlikely((error= partition_scan_set_up(buf, have_start_key))))
5604 {
5605 DBUG_RETURN(error);
5606 }
5607
5608 if (have_start_key &&
5609 (m_start_key.flag == HA_READ_PREFIX_LAST ||
5610 m_start_key.flag == HA_READ_PREFIX_LAST_OR_PREV ||
5611 m_start_key.flag == HA_READ_BEFORE_KEY))
5612 {
5613 reverse_order= TRUE;
5614 m_ordered_scan_ongoing= TRUE;
5615 }
5616 DBUG_PRINT("info", ("m_ordered %u m_o_scan_ong %u have_start_key %u",
5617 m_ordered, m_ordered_scan_ongoing, have_start_key));
5618 if (!m_ordered_scan_ongoing)
5619 {
5620 /*
5621 We use unordered index scan when read_range is used and flag
5622 is set to not use ordered.
5623 We also use an unordered index scan when the number of partitions to
5624 scan is only one.
5625 The unordered index scan will use the partition set created.
5626 */
5627 DBUG_PRINT("info", ("doing unordered scan"));
5628 error= handle_pre_scan(FALSE, FALSE);
5629 if (likely(!error))
5630 error= handle_unordered_scan_next_partition(buf);
5631 }
5632 else
5633 {
5634 /*
5635 In all other cases we will use the ordered index scan. This will use
5636 the partition set created by the get_partition_set method.
5637 */
5638 error= handle_ordered_index_scan(buf, reverse_order);
5639 }
5640 DBUG_RETURN(error);
5641}
5642
5643
5644/*
5645 Start an index scan from leftmost record and return first record
5646
5647 SYNOPSIS
5648 index_first()
5649 buf Read row in MySQL Row Format
5650
5651 RETURN VALUE
5652 >0 Error code
5653 0 Success
5654
5655 DESCRIPTION
5656 index_first() asks for the first key in the index.
5657 This is similar to index_read except that there is no start key since
5658 the scan starts from the leftmost entry and proceeds forward with
5659 index_next.
5660
5661 Called from opt_range.cc, opt_sum.cc, sql_handler.cc,
5662 and sql_select.cc.
5663*/
5664
5665int ha_partition::index_first(uchar * buf)
5666{
5667 DBUG_ENTER("ha_partition::index_first");
5668 decrement_statistics(&SSV::ha_read_first_count);
5669
5670 end_range= 0;
5671 m_index_scan_type= partition_index_first;
5672 DBUG_RETURN(common_first_last(buf));
5673}
5674
5675
5676/*
5677 Start an index scan from rightmost record and return first record
5678
5679 SYNOPSIS
5680 index_last()
5681 buf Read row in MySQL Row Format
5682
5683 RETURN VALUE
5684 >0 Error code
5685 0 Success
5686
5687 DESCRIPTION
5688 index_last() asks for the last key in the index.
5689 This is similar to index_read except that there is no start key since
5690 the scan starts from the rightmost entry and proceeds forward with
5691 index_prev.
5692
5693 Called from opt_range.cc, opt_sum.cc, sql_handler.cc,
5694 and sql_select.cc.
5695*/
5696
5697int ha_partition::index_last(uchar * buf)
5698{
5699 DBUG_ENTER("ha_partition::index_last");
5700 decrement_statistics(&SSV::ha_read_last_count);
5701
5702 m_index_scan_type= partition_index_last;
5703 DBUG_RETURN(common_first_last(buf));
5704}
5705
5706/*
5707 Common routine for index_first/index_last
5708
5709 SYNOPSIS
5710 ha_partition::common_first_last()
5711
5712 see index_first for rest
5713*/
5714
5715int ha_partition::common_first_last(uchar *buf)
5716{
5717 int error;
5718
5719 if (unlikely((error= partition_scan_set_up(buf, FALSE))))
5720 return error;
5721 if (!m_ordered_scan_ongoing &&
5722 m_index_scan_type != partition_index_last)
5723 {
5724 if (unlikely((error= handle_pre_scan(FALSE, check_parallel_search()))))
5725 return error;
5726 return handle_unordered_scan_next_partition(buf);
5727 }
5728 return handle_ordered_index_scan(buf, FALSE);
5729}
5730
5731
5732/*
5733 Optimization of the default implementation to take advantage of dynamic
5734 partition pruning.
5735*/
5736int ha_partition::index_read_idx_map(uchar *buf, uint index,
5737 const uchar *key,
5738 key_part_map keypart_map,
5739 enum ha_rkey_function find_flag)
5740{
5741 int error= HA_ERR_KEY_NOT_FOUND;
5742 DBUG_ENTER("ha_partition::index_read_idx_map");
5743
5744 if (find_flag == HA_READ_KEY_EXACT)
5745 {
5746 uint part;
5747 m_start_key.key= key;
5748 m_start_key.keypart_map= keypart_map;
5749 m_start_key.flag= find_flag;
5750 m_start_key.length= calculate_key_len(table, index, m_start_key.key,
5751 m_start_key.keypart_map);
5752
5753 get_partition_set(table, buf, index, &m_start_key, &m_part_spec);
5754
5755 /*
5756 We have either found exactly 1 partition
5757 (in which case start_part == end_part)
5758 or no matching partitions (start_part > end_part)
5759 */
5760 DBUG_ASSERT(m_part_spec.start_part >= m_part_spec.end_part);
5761 /* The start part is must be marked as used. */
5762 DBUG_ASSERT(m_part_spec.start_part > m_part_spec.end_part ||
5763 bitmap_is_set(&(m_part_info->read_partitions),
5764 m_part_spec.start_part));
5765
5766 for (part= m_part_spec.start_part;
5767 part <= m_part_spec.end_part;
5768 part= bitmap_get_next_set(&m_part_info->read_partitions, part))
5769 {
5770 error= m_file[part]->ha_index_read_idx_map(buf, index, key,
5771 keypart_map, find_flag);
5772 if (likely(error != HA_ERR_KEY_NOT_FOUND &&
5773 error != HA_ERR_END_OF_FILE))
5774 break;
5775 }
5776 if (part <= m_part_spec.end_part)
5777 m_last_part= part;
5778 }
5779 else
5780 {
5781 /*
5782 If not only used with READ_EXACT, we should investigate if possible
5783 to optimize for other find_flag's as well.
5784 */
5785 DBUG_ASSERT(0);
5786 /* fall back on the default implementation */
5787 error= handler::index_read_idx_map(buf, index, key, keypart_map, find_flag);
5788 }
5789 DBUG_RETURN(error);
5790}
5791
5792
5793/*
5794 Read next record in a forward index scan
5795
5796 SYNOPSIS
5797 index_next()
5798 buf Read row in MySQL Row Format
5799
5800 RETURN VALUE
5801 >0 Error code
5802 0 Success
5803
5804 DESCRIPTION
5805 Used to read forward through the index.
5806*/
5807
5808int ha_partition::index_next(uchar * buf)
5809{
5810 DBUG_ENTER("ha_partition::index_next");
5811 decrement_statistics(&SSV::ha_read_next_count);
5812
5813 /*
5814 TODO(low priority):
5815 If we want partition to work with the HANDLER commands, we
5816 must be able to do index_last() -> index_prev() -> index_next()
5817 and if direction changes, we must step back those partitions in
5818 the record queue so we don't return a value from the wrong direction.
5819 */
5820 if (m_index_scan_type == partition_index_last)
5821 DBUG_RETURN(HA_ERR_WRONG_COMMAND);
5822 if (!m_ordered_scan_ongoing)
5823 {
5824 DBUG_RETURN(handle_unordered_next(buf, FALSE));
5825 }
5826 DBUG_RETURN(handle_ordered_next(buf, FALSE));
5827}
5828
5829
5830/*
5831 Read next record special
5832
5833 SYNOPSIS
5834 index_next_same()
5835 buf Read row in MySQL Row Format
5836 key Key
5837 keylen Length of key
5838
5839 RETURN VALUE
5840 >0 Error code
5841 0 Success
5842
5843 DESCRIPTION
5844 This routine is used to read the next but only if the key is the same
5845 as supplied in the call.
5846*/
5847
5848int ha_partition::index_next_same(uchar *buf, const uchar *key, uint keylen)
5849{
5850 DBUG_ENTER("ha_partition::index_next_same");
5851 decrement_statistics(&SSV::ha_read_next_count);
5852
5853 DBUG_ASSERT(keylen == m_start_key.length);
5854 if (m_index_scan_type == partition_index_last)
5855 DBUG_RETURN(HA_ERR_WRONG_COMMAND);
5856 if (!m_ordered_scan_ongoing)
5857 DBUG_RETURN(handle_unordered_next(buf, TRUE));
5858 DBUG_RETURN(handle_ordered_next(buf, TRUE));
5859}
5860
5861
5862int ha_partition::index_read_last_map(uchar *buf,
5863 const uchar *key,
5864 key_part_map keypart_map)
5865{
5866 DBUG_ENTER("ha_partition::index_read_last_map");
5867
5868 m_ordered= true; // Safety measure
5869 end_range= NULL;
5870 m_index_scan_type= partition_index_read_last;
5871 m_start_key.key= key;
5872 m_start_key.keypart_map= keypart_map;
5873 m_start_key.flag= HA_READ_PREFIX_LAST;
5874 DBUG_RETURN(common_index_read(buf, true));
5875}
5876
5877
5878/*
5879 Read next record when performing index scan backwards
5880
5881 SYNOPSIS
5882 index_prev()
5883 buf Read row in MySQL Row Format
5884
5885 RETURN VALUE
5886 >0 Error code
5887 0 Success
5888
5889 DESCRIPTION
5890 Used to read backwards through the index.
5891*/
5892
5893int ha_partition::index_prev(uchar * buf)
5894{
5895 DBUG_ENTER("ha_partition::index_prev");
5896 decrement_statistics(&SSV::ha_read_prev_count);
5897
5898 /* TODO: read comment in index_next */
5899 if (m_index_scan_type == partition_index_first)
5900 DBUG_RETURN(HA_ERR_WRONG_COMMAND);
5901 DBUG_RETURN(handle_ordered_prev(buf));
5902}
5903
5904
5905/*
5906 Start a read of one range with start and end key
5907
5908 SYNOPSIS
5909 read_range_first()
5910 start_key Specification of start key
5911 end_key Specification of end key
5912 eq_range_arg Is it equal range
5913 sorted Should records be returned in sorted order
5914
5915 RETURN VALUE
5916 >0 Error code
5917 0 Success
5918
5919 DESCRIPTION
5920 We reimplement read_range_first since we don't want the compare_key
5921 check at the end. This is already performed in the partition handler.
5922 read_range_next is very much different due to that we need to scan
5923 all underlying handlers.
5924*/
5925
5926int ha_partition::read_range_first(const key_range *start_key,
5927 const key_range *end_key,
5928 bool eq_range_arg, bool sorted)
5929{
5930 int error;
5931 DBUG_ENTER("ha_partition::read_range_first");
5932
5933 m_ordered= sorted;
5934 eq_range= eq_range_arg;
5935 set_end_range(end_key);
5936
5937 range_key_part= m_curr_key_info[0]->key_part;
5938 if (start_key)
5939 m_start_key= *start_key;
5940 else
5941 m_start_key.key= NULL;
5942
5943 m_index_scan_type= partition_read_range;
5944 error= common_index_read(m_rec0, MY_TEST(start_key));
5945 DBUG_RETURN(error);
5946}
5947
5948
5949/*
5950 Read next record in read of a range with start and end key
5951
5952 SYNOPSIS
5953 read_range_next()
5954
5955 RETURN VALUE
5956 >0 Error code
5957 0 Success
5958*/
5959
5960int ha_partition::read_range_next()
5961{
5962 DBUG_ENTER("ha_partition::read_range_next");
5963
5964 if (m_ordered_scan_ongoing)
5965 {
5966 DBUG_RETURN(handle_ordered_next(table->record[0], eq_range));
5967 }
5968 DBUG_RETURN(handle_unordered_next(table->record[0], eq_range));
5969}
5970
5971/**
5972 Create a copy of all keys used by multi_range_read()
5973
5974 @retval 0 ok
5975 @retval HA_ERR_END_OF_FILE no keys in range
5976 @retval other value: error
5977
5978 TODO to save memory:
5979 - If (mrr_mode & HA_MRR_MATERIALIZED_KEYS) is set then the keys data is
5980 stable and we don't have to copy the keys, only store a pointer to the
5981 key.
5982 - When allocating key data, store things in a MEM_ROOT buffer instead of
5983 a malloc() per key. This will simplify and speed up the current code
5984 and use less memory.
5985*/
5986
5987int ha_partition::multi_range_key_create_key(RANGE_SEQ_IF *seq,
5988 range_seq_t seq_it)
5989{
5990 uint i, length;
5991 key_range *start_key, *end_key;
5992 KEY_MULTI_RANGE *range;
5993 DBUG_ENTER("ha_partition::multi_range_key_create_key");
5994
5995 bitmap_clear_all(&m_mrr_used_partitions);
5996 m_mrr_range_length= 0;
5997 bzero(m_part_mrr_range_length,
5998 sizeof(*m_part_mrr_range_length) * m_tot_parts);
5999 if (!m_mrr_range_first)
6000 {
6001 if (!(m_mrr_range_first= (PARTITION_KEY_MULTI_RANGE *)
6002 my_multi_malloc(MYF(MY_WME),
6003 &m_mrr_range_current,
6004 sizeof(PARTITION_KEY_MULTI_RANGE),
6005 NullS)))
6006 DBUG_RETURN(HA_ERR_OUT_OF_MEM);
6007
6008 m_mrr_range_first->id= 1;
6009 m_mrr_range_first->key[0]= NULL;
6010 m_mrr_range_first->key[1]= NULL;
6011 m_mrr_range_first->next= NULL;
6012 }
6013 else
6014 m_mrr_range_current= m_mrr_range_first;
6015
6016 for (i= 0; i < m_tot_parts; i++)
6017 {
6018 if (!m_part_mrr_range_first[i])
6019 {
6020 if (!(m_part_mrr_range_first[i]= (PARTITION_PART_KEY_MULTI_RANGE *)
6021 my_multi_malloc(MYF(MY_WME | MY_ZEROFILL),
6022 &m_part_mrr_range_current[i],
6023 sizeof(PARTITION_PART_KEY_MULTI_RANGE),
6024 NullS)))
6025 DBUG_RETURN(HA_ERR_OUT_OF_MEM);
6026 }
6027 else
6028 {
6029 m_part_mrr_range_current[i]= m_part_mrr_range_first[i];
6030 m_part_mrr_range_current[i]->partition_key_multi_range= NULL;
6031 }
6032 }
6033 m_mrr_range_current->key_multi_range.start_key.key= NULL;
6034 m_mrr_range_current->key_multi_range.end_key.key= NULL;
6035
6036 while (!seq->next(seq_it, &m_mrr_range_current->key_multi_range))
6037 {
6038 m_mrr_range_length++;
6039 range= &m_mrr_range_current->key_multi_range;
6040
6041 /* Copy start key */
6042 start_key= &range->start_key;
6043 DBUG_PRINT("info",("partition range->range_flag: %u", range->range_flag));
6044 DBUG_PRINT("info",("partition start_key->key: %p", start_key->key));
6045 DBUG_PRINT("info",("partition start_key->length: %u", start_key->length));
6046 DBUG_PRINT("info",("partition start_key->keypart_map: %lu",
6047 start_key->keypart_map));
6048 DBUG_PRINT("info",("partition start_key->flag: %u", start_key->flag));
6049
6050 if (start_key->key)
6051 {
6052 length= start_key->length;
6053 if (!m_mrr_range_current->key[0] ||
6054 m_mrr_range_current->length[0] < length)
6055 {
6056 if (m_mrr_range_current->key[0])
6057 my_free(m_mrr_range_current->key[0]);
6058 if (!(m_mrr_range_current->key[0]=
6059 (uchar *) my_malloc(length, MYF(MY_WME))))
6060 DBUG_RETURN(HA_ERR_OUT_OF_MEM);
6061 m_mrr_range_current->length[0]= length;
6062 }
6063 memcpy(m_mrr_range_current->key[0], start_key->key, length);
6064 start_key->key= m_mrr_range_current->key[0];
6065 }
6066
6067 /* Copy end key */
6068 end_key= &range->end_key;
6069 DBUG_PRINT("info",("partition end_key->key: %p", end_key->key));
6070 DBUG_PRINT("info",("partition end_key->length: %u", end_key->length));
6071 DBUG_PRINT("info",("partition end_key->keypart_map: %lu",
6072 end_key->keypart_map));
6073 DBUG_PRINT("info",("partition end_key->flag: %u", end_key->flag));
6074 if (end_key->key)
6075 {
6076 length= end_key->length;
6077 if (!m_mrr_range_current->key[1] ||
6078 m_mrr_range_current->length[1] < length)
6079 {
6080 if (m_mrr_range_current->key[1])
6081 my_free(m_mrr_range_current->key[1]);
6082 if (!(m_mrr_range_current->key[1]=
6083 (uchar *) my_malloc(length, MYF(MY_WME))))
6084 DBUG_RETURN(HA_ERR_OUT_OF_MEM);
6085 m_mrr_range_current->length[1]= length;
6086 }
6087 memcpy(m_mrr_range_current->key[1], end_key->key, length);
6088 end_key->key= m_mrr_range_current->key[1];
6089 }
6090
6091 m_mrr_range_current->ptr= m_mrr_range_current->key_multi_range.ptr;
6092 m_mrr_range_current->key_multi_range.ptr= m_mrr_range_current;
6093
6094 if (start_key->key && (start_key->flag & HA_READ_KEY_EXACT))
6095 get_partition_set(table, table->record[0], active_index,
6096 start_key, &m_part_spec);
6097 else
6098 {
6099 m_part_spec.start_part= 0;
6100 m_part_spec.end_part= m_tot_parts - 1;
6101 }
6102
6103 /* Copy key to those partitions that needs it */
6104 for (i= m_part_spec.start_part; i <= m_part_spec.end_part; i++)
6105 {
6106 if (bitmap_is_set(&(m_part_info->read_partitions), i))
6107 {
6108 bitmap_set_bit(&m_mrr_used_partitions, i);
6109 m_part_mrr_range_length[i]++;
6110 m_part_mrr_range_current[i]->partition_key_multi_range=
6111 m_mrr_range_current;
6112
6113 if (!m_part_mrr_range_current[i]->next)
6114 {
6115 PARTITION_PART_KEY_MULTI_RANGE *tmp_part_mrr_range;
6116 if (!(tmp_part_mrr_range= (PARTITION_PART_KEY_MULTI_RANGE *)
6117 my_malloc(sizeof(PARTITION_PART_KEY_MULTI_RANGE),
6118 MYF(MY_WME | MY_ZEROFILL))))
6119 DBUG_RETURN(HA_ERR_OUT_OF_MEM);
6120
6121 m_part_mrr_range_current[i]->next= tmp_part_mrr_range;
6122 m_part_mrr_range_current[i]= tmp_part_mrr_range;
6123 }
6124 else
6125 {
6126 m_part_mrr_range_current[i]= m_part_mrr_range_current[i]->next;
6127 m_part_mrr_range_current[i]->partition_key_multi_range= NULL;
6128 }
6129 }
6130 }
6131
6132 if (!m_mrr_range_current->next)
6133 {
6134 /* Add end of range sentinel */
6135 PARTITION_KEY_MULTI_RANGE *tmp_mrr_range;
6136 if (!(tmp_mrr_range= (PARTITION_KEY_MULTI_RANGE *)
6137 my_malloc(sizeof(PARTITION_KEY_MULTI_RANGE), MYF(MY_WME))))
6138 DBUG_RETURN(HA_ERR_OUT_OF_MEM);
6139
6140 tmp_mrr_range->id= m_mrr_range_current->id + 1;
6141 tmp_mrr_range->key[0]= NULL;
6142 tmp_mrr_range->key[1]= NULL;
6143 tmp_mrr_range->next= NULL;
6144 m_mrr_range_current->next= tmp_mrr_range;
6145 }
6146 m_mrr_range_current= m_mrr_range_current->next;
6147 }
6148
6149 if (!m_mrr_range_length)
6150 {
6151 DBUG_PRINT("Warning",("No keys to use for mrr"));
6152 DBUG_RETURN(HA_ERR_END_OF_FILE);
6153 }
6154
6155 /* set start and end part */
6156 m_part_spec.start_part= bitmap_get_first_set(&m_mrr_used_partitions);
6157
6158 for (i= m_tot_parts; i-- > 0;)
6159 {
6160 if (bitmap_is_set(&m_mrr_used_partitions, i))
6161 {
6162 m_part_spec.end_part= i;
6163 break;
6164 }
6165 }
6166 for (i= 0; i < m_tot_parts; i++)
6167 {
6168 m_partition_part_key_multi_range_hld[i].partition= this;
6169 m_partition_part_key_multi_range_hld[i].part_id= i;
6170 m_partition_part_key_multi_range_hld[i].partition_part_key_multi_range=
6171 m_part_mrr_range_first[i];
6172 }
6173 DBUG_PRINT("return",("OK"));
6174 DBUG_RETURN(0);
6175}
6176
6177
6178static void partition_multi_range_key_get_key_info(void *init_params,
6179 uint *length,
6180 key_part_map *map)
6181{
6182 PARTITION_PART_KEY_MULTI_RANGE_HLD *hld=
6183 (PARTITION_PART_KEY_MULTI_RANGE_HLD *)init_params;
6184 ha_partition *partition= hld->partition;
6185 key_range *start_key= (&partition->m_mrr_range_first->
6186 key_multi_range.start_key);
6187 DBUG_ENTER("partition_multi_range_key_get_key_info");
6188 *length= start_key->length;
6189 *map= start_key->keypart_map;
6190 DBUG_VOID_RETURN;
6191}
6192
6193
6194static range_seq_t partition_multi_range_key_init(void *init_params,
6195 uint n_ranges,
6196 uint flags)
6197{
6198 PARTITION_PART_KEY_MULTI_RANGE_HLD *hld=
6199 (PARTITION_PART_KEY_MULTI_RANGE_HLD *)init_params;
6200 ha_partition *partition= hld->partition;
6201 uint i= hld->part_id;
6202 DBUG_ENTER("partition_multi_range_key_init");
6203 partition->m_mrr_range_init_flags= flags;
6204 hld->partition_part_key_multi_range= partition->m_part_mrr_range_first[i];
6205 DBUG_RETURN(init_params);
6206}
6207
6208
6209static bool partition_multi_range_key_next(range_seq_t seq,
6210 KEY_MULTI_RANGE *range)
6211{
6212 PARTITION_PART_KEY_MULTI_RANGE_HLD *hld=
6213 (PARTITION_PART_KEY_MULTI_RANGE_HLD *)seq;
6214 PARTITION_KEY_MULTI_RANGE *partition_key_multi_range=
6215 hld->partition_part_key_multi_range->partition_key_multi_range;
6216 DBUG_ENTER("partition_multi_range_key_next");
6217 if (!partition_key_multi_range)
6218 DBUG_RETURN(TRUE);
6219 *range= partition_key_multi_range->key_multi_range;
6220 hld->partition_part_key_multi_range=
6221 hld->partition_part_key_multi_range->next;
6222 DBUG_RETURN(FALSE);
6223}
6224
6225
6226static bool partition_multi_range_key_skip_record(range_seq_t seq,
6227 range_id_t range_info,
6228 uchar *rowid)
6229{
6230 PARTITION_PART_KEY_MULTI_RANGE_HLD *hld=
6231 (PARTITION_PART_KEY_MULTI_RANGE_HLD *)seq;
6232 DBUG_ENTER("partition_multi_range_key_skip_record");
6233 DBUG_RETURN(hld->partition->m_seq_if->skip_record(hld->partition->m_seq,
6234 range_info, rowid));
6235}
6236
6237
6238static bool partition_multi_range_key_skip_index_tuple(range_seq_t seq,
6239 range_id_t range_info)
6240{
6241 PARTITION_PART_KEY_MULTI_RANGE_HLD *hld=
6242 (PARTITION_PART_KEY_MULTI_RANGE_HLD *)seq;
6243 DBUG_ENTER("partition_multi_range_key_skip_index_tuple");
6244 DBUG_RETURN(hld->partition->m_seq_if->skip_index_tuple(hld->partition->m_seq,
6245 range_info));
6246}
6247
6248ha_rows ha_partition::multi_range_read_info_const(uint keyno,
6249 RANGE_SEQ_IF *seq,
6250 void *seq_init_param,
6251 uint n_ranges, uint *bufsz,
6252 uint *mrr_mode,
6253 Cost_estimate *cost)
6254{
6255 int error;
6256 uint i;
6257 handler **file;
6258 ha_rows rows= 0;
6259 uint ret_mrr_mode= 0;
6260 range_seq_t seq_it;
6261 part_id_range save_part_spec;
6262 DBUG_ENTER("ha_partition::multi_range_read_info_const");
6263 DBUG_PRINT("enter", ("partition this: %p", this));
6264
6265 m_mrr_new_full_buffer_size= 0;
6266 save_part_spec= m_part_spec;
6267
6268 seq_it= seq->init(seq_init_param, n_ranges, *mrr_mode);
6269 if (unlikely((error= multi_range_key_create_key(seq, seq_it))))
6270 {
6271 if (likely(error == HA_ERR_END_OF_FILE)) // No keys in range
6272 {
6273 rows= 0;
6274 goto calc_cost;
6275 }
6276 /*
6277 This error means that we can't do multi_range_read for the moment
6278 (probably running out of memory) and we need to fallback to
6279 normal reads
6280 */
6281 m_part_spec= save_part_spec;
6282 DBUG_RETURN(HA_POS_ERROR);
6283 }
6284 m_part_seq_if.get_key_info=
6285 seq->get_key_info ? partition_multi_range_key_get_key_info : NULL;
6286 m_part_seq_if.init= partition_multi_range_key_init;
6287 m_part_seq_if.next= partition_multi_range_key_next;
6288 m_part_seq_if.skip_record= (seq->skip_record ?
6289 partition_multi_range_key_skip_record : NULL);
6290 m_part_seq_if.skip_index_tuple= (seq->skip_index_tuple ?
6291 partition_multi_range_key_skip_index_tuple :
6292 NULL);
6293 file= m_file;
6294 do
6295 {
6296 i= (uint)(file - m_file);
6297 DBUG_PRINT("info",("partition part_id: %u", i));
6298 if (bitmap_is_set(&m_mrr_used_partitions, i))
6299 {
6300 ha_rows tmp_rows;
6301 uint tmp_mrr_mode;
6302 m_mrr_buffer_size[i]= 0;
6303 tmp_mrr_mode= *mrr_mode;
6304 tmp_rows= (*file)->
6305 multi_range_read_info_const(keyno, &m_part_seq_if,
6306 &m_partition_part_key_multi_range_hld[i],
6307 m_part_mrr_range_length[i],
6308 &m_mrr_buffer_size[i],
6309 &tmp_mrr_mode, cost);
6310 if (tmp_rows == HA_POS_ERROR)
6311 {
6312 m_part_spec= save_part_spec;
6313 DBUG_RETURN(HA_POS_ERROR);
6314 }
6315 rows+= tmp_rows;
6316 ret_mrr_mode|= tmp_mrr_mode;
6317 m_mrr_new_full_buffer_size+= m_mrr_buffer_size[i];
6318 }
6319 } while (*(++file));
6320 *mrr_mode= ret_mrr_mode;
6321
6322calc_cost:
6323 m_part_spec= save_part_spec;
6324 cost->reset();
6325 cost->avg_io_cost= 1;
6326 if ((*mrr_mode & HA_MRR_INDEX_ONLY) && rows > 2)
6327 cost->io_count= keyread_time(keyno, n_ranges, (uint) rows);
6328 else
6329 cost->io_count= read_time(keyno, n_ranges, rows);
6330 cost->cpu_cost= (double) rows / TIME_FOR_COMPARE + 0.01;
6331 DBUG_RETURN(rows);
6332}
6333
6334
6335ha_rows ha_partition::multi_range_read_info(uint keyno, uint n_ranges,
6336 uint keys,
6337 uint key_parts, uint *bufsz,
6338 uint *mrr_mode,
6339 Cost_estimate *cost)
6340{
6341 uint i;
6342 handler **file;
6343 ha_rows rows;
6344 DBUG_ENTER("ha_partition::multi_range_read_info");
6345 DBUG_PRINT("enter", ("partition this: %p", this));
6346
6347 m_mrr_new_full_buffer_size= 0;
6348 file= m_file;
6349 do
6350 {
6351 i= (uint)(file - m_file);
6352 if (bitmap_is_set(&(m_part_info->read_partitions), (i)))
6353 {
6354 m_mrr_buffer_size[i]= 0;
6355 if ((rows= (*file)->multi_range_read_info(keyno, n_ranges, keys,
6356 key_parts,
6357 &m_mrr_buffer_size[i],
6358 mrr_mode, cost)))
6359 DBUG_RETURN(rows);
6360 m_mrr_new_full_buffer_size+= m_mrr_buffer_size[i];
6361 }
6362 } while (*(++file));
6363
6364 cost->reset();
6365 cost->avg_io_cost= 1;
6366 if (*mrr_mode & HA_MRR_INDEX_ONLY)
6367 cost->io_count= keyread_time(keyno, n_ranges, (uint) rows);
6368 else
6369 cost->io_count= read_time(keyno, n_ranges, rows);
6370 DBUG_RETURN(0);
6371}
6372
6373
6374int ha_partition::multi_range_read_init(RANGE_SEQ_IF *seq,
6375 void *seq_init_param,
6376 uint n_ranges, uint mrr_mode,
6377 HANDLER_BUFFER *buf)
6378{
6379 int error;
6380 uint i;
6381 handler **file;
6382 uchar *tmp_buffer;
6383 DBUG_ENTER("ha_partition::multi_range_read_init");
6384 DBUG_PRINT("enter", ("partition this: %p", this));
6385
6386 m_seq_if= seq;
6387 m_seq= seq->init(seq_init_param, n_ranges, mrr_mode);
6388 if (unlikely((error= multi_range_key_create_key(seq, m_seq))))
6389 DBUG_RETURN(0);
6390
6391 m_part_seq_if.get_key_info= (seq->get_key_info ?
6392 partition_multi_range_key_get_key_info :
6393 NULL);
6394 m_part_seq_if.init= partition_multi_range_key_init;
6395 m_part_seq_if.next= partition_multi_range_key_next;
6396 m_part_seq_if.skip_record= (seq->skip_record ?
6397 partition_multi_range_key_skip_record :
6398 NULL);
6399 m_part_seq_if.skip_index_tuple= (seq->skip_index_tuple ?
6400 partition_multi_range_key_skip_index_tuple :
6401 NULL);
6402
6403 /* m_mrr_new_full_buffer_size was calculated in multi_range_read_info */
6404 if (m_mrr_full_buffer_size < m_mrr_new_full_buffer_size)
6405 {
6406 if (m_mrr_full_buffer)
6407 my_free(m_mrr_full_buffer);
6408 if (!(m_mrr_full_buffer=
6409 (uchar *) my_malloc(m_mrr_new_full_buffer_size, MYF(MY_WME))))
6410 {
6411 m_mrr_full_buffer_size= 0;
6412 error= HA_ERR_OUT_OF_MEM;
6413 goto error;
6414 }
6415 m_mrr_full_buffer_size= m_mrr_new_full_buffer_size;
6416 }
6417
6418 tmp_buffer= m_mrr_full_buffer;
6419 file= m_file;
6420 do
6421 {
6422 i= (uint)(file - m_file);
6423 DBUG_PRINT("info",("partition part_id: %u", i));
6424 if (bitmap_is_set(&m_mrr_used_partitions, i))
6425 {
6426 if (m_mrr_new_full_buffer_size)
6427 {
6428 if (m_mrr_buffer_size[i])
6429 {
6430 m_mrr_buffer[i].buffer= tmp_buffer;
6431 m_mrr_buffer[i].end_of_used_area= tmp_buffer;
6432 tmp_buffer+= m_mrr_buffer_size[i];
6433 m_mrr_buffer[i].buffer_end= tmp_buffer;
6434 }
6435 }
6436 else
6437 m_mrr_buffer[i]= *buf;
6438
6439 if (unlikely((error= (*file)->
6440 multi_range_read_init(&m_part_seq_if,
6441 &m_partition_part_key_multi_range_hld[i],
6442 m_part_mrr_range_length[i],
6443 mrr_mode,
6444 &m_mrr_buffer[i]))))
6445 goto error;
6446 m_stock_range_seq[i]= 0;
6447 }
6448 } while (*(++file));
6449
6450 m_multi_range_read_first= TRUE;
6451 m_mrr_range_current= m_mrr_range_first;
6452 m_index_scan_type= partition_read_multi_range;
6453 m_mrr_mode= mrr_mode;
6454 m_mrr_n_ranges= n_ranges;
6455 DBUG_RETURN(0);
6456
6457error:
6458 DBUG_RETURN(error);
6459}
6460
6461
6462int ha_partition::multi_range_read_next(range_id_t *range_info)
6463{
6464 int error;
6465 DBUG_ENTER("ha_partition::multi_range_read_next");
6466 DBUG_PRINT("enter", ("partition this: %p partition m_mrr_mode: %u",
6467 this, m_mrr_mode));
6468
6469 if ((m_mrr_mode & HA_MRR_SORTED))
6470 {
6471 if (m_multi_range_read_first)
6472 {
6473 if (unlikely((error= handle_ordered_index_scan(table->record[0],
6474 FALSE))))
6475 DBUG_RETURN(error);
6476 if (!m_pre_calling)
6477 m_multi_range_read_first= FALSE;
6478 }
6479 else if (unlikely((error= handle_ordered_next(table->record[0],
6480 eq_range))))
6481 DBUG_RETURN(error);
6482 *range_info= m_mrr_range_current->ptr;
6483 }
6484 else
6485 {
6486 if (unlikely(m_multi_range_read_first))
6487 {
6488 if (unlikely((error=
6489 handle_unordered_scan_next_partition(table->record[0]))))
6490 DBUG_RETURN(error);
6491 if (!m_pre_calling)
6492 m_multi_range_read_first= FALSE;
6493 }
6494 else if (unlikely((error= handle_unordered_next(table->record[0], FALSE))))
6495 DBUG_RETURN(error);
6496
6497 *range_info=
6498 ((PARTITION_KEY_MULTI_RANGE *) m_range_info[m_last_part])->ptr;
6499 }
6500 DBUG_RETURN(0);
6501}
6502
6503
6504int ha_partition::multi_range_read_explain_info(uint mrr_mode, char *str,
6505 size_t size)
6506{
6507 DBUG_ENTER("ha_partition::multi_range_read_explain_info");
6508 DBUG_RETURN(get_open_file_sample()->
6509 multi_range_read_explain_info(mrr_mode, str, size));
6510}
6511
6512
6513/**
6514 Find and retrieve the Full Text Search relevance ranking for a search string
6515 in a full text index.
6516
6517 @param handler Full Text Search handler
6518 @param record Search string
6519 @param length Length of the search string
6520
6521 @retval Relevance value
6522*/
6523
6524float partition_ft_find_relevance(FT_INFO *handler,
6525 uchar *record, uint length)
6526{
6527 st_partition_ft_info *info= (st_partition_ft_info *)handler;
6528 uint m_last_part= ((ha_partition*) info->file)->last_part();
6529 FT_INFO *m_handler= info->part_ft_info[m_last_part];
6530 DBUG_ENTER("partition_ft_find_relevance");
6531 if (!m_handler)
6532 DBUG_RETURN((float)-1.0);
6533 DBUG_RETURN(m_handler->please->find_relevance(m_handler, record, length));
6534}
6535
6536
6537/**
6538 Retrieve the Full Text Search relevance ranking for the current
6539 full text search.
6540
6541 @param handler Full Text Search handler
6542
6543 @retval Relevance value
6544*/
6545
6546float partition_ft_get_relevance(FT_INFO *handler)
6547{
6548 st_partition_ft_info *info= (st_partition_ft_info *)handler;
6549 uint m_last_part= ((ha_partition*) info->file)->last_part();
6550 FT_INFO *m_handler= info->part_ft_info[m_last_part];
6551 DBUG_ENTER("partition_ft_get_relevance");
6552 if (!m_handler)
6553 DBUG_RETURN((float)-1.0);
6554 DBUG_RETURN(m_handler->please->get_relevance(m_handler));
6555}
6556
6557
6558/**
6559 Free the memory for a full text search handler.
6560
6561 @param handler Full Text Search handler
6562*/
6563
6564void partition_ft_close_search(FT_INFO *handler)
6565{
6566 st_partition_ft_info *info= (st_partition_ft_info *)handler;
6567 info->file->ft_close_search(handler);
6568}
6569
6570
6571/**
6572 Free the memory for a full text search handler.
6573
6574 @param handler Full Text Search handler
6575*/
6576
6577void ha_partition::ft_close_search(FT_INFO *handler)
6578{
6579 uint i;
6580 st_partition_ft_info *info= (st_partition_ft_info *)handler;
6581 DBUG_ENTER("ha_partition::ft_close_search");
6582
6583 for (i= 0; i < m_tot_parts; i++)
6584 {
6585 FT_INFO *m_handler= info->part_ft_info[i];
6586 DBUG_ASSERT(!m_handler ||
6587 (m_handler->please && m_handler->please->close_search));
6588 if (m_handler &&
6589 m_handler->please &&
6590 m_handler->please->close_search)
6591 m_handler->please->close_search(m_handler);
6592 }
6593 DBUG_VOID_RETURN;
6594}
6595
6596
6597/* Partition Full Text search function table */
6598_ft_vft partition_ft_vft =
6599{
6600 NULL, // partition_ft_read_next
6601 partition_ft_find_relevance,
6602 partition_ft_close_search,
6603 partition_ft_get_relevance,
6604 NULL // partition_ft_reinit_search
6605};
6606
6607
6608/**
6609 Initialize a full text search.
6610*/
6611
6612int ha_partition::ft_init()
6613{
6614 int error;
6615 uint i= 0;
6616 uint32 part_id;
6617 DBUG_ENTER("ha_partition::ft_init");
6618 DBUG_PRINT("info", ("partition this: %p", this));
6619
6620 /*
6621 For operations that may need to change data, we may need to extend
6622 read_set.
6623 */
6624 if (get_lock_type() == F_WRLCK)
6625 {
6626 /*
6627 If write_set contains any of the fields used in partition and
6628 subpartition expression, we need to set all bits in read_set because
6629 the row may need to be inserted in a different [sub]partition. In
6630 other words update_row() can be converted into write_row(), which
6631 requires a complete record.
6632 */
6633 if (bitmap_is_overlapping(&m_part_info->full_part_field_set,
6634 table->write_set))
6635 bitmap_set_all(table->read_set);
6636 else
6637 {
6638 /*
6639 Some handlers only read fields as specified by the bitmap for the
6640 read set. For partitioned handlers we always require that the
6641 fields of the partition functions are read such that we can
6642 calculate the partition id to place updated and deleted records.
6643 */
6644 bitmap_union(table->read_set, &m_part_info->full_part_field_set);
6645 }
6646 }
6647
6648 /* Now we see what the index of our first important partition is */
6649 DBUG_PRINT("info", ("m_part_info->read_partitions: %p",
6650 (void *) m_part_info->read_partitions.bitmap));
6651 part_id= bitmap_get_first_set(&(m_part_info->read_partitions));
6652 DBUG_PRINT("info", ("m_part_spec.start_part %u", (uint) part_id));
6653
6654 if (part_id == MY_BIT_NONE)
6655 {
6656 error= 0;
6657 goto err1;
6658 }
6659
6660 DBUG_PRINT("info", ("ft_init on partition %u", (uint) part_id));
6661 /*
6662 ft_end() is needed for partitioning to reset internal data if scan
6663 is already in use
6664 */
6665 if (m_pre_calling)
6666 {
6667 if (unlikely((error= pre_ft_end())))
6668 goto err1;
6669 }
6670 else
6671 ft_end();
6672 m_index_scan_type= partition_ft_read;
6673 for (i= part_id; i < m_tot_parts; i++)
6674 {
6675 if (bitmap_is_set(&(m_part_info->read_partitions), i))
6676 {
6677 error= m_pre_calling ? m_file[i]->pre_ft_init() : m_file[i]->ft_init();
6678 if (unlikely(error))
6679 goto err2;
6680 }
6681 }
6682 m_scan_value= 1;
6683 m_part_spec.start_part= part_id;
6684 m_part_spec.end_part= m_tot_parts - 1;
6685 m_ft_init_and_first= TRUE;
6686 DBUG_PRINT("info", ("m_scan_value: %u", m_scan_value));
6687 DBUG_RETURN(0);
6688
6689err2:
6690 late_extra_no_cache(part_id);
6691 while ((int)--i >= (int)part_id)
6692 {
6693 if (bitmap_is_set(&(m_part_info->read_partitions), i))
6694 {
6695 if (m_pre_calling)
6696 m_file[i]->pre_ft_end();
6697 else
6698 m_file[i]->ft_end();
6699 }
6700 }
6701err1:
6702 m_scan_value= 2;
6703 m_part_spec.start_part= NO_CURRENT_PART_ID;
6704 DBUG_RETURN(error);
6705}
6706
6707
6708/**
6709 Initialize a full text search during a bulk access request.
6710*/
6711
6712int ha_partition::pre_ft_init()
6713{
6714 bool save_m_pre_calling;
6715 int error;
6716 DBUG_ENTER("ha_partition::pre_ft_init");
6717 save_m_pre_calling= m_pre_calling;
6718 m_pre_calling= TRUE;
6719 error= ft_init();
6720 m_pre_calling= save_m_pre_calling;
6721 DBUG_RETURN(error);
6722}
6723
6724
6725/**
6726 Terminate a full text search.
6727*/
6728
6729void ha_partition::ft_end()
6730{
6731 handler **file;
6732 DBUG_ENTER("ha_partition::ft_end");
6733 DBUG_PRINT("info", ("partition this: %p", this));
6734
6735 switch (m_scan_value) {
6736 case 2: // Error
6737 break;
6738 case 1: // Table scan
6739 if (NO_CURRENT_PART_ID != m_part_spec.start_part)
6740 late_extra_no_cache(m_part_spec.start_part);
6741 file= m_file;
6742 do
6743 {
6744 if (bitmap_is_set(&(m_part_info->read_partitions), (uint)(file - m_file)))
6745 {
6746 if (m_pre_calling)
6747 (*file)->pre_ft_end();
6748 else
6749 (*file)->ft_end();
6750 }
6751 } while (*(++file));
6752 break;
6753 }
6754 m_scan_value= 2;
6755 m_part_spec.start_part= NO_CURRENT_PART_ID;
6756 ft_current= 0;
6757 DBUG_VOID_RETURN;
6758}
6759
6760
6761/**
6762 Terminate a full text search during a bulk access request.
6763*/
6764
6765int ha_partition::pre_ft_end()
6766{
6767 bool save_m_pre_calling;
6768 DBUG_ENTER("ha_partition::pre_ft_end");
6769 save_m_pre_calling= m_pre_calling;
6770 m_pre_calling= TRUE;
6771 ft_end();
6772 m_pre_calling= save_m_pre_calling;
6773 DBUG_RETURN(0);
6774}
6775
6776
6777/**
6778 Initialize a full text search using the extended API.
6779
6780 @param flags Search flags
6781 @param inx Key number
6782 @param key Key value
6783
6784 @return FT_INFO structure if successful
6785 NULL otherwise
6786*/
6787
6788FT_INFO *ha_partition::ft_init_ext(uint flags, uint inx, String *key)
6789{
6790 FT_INFO *ft_handler;
6791 handler **file;
6792 st_partition_ft_info *ft_target, **parent;
6793 DBUG_ENTER("ha_partition::ft_init_ext");
6794
6795 if (ft_current)
6796 parent= &ft_current->next;
6797 else
6798 parent= &ft_first;
6799
6800 if (!(ft_target= *parent))
6801 {
6802 FT_INFO **tmp_ft_info;
6803 if (!(ft_target= (st_partition_ft_info *)
6804 my_multi_malloc(MYF(MY_WME | MY_ZEROFILL),
6805 &ft_target,
6806 sizeof(st_partition_ft_info),
6807 &tmp_ft_info,
6808 sizeof(FT_INFO *) * m_tot_parts,
6809 NullS)))
6810 {
6811 my_error(ER_OUT_OF_RESOURCES, MYF(ME_FATALERROR));
6812 DBUG_RETURN(NULL);
6813 }
6814 ft_target->part_ft_info= tmp_ft_info;
6815 (*parent)= ft_target;
6816 }
6817
6818 ft_current= ft_target;
6819 file= m_file;
6820 do
6821 {
6822 if (bitmap_is_set(&(m_part_info->read_partitions), (uint)(file - m_file)))
6823 {
6824 if ((ft_handler= (*file)->ft_init_ext(flags, inx, key)))
6825 (*file)->ft_handler= ft_handler;
6826 else
6827 (*file)->ft_handler= NULL;
6828 ft_target->part_ft_info[file - m_file]= ft_handler;
6829 }
6830 else
6831 {
6832 (*file)->ft_handler= NULL;
6833 ft_target->part_ft_info[file - m_file]= NULL;
6834 }
6835 } while (*(++file));
6836
6837 ft_target->please= &partition_ft_vft;
6838 ft_target->file= this;
6839 DBUG_RETURN((FT_INFO*)ft_target);
6840}
6841
6842
6843/**
6844 Return the next record from the FT result set during an ordered index
6845 pre-scan
6846
6847 @param use_parallel Is it a parallel search
6848
6849 @return >0 Error code
6850 0 Success
6851*/
6852
6853int ha_partition::pre_ft_read(bool use_parallel)
6854{
6855 bool save_m_pre_calling;
6856 int error;
6857 DBUG_ENTER("ha_partition::pre_ft_read");
6858 DBUG_PRINT("info", ("partition this: %p", this));
6859 save_m_pre_calling= m_pre_calling;
6860 m_pre_calling= TRUE;
6861 m_pre_call_use_parallel= use_parallel;
6862 error= ft_read(table->record[0]);
6863 m_pre_calling= save_m_pre_calling;
6864 DBUG_RETURN(error);
6865}
6866
6867
6868/**
6869 Return the first or next record in a full text search.
6870
6871 @param buf Buffer where the record should be returned
6872
6873 @return >0 Error code
6874 0 Success
6875*/
6876
6877int ha_partition::ft_read(uchar *buf)
6878{
6879 handler *file;
6880 int result= HA_ERR_END_OF_FILE, error;
6881 uint part_id= m_part_spec.start_part;
6882 DBUG_ENTER("ha_partition::ft_read");
6883 DBUG_PRINT("info", ("partition this: %p", this));
6884 DBUG_PRINT("info", ("part_id: %u", part_id));
6885
6886 if (part_id == NO_CURRENT_PART_ID)
6887 {
6888 /*
6889 The original set of partitions to scan was empty and thus we report
6890 the result here.
6891 */
6892 DBUG_PRINT("info", ("NO_CURRENT_PART_ID"));
6893 goto end;
6894 }
6895
6896 DBUG_ASSERT(m_scan_value == 1);
6897
6898 if (m_ft_init_and_first) // First call to ft_read()
6899 {
6900 m_ft_init_and_first= FALSE;
6901 if (!bulk_access_executing)
6902 {
6903 error= handle_pre_scan(FALSE, check_parallel_search());
6904 if (m_pre_calling || error)
6905 DBUG_RETURN(error);
6906 }
6907 late_extra_cache(part_id);
6908 }
6909
6910 file= m_file[part_id];
6911
6912 while (TRUE)
6913 {
6914 if (!(result= file->ft_read(buf)))
6915 {
6916 /* Found row: remember position and return it. */
6917 m_part_spec.start_part= m_last_part= part_id;
6918 table->status= 0;
6919 DBUG_RETURN(0);
6920 }
6921
6922 /*
6923 if we get here, then the current partition ft_next returned failure
6924 */
6925 if (result != HA_ERR_END_OF_FILE)
6926 goto end_dont_reset_start_part; // Return error
6927
6928 /* End current partition */
6929 late_extra_no_cache(part_id);
6930 DBUG_PRINT("info", ("stopping using partition %u", (uint) part_id));
6931
6932 /* Shift to next partition */
6933 while (++part_id < m_tot_parts &&
6934 !bitmap_is_set(&(m_part_info->read_partitions), part_id))
6935 ;
6936 if (part_id >= m_tot_parts)
6937 {
6938 result= HA_ERR_END_OF_FILE;
6939 break;
6940 }
6941 m_part_spec.start_part= m_last_part= part_id;
6942 file= m_file[part_id];
6943 DBUG_PRINT("info", ("now using partition %u", (uint) part_id));
6944 late_extra_cache(part_id);
6945 }
6946
6947end:
6948 m_part_spec.start_part= NO_CURRENT_PART_ID;
6949end_dont_reset_start_part:
6950 table->status= STATUS_NOT_FOUND;
6951 DBUG_RETURN(result);
6952}
6953
6954
6955/*
6956 Common routine to set up index scans
6957
6958 SYNOPSIS
6959 ha_partition::partition_scan_set_up()
6960 buf Buffer to later return record in (this function
6961 needs it to calculcate partitioning function
6962 values)
6963
6964 idx_read_flag TRUE <=> m_start_key has range start endpoint which
6965 probably can be used to determine the set of partitions
6966 to scan.
6967 FALSE <=> there is no start endpoint.
6968
6969 DESCRIPTION
6970 Find out which partitions we'll need to read when scanning the specified
6971 range.
6972
6973 If we need to scan only one partition, set m_ordered_scan_ongoing=FALSE
6974 as we will not need to do merge ordering.
6975
6976 RETURN VALUE
6977 >0 Error code
6978 0 Success
6979*/
6980
6981int ha_partition::partition_scan_set_up(uchar * buf, bool idx_read_flag)
6982{
6983 DBUG_ENTER("ha_partition::partition_scan_set_up");
6984
6985 if (idx_read_flag)
6986 get_partition_set(table, buf, active_index, &m_start_key, &m_part_spec);
6987 else
6988 {
6989 m_part_spec.start_part= 0;
6990 m_part_spec.end_part= m_tot_parts - 1;
6991 }
6992 if (m_part_spec.start_part > m_part_spec.end_part)
6993 {
6994 /*
6995 We discovered a partition set but the set was empty so we report
6996 key not found.
6997 */
6998 DBUG_PRINT("info", ("scan with no partition to scan"));
6999 DBUG_RETURN(HA_ERR_END_OF_FILE);
7000 }
7001 if (m_part_spec.start_part == m_part_spec.end_part)
7002 {
7003 /*
7004 We discovered a single partition to scan, this never needs to be
7005 performed using the ordered index scan.
7006 */
7007 DBUG_PRINT("info", ("index scan using the single partition %u",
7008 (uint) m_part_spec.start_part));
7009 m_ordered_scan_ongoing= FALSE;
7010 }
7011 else
7012 {
7013 /*
7014 Set m_ordered_scan_ongoing according how the scan should be done
7015 Only exact partitions are discovered atm by get_partition_set.
7016 Verify this, also bitmap must have at least one bit set otherwise
7017 the result from this table is the empty set.
7018 */
7019 uint start_part= bitmap_get_first_set(&(m_part_info->read_partitions));
7020 if (start_part == MY_BIT_NONE)
7021 {
7022 DBUG_PRINT("info", ("scan with no partition to scan"));
7023 DBUG_RETURN(HA_ERR_END_OF_FILE);
7024 }
7025 if (start_part > m_part_spec.start_part)
7026 m_part_spec.start_part= start_part;
7027 DBUG_ASSERT(m_part_spec.start_part < m_tot_parts);
7028 m_ordered_scan_ongoing= m_ordered;
7029 }
7030 DBUG_ASSERT(m_part_spec.start_part < m_tot_parts &&
7031 m_part_spec.end_part < m_tot_parts);
7032 DBUG_RETURN(0);
7033}
7034
7035/**
7036 Check if we can search partitions in parallel
7037
7038 @retval TRUE yes
7039 @retval FALSE no
7040*/
7041
7042bool ha_partition::check_parallel_search()
7043{
7044 TABLE_LIST *table_list= table->pos_in_table_list;
7045 st_select_lex *select_lex;
7046 JOIN *join;
7047 DBUG_ENTER("ha_partition::check_parallel_search");
7048 if (!table_list)
7049 goto not_parallel;
7050
7051 while (table_list->parent_l)
7052 table_list= table_list->parent_l;
7053
7054 select_lex= table_list->select_lex;
7055 DBUG_PRINT("info",("partition select_lex: %p", select_lex));
7056 if (!select_lex)
7057 goto not_parallel;
7058 if (!select_lex->explicit_limit)
7059 {
7060 DBUG_PRINT("info",("partition not using explicit_limit"));
7061 goto parallel;
7062 }
7063
7064 join= select_lex->join;
7065 DBUG_PRINT("info",("partition join: %p", join));
7066 if (join && join->skip_sort_order)
7067 {
7068 DBUG_PRINT("info",("partition order_list.elements: %u",
7069 select_lex->order_list.elements));
7070 if (select_lex->order_list.elements)
7071 {
7072 Item *item= *select_lex->order_list.first->item;
7073 DBUG_PRINT("info",("partition item: %p", item));
7074 DBUG_PRINT("info",("partition item->type(): %u", item->type()));
7075 DBUG_PRINT("info",("partition m_part_info->part_type: %u",
7076 m_part_info->part_type));
7077 DBUG_PRINT("info",("partition m_is_sub_partitioned: %s",
7078 m_is_sub_partitioned ? "TRUE" : "FALSE"));
7079 DBUG_PRINT("info",("partition m_part_info->part_expr: %p",
7080 m_part_info->part_expr));
7081 if (item->type() == Item::FIELD_ITEM &&
7082 m_part_info->part_type == RANGE_PARTITION &&
7083 !m_is_sub_partitioned &&
7084 (!m_part_info->part_expr ||
7085 m_part_info->part_expr->type() == Item::FIELD_ITEM))
7086 {
7087 Field *order_field= ((Item_field *)item)->field;
7088 DBUG_PRINT("info",("partition order_field: %p", order_field));
7089 if (order_field && order_field->table == table_list->table)
7090 {
7091 Field *part_field= m_part_info->full_part_field_array[0];
7092 if (set_top_table_fields)
7093 order_field= top_table_field[order_field->field_index];
7094 DBUG_PRINT("info",("partition order_field: %p", order_field));
7095 DBUG_PRINT("info",("partition part_field: %p", part_field));
7096 if (part_field == order_field)
7097 {
7098 /*
7099 We are using ORDER BY partition_field LIMIT #
7100 In this case, let's not do things in parallel as it's
7101 likely that the query can be satisfied from the first
7102 partition
7103 */
7104 DBUG_PRINT("info",("partition with ORDER on partition field"));
7105 goto not_parallel;
7106 }
7107 }
7108 }
7109 DBUG_PRINT("info",("partition have order"));
7110 goto parallel;
7111 }
7112
7113 DBUG_PRINT("info",("partition group_list.elements: %u",
7114 select_lex->group_list.elements));
7115 if (select_lex->group_list.elements)
7116 {
7117 Item *item= *select_lex->group_list.first->item;
7118 DBUG_PRINT("info",("partition item: %p", item));
7119 DBUG_PRINT("info",("partition item->type(): %u", item->type()));
7120 DBUG_PRINT("info",("partition m_part_info->part_type: %u",
7121 m_part_info->part_type));
7122 DBUG_PRINT("info",("partition m_is_sub_partitioned: %s",
7123 m_is_sub_partitioned ? "TRUE" : "FALSE"));
7124 DBUG_PRINT("info",("partition m_part_info->part_expr: %p",
7125 m_part_info->part_expr));
7126 if (item->type() == Item::FIELD_ITEM &&
7127 m_part_info->part_type == RANGE_PARTITION &&
7128 !m_is_sub_partitioned &&
7129 (!m_part_info->part_expr ||
7130 m_part_info->part_expr->type() == Item::FIELD_ITEM))
7131 {
7132 Field *group_field= ((Item_field *)item)->field;
7133 DBUG_PRINT("info",("partition group_field: %p", group_field));
7134 if (group_field && group_field->table == table_list->table)
7135 {
7136 Field *part_field= m_part_info->full_part_field_array[0];
7137 if (set_top_table_fields)
7138 group_field= top_table_field[group_field->field_index];
7139 DBUG_PRINT("info",("partition group_field: %p", group_field));
7140 DBUG_PRINT("info",("partition part_field: %p", part_field));
7141 if (part_field == group_field)
7142 {
7143 DBUG_PRINT("info",("partition with GROUP BY on partition field"));
7144 goto not_parallel;
7145 }
7146 }
7147 }
7148 DBUG_PRINT("info",("partition with GROUP BY"));
7149 goto parallel;
7150 }
7151 }
7152 else if (select_lex->order_list.elements ||
7153 select_lex->group_list.elements)
7154 {
7155 DBUG_PRINT("info",("partition is not skip_order"));
7156 DBUG_PRINT("info",("partition order_list.elements: %u",
7157 select_lex->order_list.elements));
7158 DBUG_PRINT("info",("partition group_list.elements: %u",
7159 select_lex->group_list.elements));
7160 goto parallel;
7161 }
7162 DBUG_PRINT("info",("partition is not skip_order"));
7163
7164not_parallel:
7165 DBUG_PRINT("return",("partition FALSE"));
7166 DBUG_RETURN(FALSE);
7167
7168parallel:
7169 DBUG_PRINT("return",("partition TRUE"));
7170 DBUG_RETURN(TRUE);
7171}
7172
7173
7174int ha_partition::handle_pre_scan(bool reverse_order, bool use_parallel)
7175{
7176 uint i;
7177 DBUG_ENTER("ha_partition::handle_pre_scan");
7178 DBUG_PRINT("enter",
7179 ("m_part_spec.start_part: %u m_part_spec.end_part: %u",
7180 (uint) m_part_spec.start_part, (uint) m_part_spec.end_part));
7181
7182 for (i= m_part_spec.start_part; i <= m_part_spec.end_part; i++)
7183 {
7184 if (!(bitmap_is_set(&(m_part_info->read_partitions), i)))
7185 continue;
7186 int error;
7187 handler *file= m_file[i];
7188
7189 switch (m_index_scan_type) {
7190 case partition_index_read:
7191 error= file->pre_index_read_map(m_start_key.key,
7192 m_start_key.keypart_map,
7193 m_start_key.flag,
7194 use_parallel);
7195 break;
7196 case partition_index_first:
7197 error= file->pre_index_first(use_parallel);
7198 break;
7199 case partition_index_last:
7200 error= file->pre_index_last(use_parallel);
7201 break;
7202 case partition_index_read_last:
7203 error= file->pre_index_read_last_map(m_start_key.key,
7204 m_start_key.keypart_map,
7205 use_parallel);
7206 break;
7207 case partition_read_range:
7208 error= file->pre_read_range_first(m_start_key.key? &m_start_key: NULL,
7209 end_range, eq_range, TRUE, use_parallel);
7210 break;
7211 case partition_read_multi_range:
7212 if (!bitmap_is_set(&m_mrr_used_partitions, i))
7213 continue;
7214 error= file->pre_multi_range_read_next(use_parallel);
7215 break;
7216 case partition_ft_read:
7217 error= file->pre_ft_read(use_parallel);
7218 break;
7219 case partition_no_index_scan:
7220 error= file->pre_rnd_next(use_parallel);
7221 break;
7222 default:
7223 DBUG_ASSERT(FALSE);
7224 DBUG_RETURN(0);
7225 }
7226 if (error == HA_ERR_END_OF_FILE)
7227 error= 0;
7228 if (unlikely(error))
7229 DBUG_RETURN(error);
7230 }
7231 table->status= 0;
7232 DBUG_RETURN(0);
7233}
7234
7235
7236/****************************************************************************
7237 Unordered Index Scan Routines
7238****************************************************************************/
7239/*
7240 Common routine to handle index_next with unordered results
7241
7242 SYNOPSIS
7243 handle_unordered_next()
7244 out:buf Read row in MySQL Row Format
7245 next_same Called from index_next_same
7246
7247 RETURN VALUE
7248 HA_ERR_END_OF_FILE End of scan
7249 0 Success
7250 other Error code
7251
7252 DESCRIPTION
7253 These routines are used to scan partitions without considering order.
7254 This is performed in two situations.
7255 1) In read_multi_range this is the normal case
7256 2) When performing any type of index_read, index_first, index_last where
7257 all fields in the partition function is bound. In this case the index
7258 scan is performed on only one partition and thus it isn't necessary to
7259 perform any sort.
7260*/
7261
7262int ha_partition::handle_unordered_next(uchar *buf, bool is_next_same)
7263{
7264 handler *file;
7265 int error;
7266 DBUG_ENTER("ha_partition::handle_unordered_next");
7267
7268 if (m_part_spec.start_part >= m_tot_parts)
7269 {
7270 /* Should never happen! */
7271 DBUG_ASSERT(0);
7272 DBUG_RETURN(HA_ERR_END_OF_FILE);
7273 }
7274 file= m_file[m_part_spec.start_part];
7275
7276 /*
7277 We should consider if this should be split into three functions as
7278 partition_read_range is_next_same are always local constants
7279 */
7280
7281 if (m_index_scan_type == partition_read_multi_range)
7282 {
7283 if (likely(!(error= file->
7284 multi_range_read_next(&m_range_info[m_part_spec.start_part]))))
7285 {
7286 m_last_part= m_part_spec.start_part;
7287 DBUG_RETURN(0);
7288 }
7289 }
7290 else if (m_index_scan_type == partition_read_range)
7291 {
7292 if (likely(!(error= file->read_range_next())))
7293 {
7294 m_last_part= m_part_spec.start_part;
7295 DBUG_RETURN(0);
7296 }
7297 }
7298 else if (is_next_same)
7299 {
7300 if (likely(!(error= file->ha_index_next_same(buf, m_start_key.key,
7301 m_start_key.length))))
7302 {
7303 m_last_part= m_part_spec.start_part;
7304 DBUG_RETURN(0);
7305 }
7306 }
7307 else
7308 {
7309 if (likely(!(error= file->ha_index_next(buf))))
7310 {
7311 m_last_part= m_part_spec.start_part;
7312 DBUG_RETURN(0); // Row was in range
7313 }
7314 }
7315
7316 if (unlikely(error == HA_ERR_END_OF_FILE))
7317 {
7318 m_part_spec.start_part++; // Start using next part
7319 error= handle_unordered_scan_next_partition(buf);
7320 }
7321 DBUG_RETURN(error);
7322}
7323
7324
7325/*
7326 Handle index_next when changing to new partition
7327
7328 SYNOPSIS
7329 handle_unordered_scan_next_partition()
7330 buf Read row in MariaDB Row Format
7331
7332 RETURN VALUE
7333 HA_ERR_END_OF_FILE End of scan
7334 0 Success
7335 other Error code
7336
7337 DESCRIPTION
7338 This routine is used to start the index scan on the next partition.
7339 Both initial start and after completing scan on one partition.
7340*/
7341
7342int ha_partition::handle_unordered_scan_next_partition(uchar * buf)
7343{
7344 uint i= m_part_spec.start_part;
7345 int saved_error= HA_ERR_END_OF_FILE;
7346 DBUG_ENTER("ha_partition::handle_unordered_scan_next_partition");
7347
7348 /* Read next partition that includes start_part */
7349 if (i)
7350 i= bitmap_get_next_set(&m_part_info->read_partitions, i - 1);
7351 else
7352 i= bitmap_get_first_set(&m_part_info->read_partitions);
7353
7354 for (;
7355 i <= m_part_spec.end_part;
7356 i= bitmap_get_next_set(&m_part_info->read_partitions, i))
7357 {
7358 int error;
7359 handler *file= m_file[i];
7360 m_part_spec.start_part= i;
7361
7362 switch (m_index_scan_type) {
7363 case partition_read_multi_range:
7364 if (!bitmap_is_set(&m_mrr_used_partitions, i))
7365 continue;
7366 DBUG_PRINT("info", ("read_multi_range on partition %u", i));
7367 error= file->multi_range_read_next(&m_range_info[i]);
7368 break;
7369 case partition_read_range:
7370 DBUG_PRINT("info", ("read_range_first on partition %u", i));
7371 error= file->read_range_first(m_start_key.key? &m_start_key: NULL,
7372 end_range, eq_range, FALSE);
7373 break;
7374 case partition_index_read:
7375 DBUG_PRINT("info", ("index_read on partition %u", i));
7376 error= file->ha_index_read_map(buf, m_start_key.key,
7377 m_start_key.keypart_map,
7378 m_start_key.flag);
7379 break;
7380 case partition_index_first:
7381 DBUG_PRINT("info", ("index_first on partition %u", i));
7382 error= file->ha_index_first(buf);
7383 break;
7384 default:
7385 DBUG_ASSERT(FALSE);
7386 DBUG_RETURN(1);
7387 }
7388 if (likely(!error))
7389 {
7390 m_last_part= i;
7391 DBUG_RETURN(0);
7392 }
7393 if (likely((error != HA_ERR_END_OF_FILE) &&
7394 (error != HA_ERR_KEY_NOT_FOUND)))
7395 DBUG_RETURN(error);
7396
7397 /*
7398 If HA_ERR_KEY_NOT_FOUND, we must return that error instead of
7399 HA_ERR_END_OF_FILE, to be able to continue search.
7400 */
7401 if (saved_error != HA_ERR_KEY_NOT_FOUND)
7402 saved_error= error;
7403 DBUG_PRINT("info", ("END_OF_FILE/KEY_NOT_FOUND on partition %u", i));
7404 }
7405 if (saved_error == HA_ERR_END_OF_FILE)
7406 m_part_spec.start_part= NO_CURRENT_PART_ID;
7407 DBUG_RETURN(saved_error);
7408}
7409
7410
7411/**
7412 Common routine to start index scan with ordered results.
7413
7414 @param[out] buf Read row in MariaDB Row Format
7415
7416 @return Operation status
7417 @retval HA_ERR_END_OF_FILE End of scan
7418 @retval HA_ERR_KEY_NOT_FOUNE End of scan
7419 @retval 0 Success
7420 @retval other Error code
7421
7422 @details
7423 This part contains the logic to handle index scans that require ordered
7424 output. This includes all except those started by read_range_first with
7425 the flag ordered set to FALSE. Thus most direct index_read and all
7426 index_first and index_last.
7427
7428 We implement ordering by keeping one record plus a key buffer for each
7429 partition. Every time a new entry is requested we will fetch a new
7430 entry from the partition that is currently not filled with an entry.
7431 Then the entry is put into its proper sort position.
7432
7433 Returning a record is done by getting the top record, copying the
7434 record to the request buffer and setting the partition as empty on
7435 entries.
7436*/
7437
7438int ha_partition::handle_ordered_index_scan(uchar *buf, bool reverse_order)
7439{
7440 int error;
7441 uint i;
7442 uint j= queue_first_element(&m_queue);
7443 uint smallest_range_seq= 0;
7444 bool found= FALSE;
7445 uchar *part_rec_buf_ptr= m_ordered_rec_buffer;
7446 int saved_error= HA_ERR_END_OF_FILE;
7447 DBUG_ENTER("ha_partition::handle_ordered_index_scan");
7448 DBUG_PRINT("enter", ("partition this: %p", this));
7449
7450 if (m_pre_calling)
7451 error= handle_pre_scan(reverse_order, m_pre_call_use_parallel);
7452 else
7453 error= handle_pre_scan(reverse_order, check_parallel_search());
7454 if (unlikely(error))
7455 DBUG_RETURN(error);
7456
7457 if (m_key_not_found)
7458 {
7459 /* m_key_not_found was set in the previous call to this function */
7460 m_key_not_found= false;
7461 bitmap_clear_all(&m_key_not_found_partitions);
7462 }
7463 m_top_entry= NO_CURRENT_PART_ID;
7464 DBUG_PRINT("info", ("partition queue_remove_all(1)"));
7465 queue_remove_all(&m_queue);
7466 DBUG_ASSERT(bitmap_is_set(&m_part_info->read_partitions,
7467 m_part_spec.start_part));
7468
7469 /*
7470 Position part_rec_buf_ptr to point to the first used partition >=
7471 start_part. There may be partitions marked by used_partitions,
7472 but is before start_part. These partitions has allocated record buffers
7473 but is dynamically pruned, so those buffers must be skipped.
7474 */
7475 for (i= bitmap_get_first_set(&m_part_info->read_partitions);
7476 i < m_part_spec.start_part;
7477 i= bitmap_get_next_set(&m_part_info->read_partitions, i))
7478 {
7479 part_rec_buf_ptr+= m_priority_queue_rec_len;
7480 }
7481 DBUG_PRINT("info", ("m_part_spec.start_part %u first_used_part %u",
7482 m_part_spec.start_part, i));
7483 for (/* continue from above */ ;
7484 i <= m_part_spec.end_part ;
7485 i= bitmap_get_next_set(&m_part_info->read_partitions, i),
7486 part_rec_buf_ptr+= m_priority_queue_rec_len)
7487 {
7488 DBUG_PRINT("info", ("reading from part %u (scan_type: %u)",
7489 i, m_index_scan_type));
7490 DBUG_ASSERT(i == uint2korr(part_rec_buf_ptr));
7491 uchar *rec_buf_ptr= part_rec_buf_ptr + PARTITION_BYTES_IN_POS;
7492 handler *file= m_file[i];
7493
7494 switch (m_index_scan_type) {
7495 case partition_index_read:
7496 error= file->ha_index_read_map(rec_buf_ptr,
7497 m_start_key.key,
7498 m_start_key.keypart_map,
7499 m_start_key.flag);
7500 /* Caller has specified reverse_order */
7501 break;
7502 case partition_index_first:
7503 error= file->ha_index_first(rec_buf_ptr);
7504 reverse_order= FALSE;
7505 break;
7506 case partition_index_last:
7507 error= file->ha_index_last(rec_buf_ptr);
7508 reverse_order= TRUE;
7509 break;
7510 case partition_read_range:
7511 {
7512 /*
7513 This can only read record to table->record[0], as it was set when
7514 the table was being opened. We have to memcpy data ourselves.
7515 */
7516 error= file->read_range_first(m_start_key.key? &m_start_key: NULL,
7517 end_range, eq_range, TRUE);
7518 if (likely(!error))
7519 memcpy(rec_buf_ptr, table->record[0], m_rec_length);
7520 reverse_order= FALSE;
7521 break;
7522 }
7523 case partition_read_multi_range:
7524 {
7525 if (!bitmap_is_set(&m_mrr_used_partitions, i))
7526 continue;
7527 DBUG_PRINT("info", ("partition %u", i));
7528 error= file->multi_range_read_next(&m_range_info[i]);
7529 DBUG_PRINT("info", ("error: %d", error));
7530 if (error == HA_ERR_KEY_NOT_FOUND || error == HA_ERR_END_OF_FILE)
7531 {
7532 bitmap_clear_bit(&m_mrr_used_partitions, i);
7533 continue;
7534 }
7535 if (likely(!error))
7536 {
7537 memcpy(rec_buf_ptr, table->record[0], m_rec_length);
7538 reverse_order= FALSE;
7539 m_stock_range_seq[i]= (((PARTITION_KEY_MULTI_RANGE *)
7540 m_range_info[i])->id);
7541 /* Test if the key is in the first key range */
7542 if (m_stock_range_seq[i] != m_mrr_range_current->id)
7543 {
7544 /*
7545 smallest_range_seq contains the smallest key range we have seen
7546 so far
7547 */
7548 if (!smallest_range_seq || smallest_range_seq > m_stock_range_seq[i])
7549 smallest_range_seq= m_stock_range_seq[i];
7550 continue;
7551 }
7552 }
7553 break;
7554 }
7555 default:
7556 DBUG_ASSERT(FALSE);
7557 DBUG_RETURN(HA_ERR_END_OF_FILE);
7558 }
7559 if (likely(!error))
7560 {
7561 found= TRUE;
7562 if (!m_using_extended_keys)
7563 {
7564 file->position(rec_buf_ptr);
7565 memcpy(rec_buf_ptr + m_rec_length, file->ref, file->ref_length);
7566 }
7567 /*
7568 Initialize queue without order first, simply insert
7569 */
7570 queue_element(&m_queue, j++)= part_rec_buf_ptr;
7571 }
7572 else if (error == HA_ERR_KEY_NOT_FOUND)
7573 {
7574 DBUG_PRINT("info", ("HA_ERR_KEY_NOT_FOUND from partition %u", i));
7575 bitmap_set_bit(&m_key_not_found_partitions, i);
7576 m_key_not_found= true;
7577 saved_error= error;
7578 }
7579 else if (error != HA_ERR_END_OF_FILE)
7580 {
7581 DBUG_RETURN(error);
7582 }
7583 }
7584
7585 if (!found && smallest_range_seq)
7586 {
7587 /* We know that there is an existing row based on code above */
7588 found= TRUE;
7589 part_rec_buf_ptr= m_ordered_rec_buffer;
7590
7591 /*
7592 No key found in the first key range
7593 Collect all partitions that has a key in smallest_range_seq
7594 */
7595 DBUG_PRINT("info", ("partition !found && smallest_range_seq"));
7596 for (i= bitmap_get_first_set(&m_part_info->read_partitions);
7597 i <= m_part_spec.end_part;
7598 i= bitmap_get_next_set(&m_part_info->read_partitions, i))
7599 {
7600 DBUG_PRINT("info", ("partition current_part: %u", i));
7601 if (i < m_part_spec.start_part)
7602 {
7603 part_rec_buf_ptr+= m_priority_queue_rec_len;
7604 DBUG_PRINT("info", ("partition i < m_part_spec.start_part"));
7605 continue;
7606 }
7607 if (!bitmap_is_set(&m_mrr_used_partitions, i))
7608 {
7609 part_rec_buf_ptr+= m_priority_queue_rec_len;
7610 DBUG_PRINT("info", ("partition !bitmap_is_set(&m_mrr_used_partitions, i)"));
7611 continue;
7612 }
7613 DBUG_ASSERT(i == uint2korr(part_rec_buf_ptr));
7614 if (smallest_range_seq == m_stock_range_seq[i])
7615 {
7616 m_stock_range_seq[i]= 0;
7617 queue_element(&m_queue, j++)= (uchar *) part_rec_buf_ptr;
7618 DBUG_PRINT("info", ("partition smallest_range_seq == m_stock_range_seq[i]"));
7619 }
7620 part_rec_buf_ptr+= m_priority_queue_rec_len;
7621 }
7622
7623 /* Update global m_mrr_range_current to the current range */
7624 while (m_mrr_range_current->id < smallest_range_seq)
7625 m_mrr_range_current= m_mrr_range_current->next;
7626 }
7627 if (found)
7628 {
7629 /*
7630 We found at least one partition with data, now sort all entries and
7631 after that read the first entry and copy it to the buffer to return in.
7632 */
7633 queue_set_max_at_top(&m_queue, reverse_order);
7634 queue_set_cmp_arg(&m_queue, (void*) this);
7635 m_queue.elements= j - queue_first_element(&m_queue);
7636 queue_fix(&m_queue);
7637 return_top_record(buf);
7638 DBUG_PRINT("info", ("Record returned from partition %u", m_top_entry));
7639 DBUG_RETURN(0);
7640 }
7641 DBUG_RETURN(saved_error);
7642}
7643
7644
7645/*
7646 Return the top record in sort order
7647
7648 SYNOPSIS
7649 return_top_record()
7650 out:buf Row returned in MySQL Row Format
7651
7652 RETURN VALUE
7653 NONE
7654*/
7655
7656void ha_partition::return_top_record(uchar *buf)
7657{
7658 uint part_id;
7659 uchar *key_buffer= queue_top(&m_queue);
7660 uchar *rec_buffer= key_buffer + PARTITION_BYTES_IN_POS;
7661 DBUG_ENTER("ha_partition::return_top_record");
7662 DBUG_PRINT("enter", ("partition this: %p", this));
7663
7664 part_id= uint2korr(key_buffer);
7665 memcpy(buf, rec_buffer, m_rec_length);
7666 m_last_part= part_id;
7667 DBUG_PRINT("info", ("partition m_last_part: %u", m_last_part));
7668 m_top_entry= part_id;
7669 table->status= 0; // Found an existing row
7670 m_file[part_id]->return_record_by_parent();
7671 DBUG_VOID_RETURN;
7672}
7673
7674/*
7675 This function is only used if the partitioned table has own partitions.
7676 This can happen if the partitioned VP engine is used (part of spider).
7677*/
7678
7679void ha_partition::return_record_by_parent()
7680{
7681 m_file[m_last_part]->return_record_by_parent();
7682 DBUG_ASSERT(0);
7683}
7684
7685
7686/**
7687 Add index_next/prev from partitions without exact match.
7688
7689 If there where any partitions that returned HA_ERR_KEY_NOT_FOUND when
7690 ha_index_read_map was done, those partitions must be included in the
7691 following index_next/prev call.
7692*/
7693
7694int ha_partition::handle_ordered_index_scan_key_not_found()
7695{
7696 int error;
7697 uint i, old_elements= m_queue.elements;
7698 uchar *part_buf= m_ordered_rec_buffer;
7699 uchar *curr_rec_buf= NULL;
7700 DBUG_ENTER("ha_partition::handle_ordered_index_scan_key_not_found");
7701 DBUG_PRINT("enter", ("partition this: %p", this));
7702 DBUG_ASSERT(m_key_not_found);
7703 /*
7704 Loop over all used partitions to get the correct offset
7705 into m_ordered_rec_buffer.
7706 */
7707 for (i= bitmap_get_first_set(&m_part_info->read_partitions);
7708 i < m_tot_parts;
7709 i= bitmap_get_next_set(&m_part_info->read_partitions, i))
7710 {
7711 if (bitmap_is_set(&m_key_not_found_partitions, i))
7712 {
7713 /*
7714 This partition is used and did return HA_ERR_KEY_NOT_FOUND
7715 in index_read_map.
7716 */
7717 curr_rec_buf= part_buf + PARTITION_BYTES_IN_POS;
7718 error= m_file[i]->ha_index_next(curr_rec_buf);
7719 /* HA_ERR_KEY_NOT_FOUND is not allowed from index_next! */
7720 DBUG_ASSERT(error != HA_ERR_KEY_NOT_FOUND);
7721 if (likely(!error))
7722 {
7723 DBUG_PRINT("info", ("partition queue_insert(1)"));
7724 queue_insert(&m_queue, part_buf);
7725 }
7726 else if (error != HA_ERR_END_OF_FILE && error != HA_ERR_KEY_NOT_FOUND)
7727 DBUG_RETURN(error);
7728 }
7729 part_buf += m_priority_queue_rec_len;
7730 }
7731 DBUG_ASSERT(curr_rec_buf);
7732 bitmap_clear_all(&m_key_not_found_partitions);
7733 m_key_not_found= false;
7734
7735 if (m_queue.elements > old_elements)
7736 {
7737 /* Update m_top_entry, which may have changed. */
7738 uchar *key_buffer= queue_top(&m_queue);
7739 m_top_entry= uint2korr(key_buffer);
7740 }
7741 DBUG_RETURN(0);
7742}
7743
7744
7745/*
7746 Common routine to handle index_next with ordered results
7747
7748 SYNOPSIS
7749 handle_ordered_next()
7750 out:buf Read row in MySQL Row Format
7751 next_same Called from index_next_same
7752
7753 RETURN VALUE
7754 HA_ERR_END_OF_FILE End of scan
7755 0 Success
7756 other Error code
7757*/
7758
7759int ha_partition::handle_ordered_next(uchar *buf, bool is_next_same)
7760{
7761 int error;
7762 DBUG_ENTER("ha_partition::handle_ordered_next");
7763
7764 if (m_top_entry == NO_CURRENT_PART_ID)
7765 DBUG_RETURN(HA_ERR_END_OF_FILE);
7766
7767 uint part_id= m_top_entry;
7768 uchar *rec_buf= queue_top(&m_queue) + PARTITION_BYTES_IN_POS;
7769 handler *file;
7770
7771 if (m_key_not_found)
7772 {
7773 if (is_next_same)
7774 {
7775 /* Only rows which match the key. */
7776 m_key_not_found= false;
7777 bitmap_clear_all(&m_key_not_found_partitions);
7778 }
7779 else
7780 {
7781 /* There are partitions not included in the index record queue. */
7782 uint old_elements= m_queue.elements;
7783 if (unlikely((error= handle_ordered_index_scan_key_not_found())))
7784 DBUG_RETURN(error);
7785 /*
7786 If the queue top changed, i.e. one of the partitions that gave
7787 HA_ERR_KEY_NOT_FOUND in index_read_map found the next record,
7788 return it.
7789 Otherwise replace the old with a call to index_next (fall through).
7790 */
7791 if (old_elements != m_queue.elements && part_id != m_top_entry)
7792 {
7793 return_top_record(buf);
7794 DBUG_RETURN(0);
7795 }
7796 }
7797 }
7798 if (part_id >= m_tot_parts)
7799 {
7800 /* This should never happen! */
7801 DBUG_ASSERT(0);
7802 DBUG_RETURN(HA_ERR_END_OF_FILE);
7803 }
7804
7805 file= m_file[part_id];
7806
7807 if (m_index_scan_type == partition_read_range)
7808 {
7809 error= file->read_range_next();
7810 memcpy(rec_buf, table->record[0], m_rec_length);
7811 }
7812 else if (m_index_scan_type == partition_read_multi_range)
7813 {
7814 DBUG_PRINT("info", ("partition_read_multi_range route"));
7815 DBUG_PRINT("info", ("part_id: %u", part_id));
7816 bool get_next= FALSE;
7817 error= file->multi_range_read_next(&m_range_info[part_id]);
7818 DBUG_PRINT("info", ("error: %d", error));
7819 if (unlikely(error == HA_ERR_KEY_NOT_FOUND))
7820 error= HA_ERR_END_OF_FILE;
7821 if (unlikely(error == HA_ERR_END_OF_FILE))
7822 {
7823 bitmap_clear_bit(&m_mrr_used_partitions, part_id);
7824 DBUG_PRINT("info", ("partition m_queue.elements: %u", m_queue.elements));
7825 if (m_queue.elements)
7826 {
7827 DBUG_PRINT("info", ("partition queue_remove_top(1)"));
7828 queue_remove_top(&m_queue);
7829 if (m_queue.elements)
7830 {
7831 return_top_record(buf);
7832 DBUG_PRINT("info", ("Record returned from partition %u (3)",
7833 m_top_entry));
7834 DBUG_RETURN(0);
7835 }
7836 }
7837 get_next= TRUE;
7838 }
7839 else if (likely(!error))
7840 {
7841 DBUG_PRINT("info", ("m_range_info[%u])->id: %u", part_id,
7842 ((PARTITION_KEY_MULTI_RANGE *)
7843 m_range_info[part_id])->id));
7844 DBUG_PRINT("info", ("m_mrr_range_current->id: %u",
7845 m_mrr_range_current->id));
7846 memcpy(rec_buf, table->record[0], m_rec_length);
7847 if (((PARTITION_KEY_MULTI_RANGE *) m_range_info[part_id])->id !=
7848 m_mrr_range_current->id)
7849 {
7850 m_stock_range_seq[part_id]=
7851 ((PARTITION_KEY_MULTI_RANGE *) m_range_info[part_id])->id;
7852 DBUG_PRINT("info", ("partition queue_remove_top(2)"));
7853 queue_remove_top(&m_queue);
7854 if (!m_queue.elements)
7855 get_next= TRUE;
7856 }
7857 }
7858 if (get_next)
7859 {
7860 DBUG_PRINT("info", ("get_next route"));
7861 uint i, j= 0, smallest_range_seq= UINT_MAX32;
7862 for (i= m_part_spec.start_part; i <= m_part_spec.end_part; i++)
7863 {
7864 if (!(bitmap_is_set(&(m_part_info->read_partitions), i)))
7865 continue;
7866 if (!bitmap_is_set(&m_mrr_used_partitions, i))
7867 continue;
7868 if (smallest_range_seq > m_stock_range_seq[i])
7869 smallest_range_seq= m_stock_range_seq[i];
7870 }
7871
7872 DBUG_PRINT("info", ("smallest_range_seq: %u", smallest_range_seq));
7873 if (smallest_range_seq != UINT_MAX32)
7874 {
7875 uchar *part_rec_buf_ptr= m_ordered_rec_buffer;
7876 DBUG_PRINT("info", ("partition queue_remove_all(2)"));
7877 queue_remove_all(&m_queue);
7878 DBUG_PRINT("info", ("m_part_spec.start_part: %u",
7879 m_part_spec.start_part));
7880
7881 for (i= bitmap_get_first_set(&m_part_info->read_partitions);
7882 i <= m_part_spec.end_part;
7883 i= bitmap_get_next_set(&m_part_info->read_partitions, i),
7884 part_rec_buf_ptr+= m_priority_queue_rec_len)
7885 {
7886 DBUG_PRINT("info",("partition part_id: %u", i));
7887 if (i < m_part_spec.start_part)
7888 {
7889 DBUG_PRINT("info",("partition i < m_part_spec.start_part"));
7890 continue;
7891 }
7892 if (!bitmap_is_set(&m_mrr_used_partitions, i))
7893 {
7894 DBUG_PRINT("info",("partition !bitmap_is_set(&m_mrr_used_partitions, i)"));
7895 continue;
7896 }
7897 DBUG_PRINT("info",("partition uint2korr: %u",
7898 uint2korr(part_rec_buf_ptr)));
7899 DBUG_ASSERT(i == uint2korr(part_rec_buf_ptr));
7900 DBUG_PRINT("info", ("partition m_stock_range_seq[%u]: %u",
7901 i, m_stock_range_seq[i]));
7902 if (smallest_range_seq == m_stock_range_seq[i])
7903 {
7904 m_stock_range_seq[i]= 0;
7905 DBUG_PRINT("info", ("partition queue_insert(2)"));
7906 queue_insert(&m_queue, part_rec_buf_ptr);
7907 j++;
7908 }
7909 }
7910 while (m_mrr_range_current->id < smallest_range_seq)
7911 m_mrr_range_current= m_mrr_range_current->next;
7912
7913 DBUG_PRINT("info",("partition m_mrr_range_current: %p",
7914 m_mrr_range_current));
7915 DBUG_PRINT("info",("partition m_mrr_range_current->id: %u",
7916 m_mrr_range_current ? m_mrr_range_current->id : 0));
7917 queue_set_max_at_top(&m_queue, FALSE);
7918 queue_set_cmp_arg(&m_queue, (void*) this);
7919 m_queue.elements= j;
7920 queue_fix(&m_queue);
7921 return_top_record(buf);
7922 DBUG_PRINT("info", ("Record returned from partition %u (4)",
7923 m_top_entry));
7924 DBUG_RETURN(0);
7925 }
7926 }
7927 }
7928 else if (!is_next_same)
7929 error= file->ha_index_next(rec_buf);
7930 else
7931 error= file->ha_index_next_same(rec_buf, m_start_key.key,
7932 m_start_key.length);
7933
7934 if (unlikely(error))
7935 {
7936 if (error == HA_ERR_END_OF_FILE && m_queue.elements)
7937 {
7938 /* Return next buffered row */
7939 DBUG_PRINT("info", ("partition queue_remove_top(3)"));
7940 queue_remove_top(&m_queue);
7941 if (m_queue.elements)
7942 {
7943 return_top_record(buf);
7944 DBUG_PRINT("info", ("Record returned from partition %u (2)",
7945 m_top_entry));
7946 error= 0;
7947 }
7948 }
7949 DBUG_RETURN(error);
7950 }
7951
7952 if (!m_using_extended_keys)
7953 {
7954 file->position(rec_buf);
7955 memcpy(rec_buf + m_rec_length, file->ref, file->ref_length);
7956 }
7957
7958 queue_replace_top(&m_queue);
7959 return_top_record(buf);
7960 DBUG_PRINT("info", ("Record returned from partition %u", m_top_entry));
7961 DBUG_RETURN(0);
7962}
7963
7964
7965/*
7966 Common routine to handle index_prev with ordered results
7967
7968 SYNOPSIS
7969 handle_ordered_prev()
7970 out:buf Read row in MySQL Row Format
7971
7972 RETURN VALUE
7973 HA_ERR_END_OF_FILE End of scan
7974 0 Success
7975 other Error code
7976*/
7977
7978int ha_partition::handle_ordered_prev(uchar *buf)
7979{
7980 int error;
7981 DBUG_ENTER("ha_partition::handle_ordered_prev");
7982 DBUG_PRINT("enter", ("partition: %p", this));
7983
7984 if (m_top_entry == NO_CURRENT_PART_ID)
7985 DBUG_RETURN(HA_ERR_END_OF_FILE);
7986
7987 uint part_id= m_top_entry;
7988 uchar *rec_buf= queue_top(&m_queue) + PARTITION_BYTES_IN_POS;
7989 handler *file= m_file[part_id];
7990
7991 if (unlikely((error= file->ha_index_prev(rec_buf))))
7992 {
7993 if (error == HA_ERR_END_OF_FILE && m_queue.elements)
7994 {
7995 DBUG_PRINT("info", ("partition queue_remove_top(4)"));
7996 queue_remove_top(&m_queue);
7997 if (m_queue.elements)
7998 {
7999 return_top_record(buf);
8000 DBUG_PRINT("info", ("Record returned from partition %u (2)",
8001 m_top_entry));
8002 error= 0;
8003 }
8004 }
8005 DBUG_RETURN(error);
8006 }
8007 queue_replace_top(&m_queue);
8008 return_top_record(buf);
8009 DBUG_PRINT("info", ("Record returned from partition %u", m_top_entry));
8010 DBUG_RETURN(0);
8011}
8012
8013
8014/****************************************************************************
8015 MODULE information calls
8016****************************************************************************/
8017
8018/*
8019 These are all first approximations of the extra, info, scan_time
8020 and read_time calls
8021*/
8022
8023/**
8024 Helper function for sorting according to number of rows in descending order.
8025*/
8026
8027int ha_partition::compare_number_of_records(ha_partition *me,
8028 const uint32 *a,
8029 const uint32 *b)
8030{
8031 handler **file= me->m_file;
8032 /* Note: sorting in descending order! */
8033 if (file[*a]->stats.records > file[*b]->stats.records)
8034 return -1;
8035 if (file[*a]->stats.records < file[*b]->stats.records)
8036 return 1;
8037 return 0;
8038}
8039
8040
8041/*
8042 General method to gather info from handler
8043
8044 SYNOPSIS
8045 info()
8046 flag Specifies what info is requested
8047
8048 RETURN VALUE
8049 NONE
8050
8051 DESCRIPTION
8052 ::info() is used to return information to the optimizer.
8053 Currently this table handler doesn't implement most of the fields
8054 really needed. SHOW also makes use of this data
8055 Another note, if your handler doesn't provide exact record count,
8056 you will probably want to have the following in your code:
8057 if (records < 2)
8058 records = 2;
8059 The reason is that the server will optimize for cases of only a single
8060 record. If in a table scan you don't know the number of records
8061 it will probably be better to set records to two so you can return
8062 as many records as you need.
8063
8064 Along with records a few more variables you may wish to set are:
8065 records
8066 deleted
8067 data_file_length
8068 index_file_length
8069 delete_length
8070 check_time
8071 Take a look at the public variables in handler.h for more information.
8072
8073 Called in:
8074 filesort.cc
8075 ha_heap.cc
8076 item_sum.cc
8077 opt_sum.cc
8078 sql_delete.cc
8079 sql_delete.cc
8080 sql_derived.cc
8081 sql_select.cc
8082 sql_select.cc
8083 sql_select.cc
8084 sql_select.cc
8085 sql_select.cc
8086 sql_show.cc
8087 sql_show.cc
8088 sql_show.cc
8089 sql_show.cc
8090 sql_table.cc
8091 sql_union.cc
8092 sql_update.cc
8093
8094 Some flags that are not implemented
8095 HA_STATUS_POS:
8096 This parameter is never used from the MySQL Server. It is checked in a
8097 place in MyISAM so could potentially be used by MyISAM specific
8098 programs.
8099 HA_STATUS_NO_LOCK:
8100 This is declared and often used. It's only used by MyISAM.
8101 It means that MySQL doesn't need the absolute latest statistics
8102 information. This may save the handler from doing internal locks while
8103 retrieving statistics data.
8104*/
8105
8106int ha_partition::info(uint flag)
8107{
8108 uint no_lock_flag= flag & HA_STATUS_NO_LOCK;
8109 uint extra_var_flag= flag & HA_STATUS_VARIABLE_EXTRA;
8110 DBUG_ENTER("ha_partition::info");
8111
8112#ifndef DBUG_OFF
8113 if (bitmap_is_set_all(&(m_part_info->read_partitions)))
8114 DBUG_PRINT("info", ("All partitions are used"));
8115#endif /* DBUG_OFF */
8116 if (flag & HA_STATUS_AUTO)
8117 {
8118 bool auto_inc_is_first_in_idx= (table_share->next_number_keypart == 0);
8119 DBUG_PRINT("info", ("HA_STATUS_AUTO"));
8120 if (!table->found_next_number_field)
8121 stats.auto_increment_value= 0;
8122 else if (part_share->auto_inc_initialized)
8123 {
8124 lock_auto_increment();
8125 stats.auto_increment_value= part_share->next_auto_inc_val;
8126 unlock_auto_increment();
8127 }
8128 else
8129 {
8130 lock_auto_increment();
8131 /* to avoid two concurrent initializations, check again when locked */
8132 if (part_share->auto_inc_initialized)
8133 stats.auto_increment_value= part_share->next_auto_inc_val;
8134 else
8135 {
8136 /*
8137 The auto-inc mutex in the table_share is locked, so we do not need
8138 to have the handlers locked.
8139 HA_STATUS_NO_LOCK is not checked, since we cannot skip locking
8140 the mutex, because it is initialized.
8141 */
8142 handler *file, **file_array;
8143 ulonglong auto_increment_value= 0;
8144 file_array= m_file;
8145 DBUG_PRINT("info",
8146 ("checking all partitions for auto_increment_value"));
8147 do
8148 {
8149 file= *file_array;
8150 file->info(HA_STATUS_AUTO | no_lock_flag);
8151 set_if_bigger(auto_increment_value,
8152 file->stats.auto_increment_value);
8153 } while (*(++file_array));
8154
8155 DBUG_ASSERT(auto_increment_value);
8156 stats.auto_increment_value= auto_increment_value;
8157 if (auto_inc_is_first_in_idx)
8158 {
8159 set_if_bigger(part_share->next_auto_inc_val,
8160 auto_increment_value);
8161 if (can_use_for_auto_inc_init())
8162 part_share->auto_inc_initialized= true;
8163 DBUG_PRINT("info", ("initializing next_auto_inc_val to %lu",
8164 (ulong) part_share->next_auto_inc_val));
8165 }
8166 }
8167 unlock_auto_increment();
8168 }
8169 }
8170 if (flag & HA_STATUS_VARIABLE)
8171 {
8172 uint i;
8173 DBUG_PRINT("info", ("HA_STATUS_VARIABLE"));
8174 /*
8175 Calculates statistical variables
8176 records: Estimate of number records in table
8177 We report sum (always at least 2 if not empty)
8178 deleted: Estimate of number holes in the table due to
8179 deletes
8180 We report sum
8181 data_file_length: Length of data file, in principle bytes in table
8182 We report sum
8183 index_file_length: Length of index file, in principle bytes in
8184 indexes in the table
8185 We report sum
8186 delete_length: Length of free space easily used by new records in table
8187 We report sum
8188 mean_record_length:Mean record length in the table
8189 We calculate this
8190 check_time: Time of last check (only applicable to MyISAM)
8191 We report last time of all underlying handlers
8192 */
8193 handler *file;
8194 stats.records= 0;
8195 stats.deleted= 0;
8196 stats.data_file_length= 0;
8197 stats.index_file_length= 0;
8198 stats.check_time= 0;
8199 stats.delete_length= 0;
8200 for (i= bitmap_get_first_set(&m_part_info->read_partitions);
8201 i < m_tot_parts;
8202 i= bitmap_get_next_set(&m_part_info->read_partitions, i))
8203 {
8204 file= m_file[i];
8205 file->info(HA_STATUS_VARIABLE | no_lock_flag | extra_var_flag);
8206 stats.records+= file->stats.records;
8207 stats.deleted+= file->stats.deleted;
8208 stats.data_file_length+= file->stats.data_file_length;
8209 stats.index_file_length+= file->stats.index_file_length;
8210 stats.delete_length+= file->stats.delete_length;
8211 if (file->stats.check_time > stats.check_time)
8212 stats.check_time= file->stats.check_time;
8213 }
8214 if (stats.records && stats.records < 2 &&
8215 !(m_file[0]->ha_table_flags() & HA_STATS_RECORDS_IS_EXACT))
8216 stats.records= 2;
8217 if (stats.records > 0)
8218 stats.mean_rec_length= (ulong) (stats.data_file_length / stats.records);
8219 else
8220 stats.mean_rec_length= 0;
8221 }
8222 if (flag & HA_STATUS_CONST)
8223 {
8224 DBUG_PRINT("info", ("HA_STATUS_CONST"));
8225 /*
8226 Recalculate loads of constant variables. MyISAM also sets things
8227 directly on the table share object.
8228
8229 Check whether this should be fixed since handlers should not
8230 change things directly on the table object.
8231
8232 Monty comment: This should NOT be changed! It's the handlers
8233 responsibility to correct table->s->keys_xxxx information if keys
8234 have been disabled.
8235
8236 The most important parameters set here is records per key on
8237 all indexes. block_size and primar key ref_length.
8238
8239 For each index there is an array of rec_per_key.
8240 As an example if we have an index with three attributes a,b and c
8241 we will have an array of 3 rec_per_key.
8242 rec_per_key[0] is an estimate of number of records divided by
8243 number of unique values of the field a.
8244 rec_per_key[1] is an estimate of the number of records divided
8245 by the number of unique combinations of the fields a and b.
8246 rec_per_key[2] is an estimate of the number of records divided
8247 by the number of unique combinations of the fields a,b and c.
8248
8249 Many handlers only set the value of rec_per_key when all fields
8250 are bound (rec_per_key[2] in the example above).
8251
8252 If the handler doesn't support statistics, it should set all of the
8253 above to 0.
8254
8255 We first scans through all partitions to get the one holding most rows.
8256 We will then allow the handler with the most rows to set
8257 the rec_per_key and use this as an estimate on the total table.
8258
8259 max_data_file_length: Maximum data file length
8260 We ignore it, is only used in
8261 SHOW TABLE STATUS
8262 max_index_file_length: Maximum index file length
8263 We ignore it since it is never used
8264 block_size: Block size used
8265 We set it to the value of the first handler
8266 ref_length: We set this to the value calculated
8267 and stored in local object
8268 create_time: Creation time of table
8269
8270 So we calculate these constants by using the variables from the
8271 handler with most rows.
8272 */
8273 handler *file, **file_array;
8274 ulonglong max_records= 0;
8275 uint32 i= 0;
8276 uint32 handler_instance= 0;
8277
8278 file_array= m_file;
8279 do
8280 {
8281 file= *file_array;
8282 if (bitmap_is_set(&(m_opened_partitions), (uint)(file_array - m_file)))
8283 {
8284 /* Get variables if not already done */
8285 if (!(flag & HA_STATUS_VARIABLE) ||
8286 !bitmap_is_set(&(m_part_info->read_partitions),
8287 (uint) (file_array - m_file)))
8288 file->info(HA_STATUS_VARIABLE | no_lock_flag | extra_var_flag);
8289 if (file->stats.records > max_records)
8290 {
8291 max_records= file->stats.records;
8292 handler_instance= i;
8293 }
8294 }
8295 i++;
8296 } while (*(++file_array));
8297 /*
8298 Sort the array of part_ids by number of records in
8299 in descending order.
8300 */
8301 my_qsort2((void*) m_part_ids_sorted_by_num_of_records,
8302 m_tot_parts,
8303 sizeof(uint32),
8304 (qsort2_cmp) compare_number_of_records,
8305 this);
8306
8307 file= m_file[handler_instance];
8308 file->info(HA_STATUS_CONST | no_lock_flag);
8309 stats.block_size= file->stats.block_size;
8310 stats.create_time= file->stats.create_time;
8311 ref_length= m_ref_length;
8312 }
8313 if (flag & HA_STATUS_ERRKEY)
8314 {
8315 handler *file= m_file[m_last_part];
8316 DBUG_PRINT("info", ("info: HA_STATUS_ERRKEY"));
8317 /*
8318 This flag is used to get index number of the unique index that
8319 reported duplicate key
8320 We will report the errkey on the last handler used and ignore the rest
8321 Note: all engines does not support HA_STATUS_ERRKEY, so set errkey.
8322 */
8323 file->errkey= errkey;
8324 file->info(HA_STATUS_ERRKEY | no_lock_flag);
8325 errkey= file->errkey;
8326 }
8327 if (flag & HA_STATUS_TIME)
8328 {
8329 handler *file, **file_array;
8330 DBUG_PRINT("info", ("info: HA_STATUS_TIME"));
8331 /*
8332 This flag is used to set the latest update time of the table.
8333 Used by SHOW commands
8334 We will report the maximum of these times
8335 */
8336 stats.update_time= 0;
8337 file_array= m_file;
8338 do
8339 {
8340 file= *file_array;
8341 file->info(HA_STATUS_TIME | no_lock_flag);
8342 if (file->stats.update_time > stats.update_time)
8343 stats.update_time= file->stats.update_time;
8344 } while (*(++file_array));
8345 }
8346 DBUG_RETURN(0);
8347}
8348
8349
8350void ha_partition::get_dynamic_partition_info(PARTITION_STATS *stat_info,
8351 uint part_id)
8352{
8353 handler *file= m_file[part_id];
8354 DBUG_ASSERT(bitmap_is_set(&(m_part_info->read_partitions), part_id));
8355 file->info(HA_STATUS_TIME | HA_STATUS_VARIABLE |
8356 HA_STATUS_VARIABLE_EXTRA | HA_STATUS_NO_LOCK);
8357
8358 stat_info->records= file->stats.records;
8359 stat_info->mean_rec_length= file->stats.mean_rec_length;
8360 stat_info->data_file_length= file->stats.data_file_length;
8361 stat_info->max_data_file_length= file->stats.max_data_file_length;
8362 stat_info->index_file_length= file->stats.index_file_length;
8363 stat_info->max_index_file_length= file->stats.max_index_file_length;
8364 stat_info->delete_length= file->stats.delete_length;
8365 stat_info->create_time= file->stats.create_time;
8366 stat_info->update_time= file->stats.update_time;
8367 stat_info->check_time= file->stats.check_time;
8368 stat_info->check_sum= 0;
8369 if (file->ha_table_flags() & (HA_HAS_OLD_CHECKSUM | HA_HAS_NEW_CHECKSUM))
8370 stat_info->check_sum= file->checksum();
8371 return;
8372}
8373
8374
8375void ha_partition::set_partitions_to_open(List<String> *partition_names)
8376{
8377 m_partitions_to_open= partition_names;
8378}
8379
8380
8381int ha_partition::open_read_partitions(char *name_buff, size_t name_buff_size)
8382{
8383 handler **file;
8384 char *name_buffer_ptr;
8385 int error= 0;
8386
8387 name_buffer_ptr= m_name_buffer_ptr;
8388 file= m_file;
8389 m_file_sample= NULL;
8390 do
8391 {
8392 int n_file= (int)(file-m_file);
8393 int is_open= bitmap_is_set(&m_opened_partitions, n_file);
8394 int should_be_open= bitmap_is_set(&m_part_info->read_partitions, n_file);
8395
8396 /*
8397 TODO: we can close some opened partitions if they're not
8398 used in the query. It probably should be syncronized with the
8399 table_open_cache value.
8400
8401 if (is_open && !should_be_open)
8402 {
8403 if (unlikely((error= (*file)->ha_close())))
8404 goto err_handler;
8405 bitmap_clear_bit(&m_opened_partitions, n_file);
8406 }
8407 else
8408 */
8409 if (!is_open && should_be_open)
8410 {
8411 LEX_CSTRING save_connect_string= table->s->connect_string;
8412 if (unlikely((error=
8413 create_partition_name(name_buff, name_buff_size,
8414 table->s->normalized_path.str,
8415 name_buffer_ptr, NORMAL_PART_NAME,
8416 FALSE))))
8417 goto err_handler;
8418 if (!((*file)->ht->flags & HTON_CAN_READ_CONNECT_STRING_IN_PARTITION))
8419 table->s->connect_string= m_connect_string[(uint)(file-m_file)];
8420 error= (*file)->ha_open(table, name_buff, m_mode,
8421 m_open_test_lock | HA_OPEN_NO_PSI_CALL);
8422 table->s->connect_string= save_connect_string;
8423 if (error)
8424 goto err_handler;
8425 bitmap_set_bit(&m_opened_partitions, n_file);
8426 m_last_part= n_file;
8427 }
8428 if (!m_file_sample && should_be_open)
8429 m_file_sample= *file;
8430 name_buffer_ptr+= strlen(name_buffer_ptr) + 1;
8431 } while (*(++file));
8432
8433err_handler:
8434 return error;
8435}
8436
8437
8438int ha_partition::change_partitions_to_open(List<String> *partition_names)
8439{
8440 char name_buff[FN_REFLEN+1];
8441 int error= 0;
8442
8443 if (m_is_clone_of)
8444 return 0;
8445
8446 m_partitions_to_open= partition_names;
8447 if (unlikely((error= m_part_info->set_partition_bitmaps(partition_names))))
8448 goto err_handler;
8449
8450 if (m_lock_type != F_UNLCK)
8451 {
8452 /*
8453 That happens after the LOCK TABLE statement.
8454 Do nothing in this case.
8455 */
8456 return 0;
8457 }
8458
8459 if (bitmap_cmp(&m_opened_partitions, &m_part_info->read_partitions) != 0)
8460 return 0;
8461
8462 if (unlikely((error= read_par_file(table->s->normalized_path.str)) ||
8463 (error= open_read_partitions(name_buff, sizeof(name_buff)))))
8464 goto err_handler;
8465
8466 clear_handler_file();
8467
8468err_handler:
8469 return error;
8470}
8471
8472
8473/**
8474 General function to prepare handler for certain behavior.
8475
8476 @param[in] operation operation to execute
8477
8478 @return status
8479 @retval 0 success
8480 @retval >0 error code
8481
8482 @detail
8483
8484 extra() is called whenever the server wishes to send a hint to
8485 the storage engine. The MyISAM engine implements the most hints.
8486
8487 We divide the parameters into the following categories:
8488 1) Operations used by most handlers
8489 2) Operations used by some non-MyISAM handlers
8490 3) Operations used only by MyISAM
8491 4) Operations only used by temporary tables for query processing
8492 5) Operations only used by MyISAM internally
8493 6) Operations not used at all
8494 7) Operations only used by federated tables for query processing
8495 8) Operations only used by NDB
8496 9) Operations only used by MERGE
8497
8498 The partition handler need to handle category 1), 2) and 3).
8499
8500 1) Operations used by most handlers
8501 -----------------------------------
8502 HA_EXTRA_RESET:
8503 This option is used by most handlers and it resets the handler state
8504 to the same state as after an open call. This includes releasing
8505 any READ CACHE or WRITE CACHE or other internal buffer used.
8506
8507 It is called from the reset method in the handler interface. There are
8508 three instances where this is called.
8509 1) After completing a INSERT ... SELECT ... query the handler for the
8510 table inserted into is reset
8511 2) It is called from close_thread_table which in turn is called from
8512 close_thread_tables except in the case where the tables are locked
8513 in which case ha_commit_stmt is called instead.
8514 It is only called from here if refresh_version hasn't changed and the
8515 table is not an old table when calling close_thread_table.
8516 close_thread_tables is called from many places as a general clean up
8517 function after completing a query.
8518 3) It is called when deleting the QUICK_RANGE_SELECT object if the
8519 QUICK_RANGE_SELECT object had its own handler object. It is called
8520 immediatley before close of this local handler object.
8521 HA_EXTRA_KEYREAD:
8522 HA_EXTRA_NO_KEYREAD:
8523 These parameters are used to provide an optimisation hint to the handler.
8524 If HA_EXTRA_KEYREAD is set it is enough to read the index fields, for
8525 many handlers this means that the index-only scans can be used and it
8526 is not necessary to use the real records to satisfy this part of the
8527 query. Index-only scans is a very important optimisation for disk-based
8528 indexes. For main-memory indexes most indexes contain a reference to the
8529 record and thus KEYREAD only says that it is enough to read key fields.
8530 HA_EXTRA_NO_KEYREAD disables this for the handler, also HA_EXTRA_RESET
8531 will disable this option.
8532 The handler will set HA_KEYREAD_ONLY in its table flags to indicate this
8533 feature is supported.
8534 HA_EXTRA_FLUSH:
8535 Indication to flush tables to disk, is supposed to be used to
8536 ensure disk based tables are flushed at end of query execution.
8537 Currently is never used.
8538
8539 HA_EXTRA_FORCE_REOPEN:
8540 Only used by MyISAM and Archive, called when altering table,
8541 closing tables to enforce a reopen of the table files.
8542
8543 2) Operations used by some non-MyISAM handlers
8544 ----------------------------------------------
8545 HA_EXTRA_KEYREAD_PRESERVE_FIELDS:
8546 This is a strictly InnoDB feature that is more or less undocumented.
8547 When it is activated InnoDB copies field by field from its fetch
8548 cache instead of all fields in one memcpy. Have no idea what the
8549 purpose of this is.
8550 Cut from include/my_base.h:
8551 When using HA_EXTRA_KEYREAD, overwrite only key member fields and keep
8552 other fields intact. When this is off (by default) InnoDB will use memcpy
8553 to overwrite entire row.
8554 HA_EXTRA_IGNORE_DUP_KEY:
8555 HA_EXTRA_NO_IGNORE_DUP_KEY:
8556 Informs the handler to we will not stop the transaction if we get an
8557 duplicate key errors during insert/upate.
8558 Always called in pair, triggered by INSERT IGNORE and other similar
8559 SQL constructs.
8560 Not used by MyISAM.
8561
8562 3) Operations used only by MyISAM
8563 ---------------------------------
8564 HA_EXTRA_NORMAL:
8565 Only used in MyISAM to reset quick mode, not implemented by any other
8566 handler. Quick mode is also reset in MyISAM by HA_EXTRA_RESET.
8567
8568 It is called after completing a successful DELETE query if the QUICK
8569 option is set.
8570
8571 HA_EXTRA_QUICK:
8572 When the user does DELETE QUICK FROM table where-clause; this extra
8573 option is called before the delete query is performed and
8574 HA_EXTRA_NORMAL is called after the delete query is completed.
8575 Temporary tables used internally in MySQL always set this option
8576
8577 The meaning of quick mode is that when deleting in a B-tree no merging
8578 of leafs is performed. This is a common method and many large DBMS's
8579 actually only support this quick mode since it is very difficult to
8580 merge leaves in a tree used by many threads concurrently.
8581
8582 HA_EXTRA_CACHE:
8583 This flag is usually set with extra_opt along with a cache size.
8584 The size of this buffer is set by the user variable
8585 record_buffer_size. The value of this cache size is the amount of
8586 data read from disk in each fetch when performing a table scan.
8587 This means that before scanning a table it is normal to call
8588 extra with HA_EXTRA_CACHE and when the scan is completed to call
8589 HA_EXTRA_NO_CACHE to release the cache memory.
8590
8591 Some special care is taken when using this extra parameter since there
8592 could be a write ongoing on the table in the same statement. In this
8593 one has to take special care since there might be a WRITE CACHE as
8594 well. HA_EXTRA_CACHE specifies using a READ CACHE and using
8595 READ CACHE and WRITE CACHE at the same time is not possible.
8596
8597 Only MyISAM currently use this option.
8598
8599 It is set when doing full table scans using rr_sequential and
8600 reset when completing such a scan with end_read_record
8601 (resetting means calling extra with HA_EXTRA_NO_CACHE).
8602
8603 It is set in filesort.cc for MyISAM internal tables and it is set in
8604 a multi-update where HA_EXTRA_CACHE is called on a temporary result
8605 table and after that ha_rnd_init(0) on table to be updated
8606 and immediately after that HA_EXTRA_NO_CACHE on table to be updated.
8607
8608 Apart from that it is always used from init_read_record but not when
8609 used from UPDATE statements. It is not used from DELETE statements
8610 with ORDER BY and LIMIT but it is used in normal scan loop in DELETE
8611 statements. The reason here is that DELETE's in MyISAM doesn't move
8612 existings data rows.
8613
8614 It is also set in copy_data_between_tables when scanning the old table
8615 to copy over to the new table.
8616 And it is set in join_init_read_record where quick objects are used
8617 to perform a scan on the table. In this case the full table scan can
8618 even be performed multiple times as part of the nested loop join.
8619
8620 For purposes of the partition handler it is obviously necessary to have
8621 special treatment of this extra call. If we would simply pass this
8622 extra call down to each handler we would allocate
8623 cache size * no of partitions amount of memory and this is not
8624 necessary since we will only scan one partition at a time when doing
8625 full table scans.
8626
8627 Thus we treat it by first checking whether we have MyISAM handlers in
8628 the table, if not we simply ignore the call and if we have we will
8629 record the call but will not call any underlying handler yet. Then
8630 when performing the sequential scan we will check this recorded value
8631 and call extra_opt whenever we start scanning a new partition.
8632
8633 HA_EXTRA_NO_CACHE:
8634 When performing a UNION SELECT HA_EXTRA_NO_CACHE is called from the
8635 flush method in the select_union class.
8636 It is used to some extent when insert delayed inserts.
8637 See HA_EXTRA_RESET_STATE for use in conjunction with delete_all_rows().
8638
8639 It should be ok to call HA_EXTRA_NO_CACHE on all underlying handlers
8640 if they are MyISAM handlers. Other handlers we can ignore the call
8641 for. If no cache is in use they will quickly return after finding
8642 this out. And we also ensure that all caches are disabled and no one
8643 is left by mistake.
8644 In the future this call will probably be deleted and we will instead call
8645 ::reset();
8646
8647 HA_EXTRA_WRITE_CACHE:
8648 See above, called from various places. It is mostly used when we
8649 do INSERT ... SELECT
8650 No special handling to save cache space is developed currently.
8651
8652 HA_EXTRA_PREPARE_FOR_UPDATE:
8653 This is called as part of a multi-table update. When the table to be
8654 updated is also scanned then this informs MyISAM handler to drop any
8655 caches if dynamic records are used (fixed size records do not care
8656 about this call). We pass this along to the first partition to scan, and
8657 flag that it is to be called after HA_EXTRA_CACHE when moving to the next
8658 partition to scan.
8659
8660 HA_EXTRA_PREPARE_FOR_DROP:
8661 Only used by MyISAM, called in preparation for a DROP TABLE.
8662 It's used mostly by Windows that cannot handle dropping an open file.
8663 On other platforms it has the same effect as HA_EXTRA_FORCE_REOPEN.
8664
8665 HA_EXTRA_PREPARE_FOR_RENAME:
8666 Informs the handler we are about to attempt a rename of the table.
8667 For handlers that have share open files (MyISAM key-file and
8668 Archive writer) they must close the files before rename is possible
8669 on Windows.
8670
8671 HA_EXTRA_READCHECK:
8672 HA_EXTRA_NO_READCHECK:
8673 Only one call to HA_EXTRA_NO_READCHECK from ha_open where it says that
8674 this is not needed in SQL. The reason for this call is that MyISAM sets
8675 the READ_CHECK_USED in the open call so the call is needed for MyISAM
8676 to reset this feature.
8677 The idea with this parameter was to inform of doing/not doing a read
8678 check before applying an update. Since SQL always performs a read before
8679 applying the update No Read Check is needed in MyISAM as well.
8680
8681 This is a cut from Docs/myisam.txt
8682 Sometimes you might want to force an update without checking whether
8683 another user has changed the record since you last read it. This is
8684 somewhat dangerous, so it should ideally not be used. That can be
8685 accomplished by wrapping the mi_update() call in two calls to mi_extra(),
8686 using these functions:
8687 HA_EXTRA_NO_READCHECK=5 No readcheck on update
8688 HA_EXTRA_READCHECK=6 Use readcheck (def)
8689
8690 4) Operations only used by temporary tables for query processing
8691 ----------------------------------------------------------------
8692 HA_EXTRA_RESET_STATE:
8693 Same as reset() except that buffers are not released. If there is
8694 a READ CACHE it is reinit'ed. A cache is reinit'ed to restart reading
8695 or to change type of cache between READ CACHE and WRITE CACHE.
8696
8697 This extra function is always called immediately before calling
8698 delete_all_rows on the handler for temporary tables.
8699 There are cases however when HA_EXTRA_RESET_STATE isn't called in
8700 a similar case for a temporary table in sql_union.cc and in two other
8701 cases HA_EXTRA_NO_CACHE is called before and HA_EXTRA_WRITE_CACHE
8702 called afterwards.
8703 The case with HA_EXTRA_NO_CACHE and HA_EXTRA_WRITE_CACHE means
8704 disable caching, delete all rows and enable WRITE CACHE. This is
8705 used for temporary tables containing distinct sums and a
8706 functional group.
8707
8708 The only case that delete_all_rows is called on non-temporary tables
8709 is in sql_delete.cc when DELETE FROM table; is called by a user.
8710 In this case no special extra calls are performed before or after this
8711 call.
8712
8713 The partition handler should not need to bother about this one. It
8714 should never be called.
8715
8716 HA_EXTRA_NO_ROWS:
8717 Don't insert rows indication to HEAP and MyISAM, only used by temporary
8718 tables used in query processing.
8719 Not handled by partition handler.
8720
8721 5) Operations only used by MyISAM internally
8722 --------------------------------------------
8723 HA_EXTRA_REINIT_CACHE:
8724 This call reinitializes the READ CACHE described above if there is one
8725 and otherwise the call is ignored.
8726
8727 We can thus safely call it on all underlying handlers if they are
8728 MyISAM handlers. It is however never called so we don't handle it at all.
8729 HA_EXTRA_FLUSH_CACHE:
8730 Flush WRITE CACHE in MyISAM. It is only from one place in the code.
8731 This is in sql_insert.cc where it is called if the table_flags doesn't
8732 contain HA_DUPLICATE_POS. The only handler having the HA_DUPLICATE_POS
8733 set is the MyISAM handler and so the only handler not receiving this
8734 call is MyISAM.
8735 Thus in effect this call is called but never used. Could be removed
8736 from sql_insert.cc
8737 HA_EXTRA_NO_USER_CHANGE:
8738 Only used by MyISAM, never called.
8739 Simulates lock_type as locked.
8740 HA_EXTRA_WAIT_LOCK:
8741 HA_EXTRA_WAIT_NOLOCK:
8742 Only used by MyISAM, called from MyISAM handler but never from server
8743 code on top of the handler.
8744 Sets lock_wait on/off
8745 HA_EXTRA_NO_KEYS:
8746 Only used MyISAM, only used internally in MyISAM handler, never called
8747 from server level.
8748 HA_EXTRA_KEYREAD_CHANGE_POS:
8749 HA_EXTRA_REMEMBER_POS:
8750 HA_EXTRA_RESTORE_POS:
8751 HA_EXTRA_PRELOAD_BUFFER_SIZE:
8752 HA_EXTRA_CHANGE_KEY_TO_DUP:
8753 HA_EXTRA_CHANGE_KEY_TO_UNIQUE:
8754 Only used by MyISAM, never called.
8755
8756 6) Operations not used at all
8757 -----------------------------
8758 HA_EXTRA_KEY_CACHE:
8759 HA_EXTRA_NO_KEY_CACHE:
8760 This parameters are no longer used and could be removed.
8761
8762 7) Operations only used by federated tables for query processing
8763 ----------------------------------------------------------------
8764 HA_EXTRA_INSERT_WITH_UPDATE:
8765 Inform handler that an "INSERT...ON DUPLICATE KEY UPDATE" will be
8766 executed. This condition is unset by HA_EXTRA_NO_IGNORE_DUP_KEY.
8767
8768 8) Operations only used by NDB
8769 ------------------------------
8770 HA_EXTRA_DELETE_CANNOT_BATCH:
8771 HA_EXTRA_UPDATE_CANNOT_BATCH:
8772 Inform handler that delete_row()/update_row() cannot batch deletes/updates
8773 and should perform them immediately. This may be needed when table has
8774 AFTER DELETE/UPDATE triggers which access to subject table.
8775 These flags are reset by the handler::extra(HA_EXTRA_RESET) call.
8776
8777 9) Operations only used by MERGE
8778 ------------------------------
8779 HA_EXTRA_ADD_CHILDREN_LIST:
8780 HA_EXTRA_ATTACH_CHILDREN:
8781 HA_EXTRA_IS_ATTACHED_CHILDREN:
8782 HA_EXTRA_DETACH_CHILDREN:
8783 Special actions for MERGE tables. Ignore.
8784*/
8785
8786int ha_partition::extra(enum ha_extra_function operation)
8787{
8788 DBUG_ENTER("ha_partition:extra");
8789 DBUG_PRINT("enter", ("operation: %d", (int) operation));
8790
8791 switch (operation) {
8792 /* Category 1), used by most handlers */
8793 case HA_EXTRA_KEYREAD:
8794 case HA_EXTRA_NO_KEYREAD:
8795 case HA_EXTRA_FLUSH:
8796 case HA_EXTRA_PREPARE_FOR_FORCED_CLOSE:
8797 DBUG_RETURN(loop_extra(operation));
8798 case HA_EXTRA_PREPARE_FOR_RENAME:
8799 case HA_EXTRA_FORCE_REOPEN:
8800 DBUG_RETURN(loop_extra_alter(operation));
8801 break;
8802
8803 /* Category 2), used by non-MyISAM handlers */
8804 case HA_EXTRA_IGNORE_DUP_KEY:
8805 case HA_EXTRA_NO_IGNORE_DUP_KEY:
8806 case HA_EXTRA_KEYREAD_PRESERVE_FIELDS:
8807 {
8808 if (!m_myisam)
8809 DBUG_RETURN(loop_extra(operation));
8810 }
8811 break;
8812
8813 /* Category 3), used by MyISAM handlers */
8814 case HA_EXTRA_PREPARE_FOR_UPDATE:
8815 /*
8816 Needs to be run on the first partition in the range now, and
8817 later in late_extra_cache, when switching to a new partition to scan.
8818 */
8819 m_extra_prepare_for_update= TRUE;
8820 if (m_part_spec.start_part != NO_CURRENT_PART_ID)
8821 {
8822 if (!m_extra_cache)
8823 m_extra_cache_part_id= m_part_spec.start_part;
8824 DBUG_ASSERT(m_extra_cache_part_id == m_part_spec.start_part);
8825 (void) m_file[m_part_spec.start_part]->extra(HA_EXTRA_PREPARE_FOR_UPDATE);
8826 }
8827 break;
8828 case HA_EXTRA_NORMAL:
8829 case HA_EXTRA_QUICK:
8830 case HA_EXTRA_PREPARE_FOR_DROP:
8831 case HA_EXTRA_FLUSH_CACHE:
8832 case HA_EXTRA_PREPARE_FOR_ALTER_TABLE:
8833 {
8834 DBUG_RETURN(loop_extra(operation));
8835 }
8836 case HA_EXTRA_NO_READCHECK:
8837 {
8838 /*
8839 This is only done as a part of ha_open, which is also used in
8840 ha_partition::open, so no need to do anything.
8841 */
8842 break;
8843 }
8844 case HA_EXTRA_CACHE:
8845 {
8846 prepare_extra_cache(0);
8847 break;
8848 }
8849 case HA_EXTRA_NO_CACHE:
8850 {
8851 int ret= 0;
8852 if (m_extra_cache_part_id != NO_CURRENT_PART_ID)
8853 ret= m_file[m_extra_cache_part_id]->extra(HA_EXTRA_NO_CACHE);
8854 m_extra_cache= FALSE;
8855 m_extra_cache_size= 0;
8856 m_extra_prepare_for_update= FALSE;
8857 m_extra_cache_part_id= NO_CURRENT_PART_ID;
8858 DBUG_RETURN(ret);
8859 }
8860 case HA_EXTRA_WRITE_CACHE:
8861 {
8862 m_extra_cache= FALSE;
8863 m_extra_cache_size= 0;
8864 m_extra_prepare_for_update= FALSE;
8865 m_extra_cache_part_id= NO_CURRENT_PART_ID;
8866 DBUG_RETURN(loop_extra(operation));
8867 }
8868 case HA_EXTRA_IGNORE_NO_KEY:
8869 case HA_EXTRA_NO_IGNORE_NO_KEY:
8870 {
8871 /*
8872 Ignore as these are specific to NDB for handling
8873 idempotency
8874 */
8875 break;
8876 }
8877 case HA_EXTRA_WRITE_CAN_REPLACE:
8878 case HA_EXTRA_WRITE_CANNOT_REPLACE:
8879 {
8880 /*
8881 Informs handler that write_row() can replace rows which conflict
8882 with row being inserted by PK/unique key without reporting error
8883 to the SQL-layer.
8884
8885 At this time, this is safe by limitation of ha_partition
8886 */
8887 DBUG_RETURN(loop_extra(operation));
8888 }
8889 /* Category 7), used by federated handlers */
8890 case HA_EXTRA_INSERT_WITH_UPDATE:
8891 DBUG_RETURN(loop_extra(operation));
8892 /* Category 8) Operations only used by NDB */
8893 case HA_EXTRA_DELETE_CANNOT_BATCH:
8894 case HA_EXTRA_UPDATE_CANNOT_BATCH:
8895 {
8896 /* Currently only NDB use the *_CANNOT_BATCH */
8897 break;
8898 }
8899 /* Category 9) Operations only used by MERGE */
8900 case HA_EXTRA_ADD_CHILDREN_LIST:
8901 DBUG_RETURN(loop_extra(operation));
8902 case HA_EXTRA_ATTACH_CHILDREN:
8903 {
8904 int result;
8905 uint num_locks;
8906 handler **file;
8907 if ((result= loop_extra(operation)))
8908 DBUG_RETURN(result);
8909
8910 /* Recalculate lock count as each child may have different set of locks */
8911 num_locks= 0;
8912 file= m_file;
8913 do
8914 {
8915 num_locks+= (*file)->lock_count();
8916 } while (*(++file));
8917
8918 m_num_locks= num_locks;
8919 break;
8920 }
8921 case HA_EXTRA_IS_ATTACHED_CHILDREN:
8922 DBUG_RETURN(loop_extra(operation));
8923 case HA_EXTRA_DETACH_CHILDREN:
8924 DBUG_RETURN(loop_extra(operation));
8925 case HA_EXTRA_MARK_AS_LOG_TABLE:
8926 /*
8927 http://dev.mysql.com/doc/refman/5.1/en/partitioning-limitations.html
8928 says we no longer support logging to partitioned tables, so we fail
8929 here.
8930 */
8931 DBUG_RETURN(ER_UNSUPORTED_LOG_ENGINE);
8932 case HA_EXTRA_STARTING_ORDERED_INDEX_SCAN:
8933 case HA_EXTRA_BEGIN_ALTER_COPY:
8934 case HA_EXTRA_END_ALTER_COPY:
8935 case HA_EXTRA_FAKE_START_STMT:
8936 DBUG_RETURN(loop_extra(operation));
8937 default:
8938 {
8939 /* Temporary crash to discover what is wrong */
8940 DBUG_ASSERT(0);
8941 break;
8942 }
8943 }
8944 DBUG_RETURN(0);
8945}
8946
8947
8948/**
8949 Special extra call to reset extra parameters
8950
8951 @return Operation status.
8952 @retval >0 Error code
8953 @retval 0 Success
8954
8955 @note Called at end of each statement to reset buffers.
8956 To avoid excessive calls, the m_partitions_to_reset bitmap keep records
8957 of which partitions that have been used in extra(), external_lock() or
8958 start_stmt() and is needed to be called.
8959*/
8960
8961int ha_partition::reset(void)
8962{
8963 int result= 0;
8964 int tmp;
8965 uint i;
8966 DBUG_ENTER("ha_partition::reset");
8967
8968 for (i= bitmap_get_first_set(&m_partitions_to_reset);
8969 i < m_tot_parts;
8970 i= bitmap_get_next_set(&m_partitions_to_reset, i))
8971 {
8972 if (bitmap_is_set(&m_opened_partitions, i) &&
8973 (tmp= m_file[i]->ha_reset()))
8974 result= tmp;
8975 }
8976 bitmap_clear_all(&m_partitions_to_reset);
8977 m_extra_prepare_for_update= FALSE;
8978 DBUG_RETURN(result);
8979}
8980
8981/*
8982 Special extra method for HA_EXTRA_CACHE with cachesize as extra parameter
8983
8984 SYNOPSIS
8985 extra_opt()
8986 operation Must be HA_EXTRA_CACHE
8987 cachesize Size of cache in full table scan
8988
8989 RETURN VALUE
8990 >0 Error code
8991 0 Success
8992*/
8993
8994int ha_partition::extra_opt(enum ha_extra_function operation, ulong cachesize)
8995{
8996 DBUG_ENTER("ha_partition::extra_opt()");
8997
8998 DBUG_ASSERT(HA_EXTRA_CACHE == operation);
8999 prepare_extra_cache(cachesize);
9000 DBUG_RETURN(0);
9001}
9002
9003
9004/*
9005 Call extra on handler with HA_EXTRA_CACHE and cachesize
9006
9007 SYNOPSIS
9008 prepare_extra_cache()
9009 cachesize Size of cache for full table scan
9010
9011 RETURN VALUE
9012 NONE
9013*/
9014
9015void ha_partition::prepare_extra_cache(uint cachesize)
9016{
9017 DBUG_ENTER("ha_partition::prepare_extra_cache()");
9018 DBUG_PRINT("enter", ("cachesize %u", cachesize));
9019
9020 m_extra_cache= TRUE;
9021 m_extra_cache_size= cachesize;
9022 if (m_part_spec.start_part != NO_CURRENT_PART_ID)
9023 {
9024 DBUG_ASSERT(bitmap_is_set(&m_partitions_to_reset,
9025 m_part_spec.start_part));
9026 bitmap_set_bit(&m_partitions_to_reset, m_part_spec.start_part);
9027 late_extra_cache(m_part_spec.start_part);
9028 }
9029 DBUG_VOID_RETURN;
9030}
9031
9032
9033/**
9034 Prepares our new and reorged handlers for rename or delete.
9035
9036 @param operation Operation to forward
9037
9038 @return Operation status
9039 @retval 0 Success
9040 @retval !0 Error
9041*/
9042
9043int ha_partition::loop_extra_alter(enum ha_extra_function operation)
9044{
9045 int result= 0, tmp;
9046 handler **file;
9047 DBUG_ENTER("ha_partition::loop_extra_alter()");
9048 DBUG_ASSERT(operation == HA_EXTRA_PREPARE_FOR_RENAME ||
9049 operation == HA_EXTRA_FORCE_REOPEN);
9050
9051 if (m_new_file != NULL)
9052 {
9053 for (file= m_new_file; *file; file++)
9054 if ((tmp= (*file)->extra(operation)))
9055 result= tmp;
9056 }
9057 if (m_reorged_file != NULL)
9058 {
9059 for (file= m_reorged_file; *file; file++)
9060 if ((tmp= (*file)->extra(operation)))
9061 result= tmp;
9062 }
9063 if ((tmp= loop_extra(operation)))
9064 result= tmp;
9065 DBUG_RETURN(result);
9066}
9067
9068/*
9069 Call extra on all partitions
9070
9071 SYNOPSIS
9072 loop_extra()
9073 operation extra operation type
9074
9075 RETURN VALUE
9076 >0 Error code
9077 0 Success
9078*/
9079
9080int ha_partition::loop_extra(enum ha_extra_function operation)
9081{
9082 int result= 0, tmp;
9083 uint i;
9084 DBUG_ENTER("ha_partition::loop_extra()");
9085
9086 for (i= bitmap_get_first_set(&m_part_info->lock_partitions);
9087 i < m_tot_parts;
9088 i= bitmap_get_next_set(&m_part_info->lock_partitions, i))
9089 {
9090 /*
9091 This can be called after an error in ha_open.
9092 In this case calling 'extra' can crash.
9093 */
9094 if (bitmap_is_set(&m_opened_partitions, i) &&
9095 (tmp= m_file[i]->extra(operation)))
9096 result= tmp;
9097 }
9098 /* Add all used partitions to be called in reset(). */
9099 bitmap_union(&m_partitions_to_reset, &m_part_info->lock_partitions);
9100 DBUG_RETURN(result);
9101}
9102
9103
9104/*
9105 Call extra(HA_EXTRA_CACHE) on next partition_id
9106
9107 SYNOPSIS
9108 late_extra_cache()
9109 partition_id Partition id to call extra on
9110
9111 RETURN VALUE
9112 NONE
9113*/
9114
9115void ha_partition::late_extra_cache(uint partition_id)
9116{
9117 handler *file;
9118 DBUG_ENTER("ha_partition::late_extra_cache");
9119 DBUG_PRINT("enter", ("extra_cache %u prepare %u partid %u size %u",
9120 m_extra_cache, m_extra_prepare_for_update,
9121 partition_id, m_extra_cache_size));
9122
9123 if (!m_extra_cache && !m_extra_prepare_for_update)
9124 DBUG_VOID_RETURN;
9125 file= m_file[partition_id];
9126 if (m_extra_cache)
9127 {
9128 if (m_extra_cache_size == 0)
9129 (void) file->extra(HA_EXTRA_CACHE);
9130 else
9131 (void) file->extra_opt(HA_EXTRA_CACHE, m_extra_cache_size);
9132 }
9133 if (m_extra_prepare_for_update)
9134 {
9135 DBUG_ASSERT(m_extra_cache);
9136 (void) file->extra(HA_EXTRA_PREPARE_FOR_UPDATE);
9137 }
9138 m_extra_cache_part_id= partition_id;
9139 DBUG_VOID_RETURN;
9140}
9141
9142
9143/*
9144 Call extra(HA_EXTRA_NO_CACHE) on next partition_id
9145
9146 SYNOPSIS
9147 late_extra_no_cache()
9148 partition_id Partition id to call extra on
9149
9150 RETURN VALUE
9151 NONE
9152*/
9153
9154void ha_partition::late_extra_no_cache(uint partition_id)
9155{
9156 handler *file;
9157 DBUG_ENTER("ha_partition::late_extra_no_cache");
9158
9159 if (!m_extra_cache && !m_extra_prepare_for_update)
9160 DBUG_VOID_RETURN;
9161 file= m_file[partition_id];
9162 (void) file->extra(HA_EXTRA_NO_CACHE);
9163 DBUG_ASSERT(partition_id == m_extra_cache_part_id);
9164 m_extra_cache_part_id= NO_CURRENT_PART_ID;
9165 DBUG_VOID_RETURN;
9166}
9167
9168
9169/****************************************************************************
9170 MODULE optimiser support
9171****************************************************************************/
9172
9173/**
9174 Get keys to use for scanning.
9175
9176 @return key_map of keys usable for scanning
9177
9178 @note No need to use read_partitions here, since it does not depend on
9179 which partitions is used, only which storage engine used.
9180*/
9181
9182const key_map *ha_partition::keys_to_use_for_scanning()
9183{
9184 DBUG_ENTER("ha_partition::keys_to_use_for_scanning");
9185 DBUG_RETURN(get_open_file_sample()->keys_to_use_for_scanning());
9186}
9187
9188
9189/**
9190 Minimum number of rows to base optimizer estimate on.
9191*/
9192
9193ha_rows ha_partition::min_rows_for_estimate()
9194{
9195 uint i, max_used_partitions, tot_used_partitions;
9196 DBUG_ENTER("ha_partition::min_rows_for_estimate");
9197
9198 tot_used_partitions= bitmap_bits_set(&m_part_info->read_partitions);
9199
9200 /*
9201 All partitions might have been left as unused during partition pruning
9202 due to, for example, an impossible WHERE condition. Nonetheless, the
9203 optimizer might still attempt to perform (e.g. range) analysis where an
9204 estimate of the the number of rows is calculated using records_in_range.
9205 Hence, to handle this and other possible cases, use zero as the minimum
9206 number of rows to base the estimate on if no partition is being used.
9207 */
9208 if (!tot_used_partitions)
9209 DBUG_RETURN(0);
9210
9211 /*
9212 Allow O(log2(tot_partitions)) increase in number of used partitions.
9213 This gives O(tot_rows/log2(tot_partitions)) rows to base the estimate on.
9214 I.e when the total number of partitions doubles, allow one more
9215 partition to be checked.
9216 */
9217 i= 2;
9218 max_used_partitions= 1;
9219 while (i < m_tot_parts)
9220 {
9221 max_used_partitions++;
9222 i= i << 1;
9223 }
9224 if (max_used_partitions > tot_used_partitions)
9225 max_used_partitions= tot_used_partitions;
9226
9227 /* stats.records is already updated by the info(HA_STATUS_VARIABLE) call. */
9228 DBUG_PRINT("info", ("max_used_partitions: %u tot_rows: %lu",
9229 max_used_partitions,
9230 (ulong) stats.records));
9231 DBUG_PRINT("info", ("tot_used_partitions: %u min_rows_to_check: %lu",
9232 tot_used_partitions,
9233 (ulong) stats.records * max_used_partitions
9234 / tot_used_partitions));
9235 DBUG_RETURN(stats.records * max_used_partitions / tot_used_partitions);
9236}
9237
9238
9239/**
9240 Get the biggest used partition.
9241
9242 Starting at the N:th biggest partition and skips all non used
9243 partitions, returning the biggest used partition found
9244
9245 @param[in,out] part_index Skip the *part_index biggest partitions
9246
9247 @return The biggest used partition with index not lower than *part_index.
9248 @retval NO_CURRENT_PART_ID No more partition used.
9249 @retval != NO_CURRENT_PART_ID partition id of biggest used partition with
9250 index >= *part_index supplied. Note that
9251 *part_index will be updated to the next
9252 partition index to use.
9253*/
9254
9255uint ha_partition::get_biggest_used_partition(uint *part_index)
9256{
9257 uint part_id;
9258 while ((*part_index) < m_tot_parts)
9259 {
9260 part_id= m_part_ids_sorted_by_num_of_records[(*part_index)++];
9261 if (bitmap_is_set(&m_part_info->read_partitions, part_id))
9262 return part_id;
9263 }
9264 return NO_CURRENT_PART_ID;
9265}
9266
9267
9268/*
9269 Return time for a scan of the table
9270
9271 SYNOPSIS
9272 scan_time()
9273
9274 RETURN VALUE
9275 time for scan
9276*/
9277
9278double ha_partition::scan_time()
9279{
9280 double scan_time= 0;
9281 uint i;
9282 DBUG_ENTER("ha_partition::scan_time");
9283
9284 for (i= bitmap_get_first_set(&m_part_info->read_partitions);
9285 i < m_tot_parts;
9286 i= bitmap_get_next_set(&m_part_info->read_partitions, i))
9287 scan_time+= m_file[i]->scan_time();
9288 DBUG_RETURN(scan_time);
9289}
9290
9291
9292/**
9293 Find number of records in a range.
9294 @param inx Index number
9295 @param min_key Start of range
9296 @param max_key End of range
9297
9298 @return Number of rows in range.
9299
9300 Given a starting key, and an ending key estimate the number of rows that
9301 will exist between the two. max_key may be empty which in case determine
9302 if start_key matches any rows.
9303*/
9304
9305ha_rows ha_partition::records_in_range(uint inx, key_range *min_key,
9306 key_range *max_key)
9307{
9308 ha_rows min_rows_to_check, rows, estimated_rows=0, checked_rows= 0;
9309 uint partition_index= 0, part_id;
9310 DBUG_ENTER("ha_partition::records_in_range");
9311
9312 min_rows_to_check= min_rows_for_estimate();
9313
9314 while ((part_id= get_biggest_used_partition(&partition_index))
9315 != NO_CURRENT_PART_ID)
9316 {
9317 rows= m_file[part_id]->records_in_range(inx, min_key, max_key);
9318
9319 DBUG_PRINT("info", ("part %u match %lu rows of %lu", part_id, (ulong) rows,
9320 (ulong) m_file[part_id]->stats.records));
9321
9322 if (rows == HA_POS_ERROR)
9323 DBUG_RETURN(HA_POS_ERROR);
9324 estimated_rows+= rows;
9325 checked_rows+= m_file[part_id]->stats.records;
9326 /*
9327 Returning 0 means no rows can be found, so we must continue
9328 this loop as long as we have estimated_rows == 0.
9329 Also many engines return 1 to indicate that there may exist
9330 a matching row, we do not normalize this by dividing by number of
9331 used partitions, but leave it to be returned as a sum, which will
9332 reflect that we will need to scan each partition's index.
9333
9334 Note that this statistics may not always be correct, so we must
9335 continue even if the current partition has 0 rows, since we might have
9336 deleted rows from the current partition, or inserted to the next
9337 partition.
9338 */
9339 if (estimated_rows && checked_rows &&
9340 checked_rows >= min_rows_to_check)
9341 {
9342 DBUG_PRINT("info",
9343 ("records_in_range(inx %u): %lu (%lu * %lu / %lu)",
9344 inx,
9345 (ulong) (estimated_rows * stats.records / checked_rows),
9346 (ulong) estimated_rows,
9347 (ulong) stats.records,
9348 (ulong) checked_rows));
9349 DBUG_RETURN(estimated_rows * stats.records / checked_rows);
9350 }
9351 }
9352 DBUG_PRINT("info", ("records_in_range(inx %u): %lu",
9353 inx,
9354 (ulong) estimated_rows));
9355 DBUG_RETURN(estimated_rows);
9356}
9357
9358
9359/**
9360 Estimate upper bound of number of rows.
9361
9362 @return Number of rows.
9363*/
9364
9365ha_rows ha_partition::estimate_rows_upper_bound()
9366{
9367 ha_rows rows, tot_rows= 0;
9368 handler **file= m_file;
9369 DBUG_ENTER("ha_partition::estimate_rows_upper_bound");
9370
9371 do
9372 {
9373 if (bitmap_is_set(&(m_part_info->read_partitions), (uint)(file - m_file)))
9374 {
9375 rows= (*file)->estimate_rows_upper_bound();
9376 if (rows == HA_POS_ERROR)
9377 DBUG_RETURN(HA_POS_ERROR);
9378 tot_rows+= rows;
9379 }
9380 } while (*(++file));
9381 DBUG_RETURN(tot_rows);
9382}
9383
9384
9385/*
9386 Get time to read
9387
9388 SYNOPSIS
9389 read_time()
9390 index Index number used
9391 ranges Number of ranges
9392 rows Number of rows
9393
9394 RETURN VALUE
9395 time for read
9396
9397 DESCRIPTION
9398 This will be optimised later to include whether or not the index can
9399 be used with partitioning. To achieve we need to add another parameter
9400 that specifies how many of the index fields that are bound in the ranges.
9401 Possibly added as a new call to handlers.
9402*/
9403
9404double ha_partition::read_time(uint index, uint ranges, ha_rows rows)
9405{
9406 DBUG_ENTER("ha_partition::read_time");
9407
9408 DBUG_RETURN(get_open_file_sample()->read_time(index, ranges, rows));
9409}
9410
9411
9412/**
9413 Number of rows in table. see handler.h
9414
9415 @return Number of records in the table (after pruning!)
9416*/
9417
9418ha_rows ha_partition::records()
9419{
9420 int error;
9421 ha_rows tot_rows= 0;
9422 uint i;
9423 DBUG_ENTER("ha_partition::records");
9424
9425 for (i= bitmap_get_first_set(&m_part_info->read_partitions);
9426 i < m_tot_parts;
9427 i= bitmap_get_next_set(&m_part_info->read_partitions, i))
9428 {
9429 ha_rows rows;
9430 if (unlikely((error= m_file[i]->pre_records()) ||
9431 (rows= m_file[i]->records()) == HA_POS_ERROR))
9432 DBUG_RETURN(HA_POS_ERROR);
9433 tot_rows+= rows;
9434 }
9435 DBUG_PRINT("exit", ("records: %lld", (longlong) tot_rows));
9436 DBUG_RETURN(tot_rows);
9437}
9438
9439
9440/*
9441 Is it ok to switch to a new engine for this table
9442
9443 SYNOPSIS
9444 can_switch_engine()
9445
9446 RETURN VALUE
9447 TRUE Ok
9448 FALSE Not ok
9449
9450 DESCRIPTION
9451 Used to ensure that tables with foreign key constraints are not moved
9452 to engines without foreign key support.
9453*/
9454
9455bool ha_partition::can_switch_engines()
9456{
9457 handler **file;
9458 DBUG_ENTER("ha_partition::can_switch_engines");
9459
9460 file= m_file;
9461 do
9462 {
9463 if (!(*file)->can_switch_engines())
9464 DBUG_RETURN(FALSE);
9465 } while (*(++file));
9466 DBUG_RETURN(TRUE);
9467}
9468
9469
9470/*
9471 Is table cache supported
9472
9473 SYNOPSIS
9474 table_cache_type()
9475
9476*/
9477
9478uint8 ha_partition::table_cache_type()
9479{
9480 DBUG_ENTER("ha_partition::table_cache_type");
9481
9482 DBUG_RETURN(m_file[0]->table_cache_type());
9483}
9484
9485
9486/**
9487 Calculate hash value for KEY partitioning using an array of fields.
9488
9489 @param field_array An array of the fields in KEY partitioning
9490
9491 @return hash_value calculated
9492
9493 @note Uses the hash function on the character set of the field.
9494 Integer and floating point fields use the binary character set by default.
9495*/
9496
9497uint32 ha_partition::calculate_key_hash_value(Field **field_array)
9498{
9499 ulong nr1= 1;
9500 ulong nr2= 4;
9501 bool use_51_hash;
9502 use_51_hash= MY_TEST((*field_array)->table->part_info->key_algorithm ==
9503 partition_info::KEY_ALGORITHM_51);
9504
9505 do
9506 {
9507 Field *field= *field_array;
9508 if (use_51_hash)
9509 {
9510 switch (field->real_type()) {
9511 case MYSQL_TYPE_TINY:
9512 case MYSQL_TYPE_SHORT:
9513 case MYSQL_TYPE_LONG:
9514 case MYSQL_TYPE_FLOAT:
9515 case MYSQL_TYPE_DOUBLE:
9516 case MYSQL_TYPE_NEWDECIMAL:
9517 case MYSQL_TYPE_TIMESTAMP:
9518 case MYSQL_TYPE_LONGLONG:
9519 case MYSQL_TYPE_INT24:
9520 case MYSQL_TYPE_TIME:
9521 case MYSQL_TYPE_DATETIME:
9522 case MYSQL_TYPE_YEAR:
9523 case MYSQL_TYPE_NEWDATE:
9524 {
9525 if (field->is_null())
9526 {
9527 nr1^= (nr1 << 1) | 1;
9528 continue;
9529 }
9530 /* Force this to my_hash_sort_bin, which was used in 5.1! */
9531 uint len= field->pack_length();
9532 my_charset_bin.coll->hash_sort(&my_charset_bin, field->ptr, len,
9533 &nr1, &nr2);
9534 /* Done with this field, continue with next one. */
9535 continue;
9536 }
9537 case MYSQL_TYPE_STRING:
9538 case MYSQL_TYPE_VARCHAR:
9539 case MYSQL_TYPE_BIT:
9540 /* Not affected, same in 5.1 and 5.5 */
9541 break;
9542 /*
9543 ENUM/SET uses my_hash_sort_simple in 5.1 (i.e. my_charset_latin1)
9544 and my_hash_sort_bin in 5.5!
9545 */
9546 case MYSQL_TYPE_ENUM:
9547 case MYSQL_TYPE_SET:
9548 {
9549 if (field->is_null())
9550 {
9551 nr1^= (nr1 << 1) | 1;
9552 continue;
9553 }
9554 /* Force this to my_hash_sort_bin, which was used in 5.1! */
9555 uint len= field->pack_length();
9556 my_charset_latin1.coll->hash_sort(&my_charset_latin1, field->ptr,
9557 len, &nr1, &nr2);
9558 continue;
9559 }
9560 /* New types in mysql-5.6. */
9561 case MYSQL_TYPE_DATETIME2:
9562 case MYSQL_TYPE_TIME2:
9563 case MYSQL_TYPE_TIMESTAMP2:
9564 /* Not affected, 5.6+ only! */
9565 break;
9566
9567 /* These types should not be allowed for partitioning! */
9568 case MYSQL_TYPE_NULL:
9569 case MYSQL_TYPE_DECIMAL:
9570 case MYSQL_TYPE_DATE:
9571 case MYSQL_TYPE_TINY_BLOB:
9572 case MYSQL_TYPE_MEDIUM_BLOB:
9573 case MYSQL_TYPE_LONG_BLOB:
9574 case MYSQL_TYPE_BLOB:
9575 case MYSQL_TYPE_VAR_STRING:
9576 case MYSQL_TYPE_GEOMETRY:
9577 /* fall through */
9578 default:
9579 DBUG_ASSERT(0); // New type?
9580 /* Fall through for default hashing (5.5). */
9581 }
9582 /* fall through, use collation based hashing. */
9583 }
9584 field->hash(&nr1, &nr2);
9585 } while (*(++field_array));
9586 return (uint32) nr1;
9587}
9588
9589
9590/****************************************************************************
9591 MODULE print messages
9592****************************************************************************/
9593
9594const char *ha_partition::index_type(uint inx)
9595{
9596 uint first_used_partition;
9597 DBUG_ENTER("ha_partition::index_type");
9598
9599 first_used_partition= bitmap_get_first_set(&(m_part_info->read_partitions));
9600
9601 if (first_used_partition == MY_BIT_NONE)
9602 {
9603 DBUG_ASSERT(0); // How can this happen?
9604 DBUG_RETURN(handler::index_type(inx));
9605 }
9606
9607 DBUG_RETURN(m_file[first_used_partition]->index_type(inx));
9608}
9609
9610
9611enum row_type ha_partition::get_row_type() const
9612{
9613 uint i;
9614 enum row_type type;
9615 DBUG_ENTER("ha_partition::get_row_type");
9616
9617 i= bitmap_get_first_set(&m_part_info->read_partitions);
9618 DBUG_ASSERT(i < m_tot_parts);
9619 if (i >= m_tot_parts)
9620 DBUG_RETURN(ROW_TYPE_NOT_USED);
9621
9622 type= m_file[i]->get_row_type();
9623 DBUG_PRINT("info", ("partition %u, row_type: %d", i, type));
9624
9625 for (i= bitmap_get_next_set(&m_part_info->lock_partitions, i);
9626 i < m_tot_parts;
9627 i= bitmap_get_next_set(&m_part_info->lock_partitions, i))
9628 {
9629 enum row_type part_type= m_file[i]->get_row_type();
9630 DBUG_PRINT("info", ("partition %u, row_type: %d", i, type));
9631 if (part_type != type)
9632 DBUG_RETURN(ROW_TYPE_NOT_USED);
9633 }
9634
9635 DBUG_RETURN(type);
9636}
9637
9638
9639void ha_partition::append_row_to_str(String &str)
9640{
9641 const uchar *rec;
9642 bool is_rec0= !m_err_rec || m_err_rec == table->record[0];
9643 if (is_rec0)
9644 rec= table->record[0];
9645 else
9646 rec= m_err_rec;
9647 // If PK, use full PK instead of full part field array!
9648 if (table->s->primary_key != MAX_KEY)
9649 {
9650 KEY *key= table->key_info + table->s->primary_key;
9651 KEY_PART_INFO *key_part= key->key_part;
9652 KEY_PART_INFO *key_part_end= key_part + key->user_defined_key_parts;
9653 if (!is_rec0)
9654 set_key_field_ptr(key, rec, table->record[0]);
9655 for (; key_part != key_part_end; key_part++)
9656 {
9657 Field *field= key_part->field;
9658 str.append(" ");
9659 str.append(&field->field_name);
9660 str.append(":");
9661 field_unpack(&str, field, rec, 0, false);
9662 }
9663 if (!is_rec0)
9664 set_key_field_ptr(key, table->record[0], rec);
9665 }
9666 else
9667 {
9668 Field **field_ptr;
9669 if (!is_rec0)
9670 table->move_fields(m_part_info->full_part_field_array, rec,
9671 table->record[0]);
9672 /* No primary key, use full partition field array. */
9673 for (field_ptr= m_part_info->full_part_field_array;
9674 *field_ptr;
9675 field_ptr++)
9676 {
9677 Field *field= *field_ptr;
9678 str.append(" ");
9679 str.append(&field->field_name);
9680 str.append(":");
9681 field_unpack(&str, field, rec, 0, false);
9682 }
9683 if (!is_rec0)
9684 table->move_fields(m_part_info->full_part_field_array, table->record[0],
9685 rec);
9686 }
9687}
9688
9689
9690void ha_partition::print_error(int error, myf errflag)
9691{
9692 THD *thd= ha_thd();
9693 DBUG_ENTER("ha_partition::print_error");
9694 DBUG_PRINT("enter", ("error: %d", error));
9695
9696 /* Should probably look for my own errors first */
9697 if ((error == HA_ERR_NO_PARTITION_FOUND) &&
9698 ! (thd->lex->alter_info.partition_flags & ALTER_PARTITION_TRUNCATE))
9699 {
9700 m_part_info->print_no_partition_found(table, errflag);
9701 DBUG_VOID_RETURN;
9702 }
9703 else if (error == HA_ERR_ROW_IN_WRONG_PARTITION)
9704 {
9705 /* Should only happen on DELETE or UPDATE! */
9706 DBUG_ASSERT(thd_sql_command(thd) == SQLCOM_DELETE ||
9707 thd_sql_command(thd) == SQLCOM_DELETE_MULTI ||
9708 thd_sql_command(thd) == SQLCOM_UPDATE ||
9709 thd_sql_command(thd) == SQLCOM_UPDATE_MULTI);
9710 DBUG_ASSERT(m_err_rec);
9711 if (m_err_rec)
9712 {
9713 uint max_length;
9714 char buf[MAX_KEY_LENGTH];
9715 String str(buf,sizeof(buf),system_charset_info);
9716 uint32 part_id;
9717 str.length(0);
9718 str.append("(");
9719 str.append_ulonglong(m_last_part);
9720 str.append(" != ");
9721 if (get_part_for_buf(m_err_rec, m_rec0, m_part_info, &part_id))
9722 str.append("?");
9723 else
9724 str.append_ulonglong(part_id);
9725 str.append(")");
9726 append_row_to_str(str);
9727
9728 /* Log this error, so the DBA can notice it and fix it! */
9729 sql_print_error("Table '%-192s' corrupted: row in wrong partition: %s\n"
9730 "Please REPAIR the table!",
9731 table->s->table_name.str,
9732 str.c_ptr_safe());
9733
9734 max_length= (MYSQL_ERRMSG_SIZE -
9735 (uint) strlen(ER_THD(thd, ER_ROW_IN_WRONG_PARTITION)));
9736 if (str.length() >= max_length)
9737 {
9738 str.length(max_length-4);
9739 str.append(STRING_WITH_LEN("..."));
9740 }
9741 my_error(ER_ROW_IN_WRONG_PARTITION, MYF(0), str.c_ptr_safe());
9742 m_err_rec= NULL;
9743 DBUG_VOID_RETURN;
9744 }
9745 /* fall through to generic error handling. */
9746 }
9747
9748 /* In case m_file has not been initialized, like in bug#42438 */
9749 if (m_file)
9750 {
9751 if (m_last_part >= m_tot_parts)
9752 {
9753 DBUG_ASSERT(0);
9754 m_last_part= 0;
9755 }
9756 m_file[m_last_part]->print_error(error, errflag);
9757 }
9758 else
9759 handler::print_error(error, errflag);
9760 DBUG_VOID_RETURN;
9761}
9762
9763
9764bool ha_partition::get_error_message(int error, String *buf)
9765{
9766 DBUG_ENTER("ha_partition::get_error_message");
9767
9768 /* Should probably look for my own errors first */
9769
9770 /* In case m_file has not been initialized, like in bug#42438 */
9771 if (m_file)
9772 DBUG_RETURN(m_file[m_last_part]->get_error_message(error, buf));
9773 DBUG_RETURN(handler::get_error_message(error, buf));
9774
9775}
9776
9777
9778/****************************************************************************
9779 MODULE in-place ALTER
9780****************************************************************************/
9781/**
9782 Get table flags.
9783*/
9784
9785handler::Table_flags ha_partition::table_flags() const
9786{
9787 uint first_used_partition= 0;
9788 DBUG_ENTER("ha_partition::table_flags");
9789 if (m_handler_status < handler_initialized ||
9790 m_handler_status >= handler_closed)
9791 DBUG_RETURN(PARTITION_ENABLED_TABLE_FLAGS);
9792
9793 if (get_lock_type() != F_UNLCK)
9794 {
9795 /*
9796 The flags are cached after external_lock, and may depend on isolation
9797 level. So we should use a locked partition to get the correct flags.
9798 */
9799 first_used_partition= bitmap_get_first_set(&m_part_info->lock_partitions);
9800 if (first_used_partition == MY_BIT_NONE)
9801 first_used_partition= 0;
9802 }
9803 DBUG_RETURN((m_file[first_used_partition]->ha_table_flags() &
9804 ~(PARTITION_DISABLED_TABLE_FLAGS)) |
9805 (PARTITION_ENABLED_TABLE_FLAGS));
9806}
9807
9808
9809/**
9810 alter_table_flags must be on handler/table level, not on hton level
9811 due to the ha_partition hton does not know what the underlying hton is.
9812*/
9813
9814alter_table_operations ha_partition::alter_table_flags(alter_table_operations flags)
9815{
9816 alter_table_operations flags_to_return;
9817 DBUG_ENTER("ha_partition::alter_table_flags");
9818
9819 flags_to_return= ht->alter_table_flags(flags);
9820 flags_to_return|= m_file[0]->alter_table_flags(flags);
9821
9822 DBUG_RETURN(flags_to_return);
9823}
9824
9825
9826/**
9827 check if copy of data is needed in alter table.
9828*/
9829bool ha_partition::check_if_incompatible_data(HA_CREATE_INFO *create_info,
9830 uint table_changes)
9831{
9832 /*
9833 The check for any partitioning related changes have already been done
9834 in mysql_alter_table (by fix_partition_func), so it is only up to
9835 the underlying handlers.
9836 */
9837 List_iterator<partition_element> part_it(m_part_info->partitions);
9838 HA_CREATE_INFO dummy_info= *create_info;
9839 uint i=0;
9840 while (partition_element *part_elem= part_it++)
9841 {
9842 if (m_is_sub_partitioned)
9843 {
9844 List_iterator<partition_element> subpart_it(part_elem->subpartitions);
9845 while (partition_element *sub_elem= subpart_it++)
9846 {
9847 dummy_info.data_file_name= sub_elem->data_file_name;
9848 dummy_info.index_file_name= sub_elem->index_file_name;
9849 if (m_file[i++]->check_if_incompatible_data(&dummy_info, table_changes))
9850 return COMPATIBLE_DATA_NO;
9851 }
9852 }
9853 else
9854 {
9855 dummy_info.data_file_name= part_elem->data_file_name;
9856 dummy_info.index_file_name= part_elem->index_file_name;
9857 if (m_file[i++]->check_if_incompatible_data(&dummy_info, table_changes))
9858 return COMPATIBLE_DATA_NO;
9859 }
9860 }
9861 return COMPATIBLE_DATA_YES;
9862}
9863
9864
9865/**
9866 Support of in-place alter table.
9867*/
9868
9869/**
9870 Helper class for in-place alter, see handler.h
9871*/
9872
9873class ha_partition_inplace_ctx : public inplace_alter_handler_ctx
9874{
9875public:
9876 inplace_alter_handler_ctx **handler_ctx_array;
9877private:
9878 uint m_tot_parts;
9879
9880public:
9881 ha_partition_inplace_ctx(THD *thd, uint tot_parts)
9882 : inplace_alter_handler_ctx(),
9883 handler_ctx_array(NULL),
9884 m_tot_parts(tot_parts)
9885 {}
9886
9887 ~ha_partition_inplace_ctx()
9888 {
9889 if (handler_ctx_array)
9890 {
9891 for (uint index= 0; index < m_tot_parts; index++)
9892 delete handler_ctx_array[index];
9893 }
9894 }
9895};
9896
9897
9898enum_alter_inplace_result
9899ha_partition::check_if_supported_inplace_alter(TABLE *altered_table,
9900 Alter_inplace_info *ha_alter_info)
9901{
9902 uint index= 0;
9903 enum_alter_inplace_result result= HA_ALTER_INPLACE_NO_LOCK;
9904 ha_partition_inplace_ctx *part_inplace_ctx;
9905 bool first_is_set= false;
9906 THD *thd= ha_thd();
9907
9908 DBUG_ENTER("ha_partition::check_if_supported_inplace_alter");
9909 /*
9910 Support inplace change of KEY () -> KEY ALGORITHM = N ().
9911 Any other change would set partition_changed in
9912 prep_alter_part_table() in mysql_alter_table().
9913 */
9914 if (ha_alter_info->alter_info->partition_flags == ALTER_PARTITION_INFO)
9915 {
9916 DBUG_ASSERT(ha_alter_info->alter_info->flags == 0);
9917 DBUG_RETURN(HA_ALTER_INPLACE_NO_LOCK);
9918 }
9919
9920 part_inplace_ctx=
9921 new (thd->mem_root) ha_partition_inplace_ctx(thd, m_tot_parts);
9922 if (!part_inplace_ctx)
9923 DBUG_RETURN(HA_ALTER_ERROR);
9924
9925 part_inplace_ctx->handler_ctx_array= (inplace_alter_handler_ctx **)
9926 thd->alloc(sizeof(inplace_alter_handler_ctx *) * (m_tot_parts + 1));
9927 if (!part_inplace_ctx->handler_ctx_array)
9928 DBUG_RETURN(HA_ALTER_ERROR);
9929
9930 /* Set all to NULL, including the terminating one. */
9931 for (index= 0; index <= m_tot_parts; index++)
9932 part_inplace_ctx->handler_ctx_array[index]= NULL;
9933
9934 ha_alter_info->handler_flags |= ALTER_PARTITIONED;
9935 for (index= 0; index < m_tot_parts; index++)
9936 {
9937 enum_alter_inplace_result p_result=
9938 m_file[index]->check_if_supported_inplace_alter(altered_table,
9939 ha_alter_info);
9940 part_inplace_ctx->handler_ctx_array[index]= ha_alter_info->handler_ctx;
9941
9942 if (index == 0)
9943 {
9944 first_is_set= (ha_alter_info->handler_ctx != NULL);
9945 }
9946 else if (first_is_set != (ha_alter_info->handler_ctx != NULL))
9947 {
9948 /* Either none or all partitions must set handler_ctx! */
9949 DBUG_ASSERT(0);
9950 DBUG_RETURN(HA_ALTER_ERROR);
9951 }
9952 if (p_result < result)
9953 result= p_result;
9954 if (result == HA_ALTER_ERROR)
9955 break;
9956 }
9957
9958 ha_alter_info->handler_ctx= part_inplace_ctx;
9959 /*
9960 To indicate for future inplace calls that there are several
9961 partitions/handlers that need to be committed together,
9962 we set group_commit_ctx to the NULL terminated array of
9963 the partitions handlers.
9964 */
9965 ha_alter_info->group_commit_ctx= part_inplace_ctx->handler_ctx_array;
9966
9967 DBUG_RETURN(result);
9968}
9969
9970
9971bool ha_partition::prepare_inplace_alter_table(TABLE *altered_table,
9972 Alter_inplace_info *ha_alter_info)
9973{
9974 uint index= 0;
9975 bool error= false;
9976 ha_partition_inplace_ctx *part_inplace_ctx;
9977
9978 DBUG_ENTER("ha_partition::prepare_inplace_alter_table");
9979
9980 /*
9981 Changing to similar partitioning, only update metadata.
9982 Non allowed changes would be catched in prep_alter_part_table().
9983 */
9984 if (ha_alter_info->alter_info->partition_flags == ALTER_PARTITION_INFO)
9985 {
9986 DBUG_ASSERT(ha_alter_info->alter_info->flags == 0);
9987 DBUG_RETURN(false);
9988 }
9989
9990 part_inplace_ctx=
9991 static_cast<class ha_partition_inplace_ctx*>(ha_alter_info->handler_ctx);
9992
9993 for (index= 0; index < m_tot_parts && !error; index++)
9994 {
9995 ha_alter_info->handler_ctx= part_inplace_ctx->handler_ctx_array[index];
9996 if (m_file[index]->ha_prepare_inplace_alter_table(altered_table,
9997 ha_alter_info))
9998 error= true;
9999 part_inplace_ctx->handler_ctx_array[index]= ha_alter_info->handler_ctx;
10000 }
10001 ha_alter_info->handler_ctx= part_inplace_ctx;
10002
10003 DBUG_RETURN(error);
10004}
10005
10006
10007bool ha_partition::inplace_alter_table(TABLE *altered_table,
10008 Alter_inplace_info *ha_alter_info)
10009{
10010 uint index= 0;
10011 bool error= false;
10012 ha_partition_inplace_ctx *part_inplace_ctx;
10013
10014 DBUG_ENTER("ha_partition::inplace_alter_table");
10015
10016 /*
10017 Changing to similar partitioning, only update metadata.
10018 Non allowed changes would be catched in prep_alter_part_table().
10019 */
10020 if (ha_alter_info->alter_info->partition_flags == ALTER_PARTITION_INFO)
10021 {
10022 DBUG_ASSERT(ha_alter_info->alter_info->flags == 0);
10023 DBUG_RETURN(false);
10024 }
10025
10026 part_inplace_ctx=
10027 static_cast<class ha_partition_inplace_ctx*>(ha_alter_info->handler_ctx);
10028
10029 for (index= 0; index < m_tot_parts && !error; index++)
10030 {
10031 ha_alter_info->handler_ctx= part_inplace_ctx->handler_ctx_array[index];
10032 if (m_file[index]->ha_inplace_alter_table(altered_table,
10033 ha_alter_info))
10034 error= true;
10035 part_inplace_ctx->handler_ctx_array[index]= ha_alter_info->handler_ctx;
10036 }
10037 ha_alter_info->handler_ctx= part_inplace_ctx;
10038
10039 DBUG_RETURN(error);
10040}
10041
10042
10043/*
10044 Note that this function will try rollback failed ADD INDEX by
10045 executing DROP INDEX for the indexes that were committed (if any)
10046 before the error occurred. This means that the underlying storage
10047 engine must be able to drop index in-place with X-lock held.
10048 (As X-lock will be held here if new indexes are to be committed)
10049*/
10050bool ha_partition::commit_inplace_alter_table(TABLE *altered_table,
10051 Alter_inplace_info *ha_alter_info,
10052 bool commit)
10053{
10054 ha_partition_inplace_ctx *part_inplace_ctx;
10055 bool error= false;
10056
10057 DBUG_ENTER("ha_partition::commit_inplace_alter_table");
10058
10059 /*
10060 Changing to similar partitioning, only update metadata.
10061 Non allowed changes would be catched in prep_alter_part_table().
10062 */
10063 if (ha_alter_info->alter_info->partition_flags == ALTER_PARTITION_INFO)
10064 {
10065 DBUG_ASSERT(ha_alter_info->alter_info->flags == 0);
10066 DBUG_RETURN(false);
10067 }
10068
10069 part_inplace_ctx=
10070 static_cast<class ha_partition_inplace_ctx*>(ha_alter_info->handler_ctx);
10071
10072 if (commit)
10073 {
10074 DBUG_ASSERT(ha_alter_info->group_commit_ctx ==
10075 part_inplace_ctx->handler_ctx_array);
10076 ha_alter_info->handler_ctx= part_inplace_ctx->handler_ctx_array[0];
10077 error= m_file[0]->ha_commit_inplace_alter_table(altered_table,
10078 ha_alter_info, commit);
10079 if (unlikely(error))
10080 goto end;
10081 if (ha_alter_info->group_commit_ctx)
10082 {
10083 /*
10084 If ha_alter_info->group_commit_ctx is not set to NULL,
10085 then the engine did only commit the first partition!
10086 The engine is probably new, since both innodb and the default
10087 implementation of handler::commit_inplace_alter_table sets it to NULL
10088 and simply return false, since it allows metadata changes only.
10089 Loop over all other partitions as to follow the protocol!
10090 */
10091 uint i;
10092 DBUG_ASSERT(0);
10093 for (i= 1; i < m_tot_parts; i++)
10094 {
10095 ha_alter_info->handler_ctx= part_inplace_ctx->handler_ctx_array[i];
10096 error|= m_file[i]->ha_commit_inplace_alter_table(altered_table,
10097 ha_alter_info,
10098 true);
10099 }
10100 }
10101 }
10102 else
10103 {
10104 uint i;
10105 for (i= 0; i < m_tot_parts; i++)
10106 {
10107 /* Rollback, commit == false, is done for each partition! */
10108 ha_alter_info->handler_ctx= part_inplace_ctx->handler_ctx_array[i];
10109 if (m_file[i]->ha_commit_inplace_alter_table(altered_table,
10110 ha_alter_info, false))
10111 error= true;
10112 }
10113 }
10114end:
10115 ha_alter_info->handler_ctx= part_inplace_ctx;
10116
10117 DBUG_RETURN(error);
10118}
10119
10120
10121void ha_partition::notify_table_changed()
10122{
10123 handler **file;
10124
10125 DBUG_ENTER("ha_partition::notify_table_changed");
10126
10127 for (file= m_file; *file; file++)
10128 (*file)->ha_notify_table_changed();
10129
10130 DBUG_VOID_RETURN;
10131}
10132
10133
10134uint ha_partition::min_of_the_max_uint(
10135 uint (handler::*operator_func)(void) const) const
10136{
10137 handler **file;
10138 uint min_of_the_max= ((*m_file)->*operator_func)();
10139
10140 for (file= m_file+1; *file; file++)
10141 {
10142 uint tmp= ((*file)->*operator_func)();
10143 set_if_smaller(min_of_the_max, tmp);
10144 }
10145 return min_of_the_max;
10146}
10147
10148
10149uint ha_partition::max_supported_key_parts() const
10150{
10151 return min_of_the_max_uint(&handler::max_supported_key_parts);
10152}
10153
10154
10155uint ha_partition::max_supported_key_length() const
10156{
10157 return min_of_the_max_uint(&handler::max_supported_key_length);
10158}
10159
10160
10161uint ha_partition::max_supported_key_part_length() const
10162{
10163 return min_of_the_max_uint(&handler::max_supported_key_part_length);
10164}
10165
10166
10167uint ha_partition::max_supported_record_length() const
10168{
10169 return min_of_the_max_uint(&handler::max_supported_record_length);
10170}
10171
10172
10173uint ha_partition::max_supported_keys() const
10174{
10175 return min_of_the_max_uint(&handler::max_supported_keys);
10176}
10177
10178
10179uint ha_partition::min_record_length(uint options) const
10180{
10181 handler **file;
10182 uint max= (*m_file)->min_record_length(options);
10183
10184 for (file= m_file, file++; *file; file++)
10185 if (max < (*file)->min_record_length(options))
10186 max= (*file)->min_record_length(options);
10187 return max;
10188}
10189
10190/****************************************************************************
10191 MODULE compare records
10192****************************************************************************/
10193/*
10194 Compare two positions
10195
10196 SYNOPSIS
10197 cmp_ref()
10198 ref1 First position
10199 ref2 Second position
10200
10201 RETURN VALUE
10202 <0 ref1 < ref2
10203 0 Equal
10204 >0 ref1 > ref2
10205
10206 DESCRIPTION
10207 We get two references and need to check if those records are the same.
10208 If they belong to different partitions we decide that they are not
10209 the same record. Otherwise we use the particular handler to decide if
10210 they are the same. Sort in partition id order if not equal.
10211
10212 MariaDB note:
10213 Please don't merge the code from MySQL that does this:
10214
10215 We get two references and need to check if those records are the same.
10216 If they belong to different partitions we decide that they are not
10217 the same record. Otherwise we use the particular handler to decide if
10218 they are the same. Sort in partition id order if not equal.
10219
10220 It is incorrect, MariaDB has an alternative fix.
10221*/
10222
10223int ha_partition::cmp_ref(const uchar *ref1, const uchar *ref2)
10224{
10225 int cmp;
10226 uint32 diff1, diff2;
10227 DBUG_ENTER("ha_partition::cmp_ref");
10228
10229 cmp= get_open_file_sample()->cmp_ref((ref1 + PARTITION_BYTES_IN_POS),
10230 (ref2 + PARTITION_BYTES_IN_POS));
10231 if (cmp)
10232 DBUG_RETURN(cmp);
10233
10234 diff2= uint2korr(ref2);
10235 diff1= uint2korr(ref1);
10236
10237 if (diff1 == diff2)
10238 {
10239 /* This means that the references are same and are in same partition.*/
10240 DBUG_RETURN(0);
10241 }
10242
10243 /*
10244 In Innodb we compare with either primary key value or global DB_ROW_ID so
10245 it is not possible that the two references are equal and are in different
10246 partitions, but in myisam it is possible since we are comparing offsets.
10247 Remove this assert if DB_ROW_ID is changed to be per partition.
10248 */
10249 DBUG_ASSERT(!m_innodb);
10250 DBUG_RETURN(diff2 > diff1 ? -1 : 1);
10251}
10252
10253
10254/****************************************************************************
10255 MODULE auto increment
10256****************************************************************************/
10257
10258
10259/**
10260 Retreive new values for part_share->next_auto_inc_val if needed
10261
10262 This is needed if the value has not been initialized or if one of
10263 the underlying partitions require that the value should be re-calculated
10264*/
10265
10266void ha_partition::update_next_auto_inc_val()
10267{
10268 if (!part_share->auto_inc_initialized ||
10269 need_info_for_auto_inc())
10270 info(HA_STATUS_AUTO);
10271}
10272
10273
10274/**
10275 Determine whether a partition needs auto-increment initialization.
10276
10277 @return
10278 TRUE A partition needs auto-increment initialization
10279 FALSE No partition needs auto-increment initialization
10280
10281 Resets part_share->auto_inc_initialized if next auto_increment needs to be
10282 recalculated.
10283*/
10284
10285bool ha_partition::need_info_for_auto_inc()
10286{
10287 handler **file= m_file;
10288 DBUG_ENTER("ha_partition::need_info_for_auto_inc");
10289
10290 do
10291 {
10292 if ((*file)->need_info_for_auto_inc())
10293 {
10294 /* We have to get new auto_increment values from handler */
10295 part_share->auto_inc_initialized= FALSE;
10296 DBUG_RETURN(TRUE);
10297 }
10298 } while (*(++file));
10299 DBUG_RETURN(FALSE);
10300}
10301
10302
10303/**
10304 Determine if all partitions can use the current auto-increment value for
10305 auto-increment initialization.
10306
10307 @return
10308 TRUE All partitions can use the current auto-increment
10309 value for auto-increment initialization
10310 FALSE All partitions cannot use the current
10311 auto-increment value for auto-increment
10312 initialization
10313
10314 Notes
10315 This function is only called for ::info(HA_STATUS_AUTO) and is
10316 mainly used by the Spider engine, which returns false
10317 except in the case of DROP TABLE or ALTER TABLE when it returns TRUE.
10318 Other engines always returns TRUE for this call.
10319*/
10320
10321bool ha_partition::can_use_for_auto_inc_init()
10322{
10323 handler **file= m_file;
10324 DBUG_ENTER("ha_partition::can_use_for_auto_inc_init");
10325
10326 do
10327 {
10328 if (!(*file)->can_use_for_auto_inc_init())
10329 DBUG_RETURN(FALSE);
10330 } while (*(++file));
10331 DBUG_RETURN(TRUE);
10332}
10333
10334
10335int ha_partition::reset_auto_increment(ulonglong value)
10336{
10337 handler **file= m_file;
10338 int res;
10339 DBUG_ENTER("ha_partition::reset_auto_increment");
10340 lock_auto_increment();
10341 part_share->auto_inc_initialized= false;
10342 part_share->next_auto_inc_val= 0;
10343 do
10344 {
10345 if ((res= (*file)->ha_reset_auto_increment(value)) != 0)
10346 break;
10347 } while (*(++file));
10348 unlock_auto_increment();
10349 DBUG_RETURN(res);
10350}
10351
10352
10353/**
10354 This method is called by update_auto_increment which in turn is called
10355 by the individual handlers as part of write_row. We use the
10356 part_share->next_auto_inc_val, or search all
10357 partitions for the highest auto_increment_value if not initialized or
10358 if auto_increment field is a secondary part of a key, we must search
10359 every partition when holding a mutex to be sure of correctness.
10360*/
10361
10362void ha_partition::get_auto_increment(ulonglong offset, ulonglong increment,
10363 ulonglong nb_desired_values,
10364 ulonglong *first_value,
10365 ulonglong *nb_reserved_values)
10366{
10367 DBUG_ENTER("ha_partition::get_auto_increment");
10368 DBUG_PRINT("enter", ("offset: %lu inc: %lu desired_values: %lu "
10369 "first_value: %lu", (ulong) offset, (ulong) increment,
10370 (ulong) nb_desired_values, (ulong) *first_value));
10371 DBUG_ASSERT(increment && nb_desired_values);
10372 *first_value= 0;
10373 if (table->s->next_number_keypart)
10374 {
10375 /*
10376 next_number_keypart is != 0 if the auto_increment column is a secondary
10377 column in the index (it is allowed in MyISAM)
10378 */
10379 DBUG_PRINT("info", ("next_number_keypart != 0"));
10380 ulonglong nb_reserved_values_part;
10381 ulonglong first_value_part, max_first_value;
10382 handler **file= m_file;
10383 first_value_part= max_first_value= *first_value;
10384 /* Must find highest value among all partitions. */
10385 do
10386 {
10387 /* Only nb_desired_values = 1 makes sense */
10388 (*file)->get_auto_increment(offset, increment, 1,
10389 &first_value_part, &nb_reserved_values_part);
10390 if (unlikely(first_value_part == ULONGLONG_MAX)) // error in one partition
10391 {
10392 *first_value= first_value_part;
10393 /* log that the error was between table/partition handler */
10394 sql_print_error("Partition failed to reserve auto_increment value");
10395 DBUG_VOID_RETURN;
10396 }
10397 DBUG_PRINT("info", ("first_value_part: %lu", (ulong) first_value_part));
10398 set_if_bigger(max_first_value, first_value_part);
10399 } while (*(++file));
10400 *first_value= max_first_value;
10401 *nb_reserved_values= 1;
10402 }
10403 else
10404 {
10405 THD *thd= ha_thd();
10406 /*
10407 This is initialized in the beginning of the first write_row call.
10408 */
10409 DBUG_ASSERT(part_share->auto_inc_initialized);
10410 /*
10411 Get a lock for handling the auto_increment in part_share
10412 for avoiding two concurrent statements getting the same number.
10413 */
10414
10415 lock_auto_increment();
10416
10417 /*
10418 In a multi-row insert statement like INSERT SELECT and LOAD DATA
10419 where the number of candidate rows to insert is not known in advance
10420 we must hold a lock/mutex for the whole statement if we have statement
10421 based replication. Because the statement-based binary log contains
10422 only the first generated value used by the statement, and slaves assumes
10423 all other generated values used by this statement were consecutive to
10424 this first one, we must exclusively lock the generator until the
10425 statement is done.
10426 */
10427 if (!auto_increment_safe_stmt_log_lock &&
10428 thd->lex->sql_command != SQLCOM_INSERT &&
10429 mysql_bin_log.is_open() &&
10430 !thd->is_current_stmt_binlog_format_row() &&
10431 (thd->variables.option_bits & OPTION_BIN_LOG))
10432 {
10433 DBUG_PRINT("info", ("locking auto_increment_safe_stmt_log_lock"));
10434 auto_increment_safe_stmt_log_lock= TRUE;
10435 }
10436
10437 /* this gets corrected (for offset/increment) in update_auto_increment */
10438 *first_value= part_share->next_auto_inc_val;
10439 part_share->next_auto_inc_val+= nb_desired_values * increment;
10440
10441 unlock_auto_increment();
10442 DBUG_PRINT("info", ("*first_value: %lu", (ulong) *first_value));
10443 *nb_reserved_values= nb_desired_values;
10444 }
10445 DBUG_VOID_RETURN;
10446}
10447
10448void ha_partition::release_auto_increment()
10449{
10450 DBUG_ENTER("ha_partition::release_auto_increment");
10451
10452 if (table->s->next_number_keypart)
10453 {
10454 uint i;
10455 for (i= bitmap_get_first_set(&m_part_info->lock_partitions);
10456 i < m_tot_parts;
10457 i= bitmap_get_next_set(&m_part_info->lock_partitions, i))
10458 {
10459 m_file[i]->ha_release_auto_increment();
10460 }
10461 }
10462 else if (next_insert_id)
10463 {
10464 ulonglong next_auto_inc_val;
10465 lock_auto_increment();
10466 next_auto_inc_val= part_share->next_auto_inc_val;
10467 /*
10468 If the current auto_increment values is lower than the reserved
10469 value, and the reserved value was reserved by this thread,
10470 we can lower the reserved value.
10471 */
10472 if (next_insert_id < next_auto_inc_val &&
10473 auto_inc_interval_for_cur_row.maximum() >= next_auto_inc_val)
10474 {
10475 THD *thd= ha_thd();
10476 /*
10477 Check that we do not lower the value because of a failed insert
10478 with SET INSERT_ID, i.e. forced/non generated values.
10479 */
10480 if (thd->auto_inc_intervals_forced.maximum() < next_insert_id)
10481 part_share->next_auto_inc_val= next_insert_id;
10482 }
10483 DBUG_PRINT("info", ("part_share->next_auto_inc_val: %lu",
10484 (ulong) part_share->next_auto_inc_val));
10485
10486 /* Unlock the multi row statement lock taken in get_auto_increment */
10487 if (auto_increment_safe_stmt_log_lock)
10488 {
10489 auto_increment_safe_stmt_log_lock= FALSE;
10490 DBUG_PRINT("info", ("unlocking auto_increment_safe_stmt_log_lock"));
10491 }
10492
10493 unlock_auto_increment();
10494 }
10495 DBUG_VOID_RETURN;
10496}
10497
10498/****************************************************************************
10499 MODULE initialize handler for HANDLER call
10500****************************************************************************/
10501
10502void ha_partition::init_table_handle_for_HANDLER()
10503{
10504 return;
10505}
10506
10507
10508/**
10509 Return the checksum of the table (all partitions)
10510*/
10511
10512uint ha_partition::checksum() const
10513{
10514 ha_checksum sum= 0;
10515
10516 DBUG_ENTER("ha_partition::checksum");
10517 if ((table_flags() & (HA_HAS_OLD_CHECKSUM | HA_HAS_NEW_CHECKSUM)))
10518 {
10519 handler **file= m_file;
10520 do
10521 {
10522 sum+= (*file)->checksum();
10523 } while (*(++file));
10524 }
10525 DBUG_RETURN(sum);
10526}
10527
10528
10529/****************************************************************************
10530 MODULE enable/disable indexes
10531****************************************************************************/
10532
10533/*
10534 Disable indexes for a while
10535 SYNOPSIS
10536 disable_indexes()
10537 mode Mode
10538 RETURN VALUES
10539 0 Success
10540 != 0 Error
10541*/
10542
10543int ha_partition::disable_indexes(uint mode)
10544{
10545 handler **file;
10546 int error= 0;
10547
10548 DBUG_ASSERT(bitmap_is_set_all(&(m_part_info->lock_partitions)));
10549 for (file= m_file; *file; file++)
10550 {
10551 if (unlikely((error= (*file)->ha_disable_indexes(mode))))
10552 break;
10553 }
10554 return error;
10555}
10556
10557
10558/*
10559 Enable indexes again
10560 SYNOPSIS
10561 enable_indexes()
10562 mode Mode
10563 RETURN VALUES
10564 0 Success
10565 != 0 Error
10566*/
10567
10568int ha_partition::enable_indexes(uint mode)
10569{
10570 handler **file;
10571 int error= 0;
10572
10573 DBUG_ASSERT(bitmap_is_set_all(&(m_part_info->lock_partitions)));
10574 for (file= m_file; *file; file++)
10575 {
10576 if (unlikely((error= (*file)->ha_enable_indexes(mode))))
10577 break;
10578 }
10579 return error;
10580}
10581
10582
10583/*
10584 Check if indexes are disabled
10585 SYNOPSIS
10586 indexes_are_disabled()
10587
10588 RETURN VALUES
10589 0 Indexes are enabled
10590 != 0 Indexes are disabled
10591*/
10592
10593int ha_partition::indexes_are_disabled(void)
10594{
10595 handler **file;
10596 int error= 0;
10597
10598 DBUG_ASSERT(bitmap_is_set_all(&(m_part_info->lock_partitions)));
10599 for (file= m_file; *file; file++)
10600 {
10601 if (unlikely((error= (*file)->indexes_are_disabled())))
10602 break;
10603 }
10604 return error;
10605}
10606
10607
10608/**
10609 Check/fix misplaced rows.
10610
10611 @param read_part_id Partition to check/fix.
10612 @param repair If true, move misplaced rows to correct partition.
10613
10614 @return Operation status.
10615 @retval 0 Success
10616 @retval != 0 Error
10617*/
10618
10619int ha_partition::check_misplaced_rows(uint read_part_id, bool do_repair)
10620{
10621 int result= 0;
10622 uint32 correct_part_id;
10623 longlong func_value;
10624 longlong num_misplaced_rows= 0;
10625
10626 DBUG_ENTER("ha_partition::check_misplaced_rows");
10627
10628 DBUG_ASSERT(m_file);
10629
10630 if (do_repair)
10631 {
10632 /* We must read the full row, if we need to move it! */
10633 bitmap_set_all(table->read_set);
10634 bitmap_set_all(table->write_set);
10635 }
10636 else
10637 {
10638 /* Only need to read the partitioning fields. */
10639 bitmap_union(table->read_set, &m_part_info->full_part_field_set);
10640 if (table->vcol_set)
10641 bitmap_union(table->vcol_set, &m_part_info->full_part_field_set);
10642 }
10643
10644 if ((result= m_file[read_part_id]->ha_rnd_init(1)))
10645 DBUG_RETURN(result);
10646
10647 while (true)
10648 {
10649 if ((result= m_file[read_part_id]->ha_rnd_next(m_rec0)))
10650 {
10651 if (result != HA_ERR_END_OF_FILE)
10652 break;
10653
10654 if (num_misplaced_rows > 0)
10655 {
10656 print_admin_msg(ha_thd(), MYSQL_ERRMSG_SIZE, "warning",
10657 table_share->db.str, table->alias,
10658 opt_op_name[REPAIR_PARTS],
10659 "Moved %lld misplaced rows",
10660 num_misplaced_rows);
10661 }
10662 /* End-of-file reached, all rows are now OK, reset result and break. */
10663 result= 0;
10664 break;
10665 }
10666
10667 result= m_part_info->get_partition_id(m_part_info, &correct_part_id,
10668 &func_value);
10669 if (result)
10670 break;
10671
10672 if (correct_part_id != read_part_id)
10673 {
10674 num_misplaced_rows++;
10675 if (!do_repair)
10676 {
10677 /* Check. */
10678 print_admin_msg(ha_thd(), MYSQL_ERRMSG_SIZE, "error",
10679 table_share->db.str, table->alias,
10680 opt_op_name[CHECK_PARTS],
10681 "Found a misplaced row");
10682 /* Break on first misplaced row! */
10683 result= HA_ADMIN_NEEDS_UPGRADE;
10684 break;
10685 }
10686 else
10687 {
10688 DBUG_PRINT("info", ("Moving row from partition %u to %u",
10689 (uint) read_part_id, (uint) correct_part_id));
10690
10691 /*
10692 Insert row into correct partition. Notice that there are no commit
10693 for every N row, so the repair will be one large transaction!
10694 */
10695 if ((result= m_file[correct_part_id]->ha_write_row(m_rec0)))
10696 {
10697 /*
10698 We have failed to insert a row, it might have been a duplicate!
10699 */
10700 char buf[MAX_KEY_LENGTH];
10701 String str(buf,sizeof(buf),system_charset_info);
10702 str.length(0);
10703 if (result == HA_ERR_FOUND_DUPP_KEY)
10704 {
10705 str.append("Duplicate key found, "
10706 "please update or delete the record:\n");
10707 result= HA_ADMIN_CORRUPT;
10708 }
10709 m_err_rec= NULL;
10710 append_row_to_str(str);
10711
10712 /*
10713 If the engine supports transactions, the failure will be
10714 rollbacked.
10715 */
10716 if (!m_file[correct_part_id]->has_transactions())
10717 {
10718 /* Log this error, so the DBA can notice it and fix it! */
10719 sql_print_error("Table '%-192s' failed to move/insert a row"
10720 " from part %u into part %u:\n%s",
10721 table->s->table_name.str,
10722 (uint) read_part_id,
10723 (uint) correct_part_id,
10724 str.c_ptr_safe());
10725 }
10726 print_admin_msg(ha_thd(), MYSQL_ERRMSG_SIZE, "error",
10727 table_share->db.str, table->alias,
10728 opt_op_name[REPAIR_PARTS],
10729 "Failed to move/insert a row"
10730 " from part %u into part %u:\n%s",
10731 (uint) read_part_id,
10732 (uint) correct_part_id,
10733 str.c_ptr_safe());
10734 break;
10735 }
10736
10737 /* Delete row from wrong partition. */
10738 if ((result= m_file[read_part_id]->ha_delete_row(m_rec0)))
10739 {
10740 if (m_file[correct_part_id]->has_transactions())
10741 break;
10742 /*
10743 We have introduced a duplicate, since we failed to remove it
10744 from the wrong partition.
10745 */
10746 char buf[MAX_KEY_LENGTH];
10747 String str(buf,sizeof(buf),system_charset_info);
10748 str.length(0);
10749 m_err_rec= NULL;
10750 append_row_to_str(str);
10751
10752 /* Log this error, so the DBA can notice it and fix it! */
10753 sql_print_error("Table '%-192s': Delete from part %u failed with"
10754 " error %d. But it was already inserted into"
10755 " part %u, when moving the misplaced row!"
10756 "\nPlease manually fix the duplicate row:\n%s",
10757 table->s->table_name.str,
10758 (uint) read_part_id,
10759 result,
10760 (uint) correct_part_id,
10761 str.c_ptr_safe());
10762 break;
10763 }
10764 }
10765 }
10766 }
10767
10768 int tmp_result= m_file[read_part_id]->ha_rnd_end();
10769 DBUG_RETURN(result ? result : tmp_result);
10770}
10771
10772
10773#define KEY_PARTITIONING_CHANGED_STR \
10774 "KEY () partitioning changed, please run:\n" \
10775 "ALTER TABLE %s.%s ALGORITHM = INPLACE %s"
10776
10777int ha_partition::check_for_upgrade(HA_CHECK_OPT *check_opt)
10778{
10779 int error= HA_ADMIN_NEEDS_CHECK;
10780 DBUG_ENTER("ha_partition::check_for_upgrade");
10781
10782 /*
10783 This is called even without FOR UPGRADE,
10784 if the .frm version is lower than the current version.
10785 In that case return that it needs checking!
10786 */
10787 if (!(check_opt->sql_flags & TT_FOR_UPGRADE))
10788 DBUG_RETURN(error);
10789
10790 /*
10791 Partitions will be checked for during their ha_check!
10792
10793 Check if KEY (sub)partitioning was used and any field's hash calculation
10794 differs from 5.1, see bug#14521864.
10795 */
10796 if (table->s->mysql_version < 50503 && // 5.1 table (<5.5.3)
10797 ((m_part_info->part_type == HASH_PARTITION && // KEY partitioned
10798 m_part_info->list_of_part_fields) ||
10799 (m_is_sub_partitioned && // KEY subpartitioned
10800 m_part_info->list_of_subpart_fields)))
10801 {
10802 Field **field;
10803 if (m_is_sub_partitioned)
10804 {
10805 field= m_part_info->subpart_field_array;
10806 }
10807 else
10808 {
10809 field= m_part_info->part_field_array;
10810 }
10811 for (; *field; field++)
10812 {
10813 switch ((*field)->real_type()) {
10814 case MYSQL_TYPE_TINY:
10815 case MYSQL_TYPE_SHORT:
10816 case MYSQL_TYPE_LONG:
10817 case MYSQL_TYPE_FLOAT:
10818 case MYSQL_TYPE_DOUBLE:
10819 case MYSQL_TYPE_NEWDECIMAL:
10820 case MYSQL_TYPE_TIMESTAMP:
10821 case MYSQL_TYPE_LONGLONG:
10822 case MYSQL_TYPE_INT24:
10823 case MYSQL_TYPE_TIME:
10824 case MYSQL_TYPE_DATETIME:
10825 case MYSQL_TYPE_YEAR:
10826 case MYSQL_TYPE_NEWDATE:
10827 case MYSQL_TYPE_ENUM:
10828 case MYSQL_TYPE_SET:
10829 {
10830 THD *thd= ha_thd();
10831 char *part_buf;
10832 String db_name, table_name;
10833 uint part_buf_len;
10834 bool skip_generation= false;
10835 partition_info::enum_key_algorithm old_algorithm;
10836 old_algorithm= m_part_info->key_algorithm;
10837 error= HA_ADMIN_FAILED;
10838 append_identifier(ha_thd(), &db_name, &table_share->db);
10839 append_identifier(ha_thd(), &table_name, &table_share->table_name);
10840 if (m_part_info->key_algorithm != partition_info::KEY_ALGORITHM_NONE)
10841 {
10842 /*
10843 Only possible when someone tampered with .frm files,
10844 like during tests :)
10845 */
10846 skip_generation= true;
10847 }
10848 m_part_info->key_algorithm= partition_info::KEY_ALGORITHM_51;
10849 if (skip_generation ||
10850 !(part_buf= generate_partition_syntax(thd, m_part_info,
10851 &part_buf_len,
10852 true,
10853 NULL,
10854 NULL)) ||
10855 print_admin_msg(thd, SQL_ADMIN_MSG_TEXT_SIZE + 1, "error",
10856 table_share->db.str,
10857 table->alias,
10858 opt_op_name[CHECK_PARTS],
10859 KEY_PARTITIONING_CHANGED_STR,
10860 db_name.c_ptr_safe(),
10861 table_name.c_ptr_safe(),
10862 part_buf))
10863 {
10864 /* Error creating admin message (too long string?). */
10865 print_admin_msg(thd, MYSQL_ERRMSG_SIZE, "error",
10866 table_share->db.str, table->alias,
10867 opt_op_name[CHECK_PARTS],
10868 KEY_PARTITIONING_CHANGED_STR,
10869 db_name.c_ptr_safe(), table_name.c_ptr_safe(),
10870 "<old partition clause>, but add ALGORITHM = 1"
10871 " between 'KEY' and '(' to change the metadata"
10872 " without the need of a full table rebuild.");
10873 }
10874 m_part_info->key_algorithm= old_algorithm;
10875 DBUG_RETURN(error);
10876 }
10877 default:
10878 /* Not affected! */
10879 ;
10880 }
10881 }
10882 }
10883
10884 DBUG_RETURN(error);
10885}
10886
10887
10888TABLE_LIST *ha_partition::get_next_global_for_child()
10889{
10890 handler **file;
10891 DBUG_ENTER("ha_partition::get_next_global_for_child");
10892 for (file= m_file; *file; file++)
10893 {
10894 TABLE_LIST *table_list;
10895 if ((table_list= (*file)->get_next_global_for_child()))
10896 DBUG_RETURN(table_list);
10897 }
10898 DBUG_RETURN(0);
10899}
10900
10901
10902const COND *ha_partition::cond_push(const COND *cond)
10903{
10904 handler **file= m_file;
10905 COND *res_cond= NULL;
10906 DBUG_ENTER("ha_partition::cond_push");
10907
10908 if (set_top_table_fields)
10909 {
10910 /*
10911 We want to do this in a separate loop to not come into a situation
10912 where we have only done cond_push() to some of the tables
10913 */
10914 do
10915 {
10916 if (((*file)->set_top_table_and_fields(top_table,
10917 top_table_field,
10918 top_table_fields)))
10919 DBUG_RETURN(cond); // Abort cond push, no error
10920 } while (*(++file));
10921 file= m_file;
10922 }
10923
10924 do
10925 {
10926 if ((*file)->pushed_cond != cond)
10927 {
10928 if ((*file)->cond_push(cond))
10929 res_cond= (COND *) cond;
10930 else
10931 (*file)->pushed_cond= cond;
10932 }
10933 } while (*(++file));
10934 DBUG_RETURN(res_cond);
10935}
10936
10937
10938void ha_partition::cond_pop()
10939{
10940 handler **file= m_file;
10941 DBUG_ENTER("ha_partition::cond_push");
10942
10943 do
10944 {
10945 (*file)->cond_pop();
10946 } while (*(++file));
10947 DBUG_VOID_RETURN;
10948}
10949
10950
10951/**
10952 Perform bulk update preparation on each partition.
10953
10954 SYNOPSIS
10955 start_bulk_update()
10956
10957 RETURN VALUE
10958 TRUE Error
10959 FALSE Success
10960*/
10961
10962bool ha_partition::start_bulk_update()
10963{
10964 handler **file= m_file;
10965 DBUG_ENTER("ha_partition::start_bulk_update");
10966
10967 if (bitmap_is_overlapping(&m_part_info->full_part_field_set,
10968 table->write_set))
10969 DBUG_RETURN(TRUE);
10970
10971 do
10972 {
10973 if ((*file)->start_bulk_update())
10974 DBUG_RETURN(TRUE);
10975 } while (*(++file));
10976 DBUG_RETURN(FALSE);
10977}
10978
10979
10980/**
10981 Perform bulk update execution on each partition. A bulk update allows
10982 a handler to batch the updated rows instead of performing the updates
10983 one row at a time.
10984
10985 SYNOPSIS
10986 exec_bulk_update()
10987
10988 RETURN VALUE
10989 TRUE Error
10990 FALSE Success
10991*/
10992
10993int ha_partition::exec_bulk_update(ha_rows *dup_key_found)
10994{
10995 int error;
10996 handler **file= m_file;
10997 DBUG_ENTER("ha_partition::exec_bulk_update");
10998
10999 do
11000 {
11001 if (unlikely((error= (*file)->exec_bulk_update(dup_key_found))))
11002 DBUG_RETURN(error);
11003 } while (*(++file));
11004 DBUG_RETURN(0);
11005}
11006
11007
11008/**
11009 Perform bulk update cleanup on each partition.
11010
11011 SYNOPSIS
11012 end_bulk_update()
11013
11014 RETURN VALUE
11015 NONE
11016*/
11017
11018int ha_partition::end_bulk_update()
11019{
11020 int error= 0;
11021 handler **file= m_file;
11022 DBUG_ENTER("ha_partition::end_bulk_update");
11023
11024 do
11025 {
11026 int tmp;
11027 if ((tmp= (*file)->end_bulk_update()))
11028 error= tmp;
11029 } while (*(++file));
11030 DBUG_RETURN(error);
11031}
11032
11033
11034/**
11035 Add the row to the bulk update on the partition on which the row is stored.
11036 A bulk update allows a handler to batch the updated rows instead of
11037 performing the updates one row at a time.
11038
11039 SYNOPSIS
11040 bulk_update_row()
11041 old_data Old record
11042 new_data New record
11043 dup_key_found Number of duplicate keys found
11044
11045 RETURN VALUE
11046 >1 Error
11047 1 Bulk update not used, normal operation used
11048 0 Bulk update used by handler
11049*/
11050
11051int ha_partition::bulk_update_row(const uchar *old_data, const uchar *new_data,
11052 ha_rows *dup_key_found)
11053{
11054 int error= 0;
11055 uint32 part_id;
11056 longlong func_value;
11057 my_bitmap_map *old_map;
11058 DBUG_ENTER("ha_partition::bulk_update_row");
11059
11060 old_map= dbug_tmp_use_all_columns(table, table->read_set);
11061 error= m_part_info->get_partition_id(m_part_info, &part_id,
11062 &func_value);
11063 dbug_tmp_restore_column_map(table->read_set, old_map);
11064 if (unlikely(error))
11065 {
11066 m_part_info->err_value= func_value;
11067 goto end;
11068 }
11069
11070 error= m_file[part_id]->ha_bulk_update_row(old_data, new_data,
11071 dup_key_found);
11072
11073end:
11074 DBUG_RETURN(error);
11075}
11076
11077
11078/**
11079 Perform bulk delete preparation on each partition.
11080
11081 SYNOPSIS
11082 start_bulk_delete()
11083
11084 RETURN VALUE
11085 TRUE Error
11086 FALSE Success
11087*/
11088
11089bool ha_partition::start_bulk_delete()
11090{
11091 handler **file= m_file;
11092 DBUG_ENTER("ha_partition::start_bulk_delete");
11093
11094 do
11095 {
11096 if ((*file)->start_bulk_delete())
11097 DBUG_RETURN(TRUE);
11098 } while (*(++file));
11099 DBUG_RETURN(FALSE);
11100}
11101
11102
11103/**
11104 Perform bulk delete cleanup on each partition.
11105
11106 SYNOPSIS
11107 end_bulk_delete()
11108
11109 RETURN VALUE
11110 >0 Error
11111 0 Success
11112*/
11113
11114int ha_partition::end_bulk_delete()
11115{
11116 int error= 0;
11117 handler **file= m_file;
11118 DBUG_ENTER("ha_partition::end_bulk_delete");
11119
11120 do
11121 {
11122 int tmp;
11123 if ((tmp= (*file)->end_bulk_delete()))
11124 error= tmp;
11125 } while (*(++file));
11126 DBUG_RETURN(error);
11127}
11128
11129
11130/**
11131 Perform initialization for a direct update request.
11132
11133 SYNOPSIS
11134 direct_update_rows_init()
11135
11136 RETURN VALUE
11137 >0 Error
11138 0 Success
11139*/
11140
11141int ha_partition::direct_update_rows_init()
11142{
11143 int error;
11144 uint i, found;
11145 handler *file;
11146 DBUG_ENTER("ha_partition::direct_update_rows_init");
11147
11148 if (bitmap_is_overlapping(&m_part_info->full_part_field_set,
11149 table->write_set))
11150 {
11151 DBUG_PRINT("info", ("partition FALSE by updating part_key"));
11152 DBUG_RETURN(HA_ERR_WRONG_COMMAND);
11153 }
11154
11155 m_part_spec.start_part= 0;
11156 m_part_spec.end_part= m_tot_parts - 1;
11157 m_direct_update_part_spec= m_part_spec;
11158
11159 found= 0;
11160 for (i= m_part_spec.start_part; i <= m_part_spec.end_part; i++)
11161 {
11162 if (bitmap_is_set(&(m_part_info->read_partitions), i) &&
11163 bitmap_is_set(&(m_part_info->lock_partitions), i))
11164 {
11165 file= m_file[i];
11166 if (unlikely((error= (m_pre_calling ?
11167 file->pre_direct_update_rows_init() :
11168 file->direct_update_rows_init()))))
11169 {
11170 DBUG_PRINT("info", ("partition FALSE by storage engine"));
11171 DBUG_RETURN(error);
11172 }
11173 found++;
11174 }
11175 }
11176
11177 TABLE_LIST *table_list= table->pos_in_table_list;
11178 if (found != 1 && table_list)
11179 {
11180 while (table_list->parent_l)
11181 table_list= table_list->parent_l;
11182 st_select_lex *select_lex= table_list->select_lex;
11183 DBUG_PRINT("info", ("partition select_lex: %p", select_lex));
11184 if (select_lex && select_lex->explicit_limit)
11185 {
11186 DBUG_PRINT("info", ("partition explicit_limit=TRUE"));
11187 DBUG_PRINT("info", ("partition offset_limit: %p",
11188 select_lex->offset_limit));
11189 DBUG_PRINT("info", ("partition select_limit: %p",
11190 select_lex->select_limit));
11191 DBUG_PRINT("info", ("partition FALSE by select_lex"));
11192 DBUG_RETURN(HA_ERR_WRONG_COMMAND);
11193 }
11194 }
11195 DBUG_PRINT("info", ("partition OK"));
11196 DBUG_RETURN(0);
11197}
11198
11199
11200/**
11201 Do initialization for performing parallel direct update
11202 for a handlersocket update request.
11203
11204 SYNOPSIS
11205 pre_direct_update_rows_init()
11206
11207 RETURN VALUE
11208 >0 Error
11209 0 Success
11210*/
11211
11212int ha_partition::pre_direct_update_rows_init()
11213{
11214 bool save_m_pre_calling;
11215 int error;
11216 DBUG_ENTER("ha_partition::pre_direct_update_rows_init");
11217 save_m_pre_calling= m_pre_calling;
11218 m_pre_calling= TRUE;
11219 error= direct_update_rows_init();
11220 m_pre_calling= save_m_pre_calling;
11221 DBUG_RETURN(error);
11222}
11223
11224
11225/**
11226 Execute a direct update request. A direct update request updates all
11227 qualified rows in a single operation, rather than one row at a time.
11228 The direct update operation is pushed down to each individual
11229 partition.
11230
11231 SYNOPSIS
11232 direct_update_rows()
11233 update_rows Number of updated rows
11234
11235 RETURN VALUE
11236 >0 Error
11237 0 Success
11238*/
11239
11240int ha_partition::direct_update_rows(ha_rows *update_rows_result)
11241{
11242 int error;
11243 bool rnd_seq= FALSE;
11244 ha_rows update_rows= 0;
11245 uint32 i;
11246 DBUG_ENTER("ha_partition::direct_update_rows");
11247
11248 /* If first call to direct_update_rows with RND scan */
11249 if ((m_pre_calling ? pre_inited : inited) == RND && m_scan_value == 1)
11250 {
11251 rnd_seq= TRUE;
11252 m_scan_value= 2;
11253 }
11254
11255 *update_rows_result= 0;
11256 for (i= m_part_spec.start_part; i <= m_part_spec.end_part; i++)
11257 {
11258 handler *file= m_file[i];
11259 if (bitmap_is_set(&(m_part_info->read_partitions), i) &&
11260 bitmap_is_set(&(m_part_info->lock_partitions), i))
11261 {
11262 if (rnd_seq && (m_pre_calling ? file->pre_inited : file->inited) == NONE)
11263 {
11264 if (unlikely((error= (m_pre_calling ?
11265 file->ha_pre_rnd_init(TRUE) :
11266 file->ha_rnd_init(TRUE)))))
11267 DBUG_RETURN(error);
11268 }
11269 if (unlikely((error= (m_pre_calling ?
11270 (file)->pre_direct_update_rows() :
11271 (file)->ha_direct_update_rows(&update_rows)))))
11272 {
11273 if (rnd_seq)
11274 {
11275 if (m_pre_calling)
11276 file->ha_pre_rnd_end();
11277 else
11278 file->ha_rnd_end();
11279 }
11280 DBUG_RETURN(error);
11281 }
11282 *update_rows_result+= update_rows;
11283 }
11284 if (rnd_seq)
11285 {
11286 if (unlikely((error= (m_pre_calling ?
11287 file->ha_pre_index_or_rnd_end() :
11288 file->ha_index_or_rnd_end()))))
11289 DBUG_RETURN(error);
11290 }
11291 }
11292 DBUG_RETURN(0);
11293}
11294
11295
11296/**
11297 Start parallel execution of a direct update for a handlersocket update
11298 request. A direct update request updates all qualified rows in a single
11299 operation, rather than one row at a time. The direct update operation
11300 is pushed down to each individual partition.
11301
11302 SYNOPSIS
11303 pre_direct_update_rows()
11304
11305 RETURN VALUE
11306 >0 Error
11307 0 Success
11308*/
11309
11310int ha_partition::pre_direct_update_rows()
11311{
11312 bool save_m_pre_calling;
11313 int error;
11314 ha_rows not_used= 0;
11315 DBUG_ENTER("ha_partition::pre_direct_update_rows");
11316 save_m_pre_calling= m_pre_calling;
11317 m_pre_calling= TRUE;
11318 error= direct_update_rows(&not_used);
11319 m_pre_calling= save_m_pre_calling;
11320 DBUG_RETURN(error);
11321}
11322
11323
11324/**
11325 Perform initialization for a direct delete request.
11326
11327 SYNOPSIS
11328 direct_delete_rows_init()
11329
11330 RETURN VALUE
11331 >0 Error
11332 0 Success
11333*/
11334
11335int ha_partition::direct_delete_rows_init()
11336{
11337 int error;
11338 uint i, found;
11339 DBUG_ENTER("ha_partition::direct_delete_rows_init");
11340
11341 m_part_spec.start_part= 0;
11342 m_part_spec.end_part= m_tot_parts - 1;
11343 m_direct_update_part_spec= m_part_spec;
11344
11345 found= 0;
11346 for (i= m_part_spec.start_part; i <= m_part_spec.end_part; i++)
11347 {
11348 if (bitmap_is_set(&(m_part_info->read_partitions), i) &&
11349 bitmap_is_set(&(m_part_info->lock_partitions), i))
11350 {
11351 handler *file= m_file[i];
11352 if (unlikely((error= (m_pre_calling ?
11353 file->pre_direct_delete_rows_init() :
11354 file->direct_delete_rows_init()))))
11355 {
11356 DBUG_PRINT("exit", ("error in direct_delete_rows_init"));
11357 DBUG_RETURN(error);
11358 }
11359 found++;
11360 }
11361 }
11362
11363 TABLE_LIST *table_list= table->pos_in_table_list;
11364 if (found != 1 && table_list)
11365 {
11366 while (table_list->parent_l)
11367 table_list= table_list->parent_l;
11368 st_select_lex *select_lex= table_list->select_lex;
11369 DBUG_PRINT("info", ("partition select_lex: %p", select_lex));
11370 if (select_lex && select_lex->explicit_limit)
11371 {
11372 DBUG_PRINT("info", ("partition explicit_limit: TRUE"));
11373 DBUG_PRINT("info", ("partition offset_limit: %p",
11374 select_lex->offset_limit));
11375 DBUG_PRINT("info", ("partition select_limit: %p",
11376 select_lex->select_limit));
11377 DBUG_PRINT("info", ("partition FALSE by select_lex"));
11378 DBUG_RETURN(HA_ERR_WRONG_COMMAND);
11379 }
11380 }
11381 DBUG_PRINT("exit", ("OK"));
11382 DBUG_RETURN(0);
11383}
11384
11385
11386/**
11387 Do initialization for performing parallel direct delete
11388 for a handlersocket delete request.
11389
11390 SYNOPSIS
11391 pre_direct_delete_rows_init()
11392
11393 RETURN VALUE
11394 >0 Error
11395 0 Success
11396*/
11397
11398int ha_partition::pre_direct_delete_rows_init()
11399{
11400 bool save_m_pre_calling;
11401 int error;
11402 DBUG_ENTER("ha_partition::pre_direct_delete_rows_init");
11403 save_m_pre_calling= m_pre_calling;
11404 m_pre_calling= TRUE;
11405 error= direct_delete_rows_init();
11406 m_pre_calling= save_m_pre_calling;
11407 DBUG_RETURN(error);
11408}
11409
11410
11411/**
11412 Execute a direct delete request. A direct delete request deletes all
11413 qualified rows in a single operation, rather than one row at a time.
11414 The direct delete operation is pushed down to each individual
11415 partition.
11416
11417 SYNOPSIS
11418 direct_delete_rows()
11419 delete_rows Number of deleted rows
11420
11421 RETURN VALUE
11422 >0 Error
11423 0 Success
11424*/
11425
11426int ha_partition::direct_delete_rows(ha_rows *delete_rows_result)
11427{
11428 int error;
11429 bool rnd_seq= FALSE;
11430 ha_rows delete_rows= 0;
11431 uint32 i;
11432 handler *file;
11433 DBUG_ENTER("ha_partition::direct_delete_rows");
11434
11435 if ((m_pre_calling ? pre_inited : inited) == RND && m_scan_value == 1)
11436 {
11437 rnd_seq= TRUE;
11438 m_scan_value= 2;
11439 }
11440
11441 *delete_rows_result= 0;
11442 m_part_spec= m_direct_update_part_spec;
11443 for (i= m_part_spec.start_part; i <= m_part_spec.end_part; i++)
11444 {
11445 file= m_file[i];
11446 if (bitmap_is_set(&(m_part_info->read_partitions), i) &&
11447 bitmap_is_set(&(m_part_info->lock_partitions), i))
11448 {
11449 if (rnd_seq && (m_pre_calling ? file->pre_inited : file->inited) == NONE)
11450 {
11451 if (unlikely((error= (m_pre_calling ?
11452 file->ha_pre_rnd_init(TRUE) :
11453 file->ha_rnd_init(TRUE)))))
11454 DBUG_RETURN(error);
11455 }
11456 if ((error= (m_pre_calling ?
11457 file->pre_direct_delete_rows() :
11458 file->ha_direct_delete_rows(&delete_rows))))
11459 {
11460 if (m_pre_calling)
11461 file->ha_pre_rnd_end();
11462 else
11463 file->ha_rnd_end();
11464 DBUG_RETURN(error);
11465 }
11466 delete_rows_result+= delete_rows;
11467 }
11468 if (rnd_seq)
11469 {
11470 if (unlikely((error= (m_pre_calling ?
11471 file->ha_pre_index_or_rnd_end() :
11472 file->ha_index_or_rnd_end()))))
11473 DBUG_RETURN(error);
11474 }
11475 }
11476 DBUG_RETURN(0);
11477}
11478
11479
11480/**
11481 Start parallel execution of a direct delete for a handlersocket delete
11482 request. A direct delete request deletes all qualified rows in a single
11483 operation, rather than one row at a time. The direct delete operation
11484 is pushed down to each individual partition.
11485
11486 SYNOPSIS
11487 pre_direct_delete_rows()
11488
11489 RETURN VALUE
11490 >0 Error
11491 0 Success
11492*/
11493
11494int ha_partition::pre_direct_delete_rows()
11495{
11496 bool save_m_pre_calling;
11497 int error;
11498 ha_rows not_used;
11499 DBUG_ENTER("ha_partition::pre_direct_delete_rows");
11500 save_m_pre_calling= m_pre_calling;
11501 m_pre_calling= TRUE;
11502 error= direct_delete_rows(&not_used);
11503 m_pre_calling= save_m_pre_calling;
11504 DBUG_RETURN(error);
11505}
11506
11507/**
11508 Push metadata for the current operation down to each partition.
11509
11510 SYNOPSIS
11511 info_push()
11512
11513 RETURN VALUE
11514 >0 Error
11515 0 Success
11516*/
11517
11518int ha_partition::info_push(uint info_type, void *info)
11519{
11520 int error= 0;
11521 handler **file= m_file;
11522 DBUG_ENTER("ha_partition::info_push");
11523
11524 do
11525 {
11526 int tmp;
11527 if ((tmp= (*file)->info_push(info_type, info)))
11528 error= tmp;
11529 } while (*(++file));
11530 DBUG_RETURN(error);
11531}
11532
11533
11534void ha_partition::clear_top_table_fields()
11535{
11536 handler **file;
11537 DBUG_ENTER("ha_partition::clear_top_table_fields");
11538
11539 if (set_top_table_fields)
11540 {
11541 set_top_table_fields= FALSE;
11542 top_table= NULL;
11543 top_table_field= NULL;
11544 top_table_fields= 0;
11545 for (file= m_file; *file; file++)
11546 (*file)->clear_top_table_fields();
11547 }
11548 DBUG_VOID_RETURN;
11549}
11550
11551
11552struct st_mysql_storage_engine partition_storage_engine=
11553{ MYSQL_HANDLERTON_INTERFACE_VERSION };
11554
11555maria_declare_plugin(partition)
11556{
11557 MYSQL_STORAGE_ENGINE_PLUGIN,
11558 &partition_storage_engine,
11559 "partition",
11560 "Mikael Ronstrom, MySQL AB",
11561 "Partition Storage Engine Helper",
11562 PLUGIN_LICENSE_GPL,
11563 partition_initialize, /* Plugin Init */
11564 NULL, /* Plugin Deinit */
11565 0x0100, /* 1.0 */
11566 NULL, /* status variables */
11567 NULL, /* system variables */
11568 "1.0", /* string version */
11569 MariaDB_PLUGIN_MATURITY_STABLE /* maturity */
11570}
11571maria_declare_plugin_end;
11572
11573#endif
11574