| 1 | /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ | 
| 2 | // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: | 
| 3 | #ident "$Id$" | 
| 4 | /*====== | 
| 5 | This file is part of TokuDB | 
| 6 |  | 
| 7 |  | 
| 8 | Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. | 
| 9 |  | 
| 10 |     TokuDBis is free software: you can redistribute it and/or modify | 
| 11 |     it under the terms of the GNU General Public License, version 2, | 
| 12 |     as published by the Free Software Foundation. | 
| 13 |  | 
| 14 |     TokuDB is distributed in the hope that it will be useful, | 
| 15 |     but WITHOUT ANY WARRANTY; without even the implied warranty of | 
| 16 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
| 17 |     GNU General Public License for more details. | 
| 18 |  | 
| 19 |     You should have received a copy of the GNU General Public License | 
| 20 |     along with TokuDB.  If not, see <http://www.gnu.org/licenses/>. | 
| 21 |  | 
| 22 | ======= */ | 
| 23 |  | 
| 24 | #ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved." | 
| 25 |  | 
| 26 | #include "hatoku_hton.h" | 
| 27 | #include "hatoku_cmp.h" | 
| 28 | #include "tokudb_buffer.h" | 
| 29 | #include "tokudb_status.h" | 
| 30 | #include "tokudb_card.h" | 
| 31 | #include "ha_tokudb.h" | 
| 32 | #include "sql_db.h" | 
| 33 |  | 
| 34 | pfs_key_t ha_tokudb_mutex_key; | 
| 35 | pfs_key_t num_DBs_lock_key; | 
| 36 |  | 
| 37 | #if TOKU_INCLUDE_EXTENDED_KEYS | 
| 38 | static inline uint get_ext_key_parts(const KEY *key) { | 
| 39 | #if (50609 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50699) || \ | 
| 40 |     (50700 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50799) | 
| 41 |     return key->actual_key_parts; | 
| 42 | #elif defined(MARIADB_BASE_VERSION) | 
| 43 |     return key->ext_key_parts; | 
| 44 | #else | 
| 45 | #error | 
| 46 | #endif | 
| 47 | } | 
| 48 | #endif | 
| 49 |  | 
| 50 | HASH TOKUDB_SHARE::_open_tables; | 
| 51 | tokudb::thread::mutex_t TOKUDB_SHARE::_open_tables_mutex; | 
| 52 |  | 
| 53 | static const char* ha_tokudb_exts[] = { | 
| 54 |     ha_tokudb_ext, | 
| 55 |     NullS | 
| 56 | }; | 
| 57 |  | 
| 58 | // | 
| 59 | // This offset is calculated starting from AFTER the NULL bytes | 
| 60 | // | 
| 61 | static inline uint32_t get_fixed_field_size( | 
| 62 |     KEY_AND_COL_INFO* kc_info, | 
| 63 |     TABLE_SHARE* table_share, | 
| 64 |     uint keynr) { | 
| 65 |  | 
| 66 |     uint offset = 0; | 
| 67 |     for (uint i = 0; i < table_share->fields; i++) { | 
| 68 |         if (is_fixed_field(kc_info, i) && | 
| 69 |             !bitmap_is_set(&kc_info->key_filters[keynr], i)) { | 
| 70 |             offset += kc_info->field_lengths[i]; | 
| 71 |         } | 
| 72 |     } | 
| 73 |     return offset; | 
| 74 | } | 
| 75 |  | 
| 76 |  | 
| 77 | static inline uint32_t get_len_of_offsets( | 
| 78 |     KEY_AND_COL_INFO* kc_info, | 
| 79 |     TABLE_SHARE* table_share, | 
| 80 |     uint keynr) { | 
| 81 |  | 
| 82 |     uint len = 0; | 
| 83 |     for (uint i = 0; i < table_share->fields; i++) { | 
| 84 |         if (is_variable_field(kc_info, i) && | 
| 85 |             !bitmap_is_set(&kc_info->key_filters[keynr], i)) { | 
| 86 |             len += kc_info->num_offset_bytes; | 
| 87 |         } | 
| 88 |     } | 
| 89 |     return len; | 
| 90 | } | 
| 91 |  | 
| 92 |  | 
| 93 | static int allocate_key_and_col_info( | 
| 94 |     TABLE_SHARE* table_share, | 
| 95 |     KEY_AND_COL_INFO* kc_info) { | 
| 96 |  | 
| 97 |     int error; | 
| 98 |     // | 
| 99 |     // initialize all of the bitmaps | 
| 100 |     // | 
| 101 |     for (uint i = 0; i < MAX_KEY + 1; i++) { | 
| 102 |         error = | 
| 103 |             bitmap_init( | 
| 104 |                 &kc_info->key_filters[i], | 
| 105 |                 NULL, | 
| 106 |                 table_share->fields, | 
| 107 |                 false); | 
| 108 |         if (error) { | 
| 109 |             goto exit; | 
| 110 |         } | 
| 111 |     } | 
| 112 |  | 
| 113 |     // | 
| 114 |     // create the field lengths | 
| 115 |     // | 
| 116 |     kc_info->multi_ptr = tokudb::memory::multi_malloc( | 
| 117 |         MYF(MY_WME+MY_ZEROFILL), | 
| 118 |         &kc_info->field_types, (uint)(table_share->fields * sizeof (uint8_t)), | 
| 119 |         &kc_info->field_lengths, (uint)(table_share->fields * sizeof (uint16_t)), | 
| 120 |         &kc_info->length_bytes, (uint)(table_share->fields * sizeof (uint8_t)), | 
| 121 |         &kc_info->blob_fields, (uint)(table_share->fields * sizeof (uint32_t)), | 
| 122 |         NullS); | 
| 123 |     if (kc_info->multi_ptr == NULL) { | 
| 124 |         error = ENOMEM; | 
| 125 |         goto exit; | 
| 126 |     } | 
| 127 | exit: | 
| 128 |     if (error) { | 
| 129 |         for (uint i = 0; MAX_KEY + 1; i++) { | 
| 130 |             bitmap_free(&kc_info->key_filters[i]); | 
| 131 |         } | 
| 132 |         tokudb::memory::free(kc_info->multi_ptr); | 
| 133 |     } | 
| 134 |     return error; | 
| 135 | } | 
| 136 |  | 
| 137 | static void free_key_and_col_info (KEY_AND_COL_INFO* kc_info) { | 
| 138 |     for (uint i = 0; i < MAX_KEY+1; i++) { | 
| 139 |         bitmap_free(&kc_info->key_filters[i]); | 
| 140 |     } | 
| 141 |  | 
| 142 |     for (uint i = 0; i < MAX_KEY+1; i++) { | 
| 143 |         tokudb::memory::free(kc_info->cp_info[i]); | 
| 144 |         kc_info->cp_info[i] = NULL; // 3144 | 
| 145 |     } | 
| 146 |  | 
| 147 |     tokudb::memory::free(kc_info->multi_ptr); | 
| 148 |     kc_info->field_types = NULL; | 
| 149 |     kc_info->field_lengths = NULL; | 
| 150 |     kc_info->length_bytes = NULL; | 
| 151 |     kc_info->blob_fields = NULL; | 
| 152 | } | 
| 153 |  | 
| 154 |  | 
| 155 | uchar* TOKUDB_SHARE::hash_get_key( | 
| 156 |     TOKUDB_SHARE* share, | 
| 157 |     size_t* length, | 
| 158 |     TOKUDB_UNUSED(my_bool not_used)) { | 
| 159 |  | 
| 160 |     *length = share->_full_table_name.length(); | 
| 161 |     return (uchar *) share->_full_table_name.c_ptr(); | 
| 162 | } | 
| 163 | void TOKUDB_SHARE::hash_free_element(TOKUDB_SHARE* share) { | 
| 164 |     share->destroy(); | 
| 165 |     delete share; | 
| 166 | } | 
| 167 | void TOKUDB_SHARE::static_init() { | 
| 168 |     my_hash_init( | 
| 169 |         &_open_tables, | 
| 170 |         table_alias_charset, | 
| 171 |         32, | 
| 172 |         0, | 
| 173 |         0, | 
| 174 |         (my_hash_get_key)hash_get_key, | 
| 175 |         (my_hash_free_key)hash_free_element, 0); | 
| 176 | } | 
| 177 | void TOKUDB_SHARE::static_destroy() { | 
| 178 |     my_hash_free(&_open_tables); | 
| 179 | } | 
| 180 | const char* TOKUDB_SHARE::get_state_string(share_state_t state) { | 
| 181 |     static const char* state_string[] = { | 
| 182 |         "CLOSED" , | 
| 183 |         "OPENED" , | 
| 184 |         "ERROR"  | 
| 185 |     }; | 
| 186 |     assert_always(state == CLOSED || state == OPENED || state == ERROR); | 
| 187 |     return state_string[state]; | 
| 188 | } | 
| 189 | void* TOKUDB_SHARE::operator new(size_t sz) { | 
| 190 |     return tokudb::memory::malloc(sz, MYF(MY_WME|MY_ZEROFILL|MY_FAE)); | 
| 191 | } | 
| 192 | void TOKUDB_SHARE::operator delete(void* p) { tokudb::memory::free(p); } | 
| 193 | TOKUDB_SHARE::TOKUDB_SHARE() | 
| 194 |     : _num_DBs_lock(num_DBs_lock_key), _mutex(ha_tokudb_mutex_key) {} | 
| 195 | void TOKUDB_SHARE::init(const char* table_name) { | 
| 196 |     _use_count = 0; | 
| 197 |     thr_lock_init(&_thr_lock); | 
| 198 |     _state = CLOSED; | 
| 199 |     _row_delta_activity = 0; | 
| 200 |     _allow_auto_analysis = true; | 
| 201 |  | 
| 202 |     _full_table_name.append(table_name); | 
| 203 |  | 
| 204 |     String tmp_dictionary_name; | 
| 205 |     tokudb_split_dname( | 
| 206 |         table_name, | 
| 207 |         _database_name, | 
| 208 |         _table_name, | 
| 209 |         tmp_dictionary_name); | 
| 210 |  | 
| 211 |     TOKUDB_SHARE_DBUG_ENTER("file[%s]:state[%s]:use_count[%d]" , | 
| 212 |         _full_table_name.ptr(), | 
| 213 |         get_state_string(_state), | 
| 214 |         _use_count); | 
| 215 |     TOKUDB_SHARE_DBUG_VOID_RETURN(); | 
| 216 | } | 
| 217 | void TOKUDB_SHARE::destroy() { | 
| 218 |     TOKUDB_SHARE_DBUG_ENTER("file[%s]:state[%s]:use_count[%d]" , | 
| 219 |         _full_table_name.ptr(), | 
| 220 |         get_state_string(_state), | 
| 221 |         _use_count); | 
| 222 |  | 
| 223 |     assert_always(_use_count == 0); | 
| 224 |     assert_always( | 
| 225 |         _state == TOKUDB_SHARE::CLOSED || _state == TOKUDB_SHARE::ERROR); | 
| 226 |     thr_lock_delete(&_thr_lock); | 
| 227 |     TOKUDB_SHARE_DBUG_VOID_RETURN(); | 
| 228 | } | 
| 229 | TOKUDB_SHARE* TOKUDB_SHARE::get_share(const char* table_name, | 
| 230 |                                       TABLE_SHARE* table_share, | 
| 231 |                                       THR_LOCK_DATA* data, | 
| 232 |                                       bool create_new) { | 
| 233 |     mutex_t_lock(_open_tables_mutex); | 
| 234 |     int error = 0; | 
| 235 |     uint length = (uint)strlen(table_name); | 
| 236 |     TOKUDB_SHARE* share = (TOKUDB_SHARE*)my_hash_search( | 
| 237 |         &_open_tables, (uchar*)table_name, length); | 
| 238 |  | 
| 239 |     TOKUDB_TRACE_FOR_FLAGS( | 
| 240 |         TOKUDB_DEBUG_SHARE, | 
| 241 |         "existing share[%s] %s:share[%p]" , | 
| 242 |         table_name, | 
| 243 |         share == NULL ? "not found"  : "found" , | 
| 244 |         share); | 
| 245 |  | 
| 246 |     if (!share) { | 
| 247 |         if (create_new == false) | 
| 248 |             goto exit; | 
| 249 |         // create share and fill it with all zeroes | 
| 250 |         // hence, all pointers are initialized to NULL | 
| 251 |         share = new TOKUDB_SHARE; | 
| 252 |         assert_always(share); | 
| 253 |  | 
| 254 |         share->init(table_name); | 
| 255 |  | 
| 256 |         error = my_hash_insert(&_open_tables, (uchar*)share); | 
| 257 |         if (error) { | 
| 258 |             free_key_and_col_info(&share->kc_info); | 
| 259 |             share->destroy(); | 
| 260 |             tokudb::memory::free((uchar*)share); | 
| 261 |             share = NULL; | 
| 262 |             goto exit; | 
| 263 |         } | 
| 264 |     } | 
| 265 |  | 
| 266 |     share->addref(); | 
| 267 |  | 
| 268 |     if (data) | 
| 269 |         thr_lock_data_init(&(share->_thr_lock), data, NULL); | 
| 270 |  | 
| 271 | exit: | 
| 272 |     mutex_t_unlock(_open_tables_mutex); | 
| 273 |     return share; | 
| 274 | } | 
| 275 | void TOKUDB_SHARE::drop_share(TOKUDB_SHARE* share) { | 
| 276 |     TOKUDB_TRACE_FOR_FLAGS(TOKUDB_DEBUG_SHARE, | 
| 277 |                            "share[%p]:file[%s]:state[%s]:use_count[%d]" , | 
| 278 |                            share, | 
| 279 |                            share->_full_table_name.ptr(), | 
| 280 |                            get_state_string(share->_state), | 
| 281 |                            share->_use_count); | 
| 282 |  | 
| 283 |     mutex_t_lock(_open_tables_mutex); | 
| 284 |     my_hash_delete(&_open_tables, (uchar*)share); | 
| 285 |     mutex_t_unlock(_open_tables_mutex); | 
| 286 | } | 
| 287 | TOKUDB_SHARE::share_state_t TOKUDB_SHARE::addref() { | 
| 288 |     TOKUDB_SHARE_TRACE_FOR_FLAGS((TOKUDB_DEBUG_ENTER & TOKUDB_DEBUG_SHARE), | 
| 289 |                                  "file[%s]:state[%s]:use_count[%d]" , | 
| 290 |                                  _full_table_name.ptr(), | 
| 291 |                                  get_state_string(_state), | 
| 292 |                                  _use_count); | 
| 293 |  | 
| 294 |     lock(); | 
| 295 |     _use_count++; | 
| 296 |  | 
| 297 |     return _state; | 
| 298 | } | 
| 299 | int TOKUDB_SHARE::release() { | 
| 300 |     TOKUDB_SHARE_DBUG_ENTER("file[%s]:state[%s]:use_count[%d]" , | 
| 301 |         _full_table_name.ptr(), | 
| 302 |         get_state_string(_state), | 
| 303 |         _use_count); | 
| 304 |  | 
| 305 |     int error, result = 0; | 
| 306 |  | 
| 307 |     mutex_t_lock(_mutex); | 
| 308 |     assert_always(_use_count != 0); | 
| 309 |     _use_count--; | 
| 310 |     if (_use_count == 0 && _state == TOKUDB_SHARE::OPENED) { | 
| 311 |         // number of open DB's may not be equal to number of keys we have | 
| 312 |         // because add_index may have added some. So, we loop through entire | 
| 313 |         // array and close any non-NULL value.  It is imperative that we reset | 
| 314 |         // a DB to NULL once we are done with it. | 
| 315 |         for (uint i = 0; i < sizeof(key_file)/sizeof(key_file[0]); i++) { | 
| 316 |             if (key_file[i]) { | 
| 317 |                 TOKUDB_TRACE_FOR_FLAGS( | 
| 318 |                     TOKUDB_DEBUG_OPEN, | 
| 319 |                     "dbclose:%p" , | 
| 320 |                     key_file[i]); | 
| 321 |                 error = key_file[i]->close(key_file[i], 0); | 
| 322 |                 assert_always(error == 0); | 
| 323 |                 if (error) { | 
| 324 |                     result = error; | 
| 325 |                 } | 
| 326 |                 if (key_file[i] == file) | 
| 327 |                     file = NULL; | 
| 328 |                 key_file[i] = NULL; | 
| 329 |             } | 
| 330 |         } | 
| 331 |  | 
| 332 |         error = tokudb::metadata::close(&status_block); | 
| 333 |         assert_always(error == 0); | 
| 334 |  | 
| 335 |         free_key_and_col_info(&kc_info); | 
| 336 |  | 
| 337 |         if (_rec_per_key) { | 
| 338 |             tokudb::memory::free(_rec_per_key); | 
| 339 |             _rec_per_key = NULL; | 
| 340 |             _rec_per_keys = 0; | 
| 341 |         } | 
| 342 |  | 
| 343 |         for (uint i = 0; i < _keys; i++) { | 
| 344 |            tokudb::memory::free(_key_descriptors[i]._name); | 
| 345 |         } | 
| 346 |         tokudb::memory::free(_key_descriptors); | 
| 347 |         _keys = _max_key_parts = 0; _key_descriptors = NULL; | 
| 348 |  | 
| 349 |         _state = TOKUDB_SHARE::CLOSED; | 
| 350 |     } | 
| 351 |     mutex_t_unlock(_mutex); | 
| 352 |  | 
| 353 |     TOKUDB_SHARE_DBUG_RETURN(result); | 
| 354 | } | 
| 355 | void TOKUDB_SHARE::update_row_count( | 
| 356 |     THD* thd, | 
| 357 |     uint64_t added, | 
| 358 |     uint64_t deleted, | 
| 359 |     uint64_t updated) { | 
| 360 |  | 
| 361 |     uint64_t delta = added + deleted + updated; | 
| 362 |     lock(); | 
| 363 |     if (deleted > added && _rows < (deleted - added)) { | 
| 364 |         _rows = 0; | 
| 365 |     } else { | 
| 366 |         _rows += added - deleted; | 
| 367 |     } | 
| 368 |     _row_delta_activity += delta; | 
| 369 |     if (_row_delta_activity == (uint64_t)~0) | 
| 370 |         _row_delta_activity = 1; | 
| 371 |  | 
| 372 |     ulonglong auto_threshold = tokudb::sysvars::auto_analyze(thd); | 
| 373 |     if (delta && auto_threshold > 0 && _allow_auto_analysis) { | 
| 374 |         ulonglong pct_of_rows_changed_to_trigger; | 
| 375 |         pct_of_rows_changed_to_trigger = ((_rows * auto_threshold) / 100); | 
| 376 |         if (_row_delta_activity >= pct_of_rows_changed_to_trigger) { | 
| 377 |             char msg[200]; | 
| 378 |             snprintf(msg, | 
| 379 |                      sizeof(msg), | 
| 380 |                      "TokuDB: Auto %s analysis for %s, delta_activity %llu is "  | 
| 381 |                      "greater than %llu percent of %llu rows." , | 
| 382 |                      tokudb::sysvars::analyze_in_background(thd) > 0 | 
| 383 |                          ? "scheduling background"  | 
| 384 |                          : "running foreground" , | 
| 385 |                      full_table_name(), | 
| 386 |                      _row_delta_activity, | 
| 387 |                      auto_threshold, | 
| 388 |                      (ulonglong)(_rows)); | 
| 389 |  | 
| 390 |             // analyze_standard will unlock _mutex regardless of success/failure | 
| 391 |             int ret = analyze_standard(thd, NULL); | 
| 392 |             if (ret == 0) { | 
| 393 |                 sql_print_information("%s - succeeded." , msg); | 
| 394 |             } else { | 
| 395 |                 sql_print_information( | 
| 396 |                     "%s - failed, likely a job already running." , | 
| 397 |                     msg); | 
| 398 |             } | 
| 399 |         } | 
| 400 |     } | 
| 401 |     unlock(); | 
| 402 | } | 
| 403 | void TOKUDB_SHARE::set_cardinality_counts_in_table(TABLE* table) { | 
| 404 |     lock(); | 
| 405 |     uint32_t next_key_part = 0; | 
| 406 |     for (uint32_t i = 0; i < table->s->keys; i++) { | 
| 407 |         KEY* key = &table->key_info[i]; | 
| 408 |         bool is_unique_key = | 
| 409 |             (i == table->s->primary_key) || (key->flags & HA_NOSAME); | 
| 410 |  | 
| 411 |         for (uint32_t j = 0; j < get_ext_key_parts(key); j++) { | 
| 412 |             if (j >= key->user_defined_key_parts) { | 
| 413 |                 // MySQL 'hidden' keys, really needs deeper investigation | 
| 414 |                 // into MySQL hidden keys vs TokuDB hidden keys | 
| 415 |                 key->rec_per_key[j] = 1; | 
| 416 |                 continue; | 
| 417 |             } | 
| 418 |  | 
| 419 |             assert_always(next_key_part < _rec_per_keys); | 
| 420 |             ulong val = _rec_per_key[next_key_part++]; | 
| 421 |             val = (val * tokudb::sysvars::cardinality_scale_percent) / 100; | 
| 422 |             if (val == 0 || _rows == 0 || | 
| 423 |                 (is_unique_key && j == get_ext_key_parts(key) - 1)) { | 
| 424 |                 val = 1; | 
| 425 |             } | 
| 426 |             key->rec_per_key[j] = val; | 
| 427 |         } | 
| 428 |     } | 
| 429 |     unlock(); | 
| 430 | } | 
| 431 |  | 
| 432 | #define HANDLE_INVALID_CURSOR() \ | 
| 433 |     if (cursor == NULL) { \ | 
| 434 |         error = last_cursor_error; \ | 
| 435 |         goto cleanup; \ | 
| 436 |     } | 
| 437 |  | 
| 438 | const char *ha_tokudb::table_type() const { | 
| 439 |     return tokudb_hton_name; | 
| 440 | }  | 
| 441 |  | 
| 442 | const char *ha_tokudb::index_type(uint inx) { | 
| 443 |     return "BTREE" ; | 
| 444 | } | 
| 445 |  | 
| 446 | /* | 
| 447 |  *  returns NULL terminated file extension string | 
| 448 |  */ | 
| 449 | const char **ha_tokudb::bas_ext() const { | 
| 450 |     TOKUDB_HANDLER_DBUG_ENTER("" ); | 
| 451 |     DBUG_RETURN(ha_tokudb_exts); | 
| 452 | } | 
| 453 |  | 
| 454 | static inline bool is_insert_ignore (THD* thd) { | 
| 455 |     // | 
| 456 |     // from http://lists.mysql.com/internals/37735 | 
| 457 |     // | 
| 458 |     return thd->lex->ignore && thd->lex->duplicates == DUP_ERROR; | 
| 459 | } | 
| 460 |  | 
| 461 | static inline bool is_replace_into(THD* thd) { | 
| 462 |     return thd->lex->duplicates == DUP_REPLACE; | 
| 463 | } | 
| 464 |  | 
| 465 | static inline bool do_ignore_flag_optimization( | 
| 466 |     THD* thd, | 
| 467 |     TABLE* table, | 
| 468 |     bool opt_eligible) { | 
| 469 |  | 
| 470 |     bool do_opt = false; | 
| 471 |     if (opt_eligible && | 
| 472 |         (is_replace_into(thd) || is_insert_ignore(thd)) && | 
| 473 |         tokudb::sysvars::pk_insert_mode(thd) == 1 && | 
| 474 |         !table->triggers && | 
| 475 |         !(mysql_bin_log.is_open() && | 
| 476 |          thd->variables.binlog_format != BINLOG_FORMAT_STMT)) { | 
| 477 |         do_opt = true; | 
| 478 |     } | 
| 479 |     return do_opt; | 
| 480 | } | 
| 481 |  | 
| 482 | ulonglong ha_tokudb::table_flags() const { | 
| 483 |     return int_table_flags | HA_BINLOG_ROW_CAPABLE | HA_BINLOG_STMT_CAPABLE; | 
| 484 | } | 
| 485 |  | 
| 486 | // | 
| 487 | // Returns a bit mask of capabilities of the key or its part specified by  | 
| 488 | // the arguments. The capabilities are defined in sql/handler.h. | 
| 489 | // | 
| 490 | ulong ha_tokudb::index_flags(uint idx, uint part, bool all_parts) const { | 
| 491 |     TOKUDB_HANDLER_DBUG_ENTER("" ); | 
| 492 |     assert_always(table_share); | 
| 493 |     ulong flags = (HA_READ_NEXT | HA_READ_PREV | HA_READ_ORDER | | 
| 494 |         HA_KEYREAD_ONLY | HA_READ_RANGE | HA_DO_INDEX_COND_PUSHDOWN); | 
| 495 |     if (key_is_clustering(&table_share->key_info[idx])) { | 
| 496 |         flags |= HA_CLUSTERED_INDEX; | 
| 497 |     } | 
| 498 |     DBUG_RETURN(flags); | 
| 499 | } | 
| 500 |  | 
| 501 |  | 
| 502 | // | 
| 503 | // struct that will be used as a context for smart DBT callbacks | 
| 504 | // contains parameters needed to complete the smart DBT cursor call | 
| 505 | // | 
| 506 | typedef struct smart_dbt_info { | 
| 507 |     ha_tokudb* ha; //instance to ha_tokudb needed for reading the row | 
| 508 |     uchar* buf; // output buffer where row will be written | 
| 509 |     uint keynr; // index into share->key_file that represents DB we are currently operating on | 
| 510 | } *SMART_DBT_INFO; | 
| 511 |  | 
| 512 | typedef struct smart_dbt_bf_info { | 
| 513 |     ha_tokudb* ha; | 
| 514 |     bool need_val; | 
| 515 |     int direction; | 
| 516 |     THD* thd; | 
| 517 |     uchar* buf; | 
| 518 |     DBT* key_to_compare; | 
| 519 | } *SMART_DBT_BF_INFO; | 
| 520 |  | 
| 521 | typedef struct index_read_info { | 
| 522 |     struct smart_dbt_info smart_dbt_info; | 
| 523 |     int cmp; | 
| 524 |     DBT* orig_key; | 
| 525 | } *INDEX_READ_INFO; | 
| 526 |  | 
| 527 | // | 
| 528 | // smart DBT callback function for optimize | 
| 529 | // in optimize, we want to flatten DB by doing | 
| 530 | // a full table scan. Therefore, we don't | 
| 531 | // want to actually do anything with the data, hence | 
| 532 | // callback does nothing | 
| 533 | // | 
| 534 | static int smart_dbt_do_nothing (DBT const *key, DBT  const *row, void *context) { | 
| 535 |   return 0; | 
| 536 | } | 
| 537 |  | 
| 538 | static int | 
| 539 | smart_dbt_callback_rowread_ptquery (DBT const *key, DBT  const *row, void *context) { | 
| 540 |     SMART_DBT_INFO info = (SMART_DBT_INFO)context; | 
| 541 |     info->ha->extract_hidden_primary_key(info->keynr, key); | 
| 542 |     return info->ha->read_row_callback(info->buf,info->keynr,row,key); | 
| 543 | } | 
| 544 |  | 
| 545 | // | 
| 546 | // Smart DBT callback function in case where we have a covering index | 
| 547 | // | 
| 548 | static int | 
| 549 | smart_dbt_callback_keyread(DBT const *key, DBT  const *row, void *context) { | 
| 550 |     SMART_DBT_INFO info = (SMART_DBT_INFO)context; | 
| 551 |     info->ha->extract_hidden_primary_key(info->keynr, key); | 
| 552 |     info->ha->read_key_only(info->buf,info->keynr,key); | 
| 553 |     return 0; | 
| 554 | } | 
| 555 |  | 
| 556 | // | 
| 557 | // Smart DBT callback function in case where we do NOT have a covering index | 
| 558 | // | 
| 559 | static int | 
| 560 | smart_dbt_callback_rowread(DBT const *key, DBT  const *row, void *context) { | 
| 561 |     int error = 0; | 
| 562 |     SMART_DBT_INFO info = (SMART_DBT_INFO)context; | 
| 563 |     info->ha->extract_hidden_primary_key(info->keynr, key); | 
| 564 |     error = info->ha->read_primary_key(info->buf,info->keynr,row,key); | 
| 565 |     return error; | 
| 566 | } | 
| 567 |  | 
| 568 | // | 
| 569 | // Smart DBT callback function in case where we have a covering index | 
| 570 | // | 
| 571 | static int | 
| 572 | smart_dbt_callback_ir_keyread(DBT const *key, DBT  const *row, void *context) { | 
| 573 |     INDEX_READ_INFO ir_info = (INDEX_READ_INFO)context; | 
| 574 |     ir_info->cmp = ir_info->smart_dbt_info.ha->prefix_cmp_dbts(ir_info->smart_dbt_info.keynr, ir_info->orig_key, key); | 
| 575 |     if (ir_info->cmp) { | 
| 576 |         return 0; | 
| 577 |     } | 
| 578 |     return smart_dbt_callback_keyread(key, row, &ir_info->smart_dbt_info); | 
| 579 | } | 
| 580 |  | 
| 581 | static int | 
| 582 | smart_dbt_callback_lookup(DBT const *key, DBT  const *row, void *context) { | 
| 583 |     INDEX_READ_INFO ir_info = (INDEX_READ_INFO)context; | 
| 584 |     ir_info->cmp = ir_info->smart_dbt_info.ha->prefix_cmp_dbts(ir_info->smart_dbt_info.keynr, ir_info->orig_key, key); | 
| 585 |     return 0; | 
| 586 | } | 
| 587 |  | 
| 588 |  | 
| 589 | // | 
| 590 | // Smart DBT callback function in case where we do NOT have a covering index | 
| 591 | // | 
| 592 | static int | 
| 593 | smart_dbt_callback_ir_rowread(DBT const *key, DBT  const *row, void *context) { | 
| 594 |     INDEX_READ_INFO ir_info = (INDEX_READ_INFO)context; | 
| 595 |     ir_info->cmp = ir_info->smart_dbt_info.ha->prefix_cmp_dbts(ir_info->smart_dbt_info.keynr, ir_info->orig_key, key); | 
| 596 |     if (ir_info->cmp) { | 
| 597 |         return 0; | 
| 598 |     } | 
| 599 |     return smart_dbt_callback_rowread(key, row, &ir_info->smart_dbt_info); | 
| 600 | } | 
| 601 |  | 
| 602 | // | 
| 603 | // macro for Smart DBT callback function,  | 
| 604 | // so we do not need to put this long line of code in multiple places | 
| 605 | // | 
| 606 | #define SMART_DBT_CALLBACK(do_key_read) ((do_key_read) ? smart_dbt_callback_keyread : smart_dbt_callback_rowread )  | 
| 607 | #define SMART_DBT_IR_CALLBACK(do_key_read) ((do_key_read) ? smart_dbt_callback_ir_keyread : smart_dbt_callback_ir_rowread )  | 
| 608 |  | 
| 609 | // | 
| 610 | // macro that modifies read flag for cursor operations depending on whether | 
| 611 | // we have preacquired lock or not | 
| 612 | // | 
| 613 | #define SET_PRELOCK_FLAG(flg) ((flg) | (range_lock_grabbed ? (use_write_locks ? DB_PRELOCKED_WRITE : DB_PRELOCKED) : 0)) | 
| 614 |  | 
| 615 | // | 
| 616 | // This method retrieves the value of the auto increment column of a record in MySQL format | 
| 617 | // This was basically taken from MyISAM | 
| 618 | // Parameters: | 
| 619 | //              type - the type of the auto increment column (e.g. int, float, double...) | 
| 620 | //              offset - offset into the record where the auto increment column is stored | 
| 621 | //      [in]    record - MySQL row whose auto increment value we want to extract | 
| 622 | // Returns: | 
| 623 | //      The value of the auto increment column in record | 
| 624 | // | 
| 625 | static ulonglong retrieve_auto_increment(uint16 type, uint32 offset,const uchar *record) | 
| 626 | { | 
| 627 |     const uchar *key;     /* Key */ | 
| 628 |     ulonglong   unsigned_autoinc = 0;  /* Unsigned auto-increment */ | 
| 629 |     longlong      signed_autoinc = 0;  /* Signed auto-increment */ | 
| 630 |     enum { unsigned_type, signed_type } autoinc_type; | 
| 631 |     float float_tmp;   /* Temporary variable */ | 
| 632 |     double double_tmp; /* Temporary variable */ | 
| 633 |  | 
| 634 |     key = ((uchar *) record) + offset; | 
| 635 |  | 
| 636 |     /* Set default autoincrement type */ | 
| 637 |     autoinc_type = unsigned_type; | 
| 638 |  | 
| 639 |     switch (type) { | 
| 640 |     case HA_KEYTYPE_INT8: | 
| 641 |         signed_autoinc   = (longlong) *(char*)key; | 
| 642 |         autoinc_type     = signed_type; | 
| 643 |         break; | 
| 644 |  | 
| 645 |     case HA_KEYTYPE_BINARY: | 
| 646 |         unsigned_autoinc = (ulonglong) *(uchar*) key; | 
| 647 |         break; | 
| 648 |  | 
| 649 |     case HA_KEYTYPE_SHORT_INT: | 
| 650 |         signed_autoinc   = (longlong) sint2korr(key); | 
| 651 |         autoinc_type     = signed_type; | 
| 652 |         break; | 
| 653 |  | 
| 654 |     case HA_KEYTYPE_USHORT_INT: | 
| 655 |         unsigned_autoinc = (ulonglong) uint2korr(key); | 
| 656 |         break; | 
| 657 |  | 
| 658 |     case HA_KEYTYPE_LONG_INT: | 
| 659 |         signed_autoinc   = (longlong) sint4korr(key); | 
| 660 |         autoinc_type     = signed_type; | 
| 661 |         break; | 
| 662 |  | 
| 663 |     case HA_KEYTYPE_ULONG_INT: | 
| 664 |         unsigned_autoinc = (ulonglong) uint4korr(key); | 
| 665 |         break; | 
| 666 |  | 
| 667 |     case HA_KEYTYPE_INT24: | 
| 668 |         signed_autoinc   = (longlong) sint3korr(key); | 
| 669 |         autoinc_type     = signed_type; | 
| 670 |         break; | 
| 671 |  | 
| 672 |     case HA_KEYTYPE_UINT24: | 
| 673 |         unsigned_autoinc = (ulonglong) tokudb_uint3korr(key); | 
| 674 |         break; | 
| 675 |  | 
| 676 |     case HA_KEYTYPE_LONGLONG: | 
| 677 |         signed_autoinc   = sint8korr(key); | 
| 678 |         autoinc_type     = signed_type; | 
| 679 |         break; | 
| 680 |  | 
| 681 |     case HA_KEYTYPE_ULONGLONG: | 
| 682 |         unsigned_autoinc = uint8korr(key); | 
| 683 |         break; | 
| 684 |  | 
| 685 |     /* The remaining two cases should not be used but are included for  | 
| 686 |        compatibility */ | 
| 687 |     case HA_KEYTYPE_FLOAT:                       | 
| 688 |         float4get(float_tmp, key);  /* Note: float4get is a macro */ | 
| 689 |         signed_autoinc   = (longlong) float_tmp; | 
| 690 |         autoinc_type     = signed_type; | 
| 691 |         break; | 
| 692 |  | 
| 693 |     case HA_KEYTYPE_DOUBLE: | 
| 694 |         float8get(double_tmp, key); /* Note: float8get is a macro */ | 
| 695 |         signed_autoinc   = (longlong) double_tmp; | 
| 696 |         autoinc_type     = signed_type; | 
| 697 |         break; | 
| 698 |  | 
| 699 |     default: | 
| 700 |         assert_unreachable(); | 
| 701 |     } | 
| 702 |  | 
| 703 |     if (signed_autoinc < 0) { | 
| 704 |         signed_autoinc = 0; | 
| 705 |     } | 
| 706 |  | 
| 707 |     return autoinc_type == unsigned_type ?   | 
| 708 |            unsigned_autoinc : (ulonglong) signed_autoinc; | 
| 709 | } | 
| 710 |  | 
| 711 | static inline ulong field_offset(Field* field, TABLE* table) { | 
| 712 |     return((ulong) (field->ptr - table->record[0])); | 
| 713 | } | 
| 714 |  | 
| 715 | static inline HA_TOKU_ISO_LEVEL tx_to_toku_iso(ulong tx_isolation) { | 
| 716 |     if (tx_isolation == ISO_READ_UNCOMMITTED) { | 
| 717 |         return hatoku_iso_read_uncommitted; | 
| 718 |     } | 
| 719 |     else if (tx_isolation == ISO_READ_COMMITTED) { | 
| 720 |         return hatoku_iso_read_committed; | 
| 721 |     } | 
| 722 |     else if (tx_isolation == ISO_REPEATABLE_READ) { | 
| 723 |         return hatoku_iso_repeatable_read; | 
| 724 |     } | 
| 725 |     else { | 
| 726 |         return hatoku_iso_serializable; | 
| 727 |     } | 
| 728 | } | 
| 729 |  | 
| 730 | static inline uint32_t toku_iso_to_txn_flag (HA_TOKU_ISO_LEVEL lvl) { | 
| 731 |     if (lvl == hatoku_iso_read_uncommitted) { | 
| 732 |         return DB_READ_UNCOMMITTED; | 
| 733 |     } | 
| 734 |     else if (lvl == hatoku_iso_read_committed) { | 
| 735 |         return DB_READ_COMMITTED; | 
| 736 |     } | 
| 737 |     else if (lvl == hatoku_iso_repeatable_read) { | 
| 738 |         return DB_TXN_SNAPSHOT; | 
| 739 |     } | 
| 740 |     else { | 
| 741 |         return 0; | 
| 742 |     } | 
| 743 | } | 
| 744 |  | 
| 745 | static int filter_key_part_compare (const void* left, const void* right) { | 
| 746 |     FILTER_KEY_PART_INFO* left_part= (FILTER_KEY_PART_INFO *)left; | 
| 747 |     FILTER_KEY_PART_INFO* right_part = (FILTER_KEY_PART_INFO *)right; | 
| 748 |     return left_part->offset - right_part->offset; | 
| 749 | } | 
| 750 |  | 
| 751 | // | 
| 752 | // Be very careful with parameters passed to this function. Who knows | 
| 753 | // if key, table have proper info set. I had to verify by checking | 
| 754 | // in the debugger. | 
| 755 | // | 
| 756 | void set_key_filter( | 
| 757 |     MY_BITMAP* key_filter, | 
| 758 |     KEY* key, | 
| 759 |     TABLE* table, | 
| 760 |     bool get_offset_from_keypart) { | 
| 761 |  | 
| 762 |     FILTER_KEY_PART_INFO parts[MAX_REF_PARTS]; | 
| 763 |     uint curr_skip_index = 0; | 
| 764 |  | 
| 765 |     for (uint i = 0; i < key->user_defined_key_parts; i++) { | 
| 766 |         // | 
| 767 |         // horrendous hack due to bugs in mysql, basically | 
| 768 |         // we cannot always reliably get the offset from the same source | 
| 769 |         // | 
| 770 |         parts[i].offset = | 
| 771 |             get_offset_from_keypart ? | 
| 772 |                 key->key_part[i].offset : | 
| 773 |                 field_offset(key->key_part[i].field, table); | 
| 774 |         parts[i].part_index = i; | 
| 775 |     } | 
| 776 |     qsort( | 
| 777 |         parts, // start of array | 
| 778 |         key->user_defined_key_parts, //num elements | 
| 779 |         sizeof(*parts), //size of each element | 
| 780 |         filter_key_part_compare); | 
| 781 |  | 
| 782 |     for (uint i = 0; i < table->s->fields; i++) { | 
| 783 |         Field* field = table->field[i]; | 
| 784 |         uint curr_field_offset = field_offset(field, table); | 
| 785 |         if (curr_skip_index < key->user_defined_key_parts) { | 
| 786 |             uint curr_skip_offset = 0; | 
| 787 |             curr_skip_offset = parts[curr_skip_index].offset; | 
| 788 |             if (curr_skip_offset == curr_field_offset) { | 
| 789 |                 // | 
| 790 |                 // we have hit a field that is a portion of the primary key | 
| 791 |                 // | 
| 792 |                 uint curr_key_index = parts[curr_skip_index].part_index; | 
| 793 |                 curr_skip_index++; | 
| 794 |                 // | 
| 795 |                 // only choose to continue over the key if the key's length matches the field's length | 
| 796 |                 // otherwise, we may have a situation where the column is a varchar(10), the | 
| 797 |                 // key is only the first 3 characters, and we end up losing the last 7 bytes of the | 
| 798 |                 // column | 
| 799 |                 // | 
| 800 |                 TOKU_TYPE toku_type = mysql_to_toku_type(field); | 
| 801 |                 switch (toku_type) { | 
| 802 |                 case toku_type_blob: | 
| 803 |                     break; | 
| 804 |                 case toku_type_varbinary: | 
| 805 |                 case toku_type_varstring: | 
| 806 |                 case toku_type_fixbinary: | 
| 807 |                 case toku_type_fixstring: | 
| 808 |                     if (key->key_part[curr_key_index].length == field->field_length) { | 
| 809 |                         bitmap_set_bit(key_filter,i); | 
| 810 |                     } | 
| 811 |                     break; | 
| 812 |                 default: | 
| 813 |                     bitmap_set_bit(key_filter,i); | 
| 814 |                     break; | 
| 815 |                 } | 
| 816 |             } | 
| 817 |         } | 
| 818 |     } | 
| 819 | } | 
| 820 |  | 
| 821 | static inline uchar* pack_fixed_field( | 
| 822 |     uchar* to_tokudb, | 
| 823 |     const uchar* from_mysql, | 
| 824 |     uint32_t num_bytes | 
| 825 |     ) | 
| 826 | { | 
| 827 |     switch (num_bytes) { | 
| 828 |     case (1): | 
| 829 |         memcpy(to_tokudb, from_mysql, 1); | 
| 830 |         break; | 
| 831 |     case (2): | 
| 832 |         memcpy(to_tokudb, from_mysql, 2); | 
| 833 |         break; | 
| 834 |     case (3): | 
| 835 |         memcpy(to_tokudb, from_mysql, 3); | 
| 836 |         break; | 
| 837 |     case (4): | 
| 838 |         memcpy(to_tokudb, from_mysql, 4); | 
| 839 |         break; | 
| 840 |     case (8): | 
| 841 |         memcpy(to_tokudb, from_mysql, 8); | 
| 842 |         break; | 
| 843 |     default: | 
| 844 |         memcpy(to_tokudb, from_mysql, num_bytes); | 
| 845 |         break; | 
| 846 |     } | 
| 847 |     return to_tokudb+num_bytes; | 
| 848 | } | 
| 849 |  | 
| 850 | static inline const uchar* unpack_fixed_field( | 
| 851 |     uchar* to_mysql, | 
| 852 |     const uchar* from_tokudb, | 
| 853 |     uint32_t num_bytes | 
| 854 |     ) | 
| 855 | { | 
| 856 |     switch (num_bytes) { | 
| 857 |     case (1): | 
| 858 |         memcpy(to_mysql, from_tokudb, 1); | 
| 859 |         break; | 
| 860 |     case (2): | 
| 861 |         memcpy(to_mysql, from_tokudb, 2); | 
| 862 |         break; | 
| 863 |     case (3): | 
| 864 |         memcpy(to_mysql, from_tokudb, 3); | 
| 865 |         break; | 
| 866 |     case (4): | 
| 867 |         memcpy(to_mysql, from_tokudb, 4); | 
| 868 |         break; | 
| 869 |     case (8): | 
| 870 |         memcpy(to_mysql, from_tokudb, 8); | 
| 871 |         break; | 
| 872 |     default: | 
| 873 |         memcpy(to_mysql, from_tokudb, num_bytes); | 
| 874 |         break; | 
| 875 |     } | 
| 876 |     return from_tokudb+num_bytes; | 
| 877 | } | 
| 878 |  | 
| 879 | static inline uchar* write_var_field( | 
| 880 |     uchar* to_tokudb_offset_ptr, //location where offset data is going to be written | 
| 881 |     uchar* to_tokudb_data, // location where data is going to be written | 
| 882 |     uchar* to_tokudb_offset_start, //location where offset starts, IS THIS A BAD NAME???? | 
| 883 |     const uchar * data, // the data to write | 
| 884 |     uint32_t data_length, // length of data to write | 
| 885 |     uint32_t offset_bytes // number of offset bytes | 
| 886 |     ) | 
| 887 | { | 
| 888 |     memcpy(to_tokudb_data, data, data_length); | 
| 889 |     // | 
| 890 |     // for offset, we pack the offset where the data ENDS! | 
| 891 |     // | 
| 892 |     uint32_t offset = to_tokudb_data + data_length - to_tokudb_offset_start; | 
| 893 |     switch(offset_bytes) { | 
| 894 |     case (1): | 
| 895 |         to_tokudb_offset_ptr[0] = (uchar)offset; | 
| 896 |         break; | 
| 897 |     case (2): | 
| 898 |         int2store(to_tokudb_offset_ptr,offset); | 
| 899 |         break; | 
| 900 |     default: | 
| 901 |         assert_unreachable(); | 
| 902 |         break; | 
| 903 |     } | 
| 904 |     return to_tokudb_data + data_length; | 
| 905 | } | 
| 906 |  | 
| 907 | static inline uint32_t get_var_data_length( | 
| 908 |     const uchar * from_mysql,  | 
| 909 |     uint32_t mysql_length_bytes  | 
| 910 |     )  | 
| 911 | { | 
| 912 |     uint32_t data_length; | 
| 913 |     switch(mysql_length_bytes) { | 
| 914 |     case(1): | 
| 915 |         data_length = from_mysql[0]; | 
| 916 |         break; | 
| 917 |     case(2): | 
| 918 |         data_length = uint2korr(from_mysql); | 
| 919 |         break; | 
| 920 |     default: | 
| 921 |         assert_unreachable(); | 
| 922 |     } | 
| 923 |     return data_length; | 
| 924 | } | 
| 925 |  | 
| 926 | static inline uchar* pack_var_field( | 
| 927 |     uchar* to_tokudb_offset_ptr, //location where offset data is going to be written | 
| 928 |     uchar* to_tokudb_data, // pointer to where tokudb data should be written | 
| 929 |     uchar* to_tokudb_offset_start, //location where data starts, IS THIS A BAD NAME???? | 
| 930 |     const uchar * from_mysql, // mysql data | 
| 931 |     uint32_t mysql_length_bytes, //number of bytes used to store length in from_mysql | 
| 932 |     uint32_t offset_bytes //number of offset_bytes used in tokudb row | 
| 933 |     ) | 
| 934 | { | 
| 935 |     uint data_length = get_var_data_length(from_mysql, mysql_length_bytes);     | 
| 936 |     return write_var_field( | 
| 937 |         to_tokudb_offset_ptr, | 
| 938 |         to_tokudb_data, | 
| 939 |         to_tokudb_offset_start, | 
| 940 |         from_mysql + mysql_length_bytes, | 
| 941 |         data_length, | 
| 942 |         offset_bytes | 
| 943 |         ); | 
| 944 | } | 
| 945 |  | 
| 946 | static inline void unpack_var_field( | 
| 947 |     uchar* to_mysql, | 
| 948 |     const uchar* from_tokudb_data, | 
| 949 |     uint32_t from_tokudb_data_len, | 
| 950 |     uint32_t mysql_length_bytes | 
| 951 |     ) | 
| 952 | { | 
| 953 |     // | 
| 954 |     // store the length | 
| 955 |     // | 
| 956 |     switch (mysql_length_bytes) { | 
| 957 |     case(1): | 
| 958 |         to_mysql[0] = (uchar)from_tokudb_data_len; | 
| 959 |         break; | 
| 960 |     case(2): | 
| 961 |         int2store(to_mysql, from_tokudb_data_len); | 
| 962 |         break; | 
| 963 |     default: | 
| 964 |         assert_unreachable(); | 
| 965 |     } | 
| 966 |     // | 
| 967 |     // store the data | 
| 968 |     // | 
| 969 |     memcpy(to_mysql+mysql_length_bytes, from_tokudb_data, from_tokudb_data_len); | 
| 970 | } | 
| 971 |  | 
| 972 | static uchar* pack_toku_field_blob( | 
| 973 |     uchar* to_tokudb, | 
| 974 |     const uchar* from_mysql, | 
| 975 |     Field* field | 
| 976 |     ) | 
| 977 | { | 
| 978 |     uint32_t len_bytes = field->row_pack_length(); | 
| 979 |     uint32_t length = 0; | 
| 980 |     uchar* data_ptr = NULL; | 
| 981 |     memcpy(to_tokudb, from_mysql, len_bytes); | 
| 982 |  | 
| 983 |     switch (len_bytes) { | 
| 984 |     case (1): | 
| 985 |         length = (uint32_t)(*from_mysql); | 
| 986 |         break; | 
| 987 |     case (2): | 
| 988 |         length = uint2korr(from_mysql); | 
| 989 |         break; | 
| 990 |     case (3): | 
| 991 |         length = tokudb_uint3korr(from_mysql); | 
| 992 |         break; | 
| 993 |     case (4): | 
| 994 |         length = uint4korr(from_mysql); | 
| 995 |         break; | 
| 996 |     default: | 
| 997 |         assert_unreachable(); | 
| 998 |     } | 
| 999 |  | 
| 1000 |     if (length > 0) { | 
| 1001 |         memcpy((uchar *)(&data_ptr), from_mysql + len_bytes, sizeof(uchar*)); | 
| 1002 |         memcpy(to_tokudb + len_bytes, data_ptr, length); | 
| 1003 |     } | 
| 1004 |     return (to_tokudb + len_bytes + length); | 
| 1005 | } | 
| 1006 |  | 
| 1007 | static int create_tokudb_trx_data_instance(tokudb_trx_data** out_trx) { | 
| 1008 |     int error; | 
| 1009 |     tokudb_trx_data* trx = (tokudb_trx_data *) tokudb::memory::malloc( | 
| 1010 |         sizeof(*trx), | 
| 1011 |         MYF(MY_ZEROFILL)); | 
| 1012 |     if (!trx) { | 
| 1013 |         error = ENOMEM; | 
| 1014 |         goto cleanup; | 
| 1015 |     } | 
| 1016 |  | 
| 1017 |     *out_trx = trx; | 
| 1018 |     error = 0; | 
| 1019 | cleanup: | 
| 1020 |     return error; | 
| 1021 | } | 
| 1022 |  | 
| 1023 |  | 
| 1024 | static inline int tokudb_generate_row( | 
| 1025 |     DB *dest_db,  | 
| 1026 |     DB *src_db, | 
| 1027 |     DBT *dest_key,  | 
| 1028 |     DBT *dest_val, | 
| 1029 |     const DBT *src_key,  | 
| 1030 |     const DBT *src_val | 
| 1031 |     )  | 
| 1032 | { | 
| 1033 |     int error; | 
| 1034 |  | 
| 1035 |     DB* curr_db = dest_db; | 
| 1036 |     uchar* row_desc = NULL; | 
| 1037 |     uint32_t desc_size; | 
| 1038 |     uchar* buff = NULL; | 
| 1039 |     uint32_t max_key_len = 0; | 
| 1040 |      | 
| 1041 |     row_desc = (uchar *)curr_db->descriptor->dbt.data; | 
| 1042 |     row_desc += (*(uint32_t *)row_desc); | 
| 1043 |     desc_size = (*(uint32_t *)row_desc) - 4; | 
| 1044 |     row_desc += 4; | 
| 1045 |      | 
| 1046 |     if (is_key_pk(row_desc, desc_size)) { | 
| 1047 |         if (dest_key->flags == DB_DBT_REALLOC && dest_key->data != NULL) { | 
| 1048 |             free(dest_key->data); | 
| 1049 |         } | 
| 1050 |         if (dest_val != NULL) { | 
| 1051 |             if (dest_val->flags == DB_DBT_REALLOC && dest_val->data != NULL) { | 
| 1052 |                 free(dest_val->data); | 
| 1053 |             } | 
| 1054 |         } | 
| 1055 |         dest_key->data = src_key->data; | 
| 1056 |         dest_key->size = src_key->size; | 
| 1057 |         dest_key->flags = 0; | 
| 1058 |         if (dest_val != NULL) { | 
| 1059 |             dest_val->data = src_val->data; | 
| 1060 |             dest_val->size = src_val->size; | 
| 1061 |             dest_val->flags = 0; | 
| 1062 |         } | 
| 1063 |         error = 0; | 
| 1064 |         goto cleanup; | 
| 1065 |     } | 
| 1066 |     // at this point, we need to create the key/val and set it | 
| 1067 |     // in the DBTs | 
| 1068 |     if (dest_key->flags == 0) { | 
| 1069 |         dest_key->ulen = 0; | 
| 1070 |         dest_key->size = 0; | 
| 1071 |         dest_key->data = NULL; | 
| 1072 |         dest_key->flags = DB_DBT_REALLOC; | 
| 1073 |     } | 
| 1074 |     if (dest_key->flags == DB_DBT_REALLOC) { | 
| 1075 |         max_key_len = max_key_size_from_desc(row_desc, desc_size); | 
| 1076 |         max_key_len += src_key->size; | 
| 1077 |          | 
| 1078 |         if (max_key_len > dest_key->ulen) { | 
| 1079 |             void* old_ptr = dest_key->data; | 
| 1080 |             void* new_ptr = NULL; | 
| 1081 |             new_ptr = realloc(old_ptr, max_key_len); | 
| 1082 |             assert_always(new_ptr); | 
| 1083 |             dest_key->data = new_ptr; | 
| 1084 |             dest_key->ulen = max_key_len; | 
| 1085 |         } | 
| 1086 |  | 
| 1087 |         buff = (uchar *)dest_key->data; | 
| 1088 |         assert_always(buff != NULL && max_key_len > 0); | 
| 1089 |     } else { | 
| 1090 |         assert_unreachable(); | 
| 1091 |     } | 
| 1092 |  | 
| 1093 |     dest_key->size = pack_key_from_desc(buff, row_desc, desc_size, src_key, | 
| 1094 |                                         src_val); | 
| 1095 |     assert_always(dest_key->ulen >= dest_key->size); | 
| 1096 |     if (TOKUDB_UNLIKELY(TOKUDB_DEBUG_FLAGS(TOKUDB_DEBUG_CHECK_KEY)) && | 
| 1097 |         !max_key_len) { | 
| 1098 |         max_key_len = max_key_size_from_desc(row_desc, desc_size); | 
| 1099 |         max_key_len += src_key->size; | 
| 1100 |     } | 
| 1101 |     if (max_key_len) { | 
| 1102 |         assert_always(max_key_len >= dest_key->size); | 
| 1103 |     } | 
| 1104 |  | 
| 1105 |     row_desc += desc_size; | 
| 1106 |     desc_size = (*(uint32_t *)row_desc) - 4; | 
| 1107 |     row_desc += 4; | 
| 1108 |     if (dest_val != NULL) { | 
| 1109 |         if (!is_key_clustering(row_desc, desc_size) || src_val->size == 0) { | 
| 1110 |             dest_val->size = 0; | 
| 1111 |         } else { | 
| 1112 |             uchar* buff = NULL; | 
| 1113 |             if (dest_val->flags == 0) { | 
| 1114 |                 dest_val->ulen = 0; | 
| 1115 |                 dest_val->size = 0; | 
| 1116 |                 dest_val->data = NULL; | 
| 1117 |                 dest_val->flags = DB_DBT_REALLOC; | 
| 1118 |             } | 
| 1119 |             if (dest_val->flags == DB_DBT_REALLOC){ | 
| 1120 |                 if (dest_val->ulen < src_val->size) { | 
| 1121 |                     void* old_ptr = dest_val->data; | 
| 1122 |                     void* new_ptr = NULL; | 
| 1123 |                     new_ptr = realloc(old_ptr, src_val->size); | 
| 1124 |                     assert_always(new_ptr); | 
| 1125 |                     dest_val->data = new_ptr; | 
| 1126 |                     dest_val->ulen = src_val->size; | 
| 1127 |                 } | 
| 1128 |                 buff = (uchar *)dest_val->data; | 
| 1129 |                 assert_always(buff != NULL); | 
| 1130 |             } else { | 
| 1131 |                 assert_unreachable(); | 
| 1132 |             } | 
| 1133 |             dest_val->size = pack_clustering_val_from_desc( | 
| 1134 |                 buff, | 
| 1135 |                 row_desc, | 
| 1136 |                 desc_size, | 
| 1137 |                 src_val); | 
| 1138 |             assert_always(dest_val->ulen >= dest_val->size); | 
| 1139 |         } | 
| 1140 |     } | 
| 1141 |     error = 0; | 
| 1142 | cleanup: | 
| 1143 |     return error; | 
| 1144 | } | 
| 1145 |  | 
| 1146 | static int generate_row_for_del( | 
| 1147 |     DB *dest_db,  | 
| 1148 |     DB *src_db, | 
| 1149 |     DBT_ARRAY *dest_key_arrays, | 
| 1150 |     const DBT *src_key,  | 
| 1151 |     const DBT *src_val | 
| 1152 |     ) | 
| 1153 | { | 
| 1154 |     DBT* dest_key = &dest_key_arrays->dbts[0]; | 
| 1155 |     return tokudb_generate_row( | 
| 1156 |         dest_db, | 
| 1157 |         src_db, | 
| 1158 |         dest_key, | 
| 1159 |         NULL, | 
| 1160 |         src_key, | 
| 1161 |         src_val | 
| 1162 |         ); | 
| 1163 | } | 
| 1164 |  | 
| 1165 |  | 
| 1166 | static int generate_row_for_put( | 
| 1167 |     DB *dest_db,  | 
| 1168 |     DB *src_db, | 
| 1169 |     DBT_ARRAY *dest_key_arrays, | 
| 1170 |     DBT_ARRAY *dest_val_arrays, | 
| 1171 |     const DBT *src_key,  | 
| 1172 |     const DBT *src_val | 
| 1173 |     )  | 
| 1174 | { | 
| 1175 |     DBT* dest_key = &dest_key_arrays->dbts[0]; | 
| 1176 |     DBT *dest_val = (dest_val_arrays == NULL) ? NULL : &dest_val_arrays->dbts[0]; | 
| 1177 |     return tokudb_generate_row( | 
| 1178 |         dest_db, | 
| 1179 |         src_db, | 
| 1180 |         dest_key, | 
| 1181 |         dest_val, | 
| 1182 |         src_key, | 
| 1183 |         src_val | 
| 1184 |         ); | 
| 1185 | } | 
| 1186 |  | 
| 1187 | ha_tokudb::ha_tokudb(handlerton * hton, TABLE_SHARE * table_arg):handler(hton, table_arg) { | 
| 1188 |     TOKUDB_HANDLER_DBUG_ENTER("" ); | 
| 1189 |     share = NULL; | 
| 1190 |     int_table_flags = HA_REC_NOT_IN_SEQ  | HA_NULL_IN_KEY | HA_CAN_INDEX_BLOBS | 
| 1191 |         | HA_PRIMARY_KEY_IN_READ_INDEX | HA_PRIMARY_KEY_REQUIRED_FOR_POSITION | 
| 1192 |         | HA_FILE_BASED | HA_AUTO_PART_KEY | HA_TABLE_SCAN_ON_INDEX | 
| 1193 |         | HA_CAN_WRITE_DURING_OPTIMIZE | HA_ONLINE_ANALYZE; | 
| 1194 |     alloc_ptr = NULL; | 
| 1195 |     rec_buff = NULL; | 
| 1196 |     rec_update_buff = NULL; | 
| 1197 |     transaction = NULL; | 
| 1198 |     cursor = NULL; | 
| 1199 |     fixed_cols_for_query = NULL; | 
| 1200 |     var_cols_for_query = NULL; | 
| 1201 |     num_fixed_cols_for_query = 0; | 
| 1202 |     num_var_cols_for_query = 0; | 
| 1203 |     unpack_entire_row = true; | 
| 1204 |     read_blobs = false; | 
| 1205 |     read_key = false; | 
| 1206 |     added_rows = 0; | 
| 1207 |     deleted_rows = 0; | 
| 1208 |     updated_rows = 0; | 
| 1209 |     last_dup_key = UINT_MAX; | 
| 1210 |     using_ignore = false; | 
| 1211 |     using_ignore_no_key = false; | 
| 1212 |     last_cursor_error = 0; | 
| 1213 |     range_lock_grabbed = false; | 
| 1214 |     blob_buff = NULL; | 
| 1215 |     num_blob_bytes = 0; | 
| 1216 |     delay_updating_ai_metadata = false; | 
| 1217 |     ai_metadata_update_required = false; | 
| 1218 |     memset(mult_key_dbt_array, 0, sizeof(mult_key_dbt_array)); | 
| 1219 |     memset(mult_rec_dbt_array, 0, sizeof(mult_rec_dbt_array)); | 
| 1220 |     for (uint32_t i = 0; i < sizeof(mult_key_dbt_array)/sizeof(mult_key_dbt_array[0]); i++) { | 
| 1221 |         toku_dbt_array_init(&mult_key_dbt_array[i], 1); | 
| 1222 |     } | 
| 1223 |     for (uint32_t i = 0; i < sizeof(mult_rec_dbt_array)/sizeof(mult_rec_dbt_array[0]); i++) { | 
| 1224 |         toku_dbt_array_init(&mult_rec_dbt_array[i], 1); | 
| 1225 |     } | 
| 1226 |     loader = NULL; | 
| 1227 |     abort_loader = false; | 
| 1228 |     memset(&lc, 0, sizeof(lc)); | 
| 1229 |     lock.type = TL_IGNORE; | 
| 1230 |     for (uint32_t i = 0; i < MAX_KEY+1; i++) { | 
| 1231 |         mult_put_flags[i] = 0; | 
| 1232 |         mult_del_flags[i] = DB_DELETE_ANY; | 
| 1233 |         mult_dbt_flags[i] = DB_DBT_REALLOC; | 
| 1234 |     } | 
| 1235 |     num_DBs_locked_in_bulk = false; | 
| 1236 |     lock_count = 0; | 
| 1237 |     use_write_locks = false; | 
| 1238 |     range_query_buff = NULL; | 
| 1239 |     size_range_query_buff = 0; | 
| 1240 |     bytes_used_in_range_query_buff = 0; | 
| 1241 |     curr_range_query_buff_offset = 0; | 
| 1242 |     doing_bulk_fetch = false; | 
| 1243 |     prelocked_left_range_size = 0; | 
| 1244 |     prelocked_right_range_size = 0; | 
| 1245 |     tokudb_active_index = MAX_KEY; | 
| 1246 |     invalidate_icp(); | 
| 1247 |     trx_handler_list.data = this; | 
| 1248 |     in_rpl_write_rows = in_rpl_delete_rows = in_rpl_update_rows = false; | 
| 1249 |     TOKUDB_HANDLER_DBUG_VOID_RETURN; | 
| 1250 | } | 
| 1251 |  | 
| 1252 | ha_tokudb::~ha_tokudb() { | 
| 1253 |     TOKUDB_HANDLER_DBUG_ENTER("" ); | 
| 1254 |     for (uint32_t i = 0; i < sizeof(mult_key_dbt_array)/sizeof(mult_key_dbt_array[0]); i++) { | 
| 1255 |         toku_dbt_array_destroy(&mult_key_dbt_array[i]); | 
| 1256 |     } | 
| 1257 |     for (uint32_t i = 0; i < sizeof(mult_rec_dbt_array)/sizeof(mult_rec_dbt_array[0]); i++) { | 
| 1258 |         toku_dbt_array_destroy(&mult_rec_dbt_array[i]); | 
| 1259 |     } | 
| 1260 |     TOKUDB_HANDLER_DBUG_VOID_RETURN; | 
| 1261 | } | 
| 1262 |  | 
| 1263 | // | 
| 1264 | // states if table has an auto increment column, if so, sets index where auto inc column is to index | 
| 1265 | // Parameters: | 
| 1266 | //      [out]   index - if auto inc exists, then this param is set to where it exists in table, if not, then unchanged | 
| 1267 | // Returns: | 
| 1268 | //      true if auto inc column exists, false otherwise | 
| 1269 | // | 
| 1270 | bool ha_tokudb::has_auto_increment_flag(uint* index) { | 
| 1271 |     // | 
| 1272 |     // check to see if we have auto increment field | 
| 1273 |     // | 
| 1274 |     bool ai_found = false; | 
| 1275 |     uint ai_index = 0; | 
| 1276 |     for (uint i = 0; i < table_share->fields; i++, ai_index++) { | 
| 1277 |         Field* field = table->field[i]; | 
| 1278 |         if (field->flags & AUTO_INCREMENT_FLAG) { | 
| 1279 |             ai_found = true; | 
| 1280 |             *index = ai_index; | 
| 1281 |             break; | 
| 1282 |         } | 
| 1283 |     } | 
| 1284 |     return ai_found; | 
| 1285 | } | 
| 1286 |  | 
| 1287 | static int open_status_dictionary(DB** ptr, const char* name, DB_TXN* txn) { | 
| 1288 |     int error; | 
| 1289 |     char* newname = NULL; | 
| 1290 |     size_t newname_len = get_max_dict_name_path_length(name); | 
| 1291 |     newname = (char*)tokudb::memory::malloc(newname_len, MYF(MY_WME)); | 
| 1292 |     if (newname == NULL) { | 
| 1293 |         error = ENOMEM; | 
| 1294 |         goto cleanup; | 
| 1295 |     } | 
| 1296 |     make_name(newname, newname_len, name, "status" ); | 
| 1297 |     TOKUDB_TRACE_FOR_FLAGS(TOKUDB_DEBUG_OPEN, "open:%s" , newname); | 
| 1298 |  | 
| 1299 |     error = tokudb::metadata::open(db_env, ptr, newname, txn); | 
| 1300 | cleanup: | 
| 1301 |     tokudb::memory::free(newname); | 
| 1302 |     return error; | 
| 1303 | } | 
| 1304 |  | 
| 1305 | int ha_tokudb::open_main_dictionary( | 
| 1306 |     const char* name, | 
| 1307 |     bool is_read_only, | 
| 1308 |     DB_TXN* txn) { | 
| 1309 |  | 
| 1310 |     int error;     | 
| 1311 |     char* newname = NULL; | 
| 1312 |     size_t newname_len = 0; | 
| 1313 |     uint open_flags = (is_read_only ? DB_RDONLY : 0) | DB_THREAD; | 
| 1314 |  | 
| 1315 |     assert_always(share->file == NULL); | 
| 1316 |     assert_always(share->key_file[primary_key] == NULL); | 
| 1317 |     newname_len = get_max_dict_name_path_length(name); | 
| 1318 |     newname = (char*)tokudb::memory::malloc( | 
| 1319 |         newname_len, | 
| 1320 |         MYF(MY_WME|MY_ZEROFILL)); | 
| 1321 |     if (newname == NULL) {  | 
| 1322 |         error = ENOMEM; | 
| 1323 |         goto exit; | 
| 1324 |     } | 
| 1325 |     make_name(newname, newname_len, name, "main" ); | 
| 1326 |  | 
| 1327 |     error = db_create(&share->file, db_env, 0); | 
| 1328 |     if (error) { | 
| 1329 |         goto exit; | 
| 1330 |     } | 
| 1331 |     share->key_file[primary_key] = share->file; | 
| 1332 |  | 
| 1333 |     error = | 
| 1334 |         share->file->open( | 
| 1335 |             share->file, | 
| 1336 |             txn, | 
| 1337 |             newname, | 
| 1338 |             NULL, | 
| 1339 |             DB_BTREE, | 
| 1340 |             open_flags, | 
| 1341 |             0); | 
| 1342 |     if (error) { | 
| 1343 |         goto exit; | 
| 1344 |     } | 
| 1345 |  | 
| 1346 |     TOKUDB_HANDLER_TRACE_FOR_FLAGS( | 
| 1347 |         TOKUDB_DEBUG_OPEN, | 
| 1348 |         "open:%s:file=%p" , | 
| 1349 |         newname, | 
| 1350 |         share->file); | 
| 1351 |  | 
| 1352 |     error = 0; | 
| 1353 | exit: | 
| 1354 |     if (error) { | 
| 1355 |         if (share->file) { | 
| 1356 |             int r = share->file->close( | 
| 1357 |                 share->file, | 
| 1358 |                 0 | 
| 1359 |                 ); | 
| 1360 |             assert_always(r==0); | 
| 1361 |             share->file = NULL; | 
| 1362 |             share->key_file[primary_key] = NULL; | 
| 1363 |         } | 
| 1364 |     } | 
| 1365 |     tokudb::memory::free(newname); | 
| 1366 |     return error; | 
| 1367 | } | 
| 1368 |  | 
| 1369 | // | 
| 1370 | // Open a secondary table, the key will be a secondary index, the data will | 
| 1371 | // be a primary key | 
| 1372 | // | 
| 1373 | int ha_tokudb::open_secondary_dictionary( | 
| 1374 |     DB** ptr, | 
| 1375 |     KEY* key_info, | 
| 1376 |     const char* name, | 
| 1377 |     bool is_read_only, | 
| 1378 |     DB_TXN* txn) { | 
| 1379 |  | 
| 1380 |     int error = ENOSYS; | 
| 1381 |     char dict_name[MAX_DICT_NAME_LEN]; | 
| 1382 |     uint open_flags = (is_read_only ? DB_RDONLY : 0) | DB_THREAD; | 
| 1383 |     char* newname = NULL; | 
| 1384 |     size_t newname_len = 0; | 
| 1385 |  | 
| 1386 |     sprintf(dict_name, "key-%s" , key_info->name.str); | 
| 1387 |  | 
| 1388 |     newname_len = get_max_dict_name_path_length(name); | 
| 1389 |     newname = | 
| 1390 |         (char*)tokudb::memory::malloc(newname_len, MYF(MY_WME|MY_ZEROFILL)); | 
| 1391 |     if (newname == NULL) { | 
| 1392 |         error = ENOMEM; | 
| 1393 |         goto cleanup; | 
| 1394 |     } | 
| 1395 |     make_name(newname, newname_len, name, dict_name); | 
| 1396 |  | 
| 1397 |  | 
| 1398 |     if ((error = db_create(ptr, db_env, 0))) { | 
| 1399 |         my_errno = error; | 
| 1400 |         goto cleanup; | 
| 1401 |     } | 
| 1402 |  | 
| 1403 |  | 
| 1404 |     error = (*ptr)->open(*ptr, txn, newname, NULL, DB_BTREE, open_flags, 0); | 
| 1405 |     if (error) { | 
| 1406 |         my_errno = error; | 
| 1407 |         goto cleanup; | 
| 1408 |     } | 
| 1409 |     TOKUDB_HANDLER_TRACE_FOR_FLAGS( | 
| 1410 |         TOKUDB_DEBUG_OPEN, | 
| 1411 |         "open:%s:file=%p" , | 
| 1412 |         newname, | 
| 1413 |         *ptr); | 
| 1414 | cleanup: | 
| 1415 |     if (error) { | 
| 1416 |         if (*ptr) { | 
| 1417 |             int r = (*ptr)->close(*ptr, 0); | 
| 1418 |             assert_always(r==0); | 
| 1419 |             *ptr = NULL; | 
| 1420 |         } | 
| 1421 |     } | 
| 1422 |     tokudb::memory::free(newname); | 
| 1423 |     return error; | 
| 1424 | } | 
| 1425 |  | 
| 1426 | static int initialize_col_pack_info(KEY_AND_COL_INFO* kc_info, TABLE_SHARE* table_share, uint keynr) { | 
| 1427 |     int error = ENOSYS; | 
| 1428 |     // | 
| 1429 |     // set up the cp_info | 
| 1430 |     // | 
| 1431 |     assert_always(kc_info->cp_info[keynr] == NULL); | 
| 1432 |     kc_info->cp_info[keynr] = (COL_PACK_INFO*)tokudb::memory::malloc( | 
| 1433 |         table_share->fields * sizeof(COL_PACK_INFO), | 
| 1434 |         MYF(MY_WME | MY_ZEROFILL)); | 
| 1435 |     if (kc_info->cp_info[keynr] == NULL) { | 
| 1436 |         error = ENOMEM; | 
| 1437 |         goto exit; | 
| 1438 |     } | 
| 1439 |     { | 
| 1440 |     uint32_t curr_fixed_offset = 0; | 
| 1441 |     uint32_t curr_var_index = 0; | 
| 1442 |     for (uint j = 0; j < table_share->fields; j++) { | 
| 1443 |         COL_PACK_INFO* curr = &kc_info->cp_info[keynr][j]; | 
| 1444 |         // | 
| 1445 |         // need to set the offsets / indexes | 
| 1446 |         // offsets are calculated AFTER the NULL bytes | 
| 1447 |         // | 
| 1448 |         if (!bitmap_is_set(&kc_info->key_filters[keynr],j)) { | 
| 1449 |             if (is_fixed_field(kc_info, j)) { | 
| 1450 |                 curr->col_pack_val = curr_fixed_offset; | 
| 1451 |                 curr_fixed_offset += kc_info->field_lengths[j]; | 
| 1452 |             } | 
| 1453 |             else if (is_variable_field(kc_info, j)) { | 
| 1454 |                 curr->col_pack_val = curr_var_index; | 
| 1455 |                 curr_var_index++; | 
| 1456 |             } | 
| 1457 |         } | 
| 1458 |     } | 
| 1459 |      | 
| 1460 |     // | 
| 1461 |     // set up the mcp_info | 
| 1462 |     // | 
| 1463 |     kc_info->mcp_info[keynr].fixed_field_size = get_fixed_field_size( | 
| 1464 |         kc_info, | 
| 1465 |         table_share, | 
| 1466 |         keynr | 
| 1467 |         ); | 
| 1468 |     kc_info->mcp_info[keynr].len_of_offsets = get_len_of_offsets( | 
| 1469 |         kc_info, | 
| 1470 |         table_share, | 
| 1471 |         keynr | 
| 1472 |         ); | 
| 1473 |  | 
| 1474 |     error = 0; | 
| 1475 |     } | 
| 1476 | exit: | 
| 1477 |     return error; | 
| 1478 | } | 
| 1479 |  | 
| 1480 | // reset the kc_info state at keynr | 
| 1481 | static void reset_key_and_col_info(KEY_AND_COL_INFO *kc_info, uint keynr) { | 
| 1482 |     bitmap_clear_all(&kc_info->key_filters[keynr]); | 
| 1483 |     tokudb::memory::free(kc_info->cp_info[keynr]); | 
| 1484 |     kc_info->cp_info[keynr] = NULL; | 
| 1485 |     kc_info->mcp_info[keynr] = (MULTI_COL_PACK_INFO) { 0, 0 }; | 
| 1486 | } | 
| 1487 |  | 
| 1488 | static int initialize_key_and_col_info( | 
| 1489 |     TABLE_SHARE* table_share, | 
| 1490 |     TABLE* table, | 
| 1491 |     KEY_AND_COL_INFO* kc_info, | 
| 1492 |     uint hidden_primary_key, | 
| 1493 |     uint primary_key) { | 
| 1494 |  | 
| 1495 |     int error = 0; | 
| 1496 |     uint32_t curr_blob_field_index = 0; | 
| 1497 |     uint32_t max_var_bytes = 0; | 
| 1498 |     // | 
| 1499 |     // fill in the field lengths. 0 means it is a variable sized field length | 
| 1500 |     // fill in length_bytes, 0 means it is fixed or blob | 
| 1501 |     // | 
| 1502 |     for (uint i = 0; i < table_share->fields; i++) { | 
| 1503 |         Field* field = table_share->field[i]; | 
| 1504 |         TOKU_TYPE toku_type = mysql_to_toku_type(field); | 
| 1505 |         uint32 pack_length = 0; | 
| 1506 |         switch (toku_type) { | 
| 1507 |         case toku_type_int: | 
| 1508 |         case toku_type_double: | 
| 1509 |         case toku_type_float: | 
| 1510 |         case toku_type_fixbinary: | 
| 1511 |         case toku_type_fixstring: | 
| 1512 |             pack_length = field->pack_length(); | 
| 1513 |             assert_always(pack_length < 1<<16); | 
| 1514 |             kc_info->field_types[i] = KEY_AND_COL_INFO::TOKUDB_FIXED_FIELD; | 
| 1515 |             kc_info->field_lengths[i] = (uint16_t)pack_length; | 
| 1516 |             kc_info->length_bytes[i] = 0; | 
| 1517 |             break; | 
| 1518 |         case toku_type_blob: | 
| 1519 |             kc_info->field_types[i] = KEY_AND_COL_INFO::TOKUDB_BLOB_FIELD; | 
| 1520 |             kc_info->field_lengths[i] = 0; | 
| 1521 |             kc_info->length_bytes[i] = 0; | 
| 1522 |             kc_info->blob_fields[curr_blob_field_index] = i; | 
| 1523 |             curr_blob_field_index++; | 
| 1524 |             break; | 
| 1525 |         case toku_type_varstring: | 
| 1526 |         case toku_type_varbinary: | 
| 1527 |             kc_info->field_types[i] = KEY_AND_COL_INFO::TOKUDB_VARIABLE_FIELD; | 
| 1528 |             kc_info->field_lengths[i] = 0; | 
| 1529 |             kc_info->length_bytes[i] = | 
| 1530 |                 (uchar)((Field_varstring*)field)->length_bytes; | 
| 1531 |             max_var_bytes += field->field_length; | 
| 1532 |             break; | 
| 1533 |         default: | 
| 1534 |             assert_unreachable(); | 
| 1535 |         } | 
| 1536 |     } | 
| 1537 |     kc_info->num_blobs = curr_blob_field_index; | 
| 1538 |  | 
| 1539 |     // | 
| 1540 |     // initialize share->num_offset_bytes | 
| 1541 |     // because MAX_REF_LENGTH is 65536, we | 
| 1542 |     // can safely set num_offset_bytes to 1 or 2 | 
| 1543 |     // | 
| 1544 |     if (max_var_bytes < 256) { | 
| 1545 |         kc_info->num_offset_bytes = 1; | 
| 1546 |     } else { | 
| 1547 |         kc_info->num_offset_bytes = 2; | 
| 1548 |     } | 
| 1549 |  | 
| 1550 |     for (uint i = 0; | 
| 1551 |          i < table_share->keys + tokudb_test(hidden_primary_key); | 
| 1552 |          i++) { | 
| 1553 |         // | 
| 1554 |         // do the cluster/primary key filtering calculations | 
| 1555 |         // | 
| 1556 |         if (!(i==primary_key && hidden_primary_key)) { | 
| 1557 |             if (i == primary_key) { | 
| 1558 |                 set_key_filter( | 
| 1559 |                     &kc_info->key_filters[primary_key], | 
| 1560 |                     &table_share->key_info[primary_key], | 
| 1561 |                     table, | 
| 1562 |                     true); | 
| 1563 |             } else { | 
| 1564 |                 set_key_filter( | 
| 1565 |                     &kc_info->key_filters[i], | 
| 1566 |                     &table_share->key_info[i], | 
| 1567 |                     table, | 
| 1568 |                     true); | 
| 1569 |                 if (!hidden_primary_key) { | 
| 1570 |                     set_key_filter( | 
| 1571 |                         &kc_info->key_filters[i], | 
| 1572 |                         &table_share->key_info[primary_key], | 
| 1573 |                         table, | 
| 1574 |                         true); | 
| 1575 |                 } | 
| 1576 |             } | 
| 1577 |         } | 
| 1578 |         if (i == primary_key || key_is_clustering(&table_share->key_info[i])) { | 
| 1579 |             error = initialize_col_pack_info(kc_info, table_share, i); | 
| 1580 |             if (error) { | 
| 1581 |                 goto exit; | 
| 1582 |             } | 
| 1583 |         } | 
| 1584 |     } | 
| 1585 | exit: | 
| 1586 |     return error; | 
| 1587 | } | 
| 1588 |  | 
| 1589 | bool ha_tokudb::can_replace_into_be_fast( | 
| 1590 |     TABLE_SHARE* table_share, | 
| 1591 |     KEY_AND_COL_INFO* kc_info, | 
| 1592 |     uint pk) { | 
| 1593 |  | 
| 1594 |     uint curr_num_DBs = table_share->keys + tokudb_test(hidden_primary_key); | 
| 1595 |     bool ret_val; | 
| 1596 |     if (curr_num_DBs == 1) { | 
| 1597 |         ret_val = true; | 
| 1598 |         goto exit; | 
| 1599 |     } | 
| 1600 |     ret_val = true; | 
| 1601 |     for (uint curr_index = 0; curr_index < table_share->keys; curr_index++) { | 
| 1602 |         if (curr_index == pk) continue; | 
| 1603 |         KEY* curr_key_info = &table_share->key_info[curr_index]; | 
| 1604 |         for (uint i = 0; i < curr_key_info->user_defined_key_parts; i++) { | 
| 1605 |             uint16 curr_field_index = curr_key_info->key_part[i].field->field_index; | 
| 1606 |             if (!bitmap_is_set(&kc_info->key_filters[curr_index],curr_field_index)) { | 
| 1607 |                 ret_val = false; | 
| 1608 |                 goto exit; | 
| 1609 |             } | 
| 1610 |             if (bitmap_is_set(&kc_info->key_filters[curr_index], curr_field_index) && | 
| 1611 |                 !bitmap_is_set(&kc_info->key_filters[pk], curr_field_index)) { | 
| 1612 |                 ret_val = false; | 
| 1613 |                 goto exit; | 
| 1614 |             } | 
| 1615 |              | 
| 1616 |         } | 
| 1617 |     } | 
| 1618 | exit: | 
| 1619 |     return ret_val; | 
| 1620 | } | 
| 1621 |  | 
| 1622 | int ha_tokudb::initialize_share(const char* name, int mode) { | 
| 1623 |     int error = 0; | 
| 1624 |     uint64_t num_rows = 0; | 
| 1625 |     DB_TXN* txn = NULL; | 
| 1626 |     bool do_commit = false; | 
| 1627 |     THD* thd = ha_thd(); | 
| 1628 |     tokudb_trx_data *trx = (tokudb_trx_data *) thd_get_ha_data(ha_thd(), tokudb_hton); | 
| 1629 |     if (thd_sql_command(thd) == SQLCOM_CREATE_TABLE && trx && trx->sub_sp_level) { | 
| 1630 |         txn = trx->sub_sp_level; | 
| 1631 |     } | 
| 1632 |     else { | 
| 1633 |         do_commit = true; | 
| 1634 |         error = txn_begin(db_env, 0, &txn, 0, thd); | 
| 1635 |         if (error) { goto exit; } | 
| 1636 |     } | 
| 1637 |  | 
| 1638 |  | 
| 1639 |     error = get_status(txn); | 
| 1640 |     if (error) { | 
| 1641 |         goto exit; | 
| 1642 |     } | 
| 1643 |     if (share->version != HA_TOKU_VERSION) { | 
| 1644 |         error = ENOSYS; | 
| 1645 |         goto exit; | 
| 1646 |     } | 
| 1647 |  | 
| 1648 | #if WITH_PARTITION_STORAGE_ENGINE | 
| 1649 |     // verify frm data for non-partitioned tables | 
| 1650 |     if (TOKU_PARTITION_WRITE_FRM_DATA || table->part_info == NULL) { | 
| 1651 |         error = verify_frm_data(table->s->path.str, txn); | 
| 1652 |         if (error) | 
| 1653 |             goto exit; | 
| 1654 |     } else { | 
| 1655 |         // remove the frm data for partitions since we are not maintaining it | 
| 1656 |         error = remove_frm_data(share->status_block, txn); | 
| 1657 |         if (error) | 
| 1658 |             goto exit; | 
| 1659 |     } | 
| 1660 | #else | 
| 1661 |     error = verify_frm_data(table->s->path.str, txn); | 
| 1662 |     if (error) | 
| 1663 |         goto exit; | 
| 1664 | #endif | 
| 1665 |  | 
| 1666 |     error = | 
| 1667 |         initialize_key_and_col_info( | 
| 1668 |             table_share, | 
| 1669 |             table, | 
| 1670 |             &share->kc_info, | 
| 1671 |             hidden_primary_key, | 
| 1672 |             primary_key); | 
| 1673 |     if (error) { goto exit; } | 
| 1674 |  | 
| 1675 |     error = open_main_dictionary(name, mode == O_RDONLY, txn); | 
| 1676 |     if (error) { | 
| 1677 |         goto exit; | 
| 1678 |     } | 
| 1679 |  | 
| 1680 |     share->has_unique_keys = false; | 
| 1681 |     share->_keys = table_share->keys; | 
| 1682 |     share->_max_key_parts = table_share->key_parts; | 
| 1683 |     share->_key_descriptors = | 
| 1684 |         (TOKUDB_SHARE::key_descriptor_t*)tokudb::memory::malloc( | 
| 1685 |             sizeof(TOKUDB_SHARE::key_descriptor_t) * share->_keys, | 
| 1686 |             MYF(MY_ZEROFILL)); | 
| 1687 |  | 
| 1688 |     /* Open other keys;  These are part of the share structure */ | 
| 1689 |     for (uint i = 0; i < table_share->keys; i++) { | 
| 1690 |         share->_key_descriptors[i]._parts = | 
| 1691 |             table_share->key_info[i].user_defined_key_parts; | 
| 1692 |         if (i == primary_key) { | 
| 1693 |             share->_key_descriptors[i]._is_unique = true; | 
| 1694 |             share->_key_descriptors[i]._name = tokudb::memory::strdup("primary" , 0); | 
| 1695 |         } else { | 
| 1696 |             share->_key_descriptors[i]._is_unique = false; | 
| 1697 |             share->_key_descriptors[i]._name = | 
| 1698 |                 tokudb::memory::strdup(table_share->key_info[i].name.str, 0); | 
| 1699 |         } | 
| 1700 |  | 
| 1701 |         if (table_share->key_info[i].flags & HA_NOSAME) { | 
| 1702 |             share->_key_descriptors[i]._is_unique = true; | 
| 1703 |             share->has_unique_keys = true; | 
| 1704 |         } | 
| 1705 |         if (i != primary_key) { | 
| 1706 |             error = | 
| 1707 |                 open_secondary_dictionary( | 
| 1708 |                     &share->key_file[i], | 
| 1709 |                     &table_share->key_info[i], | 
| 1710 |                     name, | 
| 1711 |                     mode == O_RDONLY, | 
| 1712 |                     txn); | 
| 1713 |             if (error) { | 
| 1714 |                 goto exit; | 
| 1715 |             } | 
| 1716 |         } | 
| 1717 |     } | 
| 1718 |     share->replace_into_fast = | 
| 1719 |         can_replace_into_be_fast( | 
| 1720 |             table_share, | 
| 1721 |             &share->kc_info, | 
| 1722 |             primary_key); | 
| 1723 |  | 
| 1724 |     share->pk_has_string = false; | 
| 1725 |     if (!hidden_primary_key) { | 
| 1726 |         // | 
| 1727 |         // We need to set the ref_length to start at 5, to account for | 
| 1728 |         // the "infinity byte" in keys, and for placing the DBT size in the first four bytes | 
| 1729 |         // | 
| 1730 |         ref_length = sizeof(uint32_t) + sizeof(uchar); | 
| 1731 |         KEY_PART_INFO* key_part = table->key_info[primary_key].key_part; | 
| 1732 |         KEY_PART_INFO* end = | 
| 1733 |             key_part + table->key_info[primary_key].user_defined_key_parts; | 
| 1734 |         for (; key_part != end; key_part++) { | 
| 1735 |             ref_length += key_part->field->max_packed_col_length(key_part->length); | 
| 1736 |             TOKU_TYPE toku_type = mysql_to_toku_type(key_part->field); | 
| 1737 |             if (toku_type == toku_type_fixstring || | 
| 1738 |                 toku_type == toku_type_varstring || | 
| 1739 |                 toku_type == toku_type_blob | 
| 1740 |                 ) | 
| 1741 |             { | 
| 1742 |                 share->pk_has_string = true; | 
| 1743 |             } | 
| 1744 |         } | 
| 1745 |         share->status |= STATUS_PRIMARY_KEY_INIT; | 
| 1746 |     } | 
| 1747 |     share->ref_length = ref_length; | 
| 1748 |  | 
| 1749 |     error = estimate_num_rows(share->file, &num_rows, txn); | 
| 1750 |     // | 
| 1751 |     // estimate_num_rows should not fail under normal conditions | 
| 1752 |     // | 
| 1753 |     if (error == 0) { | 
| 1754 |         share->set_row_count(num_rows, true); | 
| 1755 |     } else { | 
| 1756 |         goto exit; | 
| 1757 |     } | 
| 1758 |     // | 
| 1759 |     // initialize auto increment data | 
| 1760 |     // | 
| 1761 |     share->has_auto_inc = has_auto_increment_flag(&share->ai_field_index); | 
| 1762 |     if (share->has_auto_inc) { | 
| 1763 |         init_auto_increment(); | 
| 1764 |     } | 
| 1765 |  | 
| 1766 |     if (may_table_be_empty(txn)) { | 
| 1767 |         share->try_table_lock = true; | 
| 1768 |     } else { | 
| 1769 |         share->try_table_lock = false; | 
| 1770 |     } | 
| 1771 |  | 
| 1772 |     share->num_DBs = table_share->keys + tokudb_test(hidden_primary_key); | 
| 1773 |  | 
| 1774 |     init_hidden_prim_key_info(txn); | 
| 1775 |  | 
| 1776 |     // initialize cardinality info from the status dictionary | 
| 1777 |     { | 
| 1778 |         uint32_t rec_per_keys = tokudb::compute_total_key_parts(table_share); | 
| 1779 |         uint64_t* rec_per_key = | 
| 1780 |             (uint64_t*)tokudb::memory::malloc( | 
| 1781 |                 rec_per_keys * sizeof(uint64_t), | 
| 1782 |                 MYF(MY_FAE)); | 
| 1783 |         error = | 
| 1784 |             tokudb::get_card_from_status( | 
| 1785 |                 share->status_block, | 
| 1786 |                 txn, | 
| 1787 |                 rec_per_keys, | 
| 1788 |                 rec_per_key); | 
| 1789 |         if (error) { | 
| 1790 |             memset(rec_per_key, 0, sizeof(ulonglong) * rec_per_keys); | 
| 1791 |         } | 
| 1792 |         share->init_cardinality_counts(rec_per_keys, rec_per_key); | 
| 1793 |     } | 
| 1794 |  | 
| 1795 |     error = 0; | 
| 1796 | exit: | 
| 1797 |     if (do_commit && txn) { | 
| 1798 |         commit_txn(txn,0); | 
| 1799 |     } | 
| 1800 |     return error; | 
| 1801 | } | 
| 1802 |  | 
| 1803 | // | 
| 1804 | // Creates and opens a handle to a table which already exists in a tokudb | 
| 1805 | // database. | 
| 1806 | // Parameters: | 
| 1807 | //      [in]   name - table name | 
| 1808 | //             mode - seems to specify if table is read only | 
| 1809 | //             test_if_locked - unused | 
| 1810 | // Returns: | 
| 1811 | //      0 on success | 
| 1812 | //      1 on error | 
| 1813 | // | 
| 1814 | int ha_tokudb::open(const char *name, int mode, uint test_if_locked) { | 
| 1815 |     TOKUDB_HANDLER_DBUG_ENTER("%s %o %u" , name, mode, test_if_locked); | 
| 1816 |     THD* thd = ha_thd(); | 
| 1817 |  | 
| 1818 |     int error = 0; | 
| 1819 |     int ret_val = 0; | 
| 1820 |  | 
| 1821 |     transaction = NULL; | 
| 1822 |     cursor = NULL; | 
| 1823 |  | 
| 1824 |  | 
| 1825 |     /* Open primary key */ | 
| 1826 |     hidden_primary_key = 0; | 
| 1827 |     if ((primary_key = table_share->primary_key) >= MAX_KEY) { | 
| 1828 |         // No primary key | 
| 1829 |         primary_key = table_share->keys; | 
| 1830 |         key_used_on_scan = MAX_KEY; | 
| 1831 |         hidden_primary_key = TOKUDB_HIDDEN_PRIMARY_KEY_LENGTH; | 
| 1832 |         ref_length = TOKUDB_HIDDEN_PRIMARY_KEY_LENGTH + sizeof(uint32_t); | 
| 1833 |     }  | 
| 1834 |     else { | 
| 1835 |         key_used_on_scan = primary_key; | 
| 1836 |     } | 
| 1837 |  | 
| 1838 |     /* Need some extra memory in case of packed keys */ | 
| 1839 |     // the "+ 1" is for the first byte that states +/- infinity | 
| 1840 |     // multiply everything by 2 to account for clustered keys having a key and primary key together | 
| 1841 |     max_key_length = 2*(table_share->max_key_length + MAX_REF_PARTS * 3 + sizeof(uchar)); | 
| 1842 |     alloc_ptr = tokudb::memory::multi_malloc( | 
| 1843 |         MYF(MY_WME), | 
| 1844 |         &key_buff, max_key_length,  | 
| 1845 |         &key_buff2, max_key_length,  | 
| 1846 |         &key_buff3, max_key_length, | 
| 1847 |         &key_buff4, max_key_length,                                | 
| 1848 |         &prelocked_left_range, max_key_length,  | 
| 1849 |         &prelocked_right_range, max_key_length,  | 
| 1850 |         &primary_key_buff, (hidden_primary_key ? 0 : max_key_length), | 
| 1851 |         &fixed_cols_for_query, table_share->fields*sizeof(uint32_t), | 
| 1852 |         &var_cols_for_query, table_share->fields*sizeof(uint32_t), | 
| 1853 |         NullS); | 
| 1854 |     if (alloc_ptr == NULL) { | 
| 1855 |         ret_val = 1; | 
| 1856 |         goto exit; | 
| 1857 |     } | 
| 1858 |  | 
| 1859 |     size_range_query_buff = tokudb::sysvars::read_buf_size(thd); | 
| 1860 |     range_query_buff = | 
| 1861 |         (uchar*)tokudb::memory::malloc(size_range_query_buff, MYF(MY_WME)); | 
| 1862 |     if (range_query_buff == NULL) { | 
| 1863 |         ret_val = 1; | 
| 1864 |         goto exit; | 
| 1865 |     } | 
| 1866 |  | 
| 1867 |     alloced_rec_buff_length = table_share->rec_buff_length + | 
| 1868 |         table_share->fields; | 
| 1869 |     rec_buff = (uchar *) tokudb::memory::malloc( | 
| 1870 |         alloced_rec_buff_length, | 
| 1871 |         MYF(MY_WME)); | 
| 1872 |     if (rec_buff == NULL) { | 
| 1873 |         ret_val = 1; | 
| 1874 |         goto exit; | 
| 1875 |     } | 
| 1876 |  | 
| 1877 |     alloced_update_rec_buff_length = alloced_rec_buff_length; | 
| 1878 |     rec_update_buff = (uchar*)tokudb::memory::malloc( | 
| 1879 |         alloced_update_rec_buff_length, | 
| 1880 |         MYF(MY_WME)); | 
| 1881 |     if (rec_update_buff == NULL) { | 
| 1882 |         ret_val = 1; | 
| 1883 |         goto exit; | 
| 1884 |     } | 
| 1885 |  | 
| 1886 |     // lookup or create share | 
| 1887 |     share = TOKUDB_SHARE::get_share(name, table_share, &lock, true); | 
| 1888 |     assert_always(share); | 
| 1889 |  | 
| 1890 |     if (share->state() != TOKUDB_SHARE::OPENED) { | 
| 1891 |         // means we're responsible for the transition to OPENED, ERROR or CLOSED | 
| 1892 |  | 
| 1893 |         ret_val = allocate_key_and_col_info(table_share, &share->kc_info); | 
| 1894 |         if (ret_val == 0) { | 
| 1895 |             ret_val = initialize_share(name, mode); | 
| 1896 |         } | 
| 1897 |  | 
| 1898 |         if (ret_val == 0) { | 
| 1899 |             share->set_state(TOKUDB_SHARE::OPENED); | 
| 1900 |         } else { | 
| 1901 |             free_key_and_col_info(&share->kc_info); | 
| 1902 |             share->set_state(TOKUDB_SHARE::ERROR); | 
| 1903 |         } | 
| 1904 |         share->unlock(); | 
| 1905 |     } else { | 
| 1906 |         // got an already OPENED instance | 
| 1907 |         share->unlock(); | 
| 1908 |     } | 
| 1909 |  | 
| 1910 |     if (share->state() == TOKUDB_SHARE::ERROR) { | 
| 1911 |         share->release(); | 
| 1912 |         goto exit; | 
| 1913 |     } | 
| 1914 |  | 
| 1915 |     assert_always(share->state() == TOKUDB_SHARE::OPENED); | 
| 1916 |  | 
| 1917 |     ref_length = share->ref_length;     // If second open | 
| 1918 |  | 
| 1919 |     TOKUDB_HANDLER_TRACE_FOR_FLAGS( | 
| 1920 |         TOKUDB_DEBUG_OPEN, | 
| 1921 |         "tokudbopen:%p:share=%p:file=%p:table=%p:table->s=%p:%d" , | 
| 1922 |         this, | 
| 1923 |         share, | 
| 1924 |         share->file, | 
| 1925 |         table, | 
| 1926 |         table->s, | 
| 1927 |         share->use_count()); | 
| 1928 |  | 
| 1929 |     key_read = false; | 
| 1930 |     stats.block_size = 1<<20;    // QQQ Tokudb DB block size | 
| 1931 |  | 
| 1932 |     info(HA_STATUS_NO_LOCK | HA_STATUS_VARIABLE | HA_STATUS_CONST); | 
| 1933 |  | 
| 1934 | exit: | 
| 1935 |     if (ret_val) { | 
| 1936 |         tokudb::memory::free(range_query_buff); | 
| 1937 |         range_query_buff = NULL; | 
| 1938 |         tokudb::memory::free(alloc_ptr); | 
| 1939 |         alloc_ptr = NULL; | 
| 1940 |         tokudb::memory::free(rec_buff); | 
| 1941 |         rec_buff = NULL; | 
| 1942 |         tokudb::memory::free(rec_update_buff); | 
| 1943 |         rec_update_buff = NULL; | 
| 1944 |          | 
| 1945 |         if (error) { | 
| 1946 |             my_errno = error; | 
| 1947 |         } | 
| 1948 |     } | 
| 1949 |     TOKUDB_HANDLER_DBUG_RETURN(ret_val); | 
| 1950 | } | 
| 1951 |  | 
| 1952 | // | 
| 1953 | // estimate the number of rows in a DB | 
| 1954 | // Parameters: | 
| 1955 | //      [in]    db - DB whose number of rows will be estimated | 
| 1956 | //      [out]   num_rows - number of estimated rows in db | 
| 1957 | // Returns: | 
| 1958 | //      0 on success | 
| 1959 | //      error otherwise | 
| 1960 | // | 
| 1961 | int ha_tokudb::estimate_num_rows(DB* db, uint64_t* num_rows, DB_TXN* txn) { | 
| 1962 |     int error = ENOSYS; | 
| 1963 |     bool do_commit = false; | 
| 1964 |     DB_BTREE_STAT64 dict_stats; | 
| 1965 |     DB_TXN* txn_to_use = NULL; | 
| 1966 |  | 
| 1967 |     if (txn == NULL) { | 
| 1968 |         error = txn_begin(db_env, 0, &txn_to_use, DB_READ_UNCOMMITTED, ha_thd()); | 
| 1969 |         if (error) goto cleanup; | 
| 1970 |         do_commit = true; | 
| 1971 |     } | 
| 1972 |     else { | 
| 1973 |         txn_to_use = txn; | 
| 1974 |     } | 
| 1975 |  | 
| 1976 |     error = db->stat64(db, txn_to_use, &dict_stats); | 
| 1977 |     if (error) { goto cleanup; } | 
| 1978 |  | 
| 1979 |     *num_rows = dict_stats.bt_ndata; | 
| 1980 |     error = 0; | 
| 1981 | cleanup: | 
| 1982 |     if (do_commit) { | 
| 1983 |         commit_txn(txn_to_use, 0); | 
| 1984 |         txn_to_use = NULL; | 
| 1985 |     } | 
| 1986 |     return error; | 
| 1987 | } | 
| 1988 |  | 
| 1989 |  | 
| 1990 | int ha_tokudb::write_to_status(DB* db, HA_METADATA_KEY curr_key_data, void* data, uint size, DB_TXN* txn ){ | 
| 1991 |     return write_metadata(db, &curr_key_data, sizeof curr_key_data, data, size, txn); | 
| 1992 | } | 
| 1993 |  | 
| 1994 | int ha_tokudb::remove_from_status(DB *db, HA_METADATA_KEY curr_key_data, DB_TXN *txn) { | 
| 1995 |     return remove_metadata(db, &curr_key_data, sizeof curr_key_data, txn); | 
| 1996 | } | 
| 1997 |  | 
| 1998 | int ha_tokudb::remove_metadata(DB* db, void* key_data, uint key_size, DB_TXN* transaction){ | 
| 1999 |     int error; | 
| 2000 |     DBT key; | 
| 2001 |     DB_TXN* txn = NULL; | 
| 2002 |     bool do_commit = false; | 
| 2003 |     // | 
| 2004 |     // transaction to be used for putting metadata into status.tokudb | 
| 2005 |     // | 
| 2006 |     if (transaction == NULL) { | 
| 2007 |         error = txn_begin(db_env, 0, &txn, 0, ha_thd()); | 
| 2008 |         if (error) {  | 
| 2009 |             goto cleanup; | 
| 2010 |         } | 
| 2011 |         do_commit = true; | 
| 2012 |     } | 
| 2013 |     else { | 
| 2014 |         txn = transaction; | 
| 2015 |     } | 
| 2016 |  | 
| 2017 |     memset(&key, 0, sizeof(key)); | 
| 2018 |     key.data = key_data; | 
| 2019 |     key.size = key_size; | 
| 2020 |     error = db->del(db, txn, &key, DB_DELETE_ANY); | 
| 2021 |     if (error) {  | 
| 2022 |         goto cleanup;  | 
| 2023 |     } | 
| 2024 |      | 
| 2025 |     error = 0; | 
| 2026 | cleanup: | 
| 2027 |     if (do_commit && txn) { | 
| 2028 |         if (!error) { | 
| 2029 |             commit_txn(txn, DB_TXN_NOSYNC); | 
| 2030 |         } | 
| 2031 |         else { | 
| 2032 |             abort_txn(txn); | 
| 2033 |         } | 
| 2034 |     } | 
| 2035 |     return error; | 
| 2036 | } | 
| 2037 |  | 
| 2038 | // | 
| 2039 | // helper function to write a piece of metadata in to status.tokudb | 
| 2040 | // | 
| 2041 | int ha_tokudb::write_metadata(DB* db, void* key_data, uint key_size, void* val_data, uint val_size, DB_TXN* transaction ){ | 
| 2042 |     int error; | 
| 2043 |     DBT key; | 
| 2044 |     DBT value; | 
| 2045 |     DB_TXN* txn = NULL; | 
| 2046 |     bool do_commit = false; | 
| 2047 |     // | 
| 2048 |     // transaction to be used for putting metadata into status.tokudb | 
| 2049 |     // | 
| 2050 |     if (transaction == NULL) { | 
| 2051 |         error = txn_begin(db_env, 0, &txn, 0, ha_thd()); | 
| 2052 |         if (error) {  | 
| 2053 |             goto cleanup; | 
| 2054 |         } | 
| 2055 |         do_commit = true; | 
| 2056 |     } | 
| 2057 |     else { | 
| 2058 |         txn = transaction; | 
| 2059 |     } | 
| 2060 |  | 
| 2061 |     memset(&key, 0, sizeof(key)); | 
| 2062 |     memset(&value, 0, sizeof(value)); | 
| 2063 |     key.data = key_data; | 
| 2064 |     key.size = key_size; | 
| 2065 |     value.data = val_data; | 
| 2066 |     value.size = val_size; | 
| 2067 |     error = db->put(db, txn, &key, &value, 0); | 
| 2068 |     if (error) {  | 
| 2069 |         goto cleanup;  | 
| 2070 |     } | 
| 2071 |      | 
| 2072 |     error = 0; | 
| 2073 | cleanup: | 
| 2074 |     if (do_commit && txn) { | 
| 2075 |         if (!error) { | 
| 2076 |             commit_txn(txn, DB_TXN_NOSYNC); | 
| 2077 |         } | 
| 2078 |         else { | 
| 2079 |             abort_txn(txn); | 
| 2080 |         } | 
| 2081 |     } | 
| 2082 |     return error; | 
| 2083 | } | 
| 2084 |  | 
| 2085 | int ha_tokudb::write_frm_data(DB* db, DB_TXN* txn, const char* frm_name) { | 
| 2086 |     TOKUDB_HANDLER_DBUG_ENTER("%p %p %s" , db, txn, frm_name); | 
| 2087 |  | 
| 2088 |     uchar* frm_data = NULL; | 
| 2089 |     size_t frm_len = 0; | 
| 2090 |     int error = 0; | 
| 2091 |  | 
| 2092 | #if 100000 <= MYSQL_VERSION_ID | 
| 2093 |     error = table_share->read_frm_image((const uchar**)&frm_data,&frm_len); | 
| 2094 |     if (error) { goto cleanup; } | 
| 2095 | #else     | 
| 2096 |     error = readfrm(frm_name,&frm_data,&frm_len); | 
| 2097 |     if (error) { goto cleanup; } | 
| 2098 | #endif | 
| 2099 |      | 
| 2100 |     error = write_to_status(db,hatoku_frm_data,frm_data,(uint)frm_len, txn); | 
| 2101 |     if (error) { goto cleanup; } | 
| 2102 |  | 
| 2103 |     error = 0; | 
| 2104 | cleanup: | 
| 2105 |     tokudb::memory::free(frm_data); | 
| 2106 |     TOKUDB_HANDLER_DBUG_RETURN(error); | 
| 2107 | } | 
| 2108 |  | 
| 2109 | int ha_tokudb::remove_frm_data(DB *db, DB_TXN *txn) { | 
| 2110 |     return remove_from_status(db, hatoku_frm_data, txn); | 
| 2111 | } | 
| 2112 |  | 
| 2113 | static int smart_dbt_callback_verify_frm (DBT const *key, DBT  const *row, void *context) { | 
| 2114 |     DBT* stored_frm = (DBT *)context; | 
| 2115 |     stored_frm->size = row->size; | 
| 2116 |     stored_frm->data = (uchar *)tokudb::memory::malloc(row->size, MYF(MY_WME)); | 
| 2117 |     assert_always(stored_frm->data); | 
| 2118 |     memcpy(stored_frm->data, row->data, row->size); | 
| 2119 |     return 0; | 
| 2120 | } | 
| 2121 |  | 
| 2122 | int ha_tokudb::verify_frm_data(const char* frm_name, DB_TXN* txn) { | 
| 2123 |     TOKUDB_HANDLER_DBUG_ENTER("%s" , frm_name); | 
| 2124 |     uchar* mysql_frm_data = NULL; | 
| 2125 |     size_t mysql_frm_len = 0; | 
| 2126 |     DBT key = {}; | 
| 2127 |     DBT stored_frm = {}; | 
| 2128 |     int error = 0; | 
| 2129 |     HA_METADATA_KEY curr_key = hatoku_frm_data; | 
| 2130 |  | 
| 2131 |     // get the frm data from MySQL | 
| 2132 | #if 100000 <= MYSQL_VERSION_ID | 
| 2133 |     error = table_share->read_frm_image((const uchar**)&mysql_frm_data,&mysql_frm_len); | 
| 2134 |     if (error) {  | 
| 2135 |         goto cleanup; | 
| 2136 |     } | 
| 2137 | #else | 
| 2138 |     error = readfrm(frm_name,&mysql_frm_data,&mysql_frm_len); | 
| 2139 |     if (error) {  | 
| 2140 |         goto cleanup;  | 
| 2141 |     } | 
| 2142 | #endif | 
| 2143 |  | 
| 2144 |     key.data = &curr_key; | 
| 2145 |     key.size = sizeof(curr_key); | 
| 2146 |     error = share->status_block->getf_set( | 
| 2147 |         share->status_block,  | 
| 2148 |         txn, | 
| 2149 |         0, | 
| 2150 |         &key,  | 
| 2151 |         smart_dbt_callback_verify_frm,  | 
| 2152 |         &stored_frm | 
| 2153 |         ); | 
| 2154 |     if (error == DB_NOTFOUND) { | 
| 2155 |         // if not found, write it | 
| 2156 |         error = write_frm_data(share->status_block, txn, frm_name); | 
| 2157 |         goto cleanup; | 
| 2158 |     } else if (error) { | 
| 2159 |         goto cleanup; | 
| 2160 |     } | 
| 2161 |  | 
| 2162 |     if (stored_frm.size != mysql_frm_len || memcmp(stored_frm.data, mysql_frm_data, stored_frm.size)) { | 
| 2163 |         error = HA_ERR_TABLE_DEF_CHANGED; | 
| 2164 |         goto cleanup; | 
| 2165 |     } | 
| 2166 |  | 
| 2167 |     error = 0; | 
| 2168 | cleanup: | 
| 2169 |     tokudb::memory::free(mysql_frm_data); | 
| 2170 |     tokudb::memory::free(stored_frm.data); | 
| 2171 |     TOKUDB_HANDLER_DBUG_RETURN(error); | 
| 2172 | } | 
| 2173 |  | 
| 2174 | // | 
| 2175 | // Updates status.tokudb with a new max value used for the auto increment column | 
| 2176 | // Parameters: | 
| 2177 | //      [in]    db - this will always be status.tokudb | 
| 2178 | //              val - value to store | 
| 2179 | //  Returns: | 
| 2180 | //      0 on success, error otherwise | 
| 2181 | // | 
| 2182 | // | 
| 2183 | int ha_tokudb::update_max_auto_inc(DB* db, ulonglong val){ | 
| 2184 |     return write_to_status(db,hatoku_max_ai,&val,sizeof(val), NULL); | 
| 2185 | } | 
| 2186 |  | 
| 2187 | // | 
| 2188 | // Writes the initial auto increment value, as specified by create table | 
| 2189 | // so if a user does "create table t1 (a int auto_increment, primary key (a)) auto_increment=100", | 
| 2190 | // then the value 100 will be stored here in val | 
| 2191 | // Parameters: | 
| 2192 | //      [in]    db - this will always be status.tokudb | 
| 2193 | //              val - value to store | 
| 2194 | //  Returns: | 
| 2195 | //      0 on success, error otherwise | 
| 2196 | // | 
| 2197 | // | 
| 2198 | int ha_tokudb::write_auto_inc_create(DB* db, ulonglong val, DB_TXN* txn){ | 
| 2199 |     return write_to_status(db,hatoku_ai_create_value,&val,sizeof(val), txn); | 
| 2200 | } | 
| 2201 |  | 
| 2202 |  | 
| 2203 | // | 
| 2204 | // Closes a handle to a table.  | 
| 2205 | // | 
| 2206 | int ha_tokudb::close() { | 
| 2207 |     TOKUDB_HANDLER_DBUG_ENTER("" ); | 
| 2208 |     int r = __close(); | 
| 2209 |     TOKUDB_HANDLER_DBUG_RETURN(r); | 
| 2210 | } | 
| 2211 |  | 
| 2212 | int ha_tokudb::__close() { | 
| 2213 |     TOKUDB_HANDLER_DBUG_ENTER("" ); | 
| 2214 |     TOKUDB_HANDLER_TRACE_FOR_FLAGS(TOKUDB_DEBUG_OPEN, "close:%p" , this); | 
| 2215 |     tokudb::memory::free(rec_buff); | 
| 2216 |     tokudb::memory::free(rec_update_buff); | 
| 2217 |     tokudb::memory::free(blob_buff); | 
| 2218 |     tokudb::memory::free(alloc_ptr); | 
| 2219 |     tokudb::memory::free(range_query_buff); | 
| 2220 |     for (uint32_t i = 0; i < sizeof(mult_key_dbt_array)/sizeof(mult_key_dbt_array[0]); i++) { | 
| 2221 |         toku_dbt_array_destroy(&mult_key_dbt_array[i]); | 
| 2222 |     } | 
| 2223 |     for (uint32_t i = 0; i < sizeof(mult_rec_dbt_array)/sizeof(mult_rec_dbt_array[0]); i++) { | 
| 2224 |         toku_dbt_array_destroy(&mult_rec_dbt_array[i]); | 
| 2225 |     } | 
| 2226 |     rec_buff = NULL; | 
| 2227 |     rec_update_buff = NULL; | 
| 2228 |     alloc_ptr = NULL; | 
| 2229 |     ha_tokudb::reset(); | 
| 2230 |     int retval = share->release(); | 
| 2231 |     TOKUDB_HANDLER_DBUG_RETURN(retval); | 
| 2232 | } | 
| 2233 |  | 
| 2234 | // | 
| 2235 | // Reallocate record buffer (rec_buff) if needed | 
| 2236 | // If not needed, does nothing | 
| 2237 | // Parameters: | 
| 2238 | //          length - size of buffer required for rec_buff | 
| 2239 | // | 
| 2240 | bool ha_tokudb::fix_rec_buff_for_blob(ulong length) { | 
| 2241 |     if (!rec_buff || (length > alloced_rec_buff_length)) { | 
| 2242 |         uchar* newptr = (uchar*)tokudb::memory::realloc( | 
| 2243 |             (void*)rec_buff, | 
| 2244 |             length, | 
| 2245 |             MYF(MY_ALLOW_ZERO_PTR)); | 
| 2246 |         if (!newptr) | 
| 2247 |             return 1; | 
| 2248 |         rec_buff = newptr; | 
| 2249 |         alloced_rec_buff_length = length; | 
| 2250 |     } | 
| 2251 |     return 0; | 
| 2252 | } | 
| 2253 |  | 
| 2254 | // | 
| 2255 | // Reallocate record buffer (rec_buff) if needed | 
| 2256 | // If not needed, does nothing | 
| 2257 | // Parameters: | 
| 2258 | //          length - size of buffer required for rec_buff | 
| 2259 | // | 
| 2260 | bool ha_tokudb::fix_rec_update_buff_for_blob(ulong length) { | 
| 2261 |     if (!rec_update_buff || (length > alloced_update_rec_buff_length)) { | 
| 2262 |         uchar* newptr = (uchar*)tokudb::memory::realloc( | 
| 2263 |             (void*)rec_update_buff, | 
| 2264 |             length, | 
| 2265 |             MYF(MY_ALLOW_ZERO_PTR)); | 
| 2266 |         if (!newptr) | 
| 2267 |             return 1; | 
| 2268 |         rec_update_buff= newptr; | 
| 2269 |         alloced_update_rec_buff_length = length; | 
| 2270 |     } | 
| 2271 |     return 0; | 
| 2272 | } | 
| 2273 |  | 
| 2274 | /* Calculate max length needed for row */ | 
| 2275 | ulong ha_tokudb::max_row_length(const uchar * buf) { | 
| 2276 |     ulong length = table_share->reclength + table_share->fields * 2; | 
| 2277 |     uint *ptr, *end; | 
| 2278 |     for (ptr = table_share->blob_field, end = ptr + table_share->blob_fields; ptr != end; ptr++) { | 
| 2279 |         Field_blob *blob = ((Field_blob *) table->field[*ptr]); | 
| 2280 |         length += blob->get_length((uchar *) (buf + field_offset(blob, table))) + 2; | 
| 2281 |     } | 
| 2282 |     return length; | 
| 2283 | } | 
| 2284 |  | 
| 2285 | /* | 
| 2286 | */ | 
| 2287 | // | 
| 2288 | // take the row passed in as a DBT*, and convert it into a row in MySQL format in record | 
| 2289 | // Pack a row for storage. | 
| 2290 | // If the row is of fixed length, just store the  row 'as is'. | 
| 2291 | // If not, we will generate a packed row suitable for storage. | 
| 2292 | // This will only fail if we don't have enough memory to pack the row, | 
| 2293 | // which may only happen in rows with blobs, as the default row length is | 
| 2294 | // pre-allocated. | 
| 2295 | // Parameters: | 
| 2296 | //      [out]   row - row stored in DBT to be converted | 
| 2297 | //      [out]   buf - buffer where row is packed | 
| 2298 | //      [in]    record - row in MySQL format | 
| 2299 | // | 
| 2300 |  | 
| 2301 | int ha_tokudb::pack_row_in_buff( | 
| 2302 |     DBT * row,  | 
| 2303 |     const uchar* record, | 
| 2304 |     uint index, | 
| 2305 |     uchar* row_buff | 
| 2306 |     )  | 
| 2307 | { | 
| 2308 |     uchar* fixed_field_ptr = NULL; | 
| 2309 |     uchar* var_field_offset_ptr = NULL; | 
| 2310 |     uchar* start_field_data_ptr = NULL; | 
| 2311 |     uchar* var_field_data_ptr = NULL; | 
| 2312 |     int r = ENOSYS; | 
| 2313 |     memset((void *) row, 0, sizeof(*row)); | 
| 2314 |  | 
| 2315 |     my_bitmap_map *old_map = dbug_tmp_use_all_columns(table, table->write_set); | 
| 2316 |      | 
| 2317 |     // Copy null bytes | 
| 2318 |     memcpy(row_buff, record, table_share->null_bytes); | 
| 2319 |     fixed_field_ptr = row_buff + table_share->null_bytes; | 
| 2320 |     var_field_offset_ptr = fixed_field_ptr + share->kc_info.mcp_info[index].fixed_field_size; | 
| 2321 |     start_field_data_ptr = var_field_offset_ptr + share->kc_info.mcp_info[index].len_of_offsets; | 
| 2322 |     var_field_data_ptr = var_field_offset_ptr + share->kc_info.mcp_info[index].len_of_offsets; | 
| 2323 |  | 
| 2324 |     // assert that when the hidden primary key exists, primary_key_offsets is NULL | 
| 2325 |     for (uint i = 0; i < table_share->fields; i++) { | 
| 2326 |         Field* field = table->field[i]; | 
| 2327 |         uint curr_field_offset = field_offset(field, table); | 
| 2328 |         if (bitmap_is_set(&share->kc_info.key_filters[index],i)) { | 
| 2329 |             continue; | 
| 2330 |         } | 
| 2331 |         if (is_fixed_field(&share->kc_info, i)) { | 
| 2332 |             fixed_field_ptr = pack_fixed_field( | 
| 2333 |                 fixed_field_ptr, | 
| 2334 |                 record + curr_field_offset,  | 
| 2335 |                 share->kc_info.field_lengths[i] | 
| 2336 |                 ); | 
| 2337 |         } | 
| 2338 |         else if (is_variable_field(&share->kc_info, i)) { | 
| 2339 |             var_field_data_ptr = pack_var_field( | 
| 2340 |                 var_field_offset_ptr, | 
| 2341 |                 var_field_data_ptr, | 
| 2342 |                 start_field_data_ptr, | 
| 2343 |                 record + curr_field_offset, | 
| 2344 |                 share->kc_info.length_bytes[i], | 
| 2345 |                 share->kc_info.num_offset_bytes | 
| 2346 |                 ); | 
| 2347 |             var_field_offset_ptr += share->kc_info.num_offset_bytes; | 
| 2348 |         } | 
| 2349 |     } | 
| 2350 |  | 
| 2351 |     for (uint i = 0; i < share->kc_info.num_blobs; i++) { | 
| 2352 |         Field* field = table->field[share->kc_info.blob_fields[i]]; | 
| 2353 |         var_field_data_ptr = pack_toku_field_blob( | 
| 2354 |             var_field_data_ptr, | 
| 2355 |             record + field_offset(field, table), | 
| 2356 |             field | 
| 2357 |             ); | 
| 2358 |     } | 
| 2359 |  | 
| 2360 |     row->data = row_buff; | 
| 2361 |     row->size = (size_t) (var_field_data_ptr - row_buff); | 
| 2362 |     r = 0; | 
| 2363 |  | 
| 2364 |     dbug_tmp_restore_column_map(table->write_set, old_map); | 
| 2365 |     return r; | 
| 2366 | } | 
| 2367 |  | 
| 2368 |  | 
| 2369 | int ha_tokudb::pack_row( | 
| 2370 |     DBT * row,  | 
| 2371 |     const uchar* record, | 
| 2372 |     uint index | 
| 2373 |     ) | 
| 2374 | { | 
| 2375 |     return pack_row_in_buff(row,record,index,rec_buff); | 
| 2376 | } | 
| 2377 |  | 
| 2378 | int ha_tokudb::pack_old_row_for_update( | 
| 2379 |     DBT * row,  | 
| 2380 |     const uchar* record, | 
| 2381 |     uint index | 
| 2382 |     ) | 
| 2383 | { | 
| 2384 |     return pack_row_in_buff(row,record,index,rec_update_buff); | 
| 2385 | } | 
| 2386 |  | 
| 2387 |  | 
| 2388 | int ha_tokudb::unpack_blobs( | 
| 2389 |     uchar* record, | 
| 2390 |     const uchar* from_tokudb_blob, | 
| 2391 |     uint32_t num_bytes, | 
| 2392 |     bool check_bitmap | 
| 2393 |     ) | 
| 2394 | { | 
| 2395 |     uint error = 0; | 
| 2396 |     uchar* ptr = NULL; | 
| 2397 |     const uchar* buff = NULL; | 
| 2398 |     // | 
| 2399 |     // assert that num_bytes > 0 iff share->num_blobs > 0 | 
| 2400 |     // | 
| 2401 |     assert_always( !((share->kc_info.num_blobs == 0) && (num_bytes > 0)) ); | 
| 2402 |     if (num_bytes > num_blob_bytes) { | 
| 2403 |         ptr = (uchar*)tokudb::memory::realloc( | 
| 2404 |             (void*)blob_buff, num_bytes, | 
| 2405 |             MYF(MY_ALLOW_ZERO_PTR)); | 
| 2406 |         if (ptr == NULL) { | 
| 2407 |             error = ENOMEM; | 
| 2408 |             goto exit; | 
| 2409 |         } | 
| 2410 |         blob_buff = ptr; | 
| 2411 |         num_blob_bytes = num_bytes; | 
| 2412 |     } | 
| 2413 |      | 
| 2414 |     memcpy(blob_buff, from_tokudb_blob, num_bytes); | 
| 2415 |     buff= blob_buff; | 
| 2416 |     for (uint i = 0; i < share->kc_info.num_blobs; i++) { | 
| 2417 |         uint32_t curr_field_index = share->kc_info.blob_fields[i];  | 
| 2418 |         bool skip = check_bitmap ?  | 
| 2419 |             !(bitmap_is_set(table->read_set,curr_field_index) ||  | 
| 2420 |                 bitmap_is_set(table->write_set,curr_field_index)) :  | 
| 2421 |             false; | 
| 2422 |         Field* field = table->field[curr_field_index]; | 
| 2423 |         uint32_t len_bytes = field->row_pack_length(); | 
| 2424 |         const uchar* end_buff = unpack_toku_field_blob( | 
| 2425 |             record + field_offset(field, table), | 
| 2426 |             buff, | 
| 2427 |             len_bytes, | 
| 2428 |             skip | 
| 2429 |             ); | 
| 2430 |         // verify that the pointers to the blobs are all contained within the blob_buff | 
| 2431 |         if (!(blob_buff <= buff && end_buff <= blob_buff + num_bytes)) { | 
| 2432 |             error = -3000000; | 
| 2433 |             goto exit; | 
| 2434 |         } | 
| 2435 |         buff = end_buff; | 
| 2436 |     } | 
| 2437 |     // verify that the entire blob buffer was parsed | 
| 2438 |     if (share->kc_info.num_blobs > 0 && !(num_bytes > 0 && buff == blob_buff + num_bytes)) { | 
| 2439 |         error = -4000000; | 
| 2440 |         goto exit; | 
| 2441 |     } | 
| 2442 |  | 
| 2443 |     error = 0; | 
| 2444 | exit: | 
| 2445 |     return error; | 
| 2446 | } | 
| 2447 |  | 
| 2448 | // | 
| 2449 | // take the row passed in as a DBT*, and convert it into a row in MySQL format in record | 
| 2450 | // Parameters: | 
| 2451 | //      [out]   record - row in MySQL format | 
| 2452 | //      [in]    row - row stored in DBT to be converted | 
| 2453 | // | 
| 2454 | int ha_tokudb::unpack_row( | 
| 2455 |     uchar* record,  | 
| 2456 |     DBT const *row,  | 
| 2457 |     DBT const *key, | 
| 2458 |     uint index | 
| 2459 |     )  | 
| 2460 | { | 
| 2461 |     // | 
| 2462 |     // two cases, fixed length row, and variable length row | 
| 2463 |     // fixed length row is first below | 
| 2464 |     // | 
| 2465 |     /* Copy null bits */ | 
| 2466 |     int error = 0; | 
| 2467 |     const uchar* fixed_field_ptr = (const uchar *) row->data; | 
| 2468 |     const uchar* var_field_offset_ptr = NULL; | 
| 2469 |     const uchar* var_field_data_ptr = NULL; | 
| 2470 |     uint32_t data_end_offset = 0; | 
| 2471 |     memcpy(record, fixed_field_ptr, table_share->null_bytes); | 
| 2472 |     fixed_field_ptr += table_share->null_bytes; | 
| 2473 |  | 
| 2474 |     var_field_offset_ptr = fixed_field_ptr + share->kc_info.mcp_info[index].fixed_field_size; | 
| 2475 |     var_field_data_ptr = var_field_offset_ptr + share->kc_info.mcp_info[index].len_of_offsets; | 
| 2476 |  | 
| 2477 |     // | 
| 2478 |     // unpack the key, if necessary | 
| 2479 |     // | 
| 2480 |     if (!(hidden_primary_key && index == primary_key)) { | 
| 2481 |         unpack_key(record,key,index); | 
| 2482 |     } | 
| 2483 |  | 
| 2484 |     uint32_t last_offset = 0; | 
| 2485 |     // | 
| 2486 |     // we have two methods of unpacking, one if we need to unpack the entire row | 
| 2487 |     // the second if we unpack a subset of the entire row | 
| 2488 |     // first method here is if we unpack the entire row | 
| 2489 |     // | 
| 2490 |     if (unpack_entire_row) { | 
| 2491 |         // | 
| 2492 |         // fill in parts of record that are not part of the key | 
| 2493 |         // | 
| 2494 |         for (uint i = 0; i < table_share->fields; i++) { | 
| 2495 |             Field* field = table->field[i]; | 
| 2496 |             if (bitmap_is_set(&share->kc_info.key_filters[index],i)) { | 
| 2497 |                 continue; | 
| 2498 |             } | 
| 2499 |  | 
| 2500 |             if (is_fixed_field(&share->kc_info, i)) { | 
| 2501 |                 fixed_field_ptr = unpack_fixed_field( | 
| 2502 |                     record + field_offset(field, table), | 
| 2503 |                     fixed_field_ptr, | 
| 2504 |                     share->kc_info.field_lengths[i] | 
| 2505 |                     ); | 
| 2506 |             } | 
| 2507 |             // | 
| 2508 |             // here, we DO modify var_field_data_ptr or var_field_offset_ptr | 
| 2509 |             // as we unpack variable sized fields | 
| 2510 |             // | 
| 2511 |             else if (is_variable_field(&share->kc_info, i)) { | 
| 2512 |                 switch (share->kc_info.num_offset_bytes) { | 
| 2513 |                 case (1): | 
| 2514 |                     data_end_offset = var_field_offset_ptr[0]; | 
| 2515 |                     break; | 
| 2516 |                 case (2): | 
| 2517 |                     data_end_offset = uint2korr(var_field_offset_ptr); | 
| 2518 |                     break; | 
| 2519 |                 default: | 
| 2520 |                     assert_unreachable(); | 
| 2521 |                 } | 
| 2522 |                 unpack_var_field( | 
| 2523 |                     record + field_offset(field, table), | 
| 2524 |                     var_field_data_ptr, | 
| 2525 |                     data_end_offset - last_offset, | 
| 2526 |                     share->kc_info.length_bytes[i] | 
| 2527 |                     ); | 
| 2528 |                 var_field_offset_ptr += share->kc_info.num_offset_bytes; | 
| 2529 |                 var_field_data_ptr += data_end_offset - last_offset; | 
| 2530 |                 last_offset = data_end_offset; | 
| 2531 |             } | 
| 2532 |         } | 
| 2533 |         error = unpack_blobs( | 
| 2534 |             record, | 
| 2535 |             var_field_data_ptr, | 
| 2536 |             row->size - (uint32_t)(var_field_data_ptr - (const uchar *)row->data), | 
| 2537 |             false | 
| 2538 |             ); | 
| 2539 |         if (error) { | 
| 2540 |             goto exit; | 
| 2541 |         } | 
| 2542 |     } | 
| 2543 |     // | 
| 2544 |     // in this case, we unpack only what is specified  | 
| 2545 |     // in fixed_cols_for_query and var_cols_for_query | 
| 2546 |     // | 
| 2547 |     else { | 
| 2548 |         // | 
| 2549 |         // first the fixed fields | 
| 2550 |         // | 
| 2551 |         for (uint32_t i = 0; i < num_fixed_cols_for_query; i++) { | 
| 2552 |             uint field_index = fixed_cols_for_query[i]; | 
| 2553 |             Field* field = table->field[field_index]; | 
| 2554 |             unpack_fixed_field( | 
| 2555 |                 record + field_offset(field, table), | 
| 2556 |                 fixed_field_ptr + share->kc_info.cp_info[index][field_index].col_pack_val, | 
| 2557 |                 share->kc_info.field_lengths[field_index] | 
| 2558 |                 ); | 
| 2559 |         } | 
| 2560 |  | 
| 2561 |         // | 
| 2562 |         // now the var fields | 
| 2563 |         // here, we do NOT modify var_field_data_ptr or var_field_offset_ptr | 
| 2564 |         // | 
| 2565 |         for (uint32_t i = 0; i < num_var_cols_for_query; i++) { | 
| 2566 |             uint field_index = var_cols_for_query[i]; | 
| 2567 |             Field* field = table->field[field_index]; | 
| 2568 |             uint32_t var_field_index = share->kc_info.cp_info[index][field_index].col_pack_val; | 
| 2569 |             uint32_t data_start_offset; | 
| 2570 |             uint32_t field_len; | 
| 2571 |              | 
| 2572 |             get_var_field_info( | 
| 2573 |                 &field_len,  | 
| 2574 |                 &data_start_offset,  | 
| 2575 |                 var_field_index,  | 
| 2576 |                 var_field_offset_ptr,  | 
| 2577 |                 share->kc_info.num_offset_bytes | 
| 2578 |                 ); | 
| 2579 |  | 
| 2580 |             unpack_var_field( | 
| 2581 |                 record + field_offset(field, table), | 
| 2582 |                 var_field_data_ptr + data_start_offset, | 
| 2583 |                 field_len, | 
| 2584 |                 share->kc_info.length_bytes[field_index] | 
| 2585 |                 ); | 
| 2586 |         } | 
| 2587 |  | 
| 2588 |         if (read_blobs) { | 
| 2589 |             // | 
| 2590 |             // now the blobs | 
| 2591 |             // | 
| 2592 |             get_blob_field_info( | 
| 2593 |                 &data_end_offset,  | 
| 2594 |                 share->kc_info.mcp_info[index].len_of_offsets, | 
| 2595 |                 var_field_data_ptr,  | 
| 2596 |                 share->kc_info.num_offset_bytes | 
| 2597 |                 ); | 
| 2598 |  | 
| 2599 |             var_field_data_ptr += data_end_offset; | 
| 2600 |             error = unpack_blobs( | 
| 2601 |                 record, | 
| 2602 |                 var_field_data_ptr, | 
| 2603 |                 row->size - (uint32_t)(var_field_data_ptr - (const uchar *)row->data), | 
| 2604 |                 true | 
| 2605 |                 ); | 
| 2606 |             if (error) { | 
| 2607 |                 goto exit; | 
| 2608 |             } | 
| 2609 |         } | 
| 2610 |     } | 
| 2611 |     error = 0; | 
| 2612 | exit: | 
| 2613 |     return error; | 
| 2614 | } | 
| 2615 |  | 
| 2616 | uint32_t ha_tokudb::place_key_into_mysql_buff( | 
| 2617 |     KEY* key_info, | 
| 2618 |     uchar* record, | 
| 2619 |     uchar* data) { | 
| 2620 |  | 
| 2621 |     KEY_PART_INFO* key_part = key_info->key_part; | 
| 2622 |     KEY_PART_INFO* end = key_part + key_info->user_defined_key_parts; | 
| 2623 |     uchar* pos = data; | 
| 2624 |  | 
| 2625 |     for (; key_part != end; key_part++) { | 
| 2626 |         if (key_part->field->null_bit) { | 
| 2627 |             uint null_offset = get_null_offset(table, key_part->field); | 
| 2628 |             if (*pos++ == NULL_COL_VAL) { // Null value | 
| 2629 |                 // | 
| 2630 |                 // We don't need to reset the record data as we will not access it | 
| 2631 |                 // if the null data is set | 
| 2632 |                 //             | 
| 2633 |                 record[null_offset] |= key_part->field->null_bit; | 
| 2634 |                 continue; | 
| 2635 |             } | 
| 2636 |             record[null_offset] &= ~key_part->field->null_bit; | 
| 2637 |         } | 
| 2638 | #if !defined(MARIADB_BASE_VERSION) | 
| 2639 |         // | 
| 2640 |         // HOPEFULLY TEMPORARY | 
| 2641 |         // | 
| 2642 |         assert_always(table->s->db_low_byte_first); | 
| 2643 | #endif | 
| 2644 |         pos = unpack_toku_key_field( | 
| 2645 |             record + field_offset(key_part->field, table), | 
| 2646 |             pos, | 
| 2647 |             key_part->field, | 
| 2648 |             key_part->length | 
| 2649 |             ); | 
| 2650 |     } | 
| 2651 |     return pos-data; | 
| 2652 | } | 
| 2653 |  | 
| 2654 | // | 
| 2655 | // Store the key and the primary key into the row | 
| 2656 | // Parameters: | 
| 2657 | //      [out]   record - key stored in MySQL format | 
| 2658 | //      [in]    key - key stored in DBT to be converted | 
| 2659 | //              index -index into key_file that represents the DB  | 
| 2660 | //                  unpacking a key of | 
| 2661 | // | 
| 2662 | void ha_tokudb::unpack_key(uchar * record, DBT const *key, uint index) { | 
| 2663 |     uint32_t bytes_read; | 
| 2664 |     uchar *pos = (uchar *) key->data + 1; | 
| 2665 |     bytes_read = place_key_into_mysql_buff( | 
| 2666 |         &table->key_info[index],  | 
| 2667 |         record,  | 
| 2668 |         pos | 
| 2669 |         ); | 
| 2670 |     if( (index != primary_key) && !hidden_primary_key) { | 
| 2671 |         // | 
| 2672 |         // also unpack primary key | 
| 2673 |         // | 
| 2674 |         place_key_into_mysql_buff( | 
| 2675 |             &table->key_info[primary_key],  | 
| 2676 |             record,  | 
| 2677 |             pos+bytes_read | 
| 2678 |             ); | 
| 2679 |     } | 
| 2680 | } | 
| 2681 |  | 
| 2682 | uint32_t ha_tokudb::place_key_into_dbt_buff( | 
| 2683 |     KEY* key_info, | 
| 2684 |     uchar* buff, | 
| 2685 |     const uchar* record, | 
| 2686 |     bool* has_null, | 
| 2687 |     int key_length) { | 
| 2688 |  | 
| 2689 |     KEY_PART_INFO* key_part = key_info->key_part; | 
| 2690 |     KEY_PART_INFO* end = key_part + key_info->user_defined_key_parts; | 
| 2691 |     uchar* curr_buff = buff; | 
| 2692 |     *has_null = false; | 
| 2693 |     for (; key_part != end && key_length > 0; key_part++) { | 
| 2694 |         // | 
| 2695 |         // accessing key_part->field->null_bit instead off key_part->null_bit | 
| 2696 |         // because key_part->null_bit is not set in add_index | 
| 2697 |         // filed ticket 862 to look into this | 
| 2698 |         // | 
| 2699 |         if (key_part->field->null_bit) { | 
| 2700 |             /* Store 0 if the key part is a NULL part */ | 
| 2701 |             uint null_offset = get_null_offset(table, key_part->field); | 
| 2702 |             if (record[null_offset] & key_part->field->null_bit) { | 
| 2703 |                 *curr_buff++ = NULL_COL_VAL; | 
| 2704 |                 *has_null = true; | 
| 2705 |                 continue; | 
| 2706 |             } | 
| 2707 |             *curr_buff++ = NONNULL_COL_VAL;        // Store NOT NULL marker | 
| 2708 |         } | 
| 2709 | #if !defined(MARIADB_BASE_VERSION) | 
| 2710 |         // | 
| 2711 |         // HOPEFULLY TEMPORARY | 
| 2712 |         // | 
| 2713 |         assert_always(table->s->db_low_byte_first); | 
| 2714 | #endif | 
| 2715 |         // | 
| 2716 |         // accessing field_offset(key_part->field) instead off key_part->offset | 
| 2717 |         // because key_part->offset is SET INCORRECTLY in add_index | 
| 2718 |         // filed ticket 862 to look into this | 
| 2719 |         // | 
| 2720 |         curr_buff = pack_toku_key_field( | 
| 2721 |             curr_buff, | 
| 2722 |             (uchar *) (record + field_offset(key_part->field, table)), | 
| 2723 |             key_part->field, | 
| 2724 |             key_part->length | 
| 2725 |             ); | 
| 2726 |         key_length -= key_part->length; | 
| 2727 |     } | 
| 2728 |     return curr_buff - buff; | 
| 2729 | } | 
| 2730 |  | 
| 2731 |  | 
| 2732 |  | 
| 2733 | // | 
| 2734 | // Create a packed key from a row. This key will be written as such | 
| 2735 | // to the index tree.  This will never fail as the key buffer is pre-allocated. | 
| 2736 | // Parameters: | 
| 2737 | //      [out]   key - DBT that holds the key | 
| 2738 | //      [in]    key_info - holds data about the key, such as it's length and offset into record | 
| 2739 | //      [out]   buff - buffer that will hold the data for key (unless  | 
| 2740 | //                  we have a hidden primary key) | 
| 2741 | //      [in]    record - row from which to create the key | 
| 2742 | //              key_length - currently set to MAX_KEY_LENGTH, is it size of buff? | 
| 2743 | // Returns: | 
| 2744 | //      the parameter key | 
| 2745 | // | 
| 2746 |  | 
| 2747 | DBT* ha_tokudb::create_dbt_key_from_key( | 
| 2748 |     DBT * key, | 
| 2749 |     KEY* key_info,  | 
| 2750 |     uchar * buff, | 
| 2751 |     const uchar * record,  | 
| 2752 |     bool* has_null, | 
| 2753 |     bool dont_pack_pk, | 
| 2754 |     int key_length, | 
| 2755 |     uint8_t inf_byte | 
| 2756 |     )  | 
| 2757 | { | 
| 2758 |     uint32_t size = 0; | 
| 2759 |     uchar* tmp_buff = buff; | 
| 2760 |     my_bitmap_map *old_map = dbug_tmp_use_all_columns(table, table->write_set); | 
| 2761 |  | 
| 2762 |     key->data = buff; | 
| 2763 |  | 
| 2764 |     // | 
| 2765 |     // first put the "infinity" byte at beginning. States if missing columns are implicitly | 
| 2766 |     // positive infinity or negative infinity or zero. For this, because we are creating key | 
| 2767 |     // from a row, there is no way that columns can be missing, so in practice, | 
| 2768 |     // this will be meaningless. Might as well put in a value | 
| 2769 |     // | 
| 2770 |     *tmp_buff++ = inf_byte; | 
| 2771 |     size++; | 
| 2772 |     size += place_key_into_dbt_buff( | 
| 2773 |         key_info,  | 
| 2774 |         tmp_buff,  | 
| 2775 |         record,  | 
| 2776 |         has_null,  | 
| 2777 |         key_length | 
| 2778 |         ); | 
| 2779 |     if (!dont_pack_pk) { | 
| 2780 |         tmp_buff = buff + size; | 
| 2781 |         if (hidden_primary_key) { | 
| 2782 |             memcpy(tmp_buff, current_ident, TOKUDB_HIDDEN_PRIMARY_KEY_LENGTH); | 
| 2783 |             size += TOKUDB_HIDDEN_PRIMARY_KEY_LENGTH; | 
| 2784 |         } | 
| 2785 |         else { | 
| 2786 |             bool tmp_bool = false; | 
| 2787 |             size += place_key_into_dbt_buff( | 
| 2788 |                 &table->key_info[primary_key],  | 
| 2789 |                 tmp_buff,  | 
| 2790 |                 record,  | 
| 2791 |                 &tmp_bool,  | 
| 2792 |                 MAX_KEY_LENGTH //this parameter does not matter | 
| 2793 |                 ); | 
| 2794 |         } | 
| 2795 |     } | 
| 2796 |  | 
| 2797 |     key->size = size; | 
| 2798 |     DBUG_DUMP("key" , (uchar *) key->data, key->size); | 
| 2799 |     dbug_tmp_restore_column_map(table->write_set, old_map); | 
| 2800 |     return key; | 
| 2801 | } | 
| 2802 |  | 
| 2803 |  | 
| 2804 | // | 
| 2805 | // Create a packed key from a row. This key will be written as such | 
| 2806 | // to the index tree.  This will never fail as the key buffer is pre-allocated. | 
| 2807 | // Parameters: | 
| 2808 | //      [out]   key - DBT that holds the key | 
| 2809 | //              keynr - index for which to create the key | 
| 2810 | //      [out]   buff - buffer that will hold the data for key (unless  | 
| 2811 | //                  we have a hidden primary key) | 
| 2812 | //      [in]    record - row from which to create the key | 
| 2813 | //      [out]   has_null - says if the key has a NULL value for one of its columns | 
| 2814 | //              key_length - currently set to MAX_KEY_LENGTH, is it size of buff? | 
| 2815 | // Returns: | 
| 2816 | //      the parameter key | 
| 2817 | // | 
| 2818 | DBT *ha_tokudb::create_dbt_key_from_table( | 
| 2819 |     DBT * key,  | 
| 2820 |     uint keynr,  | 
| 2821 |     uchar * buff,  | 
| 2822 |     const uchar * record,  | 
| 2823 |     bool* has_null,  | 
| 2824 |     int key_length | 
| 2825 |     )  | 
| 2826 | { | 
| 2827 |     TOKUDB_HANDLER_DBUG_ENTER("" ); | 
| 2828 |     memset((void *) key, 0, sizeof(*key)); | 
| 2829 |     if (hidden_primary_key && keynr == primary_key) { | 
| 2830 |         key->data = buff; | 
| 2831 |         memcpy(buff, ¤t_ident, TOKUDB_HIDDEN_PRIMARY_KEY_LENGTH); | 
| 2832 |         key->size = TOKUDB_HIDDEN_PRIMARY_KEY_LENGTH; | 
| 2833 |         *has_null = false; | 
| 2834 |         DBUG_RETURN(key); | 
| 2835 |     } | 
| 2836 |     DBUG_RETURN(create_dbt_key_from_key(key, &table->key_info[keynr],buff,record, has_null, (keynr == primary_key), key_length, COL_ZERO)); | 
| 2837 | } | 
| 2838 |  | 
| 2839 | DBT* ha_tokudb::create_dbt_key_for_lookup( | 
| 2840 |     DBT * key,  | 
| 2841 |     KEY* key_info,  | 
| 2842 |     uchar * buff,  | 
| 2843 |     const uchar * record,  | 
| 2844 |     bool* has_null,  | 
| 2845 |     int key_length | 
| 2846 |     ) | 
| 2847 | { | 
| 2848 |     TOKUDB_HANDLER_DBUG_ENTER("" ); | 
| 2849 |     // override the infinity byte, needed in case the pk is a string | 
| 2850 |     // to make sure that the cursor that uses this key properly positions | 
| 2851 |     // it at the right location. If the table stores "D", but we look up for "d", | 
| 2852 |     // and the infinity byte is 0, then we will skip the "D", because  | 
| 2853 |     // in bytes, "d" > "D". | 
| 2854 |     DBT* ret = create_dbt_key_from_key(key, key_info, buff, record, has_null, true, key_length, COL_NEG_INF); | 
| 2855 |     DBUG_RETURN(ret);     | 
| 2856 | } | 
| 2857 |  | 
| 2858 | // | 
| 2859 | // Create a packed key from from a MySQL unpacked key (like the one that is | 
| 2860 | // sent from the index_read() This key is to be used to read a row | 
| 2861 | // Parameters: | 
| 2862 | //      [out]   key - DBT that holds the key | 
| 2863 | //              keynr - index for which to pack the key | 
| 2864 | //      [out]   buff - buffer that will hold the data for key | 
| 2865 | //      [in]    key_ptr - MySQL unpacked key | 
| 2866 | //              key_length - length of key_ptr | 
| 2867 | // Returns: | 
| 2868 | //      the parameter key | 
| 2869 | // | 
| 2870 | DBT* ha_tokudb::pack_key( | 
| 2871 |     DBT* key, | 
| 2872 |     uint keynr, | 
| 2873 |     uchar* buff, | 
| 2874 |     const uchar* key_ptr, | 
| 2875 |     uint key_length, | 
| 2876 |     int8_t inf_byte) { | 
| 2877 |  | 
| 2878 |     TOKUDB_HANDLER_DBUG_ENTER( | 
| 2879 |         "key %p %u:%2.2x inf=%d" , | 
| 2880 |         key_ptr, | 
| 2881 |         key_length, | 
| 2882 |         key_length > 0 ? key_ptr[0] : 0, | 
| 2883 |         inf_byte); | 
| 2884 | #if TOKU_INCLUDE_EXTENDED_KEYS | 
| 2885 |     if (keynr != primary_key && !tokudb_test(hidden_primary_key)) { | 
| 2886 |         DBUG_RETURN(pack_ext_key(key, keynr, buff, key_ptr, key_length, inf_byte)); | 
| 2887 |     } | 
| 2888 | #endif | 
| 2889 |     KEY* key_info = &table->key_info[keynr]; | 
| 2890 |     KEY_PART_INFO* key_part = key_info->key_part; | 
| 2891 |     KEY_PART_INFO* end = key_part + key_info->user_defined_key_parts; | 
| 2892 |     my_bitmap_map* old_map = dbug_tmp_use_all_columns(table, table->write_set); | 
| 2893 |  | 
| 2894 |     memset((void *) key, 0, sizeof(*key)); | 
| 2895 |     key->data = buff; | 
| 2896 |  | 
| 2897 |     // first put the "infinity" byte at beginning. States if missing columns are implicitly | 
| 2898 |     // positive infinity or negative infinity | 
| 2899 |     *buff++ = (uchar)inf_byte; | 
| 2900 |  | 
| 2901 |     for (; key_part != end && (int) key_length > 0; key_part++) { | 
| 2902 |         uint offset = 0; | 
| 2903 |         if (key_part->null_bit) { | 
| 2904 |             if (!(*key_ptr == 0)) { | 
| 2905 |                 *buff++ = NULL_COL_VAL; | 
| 2906 |                 key_length -= key_part->store_length; | 
| 2907 |                 key_ptr += key_part->store_length; | 
| 2908 |                 continue; | 
| 2909 |             } | 
| 2910 |             *buff++ = NONNULL_COL_VAL; | 
| 2911 |             offset = 1;         // Data is at key_ptr+1 | 
| 2912 |         } | 
| 2913 | #if !defined(MARIADB_BASE_VERSION) | 
| 2914 |         assert_always(table->s->db_low_byte_first); | 
| 2915 | #endif | 
| 2916 |         buff = pack_key_toku_key_field( | 
| 2917 |             buff, | 
| 2918 |             (uchar *) key_ptr + offset, | 
| 2919 |             key_part->field, | 
| 2920 |             key_part->length | 
| 2921 |             ); | 
| 2922 |          | 
| 2923 |         key_ptr += key_part->store_length; | 
| 2924 |         key_length -= key_part->store_length; | 
| 2925 |     } | 
| 2926 |  | 
| 2927 |     key->size = (buff - (uchar *) key->data); | 
| 2928 |     DBUG_DUMP("key" , (uchar *) key->data, key->size); | 
| 2929 |     dbug_tmp_restore_column_map(table->write_set, old_map); | 
| 2930 |     DBUG_RETURN(key); | 
| 2931 | } | 
| 2932 |  | 
| 2933 | #if TOKU_INCLUDE_EXTENDED_KEYS | 
| 2934 | DBT* ha_tokudb::pack_ext_key( | 
| 2935 |     DBT* key, | 
| 2936 |     uint keynr, | 
| 2937 |     uchar* buff, | 
| 2938 |     const uchar* key_ptr, | 
| 2939 |     uint key_length, | 
| 2940 |     int8_t inf_byte) { | 
| 2941 |  | 
| 2942 |     TOKUDB_HANDLER_DBUG_ENTER("" ); | 
| 2943 |  | 
| 2944 |     // build a list of PK parts that are in the SK.  we will use this list to build the | 
| 2945 |     // extended key if necessary.  | 
| 2946 |     KEY* pk_key_info = &table->key_info[primary_key]; | 
| 2947 |     uint pk_parts = pk_key_info->user_defined_key_parts; | 
| 2948 |     uint pk_next = 0; | 
| 2949 |     struct { | 
| 2950 |         const uchar *key_ptr; | 
| 2951 |         KEY_PART_INFO *key_part; | 
| 2952 |     } pk_info[pk_parts]; | 
| 2953 |  | 
| 2954 |     KEY* key_info = &table->key_info[keynr]; | 
| 2955 |     KEY_PART_INFO* key_part = key_info->key_part; | 
| 2956 |     KEY_PART_INFO* end = key_part + key_info->user_defined_key_parts; | 
| 2957 |     my_bitmap_map* old_map = dbug_tmp_use_all_columns(table, table->write_set); | 
| 2958 |  | 
| 2959 |     memset((void *) key, 0, sizeof(*key)); | 
| 2960 |     key->data = buff; | 
| 2961 |  | 
| 2962 |     // first put the "infinity" byte at beginning. States if missing columns are implicitly | 
| 2963 |     // positive infinity or negative infinity | 
| 2964 |     *buff++ = (uchar)inf_byte; | 
| 2965 |  | 
| 2966 |     for (; key_part != end && (int) key_length > 0; key_part++) { | 
| 2967 |         // if the SK part is part of the PK, then append it to the list. | 
| 2968 |         if (key_part->field->part_of_key.is_set(primary_key)) { | 
| 2969 |             assert_always(pk_next < pk_parts); | 
| 2970 |             pk_info[pk_next].key_ptr = key_ptr; | 
| 2971 |             pk_info[pk_next].key_part = key_part; | 
| 2972 |             pk_next++; | 
| 2973 |         } | 
| 2974 |         uint offset = 0; | 
| 2975 |         if (key_part->null_bit) { | 
| 2976 |             if (!(*key_ptr == 0)) { | 
| 2977 |                 *buff++ = NULL_COL_VAL; | 
| 2978 |                 key_length -= key_part->store_length; | 
| 2979 |                 key_ptr += key_part->store_length; | 
| 2980 |                 continue; | 
| 2981 |             } | 
| 2982 |             *buff++ = NONNULL_COL_VAL; | 
| 2983 |             offset = 1;         // Data is at key_ptr+1 | 
| 2984 |         } | 
| 2985 | #if !defined(MARIADB_BASE_VERSION) | 
| 2986 |         assert_always(table->s->db_low_byte_first); | 
| 2987 | #endif | 
| 2988 |         buff = pack_key_toku_key_field( | 
| 2989 |             buff, | 
| 2990 |             (uchar *) key_ptr + offset, | 
| 2991 |             key_part->field, | 
| 2992 |             key_part->length | 
| 2993 |             ); | 
| 2994 |          | 
| 2995 |         key_ptr += key_part->store_length; | 
| 2996 |         key_length -= key_part->store_length; | 
| 2997 |     } | 
| 2998 |  | 
| 2999 |     if (key_length > 0) { | 
| 3000 |         assert_always(key_part == end); | 
| 3001 |         end = key_info->key_part + get_ext_key_parts(key_info); | 
| 3002 |  | 
| 3003 |         // pack PK in order of PK key parts | 
| 3004 |         for (uint pk_index = 0; | 
| 3005 |              key_part != end && (int) key_length > 0 && pk_index < pk_parts; | 
| 3006 |              pk_index++) { | 
| 3007 |             uint i; | 
| 3008 |             for (i = 0; i < pk_next; i++) { | 
| 3009 |                 if (pk_info[i].key_part->fieldnr == | 
| 3010 |                     pk_key_info->key_part[pk_index].fieldnr) | 
| 3011 |                     break; | 
| 3012 |             } | 
| 3013 |             if (i < pk_next) { | 
| 3014 |                 const uchar *this_key_ptr = pk_info[i].key_ptr; | 
| 3015 |                 KEY_PART_INFO *this_key_part = pk_info[i].key_part; | 
| 3016 |                 buff = pack_key_toku_key_field( | 
| 3017 |                     buff, | 
| 3018 |                     (uchar*)this_key_ptr, | 
| 3019 |                     this_key_part->field, | 
| 3020 |                     this_key_part->length); | 
| 3021 |             } else { | 
| 3022 |                 buff = pack_key_toku_key_field( | 
| 3023 |                     buff, | 
| 3024 |                     (uchar*)key_ptr, | 
| 3025 |                     key_part->field, | 
| 3026 |                     key_part->length); | 
| 3027 |                 key_ptr += key_part->store_length; | 
| 3028 |                 key_length -= key_part->store_length; | 
| 3029 |                 key_part++; | 
| 3030 |             } | 
| 3031 |         } | 
| 3032 |     } | 
| 3033 |  | 
| 3034 |     key->size = (buff - (uchar *) key->data); | 
| 3035 |     DBUG_DUMP("key" , (uchar *) key->data, key->size); | 
| 3036 |     dbug_tmp_restore_column_map(table->write_set, old_map); | 
| 3037 |     DBUG_RETURN(key); | 
| 3038 | } | 
| 3039 | #endif | 
| 3040 |  | 
| 3041 | // | 
| 3042 | // get max used hidden primary key value | 
| 3043 | // | 
| 3044 | void ha_tokudb::init_hidden_prim_key_info(DB_TXN *txn) { | 
| 3045 |     TOKUDB_HANDLER_DBUG_ENTER("" ); | 
| 3046 |     if (!(share->status & STATUS_PRIMARY_KEY_INIT)) { | 
| 3047 |         int error = 0; | 
| 3048 |         DBC* c = NULL;         | 
| 3049 |         error = share->key_file[primary_key]->cursor( | 
| 3050 |             share->key_file[primary_key], | 
| 3051 |             txn, | 
| 3052 |             &c, | 
| 3053 |             0); | 
| 3054 |         assert_always(error == 0); | 
| 3055 |         DBT key,val;         | 
| 3056 |         memset(&key, 0, sizeof(key)); | 
| 3057 |         memset(&val, 0, sizeof(val)); | 
| 3058 |         error = c->c_get(c, &key, &val, DB_LAST); | 
| 3059 |         if (error == 0) { | 
| 3060 |             assert_always(key.size == TOKUDB_HIDDEN_PRIMARY_KEY_LENGTH); | 
| 3061 |             share->auto_ident = hpk_char_to_num((uchar *)key.data); | 
| 3062 |         } | 
| 3063 |         error = c->c_close(c); | 
| 3064 |         assert_always(error == 0); | 
| 3065 |         share->status |= STATUS_PRIMARY_KEY_INIT; | 
| 3066 |     } | 
| 3067 |     TOKUDB_HANDLER_DBUG_VOID_RETURN; | 
| 3068 | } | 
| 3069 |  | 
| 3070 |  | 
| 3071 |  | 
| 3072 | /** @brief | 
| 3073 |     Get metadata info stored in status.tokudb | 
| 3074 |     */ | 
| 3075 | int ha_tokudb::get_status(DB_TXN* txn) { | 
| 3076 |     TOKUDB_HANDLER_DBUG_ENTER("" ); | 
| 3077 |     DBT key, value; | 
| 3078 |     HA_METADATA_KEY curr_key; | 
| 3079 |     int error; | 
| 3080 |  | 
| 3081 |     // | 
| 3082 |     // open status.tokudb | 
| 3083 |     // | 
| 3084 |     if (!share->status_block) { | 
| 3085 |         error = | 
| 3086 |             open_status_dictionary( | 
| 3087 |                 &share->status_block, | 
| 3088 |                 share->full_table_name(), | 
| 3089 |                 txn); | 
| 3090 |         if (error) {  | 
| 3091 |             goto cleanup;  | 
| 3092 |         } | 
| 3093 |     } | 
| 3094 |      | 
| 3095 |     // | 
| 3096 |     // transaction to be used for putting metadata into status.tokudb | 
| 3097 |     // | 
| 3098 |     memset(&key, 0, sizeof(key)); | 
| 3099 |     memset(&value, 0, sizeof(value)); | 
| 3100 |     key.data = &curr_key; | 
| 3101 |     key.size = sizeof(curr_key); | 
| 3102 |     value.flags = DB_DBT_USERMEM; | 
| 3103 |  | 
| 3104 |     assert_always(share->status_block); | 
| 3105 |     // | 
| 3106 |     // get version | 
| 3107 |     // | 
| 3108 |     value.ulen = sizeof(share->version); | 
| 3109 |     value.data = &share->version; | 
| 3110 |     curr_key = hatoku_new_version; | 
| 3111 |     error = share->status_block->get( | 
| 3112 |         share->status_block,  | 
| 3113 |         txn,  | 
| 3114 |         &key,  | 
| 3115 |         &value,  | 
| 3116 |         0 | 
| 3117 |         ); | 
| 3118 |     if (error == DB_NOTFOUND) { | 
| 3119 |         // | 
| 3120 |         // hack to keep handle the issues of going back and forth | 
| 3121 |         // between 5.0.3 to 5.0.4 | 
| 3122 |         // the problem with going back and forth | 
| 3123 |         // is with storing the frm file, 5.0.4 stores it, 5.0.3 does not | 
| 3124 |         // so, if a user goes back and forth and alters the schema | 
| 3125 |         // the frm stored can get out of sync with the schema of the table | 
| 3126 |         // This can cause issues. | 
| 3127 |         // To take care of this, we are doing this versioning work here. | 
| 3128 |         // We change the key that stores the version.  | 
| 3129 |         // In 5.0.3, it is hatoku_old_version, in 5.0.4 it is hatoku_new_version | 
| 3130 |         // When we encounter a table that does not have hatoku_new_version | 
| 3131 |         // set, we give it the right one, and overwrite the old one with zero. | 
| 3132 |         // This ensures that 5.0.3 cannot open the table. Once it has been opened by 5.0.4 | 
| 3133 |         // | 
| 3134 |         uint dummy_version = 0; | 
| 3135 |         share->version = HA_TOKU_ORIG_VERSION; | 
| 3136 |         error = write_to_status( | 
| 3137 |             share->status_block,  | 
| 3138 |             hatoku_new_version, | 
| 3139 |             &share->version, | 
| 3140 |             sizeof(share->version),  | 
| 3141 |             txn | 
| 3142 |             ); | 
| 3143 |         if (error) { goto cleanup; } | 
| 3144 |         error = write_to_status( | 
| 3145 |             share->status_block,  | 
| 3146 |             hatoku_old_version, | 
| 3147 |             &dummy_version, | 
| 3148 |             sizeof(dummy_version),  | 
| 3149 |             txn | 
| 3150 |             ); | 
| 3151 |         if (error) { goto cleanup; } | 
| 3152 |     } | 
| 3153 |     else if (error || value.size != sizeof(share->version)) { | 
| 3154 |         if (error == 0) { | 
| 3155 |             error = HA_ERR_INTERNAL_ERROR; | 
| 3156 |         } | 
| 3157 |         goto cleanup; | 
| 3158 |     } | 
| 3159 |     // | 
| 3160 |     // get capabilities | 
| 3161 |     // | 
| 3162 |     curr_key = hatoku_capabilities; | 
| 3163 |     value.ulen = sizeof(share->capabilities); | 
| 3164 |     value.data = &share->capabilities; | 
| 3165 |     error = share->status_block->get( | 
| 3166 |         share->status_block,  | 
| 3167 |         txn,  | 
| 3168 |         &key,  | 
| 3169 |         &value,  | 
| 3170 |         0 | 
| 3171 |         ); | 
| 3172 |     if (error == DB_NOTFOUND) { | 
| 3173 |         share->capabilities= 0; | 
| 3174 |     } | 
| 3175 |     else if (error || value.size != sizeof(share->version)) { | 
| 3176 |         if (error == 0) { | 
| 3177 |             error = HA_ERR_INTERNAL_ERROR; | 
| 3178 |         } | 
| 3179 |         goto cleanup; | 
| 3180 |     } | 
| 3181 |      | 
| 3182 |     error = 0; | 
| 3183 | cleanup: | 
| 3184 |     TOKUDB_HANDLER_DBUG_RETURN(error); | 
| 3185 | } | 
| 3186 |  | 
| 3187 | /** @brief | 
| 3188 |     Return an estimated of the number of rows in the table. | 
| 3189 |     Used when sorting to allocate buffers and by the optimizer. | 
| 3190 |     This is used in filesort.cc.  | 
| 3191 | */ | 
| 3192 | ha_rows ha_tokudb::estimate_rows_upper_bound() { | 
| 3193 |     TOKUDB_HANDLER_DBUG_ENTER("" ); | 
| 3194 |     DBUG_RETURN(share->row_count() + HA_TOKUDB_EXTRA_ROWS); | 
| 3195 | } | 
| 3196 |  | 
| 3197 | // | 
| 3198 | // Function that compares two primary keys that were saved as part of rnd_pos | 
| 3199 | // and ::position | 
| 3200 | // | 
| 3201 | int ha_tokudb::cmp_ref(const uchar * ref1, const uchar * ref2) { | 
| 3202 |     int ret_val = 0; | 
| 3203 |     bool read_string = false; | 
| 3204 |     ret_val = tokudb_compare_two_keys( | 
| 3205 |         ref1 + sizeof(uint32_t), | 
| 3206 |         *(uint32_t *)ref1, | 
| 3207 |         ref2 + sizeof(uint32_t), | 
| 3208 |         *(uint32_t *)ref2, | 
| 3209 |         (uchar *)share->file->descriptor->dbt.data + 4, | 
| 3210 |         *(uint32_t *)share->file->descriptor->dbt.data - 4, | 
| 3211 |         false, | 
| 3212 |         &read_string | 
| 3213 |         ); | 
| 3214 |     return ret_val; | 
| 3215 | } | 
| 3216 |  | 
| 3217 | bool ha_tokudb::check_if_incompatible_data(HA_CREATE_INFO * info, uint table_changes) { | 
| 3218 |   // | 
| 3219 |   // This is a horrendous hack for now, as copied by InnoDB. | 
| 3220 |   // This states that if the auto increment create field has changed, | 
| 3221 |   // via a "alter table foo auto_increment=new_val", that this | 
| 3222 |   // change is incompatible, and to rebuild the entire table | 
| 3223 |   // This will need to be fixed | 
| 3224 |   // | 
| 3225 |   if ((info->used_fields & HA_CREATE_USED_AUTO) && | 
| 3226 |       info->auto_increment_value != 0) { | 
| 3227 |  | 
| 3228 |     return COMPATIBLE_DATA_NO; | 
| 3229 |   } | 
| 3230 |   if (table_changes != IS_EQUAL_YES) | 
| 3231 |     return COMPATIBLE_DATA_NO; | 
| 3232 |   return COMPATIBLE_DATA_YES; | 
| 3233 | } | 
| 3234 |  | 
| 3235 | // | 
| 3236 | // Method that is called before the beginning of many calls | 
| 3237 | // to insert rows (ha_tokudb::write_row). There is no guarantee | 
| 3238 | // that start_bulk_insert is called, however there is a guarantee | 
| 3239 | // that if start_bulk_insert is called, then end_bulk_insert may be | 
| 3240 | // called as well. | 
| 3241 | // Parameters: | 
| 3242 | //      [in]    rows - an estimate of the number of rows that will be inserted | 
| 3243 | //                     if number of rows is unknown (such as if doing  | 
| 3244 | //                     "insert into foo select * from bar), then rows  | 
| 3245 | //                     will be 0 | 
| 3246 | // | 
| 3247 | // | 
| 3248 | // This function returns true if the table MAY be empty. | 
| 3249 | // It is NOT meant to be a 100% check for emptiness. | 
| 3250 | // This is used for a bulk load optimization. | 
| 3251 | // | 
| 3252 | bool ha_tokudb::may_table_be_empty(DB_TXN *txn) { | 
| 3253 |     int error; | 
| 3254 |     bool ret_val = false; | 
| 3255 |     DBC* tmp_cursor = NULL; | 
| 3256 |     DB_TXN* tmp_txn = NULL; | 
| 3257 |  | 
| 3258 |     const int empty_scan = tokudb::sysvars::empty_scan(ha_thd()); | 
| 3259 |     if (empty_scan == tokudb::sysvars::TOKUDB_EMPTY_SCAN_DISABLED) | 
| 3260 |         goto cleanup; | 
| 3261 |  | 
| 3262 |     if (txn == NULL) { | 
| 3263 |         error = txn_begin(db_env, 0, &tmp_txn, 0, ha_thd()); | 
| 3264 |         if (error) { | 
| 3265 |             goto cleanup; | 
| 3266 |         } | 
| 3267 |         txn = tmp_txn; | 
| 3268 |     } | 
| 3269 |  | 
| 3270 |     error = share->file->cursor(share->file, txn, &tmp_cursor, 0); | 
| 3271 |     if (error) | 
| 3272 |         goto cleanup; | 
| 3273 |     tmp_cursor->c_set_check_interrupt_callback(tmp_cursor, tokudb_killed_thd_callback, ha_thd()); | 
| 3274 |     if (empty_scan == tokudb::sysvars::TOKUDB_EMPTY_SCAN_LR) | 
| 3275 |         error = tmp_cursor->c_getf_next(tmp_cursor, 0, smart_dbt_do_nothing, NULL); | 
| 3276 |     else | 
| 3277 |         error = tmp_cursor->c_getf_prev(tmp_cursor, 0, smart_dbt_do_nothing, NULL); | 
| 3278 |     error = map_to_handler_error(error); | 
| 3279 |     if (error == DB_NOTFOUND) | 
| 3280 |         ret_val = true; | 
| 3281 |     else  | 
| 3282 |         ret_val = false; | 
| 3283 |     error = 0; | 
| 3284 |  | 
| 3285 | cleanup: | 
| 3286 |     if (tmp_cursor) { | 
| 3287 |         int r = tmp_cursor->c_close(tmp_cursor); | 
| 3288 |         assert_always(r == 0); | 
| 3289 |         tmp_cursor = NULL; | 
| 3290 |     } | 
| 3291 |     if (tmp_txn) { | 
| 3292 |         commit_txn(tmp_txn, 0); | 
| 3293 |         tmp_txn = NULL; | 
| 3294 |     } | 
| 3295 |     return ret_val; | 
| 3296 | } | 
| 3297 |  | 
| 3298 | #if MYSQL_VERSION_ID >= 100000 | 
| 3299 | void ha_tokudb::start_bulk_insert(ha_rows rows, uint flags) { | 
| 3300 |     TOKUDB_HANDLER_DBUG_ENTER("%llu %u txn %p" , (unsigned long long) rows, flags, transaction); | 
| 3301 | #else | 
| 3302 | void ha_tokudb::start_bulk_insert(ha_rows rows) { | 
| 3303 |     TOKUDB_HANDLER_DBUG_ENTER("%llu txn %p" , (unsigned long long) rows, transaction); | 
| 3304 | #endif | 
| 3305 |     THD* thd = ha_thd(); | 
| 3306 |     tokudb_trx_data* trx = (tokudb_trx_data *) thd_get_ha_data(thd, tokudb_hton); | 
| 3307 |     delay_updating_ai_metadata = true; | 
| 3308 |     ai_metadata_update_required = false; | 
| 3309 |     abort_loader = false; | 
| 3310 |  | 
| 3311 |     rwlock_t_lock_read(share->_num_DBs_lock); | 
| 3312 |     uint curr_num_DBs = table->s->keys + tokudb_test(hidden_primary_key); | 
| 3313 |     num_DBs_locked_in_bulk = true; | 
| 3314 |     lock_count = 0; | 
| 3315 |  | 
| 3316 |     if ((rows == 0 || rows > 1) && share->try_table_lock) { | 
| 3317 |         if (tokudb::sysvars::prelock_empty(thd) && | 
| 3318 |             may_table_be_empty(transaction) && | 
| 3319 |             transaction != NULL) { | 
| 3320 |             if (using_ignore || is_insert_ignore(thd) || thd->lex->duplicates != DUP_ERROR | 
| 3321 |                 || table->s->next_number_key_offset) { | 
| 3322 |                 acquire_table_lock(transaction, lock_write); | 
| 3323 |             } else { | 
| 3324 |                 mult_dbt_flags[primary_key] = 0; | 
| 3325 |                 if (!thd_test_options(thd, OPTION_RELAXED_UNIQUE_CHECKS) && !hidden_primary_key) { | 
| 3326 |                     mult_put_flags[primary_key] = DB_NOOVERWRITE; | 
| 3327 |                 } | 
| 3328 |                 uint32_t loader_flags = (tokudb::sysvars::load_save_space(thd)) ? | 
| 3329 |                     LOADER_COMPRESS_INTERMEDIATES : 0; | 
| 3330 |  | 
| 3331 |                 int error = db_env->create_loader( | 
| 3332 |                     db_env,  | 
| 3333 |                     transaction,  | 
| 3334 |                     &loader,  | 
| 3335 |                     NULL, // no src_db needed | 
| 3336 |                     curr_num_DBs,  | 
| 3337 |                     share->key_file,  | 
| 3338 |                     mult_put_flags, | 
| 3339 |                     mult_dbt_flags, | 
| 3340 |                     loader_flags | 
| 3341 |                     ); | 
| 3342 |                 if (error) {  | 
| 3343 |                     assert_always(loader == NULL); | 
| 3344 |                     goto exit_try_table_lock; | 
| 3345 |                 } | 
| 3346 |  | 
| 3347 |                 lc.thd = thd; | 
| 3348 |                 lc.ha = this; | 
| 3349 |  | 
| 3350 |                 error = loader->set_poll_function( | 
| 3351 |                     loader, ha_tokudb::bulk_insert_poll, &lc); | 
| 3352 |                 assert_always(!error); | 
| 3353 |  | 
| 3354 |                 error = loader->set_error_callback( | 
| 3355 |                     loader, ha_tokudb::loader_dup, &lc); | 
| 3356 |                 assert_always(!error); | 
| 3357 |  | 
| 3358 |                 trx->stmt_progress.using_loader = true; | 
| 3359 |             } | 
| 3360 |         } | 
| 3361 |     exit_try_table_lock: | 
| 3362 |         share->lock(); | 
| 3363 |         share->try_table_lock = false; | 
| 3364 |         share->unlock(); | 
| 3365 |     } | 
| 3366 |     TOKUDB_HANDLER_DBUG_VOID_RETURN; | 
| 3367 | } | 
| 3368 | int ha_tokudb::bulk_insert_poll(void* , float progress) { | 
| 3369 |     LOADER_CONTEXT context = (LOADER_CONTEXT)extra; | 
| 3370 |     if (thd_killed(context->thd)) { | 
| 3371 |         sprintf(context->write_status_msg, | 
| 3372 |                 "The process has been killed, aborting bulk load." ); | 
| 3373 |         return ER_ABORTING_CONNECTION; | 
| 3374 |     } | 
| 3375 |     float percentage = progress * 100; | 
| 3376 |     sprintf(context->write_status_msg, | 
| 3377 |             "Loading of data t %s about %.1f%% done" , | 
| 3378 |             context->ha->share->full_table_name(), | 
| 3379 |             percentage); | 
| 3380 |     thd_proc_info(context->thd, context->write_status_msg); | 
| 3381 | #ifdef HA_TOKUDB_HAS_THD_PROGRESS | 
| 3382 |     thd_progress_report(context->thd, (unsigned long long)percentage, 100); | 
| 3383 | #endif | 
| 3384 |     return 0; | 
| 3385 | } | 
| 3386 | void ha_tokudb::loader_add_index_err(DB* db, | 
| 3387 |                                      int i, | 
| 3388 |                                      int err, | 
| 3389 |                                      DBT* key, | 
| 3390 |                                      DBT* val, | 
| 3391 |                                      void* ) { | 
| 3392 |     LOADER_CONTEXT context = (LOADER_CONTEXT)error_extra; | 
| 3393 |     assert_always(context->ha); | 
| 3394 |     context->ha->set_loader_error(err); | 
| 3395 | } | 
| 3396 | void ha_tokudb::loader_dup(DB* db, | 
| 3397 |                            int i, | 
| 3398 |                            int err, | 
| 3399 |                            DBT* key, | 
| 3400 |                            DBT* val, | 
| 3401 |                            void* ) { | 
| 3402 |     LOADER_CONTEXT context = (LOADER_CONTEXT)error_extra; | 
| 3403 |     assert_always(context->ha); | 
| 3404 |     context->ha->set_loader_error(err); | 
| 3405 |     if (err == DB_KEYEXIST) { | 
| 3406 |         context->ha->set_dup_value_for_pk(key); | 
| 3407 |     } | 
| 3408 | } | 
| 3409 |  | 
| 3410 | // | 
| 3411 | // Method that is called at the end of many calls to insert rows | 
| 3412 | // (ha_tokudb::write_row). If start_bulk_insert is called, then | 
| 3413 | // this is guaranteed to be called. | 
| 3414 | // | 
| 3415 | int ha_tokudb::end_bulk_insert(bool abort) { | 
| 3416 |     TOKUDB_HANDLER_DBUG_ENTER("" ); | 
| 3417 |     int error = 0; | 
| 3418 |     THD* thd = ha_thd(); | 
| 3419 |     tokudb_trx_data* trx = (tokudb_trx_data *) thd_get_ha_data(thd, tokudb_hton); | 
| 3420 |     bool using_loader = (loader != NULL); | 
| 3421 |     if (ai_metadata_update_required) { | 
| 3422 |         share->lock(); | 
| 3423 |         error = update_max_auto_inc(share->status_block, share->last_auto_increment); | 
| 3424 |         share->unlock(); | 
| 3425 |         if (error) { goto cleanup; } | 
| 3426 |     } | 
| 3427 |     delay_updating_ai_metadata = false; | 
| 3428 |     ai_metadata_update_required = false; | 
| 3429 |     loader_error = 0; | 
| 3430 |     if (loader) { | 
| 3431 |         if (!abort_loader && !thd_kill_level(thd)) { | 
| 3432 |             DBUG_EXECUTE_IF("tokudb_end_bulk_insert_sleep" , { | 
| 3433 |                 const char *orig_proc_info = tokudb_thd_get_proc_info(thd); | 
| 3434 |                 thd_proc_info(thd, "DBUG sleep" ); | 
| 3435 |                 my_sleep(20000000); | 
| 3436 |                 thd_proc_info(thd, orig_proc_info); | 
| 3437 |             }); | 
| 3438 |             error = loader->close(loader); | 
| 3439 |             loader = NULL; | 
| 3440 |             if (error) {  | 
| 3441 |                 if (thd_kill_level(thd)) { | 
| 3442 |                     my_error(ER_QUERY_INTERRUPTED, MYF(0)); | 
| 3443 |                 } | 
| 3444 |                 goto cleanup;  | 
| 3445 |             } | 
| 3446 |  | 
| 3447 |             for (uint i = 0; i < table_share->keys; i++) { | 
| 3448 |                 if (table_share->key_info[i].flags & HA_NOSAME) { | 
| 3449 |                     bool is_unique; | 
| 3450 |                     if (i == primary_key && !share->pk_has_string) { | 
| 3451 |                         continue; | 
| 3452 |                     } | 
| 3453 |                     error = is_index_unique(&is_unique, transaction, share->key_file[i], &table->key_info[i],  | 
| 3454 |                                             DB_PRELOCKED_WRITE); | 
| 3455 |                     if (error) goto cleanup; | 
| 3456 |                     if (!is_unique) { | 
| 3457 |                         error = HA_ERR_FOUND_DUPP_KEY; | 
| 3458 |                         last_dup_key = i; | 
| 3459 |                         goto cleanup; | 
| 3460 |                     } | 
| 3461 |                 } | 
| 3462 |             } | 
| 3463 |         } | 
| 3464 |         else { | 
| 3465 |             error = sprintf(write_status_msg, "aborting bulk load" );  | 
| 3466 |             thd_proc_info(thd, write_status_msg); | 
| 3467 |             loader->abort(loader); | 
| 3468 |             loader = NULL; | 
| 3469 |             share->try_table_lock = true; | 
| 3470 |         } | 
| 3471 |     } | 
| 3472 |  | 
| 3473 | cleanup: | 
| 3474 |     if (num_DBs_locked_in_bulk) { | 
| 3475 |         share->_num_DBs_lock.unlock(); | 
| 3476 |     } | 
| 3477 |     num_DBs_locked_in_bulk = false; | 
| 3478 |     lock_count = 0; | 
| 3479 |     if (loader) { | 
| 3480 |         error = sprintf(write_status_msg, "aborting bulk load" );  | 
| 3481 |         thd_proc_info(thd, write_status_msg); | 
| 3482 |         loader->abort(loader); | 
| 3483 |         loader = NULL; | 
| 3484 |     } | 
| 3485 |     abort_loader = false; | 
| 3486 |     memset(&lc, 0, sizeof(lc)); | 
| 3487 |     if (error || loader_error) { | 
| 3488 |         my_errno = error ? error : loader_error; | 
| 3489 |         if (using_loader) { | 
| 3490 |             share->try_table_lock = true; | 
| 3491 |         } | 
| 3492 |     } | 
| 3493 |     trx->stmt_progress.using_loader = false; | 
| 3494 |     thd_proc_info(thd, 0); | 
| 3495 |     TOKUDB_HANDLER_DBUG_RETURN(error ? error : loader_error); | 
| 3496 | } | 
| 3497 |  | 
| 3498 | int ha_tokudb::end_bulk_insert() { | 
| 3499 |     return end_bulk_insert( false ); | 
| 3500 | } | 
| 3501 |  | 
| 3502 | int ha_tokudb::is_index_unique(bool* is_unique, DB_TXN* txn, DB* db, KEY* key_info, int lock_flags) { | 
| 3503 |     int error; | 
| 3504 |     DBC* tmp_cursor1 = NULL; | 
| 3505 |     DBC* tmp_cursor2 = NULL; | 
| 3506 |     DBT key1, key2, val, packed_key1, packed_key2; | 
| 3507 |     uint64_t cnt = 0; | 
| 3508 |     char status_msg[MAX_ALIAS_NAME + 200]; //buffer of 200 should be a good upper bound. | 
| 3509 |     THD* thd = ha_thd(); | 
| 3510 |     const char *orig_proc_info = tokudb_thd_get_proc_info(thd); | 
| 3511 |     memset(&key1, 0, sizeof(key1)); | 
| 3512 |     memset(&key2, 0, sizeof(key2)); | 
| 3513 |     memset(&val, 0, sizeof(val)); | 
| 3514 |     memset(&packed_key1, 0, sizeof(packed_key1)); | 
| 3515 |     memset(&packed_key2, 0, sizeof(packed_key2)); | 
| 3516 |     *is_unique = true; | 
| 3517 |      | 
| 3518 |     error = db->cursor(db, txn, &tmp_cursor1, DB_SERIALIZABLE); | 
| 3519 |     if (error) { goto cleanup; } | 
| 3520 |  | 
| 3521 |     error = db->cursor(db, txn, &tmp_cursor2, DB_SERIALIZABLE); | 
| 3522 |     if (error) { goto cleanup; } | 
| 3523 |      | 
| 3524 |     error = tmp_cursor1->c_get(tmp_cursor1, &key1, &val, DB_NEXT + lock_flags); | 
| 3525 |     if (error == DB_NOTFOUND) { | 
| 3526 |         *is_unique = true; | 
| 3527 |         error = 0; | 
| 3528 |         goto cleanup; | 
| 3529 |     } | 
| 3530 |     else if (error) { goto cleanup; } | 
| 3531 |     error = tmp_cursor2->c_get(tmp_cursor2, &key2, &val, DB_NEXT + lock_flags); | 
| 3532 |     if (error) { goto cleanup; } | 
| 3533 |  | 
| 3534 |     error = tmp_cursor2->c_get(tmp_cursor2, &key2, &val, DB_NEXT + lock_flags); | 
| 3535 |     if (error == DB_NOTFOUND) { | 
| 3536 |         *is_unique = true; | 
| 3537 |         error = 0; | 
| 3538 |         goto cleanup; | 
| 3539 |     } | 
| 3540 |     else if (error) { goto cleanup; } | 
| 3541 |  | 
| 3542 |     while (error != DB_NOTFOUND) { | 
| 3543 |         bool has_null1; | 
| 3544 |         bool has_null2; | 
| 3545 |         int cmp; | 
| 3546 |         place_key_into_mysql_buff(key_info, table->record[0], (uchar *) key1.data + 1); | 
| 3547 |         place_key_into_mysql_buff(key_info, table->record[1], (uchar *) key2.data + 1); | 
| 3548 |          | 
| 3549 |         create_dbt_key_for_lookup(&packed_key1, key_info, key_buff, table->record[0], &has_null1); | 
| 3550 |         create_dbt_key_for_lookup(&packed_key2, key_info, key_buff2, table->record[1], &has_null2); | 
| 3551 |  | 
| 3552 |         if (!has_null1 && !has_null2) { | 
| 3553 |             cmp = tokudb_prefix_cmp_dbt_key(db, &packed_key1, &packed_key2); | 
| 3554 |             if (cmp == 0) { | 
| 3555 |                 memcpy(key_buff, key1.data, key1.size); | 
| 3556 |                 place_key_into_mysql_buff(key_info, table->record[0], (uchar *) key_buff + 1); | 
| 3557 |                 *is_unique = false; | 
| 3558 |                 break; | 
| 3559 |             } | 
| 3560 |         } | 
| 3561 |  | 
| 3562 |         error = tmp_cursor1->c_get(tmp_cursor1, &key1, &val, DB_NEXT + lock_flags); | 
| 3563 |         if (error) { goto cleanup; } | 
| 3564 |         error = tmp_cursor2->c_get(tmp_cursor2, &key2, &val, DB_NEXT + lock_flags); | 
| 3565 |         if (error && (error != DB_NOTFOUND)) { goto cleanup; } | 
| 3566 |  | 
| 3567 |         cnt++; | 
| 3568 |         if ((cnt % 10000) == 0) { | 
| 3569 |             sprintf( | 
| 3570 |                 status_msg, | 
| 3571 |                 "Verifying index uniqueness: Checked %llu of %llu rows in key-%s." , | 
| 3572 |                 (long long unsigned) cnt, | 
| 3573 |                 share->row_count(), | 
| 3574 |                 key_info->name.str); | 
| 3575 |             thd_proc_info(thd, status_msg); | 
| 3576 |             if (thd_kill_level(thd)) { | 
| 3577 |                 my_error(ER_QUERY_INTERRUPTED, MYF(0)); | 
| 3578 |                 error = ER_QUERY_INTERRUPTED; | 
| 3579 |                 goto cleanup; | 
| 3580 |             } | 
| 3581 |         } | 
| 3582 |     } | 
| 3583 |  | 
| 3584 |     error = 0; | 
| 3585 |  | 
| 3586 | cleanup: | 
| 3587 |     thd_proc_info(thd, orig_proc_info); | 
| 3588 |     if (tmp_cursor1) { | 
| 3589 |         tmp_cursor1->c_close(tmp_cursor1); | 
| 3590 |         tmp_cursor1 = NULL; | 
| 3591 |     } | 
| 3592 |     if (tmp_cursor2) { | 
| 3593 |         tmp_cursor2->c_close(tmp_cursor2); | 
| 3594 |         tmp_cursor2 = NULL; | 
| 3595 |     } | 
| 3596 |     return error; | 
| 3597 | } | 
| 3598 |  | 
| 3599 | int ha_tokudb::is_val_unique(bool* is_unique, const uchar* record, KEY* key_info, uint dict_index, DB_TXN* txn) { | 
| 3600 |     int error = 0; | 
| 3601 |     bool has_null; | 
| 3602 |     DBC* tmp_cursor = NULL; | 
| 3603 |  | 
| 3604 |     DBT key; memset((void *)&key, 0, sizeof(key)); | 
| 3605 |     create_dbt_key_from_key(&key, key_info, key_buff2, record, &has_null, true, MAX_KEY_LENGTH, COL_NEG_INF); | 
| 3606 |     if (has_null) { | 
| 3607 |         error = 0; | 
| 3608 |         *is_unique = true; | 
| 3609 |         goto cleanup; | 
| 3610 |     } | 
| 3611 |      | 
| 3612 |     error = share->key_file[dict_index]->cursor(share->key_file[dict_index], txn, &tmp_cursor, DB_SERIALIZABLE | DB_RMW); | 
| 3613 |     if (error) {  | 
| 3614 |         goto cleanup;  | 
| 3615 |     } else { | 
| 3616 |         // prelock (key,-inf),(key,+inf) so that the subsequent key lookup does not overlock  | 
| 3617 |         uint flags = 0; | 
| 3618 |         DBT key_right; memset(&key_right, 0, sizeof key_right); | 
| 3619 |         create_dbt_key_from_key(&key_right, key_info, key_buff3, record, &has_null, true, MAX_KEY_LENGTH, COL_POS_INF); | 
| 3620 |         error = tmp_cursor->c_set_bounds(tmp_cursor, &key, &key_right, true, DB_NOTFOUND); | 
| 3621 |         if (error == 0) { | 
| 3622 |             flags = DB_PRELOCKED | DB_PRELOCKED_WRITE; | 
| 3623 |         } | 
| 3624 |  | 
| 3625 |         // lookup key and check unique prefix | 
| 3626 |         struct smart_dbt_info info; | 
| 3627 |         info.ha = this; | 
| 3628 |         info.buf = NULL; | 
| 3629 |         info.keynr = dict_index; | 
| 3630 |          | 
| 3631 |         struct index_read_info ir_info; | 
| 3632 |         ir_info.orig_key = &key; | 
| 3633 |         ir_info.smart_dbt_info = info; | 
| 3634 |  | 
| 3635 |         error = tmp_cursor->c_getf_set_range(tmp_cursor, flags, &key, smart_dbt_callback_lookup, &ir_info); | 
| 3636 |         if (error == DB_NOTFOUND) { | 
| 3637 |             *is_unique = true; | 
| 3638 |             error = 0; | 
| 3639 |             goto cleanup; | 
| 3640 |         } | 
| 3641 |         else if (error) { | 
| 3642 |             error = map_to_handler_error(error); | 
| 3643 |             goto cleanup; | 
| 3644 |         } | 
| 3645 |         if (ir_info.cmp) { | 
| 3646 |             *is_unique = true; | 
| 3647 |         } | 
| 3648 |         else { | 
| 3649 |             *is_unique = false; | 
| 3650 |         } | 
| 3651 |     } | 
| 3652 |     error = 0; | 
| 3653 |  | 
| 3654 | cleanup: | 
| 3655 |     if (tmp_cursor) { | 
| 3656 |         int r = tmp_cursor->c_close(tmp_cursor); | 
| 3657 |         assert_always(r==0); | 
| 3658 |         tmp_cursor = NULL; | 
| 3659 |     } | 
| 3660 |     return error; | 
| 3661 | } | 
| 3662 |  | 
| 3663 | static void maybe_do_unique_checks_delay(THD *thd) { | 
| 3664 |     if (thd->slave_thread) { | 
| 3665 |         uint64_t delay_ms = tokudb::sysvars::rpl_unique_checks_delay(thd); | 
| 3666 |         if (delay_ms) | 
| 3667 |             usleep(delay_ms * 1000); | 
| 3668 |     } | 
| 3669 | } | 
| 3670 |  | 
| 3671 | static bool need_read_only(THD *thd) { | 
| 3672 |     return opt_readonly || !tokudb::sysvars::rpl_check_readonly(thd); | 
| 3673 | } | 
| 3674 |  | 
| 3675 | static bool do_unique_checks(THD *thd, bool do_rpl_event) { | 
| 3676 |     if (do_rpl_event && | 
| 3677 |         thd->slave_thread && | 
| 3678 |         need_read_only(thd) && | 
| 3679 |         !tokudb::sysvars::rpl_unique_checks(thd)) { | 
| 3680 |         return false; | 
| 3681 |     } else { | 
| 3682 |         return !thd_test_options(thd, OPTION_RELAXED_UNIQUE_CHECKS); | 
| 3683 |     } | 
| 3684 | } | 
| 3685 |  | 
| 3686 | int ha_tokudb::do_uniqueness_checks(uchar* record, DB_TXN* txn, THD* thd) { | 
| 3687 |     int error = 0; | 
| 3688 |     // | 
| 3689 |     // first do uniqueness checks | 
| 3690 |     // | 
| 3691 |     if (share->has_unique_keys && do_unique_checks(thd, in_rpl_write_rows)) { | 
| 3692 |         DBUG_EXECUTE_IF("tokudb_crash_if_rpl_does_uniqueness_check" , | 
| 3693 |                         DBUG_ASSERT(0);); | 
| 3694 |         for (uint keynr = 0; keynr < table_share->keys; keynr++) { | 
| 3695 |             bool is_unique_key = (table->key_info[keynr].flags & HA_NOSAME) || (keynr == primary_key); | 
| 3696 |             bool is_unique = false; | 
| 3697 |             // | 
| 3698 |             // don't need to do check for primary key that don't have strings | 
| 3699 |             // | 
| 3700 |             if (keynr == primary_key && !share->pk_has_string) { | 
| 3701 |                 continue; | 
| 3702 |             } | 
| 3703 |             if (!is_unique_key) { | 
| 3704 |                 continue; | 
| 3705 |             } | 
| 3706 |  | 
| 3707 |             maybe_do_unique_checks_delay(thd); | 
| 3708 |  | 
| 3709 |             // | 
| 3710 |             // if unique key, check uniqueness constraint | 
| 3711 |             // but, we do not need to check it if the key has a null | 
| 3712 |             // and we do not need to check it if unique_checks is off | 
| 3713 |             // | 
| 3714 |             error = is_val_unique(&is_unique, record, &table->key_info[keynr], keynr, txn); | 
| 3715 |             if (error) {  | 
| 3716 |                 goto cleanup;  | 
| 3717 |             } | 
| 3718 |             if (!is_unique) { | 
| 3719 |                 error = DB_KEYEXIST; | 
| 3720 |                 last_dup_key = keynr; | 
| 3721 |                 goto cleanup; | 
| 3722 |             } | 
| 3723 |         } | 
| 3724 |     }     | 
| 3725 | cleanup: | 
| 3726 |     return error; | 
| 3727 | } | 
| 3728 |  | 
| 3729 | void ha_tokudb::test_row_packing(uchar* record, DBT* pk_key, DBT* pk_val) { | 
| 3730 |     int error; | 
| 3731 |     DBT row, key; | 
| 3732 |     // | 
| 3733 |     // variables for testing key packing, only used in some debug modes | 
| 3734 |     // | 
| 3735 |     uchar* tmp_pk_key_data = NULL; | 
| 3736 |     uchar* tmp_pk_val_data = NULL; | 
| 3737 |     DBT tmp_pk_key; | 
| 3738 |     DBT tmp_pk_val; | 
| 3739 |     bool has_null; | 
| 3740 |     int cmp; | 
| 3741 |  | 
| 3742 |     memset(&tmp_pk_key, 0, sizeof(DBT)); | 
| 3743 |     memset(&tmp_pk_val, 0, sizeof(DBT)); | 
| 3744 |  | 
| 3745 |     // | 
| 3746 |     //use for testing the packing of keys | 
| 3747 |     // | 
| 3748 |     tmp_pk_key_data = (uchar*)tokudb::memory::malloc(pk_key->size, MYF(MY_WME)); | 
| 3749 |     assert_always(tmp_pk_key_data); | 
| 3750 |     tmp_pk_val_data = (uchar*)tokudb::memory::malloc(pk_val->size, MYF(MY_WME)); | 
| 3751 |     assert_always(tmp_pk_val_data); | 
| 3752 |     memcpy(tmp_pk_key_data, pk_key->data, pk_key->size); | 
| 3753 |     memcpy(tmp_pk_val_data, pk_val->data, pk_val->size); | 
| 3754 |     tmp_pk_key.data = tmp_pk_key_data; | 
| 3755 |     tmp_pk_key.size = pk_key->size; | 
| 3756 |     tmp_pk_val.data = tmp_pk_val_data; | 
| 3757 |     tmp_pk_val.size = pk_val->size; | 
| 3758 |  | 
| 3759 |     for (uint keynr = 0; keynr < table_share->keys; keynr++) { | 
| 3760 |         uint32_t tmp_num_bytes = 0; | 
| 3761 |         uchar* row_desc = NULL; | 
| 3762 |         uint32_t desc_size = 0; | 
| 3763 |          | 
| 3764 |         if (keynr == primary_key) { | 
| 3765 |             continue; | 
| 3766 |         } | 
| 3767 |  | 
| 3768 |         create_dbt_key_from_table(&key, keynr, key_buff2, record, &has_null);  | 
| 3769 |  | 
| 3770 |         // | 
| 3771 |         // TEST | 
| 3772 |         // | 
| 3773 |         row_desc = (uchar *)share->key_file[keynr]->descriptor->dbt.data; | 
| 3774 |         row_desc += (*(uint32_t *)row_desc); | 
| 3775 |         desc_size = (*(uint32_t *)row_desc) - 4; | 
| 3776 |         row_desc += 4; | 
| 3777 |         tmp_num_bytes = pack_key_from_desc( | 
| 3778 |             key_buff3, | 
| 3779 |             row_desc, | 
| 3780 |             desc_size, | 
| 3781 |             &tmp_pk_key, | 
| 3782 |             &tmp_pk_val | 
| 3783 |             ); | 
| 3784 |         assert_always(tmp_num_bytes == key.size); | 
| 3785 |         cmp = memcmp(key_buff3,key_buff2,tmp_num_bytes); | 
| 3786 |         assert_always(cmp == 0); | 
| 3787 |  | 
| 3788 |         // | 
| 3789 |         // test key packing of clustering keys | 
| 3790 |         // | 
| 3791 |         if (key_is_clustering(&table->key_info[keynr])) { | 
| 3792 |             error = pack_row(&row, (const uchar *) record, keynr); | 
| 3793 |             assert_always(error == 0); | 
| 3794 |             uchar* tmp_buff = NULL; | 
| 3795 |             tmp_buff = (uchar*)tokudb::memory::malloc( | 
| 3796 |                 alloced_rec_buff_length, | 
| 3797 |                 MYF(MY_WME)); | 
| 3798 |             assert_always(tmp_buff); | 
| 3799 |             row_desc = (uchar *)share->key_file[keynr]->descriptor->dbt.data; | 
| 3800 |             row_desc += (*(uint32_t *)row_desc); | 
| 3801 |             row_desc += (*(uint32_t *)row_desc); | 
| 3802 |             desc_size = (*(uint32_t *)row_desc) - 4; | 
| 3803 |             row_desc += 4; | 
| 3804 |             tmp_num_bytes = pack_clustering_val_from_desc( | 
| 3805 |                 tmp_buff, | 
| 3806 |                 row_desc, | 
| 3807 |                 desc_size, | 
| 3808 |                 &tmp_pk_val | 
| 3809 |                 ); | 
| 3810 |             assert_always(tmp_num_bytes == row.size); | 
| 3811 |             cmp = memcmp(tmp_buff,rec_buff,tmp_num_bytes); | 
| 3812 |             assert_always(cmp == 0); | 
| 3813 |             tokudb::memory::free(tmp_buff); | 
| 3814 |         } | 
| 3815 |     } | 
| 3816 |  | 
| 3817 |     // | 
| 3818 |     // copy stuff back out | 
| 3819 |     // | 
| 3820 |     error = pack_row(pk_val, (const uchar *) record, primary_key); | 
| 3821 |     assert_always(pk_val->size == tmp_pk_val.size); | 
| 3822 |     cmp = memcmp(pk_val->data, tmp_pk_val_data, pk_val->size);     | 
| 3823 |     assert_always( cmp == 0); | 
| 3824 |  | 
| 3825 |     tokudb::memory::free(tmp_pk_key_data); | 
| 3826 |     tokudb::memory::free(tmp_pk_val_data); | 
| 3827 | } | 
| 3828 |  | 
| 3829 | // set the put flags for the main dictionary | 
| 3830 | void ha_tokudb::set_main_dict_put_flags(THD* thd, bool opt_eligible, uint32_t* put_flags) { | 
| 3831 |     uint32_t old_prelock_flags = 0; | 
| 3832 |     uint curr_num_DBs = table->s->keys + tokudb_test(hidden_primary_key); | 
| 3833 |     bool in_hot_index = share->num_DBs > curr_num_DBs; | 
| 3834 |     bool using_ignore_flag_opt = do_ignore_flag_optimization(thd, table, share->replace_into_fast && !using_ignore_no_key); | 
| 3835 |     // | 
| 3836 |     // optimization for "REPLACE INTO..." (and "INSERT IGNORE") command | 
| 3837 |     // if the command is "REPLACE INTO" and the only table | 
| 3838 |     // is the main table (or all indexes are a subset of the pk),  | 
| 3839 |     // then we can simply insert the element | 
| 3840 |     // with DB_YESOVERWRITE. If the element does not exist, | 
| 3841 |     // it will act as a normal insert, and if it does exist, it  | 
| 3842 |     // will act as a replace, which is exactly what REPLACE INTO is supposed | 
| 3843 |     // to do. We cannot do this if otherwise, because then we lose | 
| 3844 |     // consistency between indexes | 
| 3845 |     // | 
| 3846 |     if (hidden_primary_key)  | 
| 3847 |     { | 
| 3848 |         *put_flags = old_prelock_flags; | 
| 3849 |     } | 
| 3850 |     else if (!do_unique_checks(thd, in_rpl_write_rows | in_rpl_update_rows) && !is_replace_into(thd) && !is_insert_ignore(thd)) | 
| 3851 |     { | 
| 3852 |         *put_flags = old_prelock_flags; | 
| 3853 |     } | 
| 3854 |     else if (using_ignore_flag_opt && is_replace_into(thd)  | 
| 3855 |             && !in_hot_index) | 
| 3856 |     { | 
| 3857 |         *put_flags = old_prelock_flags; | 
| 3858 |     } | 
| 3859 |     else if (opt_eligible && using_ignore_flag_opt && is_insert_ignore(thd)  | 
| 3860 |             && !in_hot_index) | 
| 3861 |     { | 
| 3862 |         *put_flags = DB_NOOVERWRITE_NO_ERROR | old_prelock_flags; | 
| 3863 |     } | 
| 3864 |     else  | 
| 3865 |     { | 
| 3866 |         *put_flags = DB_NOOVERWRITE | old_prelock_flags; | 
| 3867 |     } | 
| 3868 | } | 
| 3869 |  | 
| 3870 | int ha_tokudb::insert_row_to_main_dictionary(uchar* record, DBT* pk_key, DBT* pk_val, DB_TXN* txn) { | 
| 3871 |     int error = 0; | 
| 3872 |     uint curr_num_DBs = table->s->keys + tokudb_test(hidden_primary_key); | 
| 3873 |     assert_always(curr_num_DBs == 1); | 
| 3874 |  | 
| 3875 |     uint32_t put_flags = mult_put_flags[primary_key]; | 
| 3876 |     THD *thd = ha_thd();  | 
| 3877 |     set_main_dict_put_flags(thd, true, &put_flags); | 
| 3878 |  | 
| 3879 |     // for test, make unique checks have a very long duration | 
| 3880 |     if ((put_flags & DB_OPFLAGS_MASK) == DB_NOOVERWRITE) | 
| 3881 |         maybe_do_unique_checks_delay(thd); | 
| 3882 |  | 
| 3883 |     error = share->file->put(share->file, txn, pk_key, pk_val, put_flags); | 
| 3884 |     if (error) { | 
| 3885 |         last_dup_key = primary_key; | 
| 3886 |         goto cleanup; | 
| 3887 |     } | 
| 3888 |  | 
| 3889 | cleanup: | 
| 3890 |     return error; | 
| 3891 | } | 
| 3892 |  | 
| 3893 | int ha_tokudb::insert_rows_to_dictionaries_mult(DBT* pk_key, DBT* pk_val, DB_TXN* txn, THD* thd) { | 
| 3894 |     int error = 0; | 
| 3895 |     uint curr_num_DBs = share->num_DBs; | 
| 3896 |     set_main_dict_put_flags(thd, true, &mult_put_flags[primary_key]); | 
| 3897 |     uint32_t flags = mult_put_flags[primary_key]; | 
| 3898 |  | 
| 3899 |     // for test, make unique checks have a very long duration | 
| 3900 |     if ((flags & DB_OPFLAGS_MASK) == DB_NOOVERWRITE) | 
| 3901 |         maybe_do_unique_checks_delay(thd); | 
| 3902 |  | 
| 3903 |     // the insert ignore optimization uses DB_NOOVERWRITE_NO_ERROR,  | 
| 3904 |     // which is not allowed with env->put_multiple.  | 
| 3905 |     // we have to insert the rows one by one in this case. | 
| 3906 |     if (flags & DB_NOOVERWRITE_NO_ERROR) { | 
| 3907 |         DB * src_db = share->key_file[primary_key]; | 
| 3908 |         for (uint32_t i = 0; i < curr_num_DBs; i++) { | 
| 3909 |             DB * db = share->key_file[i]; | 
| 3910 |             if (i == primary_key) { | 
| 3911 |                 // if it's the primary key, insert the rows | 
| 3912 |                 // as they are. | 
| 3913 |                 error = db->put(db, txn, pk_key, pk_val, flags); | 
| 3914 |             } else { | 
| 3915 |                 // generate a row for secondary keys. | 
| 3916 |                 // use our multi put key/rec buffers | 
| 3917 |                 // just as the ydb layer would have in | 
| 3918 |                 // env->put_multiple(), except that | 
| 3919 |                 // we will just do a put() right away. | 
| 3920 |                 error = tokudb_generate_row(db, src_db, | 
| 3921 |                         &mult_key_dbt_array[i].dbts[0], &mult_rec_dbt_array[i].dbts[0],  | 
| 3922 |                         pk_key, pk_val); | 
| 3923 |                 if (error != 0) { | 
| 3924 |                     goto out; | 
| 3925 |                 } | 
| 3926 |                 error = db->put(db, txn, &mult_key_dbt_array[i].dbts[0],  | 
| 3927 |                         &mult_rec_dbt_array[i].dbts[0], flags); | 
| 3928 |             } | 
| 3929 |             if (error != 0) { | 
| 3930 |                 goto out; | 
| 3931 |             } | 
| 3932 |         } | 
| 3933 |     } else { | 
| 3934 |         // not insert ignore, so we can use put multiple | 
| 3935 |         error = db_env->put_multiple( | 
| 3936 |             db_env,  | 
| 3937 |             share->key_file[primary_key],  | 
| 3938 |             txn,  | 
| 3939 |             pk_key,  | 
| 3940 |             pk_val, | 
| 3941 |             curr_num_DBs,  | 
| 3942 |             share->key_file,  | 
| 3943 |             mult_key_dbt_array, | 
| 3944 |             mult_rec_dbt_array, | 
| 3945 |             mult_put_flags | 
| 3946 |             ); | 
| 3947 |     } | 
| 3948 |  | 
| 3949 | out: | 
| 3950 |     // | 
| 3951 |     // We break if we hit an error, unless it is a dup key error | 
| 3952 |     // and MySQL told us to ignore duplicate key errors | 
| 3953 |     // | 
| 3954 |     if (error) { | 
| 3955 |         last_dup_key = primary_key; | 
| 3956 |     } | 
| 3957 |     return error; | 
| 3958 | } | 
| 3959 |  | 
| 3960 | // | 
| 3961 | // Stores a row in the table, called when handling an INSERT query | 
| 3962 | // Parameters: | 
| 3963 | //      [in]    record - a row in MySQL format | 
| 3964 | // Returns: | 
| 3965 | //      0 on success | 
| 3966 | //      error otherwise | 
| 3967 | // | 
| 3968 | int ha_tokudb::write_row(uchar * record) { | 
| 3969 |     TOKUDB_HANDLER_DBUG_ENTER("%p" , record); | 
| 3970 |  | 
| 3971 |     DBT row, prim_key; | 
| 3972 |     int error; | 
| 3973 |     THD *thd = ha_thd(); | 
| 3974 |     bool has_null; | 
| 3975 |     DB_TXN* sub_trans = NULL; | 
| 3976 |     DB_TXN* txn = NULL; | 
| 3977 |     tokudb_trx_data *trx = NULL; | 
| 3978 |     uint curr_num_DBs; | 
| 3979 |     bool create_sub_trans = false; | 
| 3980 |     bool num_DBs_locked = false; | 
| 3981 |  | 
| 3982 |     // | 
| 3983 |     // some crap that needs to be done because MySQL does not properly abstract | 
| 3984 |     // this work away from us, namely filling in auto increment and setting auto timestamp | 
| 3985 |     // | 
| 3986 | #if MYSQL_VERSION_ID < 50600 | 
| 3987 |     if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_INSERT) { | 
| 3988 |         table->timestamp_field->set_time(); | 
| 3989 |     } | 
| 3990 | #endif | 
| 3991 |     if (table->next_number_field && record == table->record[0]) { | 
| 3992 |         error = update_auto_increment(); | 
| 3993 |         if (error) | 
| 3994 |             goto cleanup; | 
| 3995 |     } | 
| 3996 |  | 
| 3997 |     // | 
| 3998 |     // check to see if some value for the auto increment column that is bigger | 
| 3999 |     // than anything else til now is being used. If so, update the metadata to reflect it | 
| 4000 |     // the goal here is we never want to have a dup key error due to a bad increment | 
| 4001 |     // of the auto inc field. | 
| 4002 |     // | 
| 4003 |     if (share->has_auto_inc && record == table->record[0]) { | 
| 4004 |         share->lock(); | 
| 4005 |         ulonglong curr_auto_inc = retrieve_auto_increment( | 
| 4006 |             table->field[share->ai_field_index]->key_type(), | 
| 4007 |             field_offset(table->field[share->ai_field_index], table), | 
| 4008 |             record); | 
| 4009 |         if (curr_auto_inc > share->last_auto_increment) { | 
| 4010 |             share->last_auto_increment = curr_auto_inc; | 
| 4011 |             if (delay_updating_ai_metadata) { | 
| 4012 |                 ai_metadata_update_required = true; | 
| 4013 |             } else { | 
| 4014 |                 update_max_auto_inc( | 
| 4015 |                     share->status_block, | 
| 4016 |                     share->last_auto_increment); | 
| 4017 |             } | 
| 4018 |         } | 
| 4019 |         share->unlock(); | 
| 4020 |     } | 
| 4021 |  | 
| 4022 |     // | 
| 4023 |     // grab reader lock on numDBs_lock | 
| 4024 |     // | 
| 4025 |     if (!num_DBs_locked_in_bulk) { | 
| 4026 |         rwlock_t_lock_read(share->_num_DBs_lock); | 
| 4027 |         num_DBs_locked = true; | 
| 4028 |     } else { | 
| 4029 |         lock_count++; | 
| 4030 |         if (lock_count >= 2000) { | 
| 4031 |             share->_num_DBs_lock.unlock(); | 
| 4032 |             rwlock_t_lock_read(share->_num_DBs_lock); | 
| 4033 |             lock_count = 0; | 
| 4034 |         } | 
| 4035 |     } | 
| 4036 |     curr_num_DBs = share->num_DBs; | 
| 4037 |      | 
| 4038 |     if (hidden_primary_key) { | 
| 4039 |         get_auto_primary_key(current_ident); | 
| 4040 |     } | 
| 4041 |  | 
| 4042 |     if (table_share->blob_fields) { | 
| 4043 |         if (fix_rec_buff_for_blob(max_row_length(record))) { | 
| 4044 |             error = HA_ERR_OUT_OF_MEM; | 
| 4045 |             goto cleanup; | 
| 4046 |         } | 
| 4047 |     } | 
| 4048 |  | 
| 4049 |     create_dbt_key_from_table(&prim_key, primary_key, primary_key_buff, record, &has_null); | 
| 4050 |     if ((error = pack_row(&row, (const uchar *) record, primary_key))){ | 
| 4051 |         goto cleanup; | 
| 4052 |     } | 
| 4053 |  | 
| 4054 |     create_sub_trans = (using_ignore && !(do_ignore_flag_optimization(thd,table,share->replace_into_fast && !using_ignore_no_key))); | 
| 4055 |     if (create_sub_trans) { | 
| 4056 |         error = txn_begin(db_env, transaction, &sub_trans, DB_INHERIT_ISOLATION, thd); | 
| 4057 |         if (error) { | 
| 4058 |             goto cleanup; | 
| 4059 |         } | 
| 4060 |     } | 
| 4061 |     txn = create_sub_trans ? sub_trans : transaction; | 
| 4062 |     TOKUDB_HANDLER_TRACE_FOR_FLAGS(TOKUDB_DEBUG_TXN, "txn %p" , txn); | 
| 4063 |     if (TOKUDB_UNLIKELY(TOKUDB_DEBUG_FLAGS(TOKUDB_DEBUG_CHECK_KEY))) { | 
| 4064 |         test_row_packing(record,&prim_key,&row); | 
| 4065 |     } | 
| 4066 |     if (loader) { | 
| 4067 |         error = loader->put(loader, &prim_key, &row); | 
| 4068 |         if (error) { | 
| 4069 |             abort_loader = true; | 
| 4070 |             goto cleanup; | 
| 4071 |         } | 
| 4072 |     } else { | 
| 4073 |         error = do_uniqueness_checks(record, txn, thd); | 
| 4074 |         if (error) { | 
| 4075 |             // for #4633 | 
| 4076 |             // if we have a duplicate key error, let's check the primary key to see | 
| 4077 |             // if there is a duplicate there. If so, set last_dup_key to the pk | 
| 4078 |             if (error == DB_KEYEXIST && !tokudb_test(hidden_primary_key) && last_dup_key != primary_key) { | 
| 4079 |                 int r = share->file->getf_set(share->file, txn, DB_SERIALIZABLE, &prim_key, smart_dbt_do_nothing, NULL); | 
| 4080 |                 if (r == 0) { | 
| 4081 |                     // if we get no error, that means the row | 
| 4082 |                     // was found and this is a duplicate key, | 
| 4083 |                     // so we set last_dup_key | 
| 4084 |                     last_dup_key = primary_key; | 
| 4085 |                 } else if (r != DB_NOTFOUND) { | 
| 4086 |                     // if some other error is returned, return that to the user. | 
| 4087 |                     error = r; | 
| 4088 |                 } | 
| 4089 |             } | 
| 4090 |             goto cleanup;  | 
| 4091 |         } | 
| 4092 |         if (curr_num_DBs == 1) { | 
| 4093 |             error = insert_row_to_main_dictionary(record, &prim_key, &row, txn); | 
| 4094 |             if (error) { goto cleanup; } | 
| 4095 |         } else { | 
| 4096 |             error = insert_rows_to_dictionaries_mult(&prim_key, &row, txn, thd); | 
| 4097 |             if (error) { goto cleanup; } | 
| 4098 |         } | 
| 4099 |         if (error == 0) { | 
| 4100 |             uint64_t full_row_size = prim_key.size + row.size; | 
| 4101 |             toku_hton_update_primary_key_bytes_inserted(full_row_size); | 
| 4102 |         } | 
| 4103 |     } | 
| 4104 |  | 
| 4105 |     trx = (tokudb_trx_data *) thd_get_ha_data(thd, tokudb_hton); | 
| 4106 |     if (!error) { | 
| 4107 |         added_rows++; | 
| 4108 |         trx->stmt_progress.inserted++; | 
| 4109 |         track_progress(thd); | 
| 4110 |     } | 
| 4111 | cleanup: | 
| 4112 |     if (num_DBs_locked) { | 
| 4113 |        share->_num_DBs_lock.unlock(); | 
| 4114 |     } | 
| 4115 |     if (error == DB_KEYEXIST) { | 
| 4116 |         error = HA_ERR_FOUND_DUPP_KEY; | 
| 4117 |     } | 
| 4118 |     if (sub_trans) { | 
| 4119 |         // no point in recording error value of abort. | 
| 4120 |         // nothing we can do about it anyway and it is not what | 
| 4121 |         // we want to return. | 
| 4122 |         if (error) { | 
| 4123 |             abort_txn(sub_trans); | 
| 4124 |         } | 
| 4125 |         else { | 
| 4126 |             commit_txn(sub_trans, DB_TXN_NOSYNC); | 
| 4127 |         } | 
| 4128 |     } | 
| 4129 |     TOKUDB_HANDLER_DBUG_RETURN(error); | 
| 4130 | } | 
| 4131 |  | 
| 4132 | /* Compare if a key in a row has changed */ | 
| 4133 | bool ha_tokudb::key_changed(uint keynr, const uchar * old_row, const uchar * new_row) { | 
| 4134 |     DBT old_key; | 
| 4135 |     DBT new_key; | 
| 4136 |     memset((void *) &old_key, 0, sizeof(old_key)); | 
| 4137 |     memset((void *) &new_key, 0, sizeof(new_key)); | 
| 4138 |  | 
| 4139 |     bool has_null; | 
| 4140 |     create_dbt_key_from_table(&new_key, keynr, key_buff2, new_row, &has_null); | 
| 4141 |     create_dbt_key_for_lookup(&old_key,&table->key_info[keynr], key_buff3, old_row, &has_null); | 
| 4142 |     return tokudb_prefix_cmp_dbt_key(share->key_file[keynr], &old_key, &new_key); | 
| 4143 | } | 
| 4144 |  | 
| 4145 | // | 
| 4146 | // Updates a row in the table, called when handling an UPDATE query | 
| 4147 | // Parameters: | 
| 4148 | //      [in]    old_row - row to be updated, in MySQL format | 
| 4149 | //      [in]    new_row - new row, in MySQL format | 
| 4150 | // Returns: | 
| 4151 | //      0 on success | 
| 4152 | //      error otherwise | 
| 4153 | // | 
| 4154 | int ha_tokudb::update_row(const uchar * old_row, const uchar * new_row) { | 
| 4155 |     TOKUDB_HANDLER_DBUG_ENTER("" ); | 
| 4156 |     DBT prim_key, old_prim_key, prim_row, old_prim_row; | 
| 4157 |     int UNINIT_VAR(error); | 
| 4158 |     bool has_null; | 
| 4159 |     THD* thd = ha_thd(); | 
| 4160 |     DB_TXN* sub_trans = NULL; | 
| 4161 |     DB_TXN* txn = NULL; | 
| 4162 |     tokudb_trx_data* trx = (tokudb_trx_data *) thd_get_ha_data(thd, tokudb_hton); | 
| 4163 |     uint curr_num_DBs; | 
| 4164 |  | 
| 4165 |     memset((void *) &prim_key, 0, sizeof(prim_key)); | 
| 4166 |     memset((void *) &old_prim_key, 0, sizeof(old_prim_key)); | 
| 4167 |     memset((void *) &prim_row, 0, sizeof(prim_row)); | 
| 4168 |     memset((void *) &old_prim_row, 0, sizeof(old_prim_row)); | 
| 4169 |  | 
| 4170 | #if MYSQL_VERSION_ID < 50600 | 
| 4171 |     if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_UPDATE) { | 
| 4172 |         table->timestamp_field->set_time(); | 
| 4173 |     } | 
| 4174 | #endif | 
| 4175 |     // | 
| 4176 |     // check to see if some value for the auto increment column that is bigger | 
| 4177 |     // than anything else til now is being used. If so, update the metadata to reflect it | 
| 4178 |     // the goal here is we never want to have a dup key error due to a bad increment | 
| 4179 |     // of the auto inc field. | 
| 4180 |     // | 
| 4181 |     if (share->has_auto_inc && new_row == table->record[0]) { | 
| 4182 |         share->lock(); | 
| 4183 |         ulonglong curr_auto_inc = retrieve_auto_increment( | 
| 4184 |             table->field[share->ai_field_index]->key_type(),  | 
| 4185 |             field_offset(table->field[share->ai_field_index], table), | 
| 4186 |             new_row | 
| 4187 |             ); | 
| 4188 |         if (curr_auto_inc > share->last_auto_increment) { | 
| 4189 |             error = update_max_auto_inc(share->status_block, curr_auto_inc); | 
| 4190 |             if (!error) { | 
| 4191 |                 share->last_auto_increment = curr_auto_inc; | 
| 4192 |             } | 
| 4193 |         } | 
| 4194 |         share->unlock(); | 
| 4195 |     } | 
| 4196 |  | 
| 4197 |     // | 
| 4198 |     // grab reader lock on numDBs_lock | 
| 4199 |     // | 
| 4200 |     bool num_DBs_locked = false; | 
| 4201 |     if (!num_DBs_locked_in_bulk) { | 
| 4202 |         rwlock_t_lock_read(share->_num_DBs_lock); | 
| 4203 |         num_DBs_locked = true; | 
| 4204 |     } | 
| 4205 |     curr_num_DBs = share->num_DBs; | 
| 4206 |  | 
| 4207 |     if (using_ignore) { | 
| 4208 |         error = txn_begin(db_env, transaction, &sub_trans, DB_INHERIT_ISOLATION, thd); | 
| 4209 |         if (error) { | 
| 4210 |             goto cleanup; | 
| 4211 |         } | 
| 4212 |     } | 
| 4213 |     txn = using_ignore ? sub_trans : transaction; | 
| 4214 |  | 
| 4215 |     if (hidden_primary_key) { | 
| 4216 |         memset((void *) &prim_key, 0, sizeof(prim_key)); | 
| 4217 |         prim_key.data = (void *) current_ident; | 
| 4218 |         prim_key.size = TOKUDB_HIDDEN_PRIMARY_KEY_LENGTH; | 
| 4219 |         old_prim_key = prim_key; | 
| 4220 |     }  | 
| 4221 |     else { | 
| 4222 |         create_dbt_key_from_table(&prim_key, primary_key, key_buff, new_row, &has_null); | 
| 4223 |         create_dbt_key_from_table(&old_prim_key, primary_key, primary_key_buff, old_row, &has_null); | 
| 4224 |     } | 
| 4225 |  | 
| 4226 |     // do uniqueness checks | 
| 4227 |     if (share->has_unique_keys && do_unique_checks(thd, in_rpl_update_rows)) { | 
| 4228 |         for (uint keynr = 0; keynr < table_share->keys; keynr++) { | 
| 4229 |             bool is_unique_key = (table->key_info[keynr].flags & HA_NOSAME) || (keynr == primary_key); | 
| 4230 |             if (keynr == primary_key && !share->pk_has_string) { | 
| 4231 |                 continue; | 
| 4232 |             } | 
| 4233 |             if (is_unique_key) { | 
| 4234 |                 bool key_ch = key_changed(keynr, old_row, new_row); | 
| 4235 |                 if (key_ch) { | 
| 4236 |                     bool is_unique; | 
| 4237 |                     error = is_val_unique(&is_unique, new_row, &table->key_info[keynr], keynr, txn); | 
| 4238 |                     if (error) goto cleanup; | 
| 4239 |                     if (!is_unique) { | 
| 4240 |                         error = DB_KEYEXIST; | 
| 4241 |                         last_dup_key = keynr; | 
| 4242 |                         goto cleanup; | 
| 4243 |                     } | 
| 4244 |                 } | 
| 4245 |             } | 
| 4246 |         } | 
| 4247 |     } | 
| 4248 |      | 
| 4249 |     if (table_share->blob_fields) { | 
| 4250 |         if (fix_rec_buff_for_blob(max_row_length(new_row))) { | 
| 4251 |             error = HA_ERR_OUT_OF_MEM; | 
| 4252 |             goto cleanup; | 
| 4253 |         } | 
| 4254 |         if (fix_rec_update_buff_for_blob(max_row_length(old_row))) { | 
| 4255 |             error = HA_ERR_OUT_OF_MEM; | 
| 4256 |             goto cleanup; | 
| 4257 |         } | 
| 4258 |     } | 
| 4259 |  | 
| 4260 |     error = pack_row(&prim_row, new_row, primary_key); | 
| 4261 |     if (error) { goto cleanup; } | 
| 4262 |  | 
| 4263 |     error = pack_old_row_for_update(&old_prim_row, old_row, primary_key); | 
| 4264 |     if (error) { goto cleanup; } | 
| 4265 |  | 
| 4266 |     set_main_dict_put_flags(thd, false, &mult_put_flags[primary_key]); | 
| 4267 |  | 
| 4268 |     // for test, make unique checks have a very long duration | 
| 4269 |     if ((mult_put_flags[primary_key] & DB_OPFLAGS_MASK) == DB_NOOVERWRITE) | 
| 4270 |         maybe_do_unique_checks_delay(thd); | 
| 4271 |  | 
| 4272 |     error = db_env->update_multiple( | 
| 4273 |         db_env,  | 
| 4274 |         share->key_file[primary_key],  | 
| 4275 |         txn, | 
| 4276 |         &old_prim_key,  | 
| 4277 |         &old_prim_row, | 
| 4278 |         &prim_key,  | 
| 4279 |         &prim_row, | 
| 4280 |         curr_num_DBs,  | 
| 4281 |         share->key_file, | 
| 4282 |         mult_put_flags, | 
| 4283 |         2*curr_num_DBs,  | 
| 4284 |         mult_key_dbt_array, | 
| 4285 |         curr_num_DBs,  | 
| 4286 |         mult_rec_dbt_array | 
| 4287 |         ); | 
| 4288 |      | 
| 4289 |     if (error == DB_KEYEXIST) { | 
| 4290 |         last_dup_key = primary_key; | 
| 4291 |     }     | 
| 4292 |     else if (!error) { | 
| 4293 |         updated_rows++; | 
| 4294 |         trx->stmt_progress.updated++; | 
| 4295 |         track_progress(thd); | 
| 4296 |     } | 
| 4297 |  | 
| 4298 |  | 
| 4299 | cleanup: | 
| 4300 |     if (num_DBs_locked) { | 
| 4301 |         share->_num_DBs_lock.unlock(); | 
| 4302 |     } | 
| 4303 |     if (error == DB_KEYEXIST) { | 
| 4304 |         error = HA_ERR_FOUND_DUPP_KEY; | 
| 4305 |     } | 
| 4306 |     if (sub_trans) { | 
| 4307 |         // no point in recording error value of abort. | 
| 4308 |         // nothing we can do about it anyway and it is not what | 
| 4309 |         // we want to return. | 
| 4310 |         if (error) { | 
| 4311 |             abort_txn(sub_trans); | 
| 4312 |         } | 
| 4313 |         else { | 
| 4314 |             commit_txn(sub_trans, DB_TXN_NOSYNC); | 
| 4315 |         } | 
| 4316 |     } | 
| 4317 |     TOKUDB_HANDLER_DBUG_RETURN(error); | 
| 4318 | } | 
| 4319 |  | 
| 4320 | // | 
| 4321 | // Deletes a row in the table, called when handling a DELETE query | 
| 4322 | // Parameters: | 
| 4323 | //      [in]    record - row to be deleted, in MySQL format | 
| 4324 | // Returns: | 
| 4325 | //      0 on success | 
| 4326 | //      error otherwise | 
| 4327 | // | 
| 4328 | int ha_tokudb::delete_row(const uchar * record) { | 
| 4329 |     TOKUDB_HANDLER_DBUG_ENTER("" ); | 
| 4330 |     int error = ENOSYS; | 
| 4331 |     DBT row, prim_key; | 
| 4332 |     bool has_null; | 
| 4333 |     THD* thd = ha_thd(); | 
| 4334 |     uint curr_num_DBs; | 
| 4335 |     tokudb_trx_data* trx = (tokudb_trx_data *) thd_get_ha_data(thd, tokudb_hton); | 
| 4336 |  | 
| 4337 |     // | 
| 4338 |     // grab reader lock on numDBs_lock | 
| 4339 |     // | 
| 4340 |     bool num_DBs_locked = false; | 
| 4341 |     if (!num_DBs_locked_in_bulk) { | 
| 4342 |         rwlock_t_lock_read(share->_num_DBs_lock); | 
| 4343 |         num_DBs_locked = true; | 
| 4344 |     } | 
| 4345 |     curr_num_DBs = share->num_DBs; | 
| 4346 |  | 
| 4347 |     create_dbt_key_from_table(&prim_key, primary_key, key_buff, record, &has_null); | 
| 4348 |     if (table_share->blob_fields) { | 
| 4349 |         if (fix_rec_buff_for_blob(max_row_length(record))) { | 
| 4350 |             error = HA_ERR_OUT_OF_MEM; | 
| 4351 |             goto cleanup; | 
| 4352 |         } | 
| 4353 |     } | 
| 4354 |     if ((error = pack_row(&row, (const uchar *) record, primary_key))){ | 
| 4355 |         goto cleanup; | 
| 4356 |     } | 
| 4357 |  | 
| 4358 |     TOKUDB_HANDLER_TRACE_FOR_FLAGS( | 
| 4359 |         TOKUDB_DEBUG_TXN, | 
| 4360 |         "all %p stmt %p sub_sp_level %p transaction %p" , | 
| 4361 |         trx->all, | 
| 4362 |         trx->stmt, | 
| 4363 |         trx->sub_sp_level, | 
| 4364 |         transaction); | 
| 4365 |  | 
| 4366 |     error = | 
| 4367 |         db_env->del_multiple( | 
| 4368 |             db_env, | 
| 4369 |             share->key_file[primary_key], | 
| 4370 |             transaction, | 
| 4371 |             &prim_key, | 
| 4372 |             &row, | 
| 4373 |             curr_num_DBs, | 
| 4374 |             share->key_file, | 
| 4375 |             mult_key_dbt_array, | 
| 4376 |             mult_del_flags); | 
| 4377 |  | 
| 4378 |     if (error) { | 
| 4379 |         DBUG_PRINT("error" , ("Got error %d" , error)); | 
| 4380 |     } else { | 
| 4381 |         deleted_rows++; | 
| 4382 |         trx->stmt_progress.deleted++; | 
| 4383 |         track_progress(thd); | 
| 4384 |     } | 
| 4385 | cleanup: | 
| 4386 |     if (num_DBs_locked) { | 
| 4387 |         share->_num_DBs_lock.unlock(); | 
| 4388 |     } | 
| 4389 |     TOKUDB_HANDLER_DBUG_RETURN(error); | 
| 4390 | } | 
| 4391 |  | 
| 4392 | // | 
| 4393 | // takes as input table->read_set and table->write_set | 
| 4394 | // and puts list of field indexes that need to be read in | 
| 4395 | // unpack_row in the member variables fixed_cols_for_query | 
| 4396 | // and var_cols_for_query | 
| 4397 | // | 
| 4398 | void ha_tokudb::set_query_columns(uint keynr) { | 
| 4399 |     uint32_t curr_fixed_col_index = 0; | 
| 4400 |     uint32_t curr_var_col_index = 0; | 
| 4401 |     read_key = false; | 
| 4402 |     read_blobs = false; | 
| 4403 |     // | 
| 4404 |     // i know this is probably confusing and will need to be explained better | 
| 4405 |     // | 
| 4406 |     uint key_index = 0; | 
| 4407 |  | 
| 4408 |     if (keynr == primary_key || keynr == MAX_KEY) { | 
| 4409 |         key_index = primary_key; | 
| 4410 |     } | 
| 4411 |     else { | 
| 4412 |         key_index = (key_is_clustering(&table->key_info[keynr]) ? keynr : primary_key); | 
| 4413 |     } | 
| 4414 |     for (uint i = 0; i < table_share->fields; i++) { | 
| 4415 |         if (bitmap_is_set(table->read_set,i) ||  | 
| 4416 |             bitmap_is_set(table->write_set,i) | 
| 4417 |             )  | 
| 4418 |         { | 
| 4419 |             if (bitmap_is_set(&share->kc_info.key_filters[key_index],i)) { | 
| 4420 |                 read_key = true; | 
| 4421 |             } | 
| 4422 |             else { | 
| 4423 |                 // | 
| 4424 |                 // if fixed field length | 
| 4425 |                 // | 
| 4426 |                 if (is_fixed_field(&share->kc_info, i)) { | 
| 4427 |                     // | 
| 4428 |                     // save the offset into the list | 
| 4429 |                     // | 
| 4430 |                     fixed_cols_for_query[curr_fixed_col_index] = i; | 
| 4431 |                     curr_fixed_col_index++; | 
| 4432 |                 } | 
| 4433 |                 // | 
| 4434 |                 // varchar or varbinary | 
| 4435 |                 // | 
| 4436 |                 else if (is_variable_field(&share->kc_info, i)) { | 
| 4437 |                     var_cols_for_query[curr_var_col_index] = i; | 
| 4438 |                     curr_var_col_index++; | 
| 4439 |                 } | 
| 4440 |                 // | 
| 4441 |                 // it is a blob | 
| 4442 |                 // | 
| 4443 |                 else { | 
| 4444 |                     read_blobs = true; | 
| 4445 |                 } | 
| 4446 |             } | 
| 4447 |         } | 
| 4448 |     } | 
| 4449 |     num_fixed_cols_for_query = curr_fixed_col_index; | 
| 4450 |     num_var_cols_for_query = curr_var_col_index; | 
| 4451 | } | 
| 4452 |  | 
| 4453 | void ha_tokudb::column_bitmaps_signal() { | 
| 4454 |     // | 
| 4455 |     // if we have max number of indexes, then MAX_KEY == primary_key | 
| 4456 |     // | 
| 4457 |     if (tokudb_active_index != MAX_KEY || tokudb_active_index == primary_key) { | 
| 4458 |         set_query_columns(tokudb_active_index); | 
| 4459 |     } | 
| 4460 | } | 
| 4461 |  | 
| 4462 | // | 
| 4463 | // Notification that a scan of entire secondary table is about | 
| 4464 | // to take place. Will pre acquire table read lock | 
| 4465 | // Returns: | 
| 4466 | //      0 on success | 
| 4467 | //      error otherwise | 
| 4468 | // | 
| 4469 | int ha_tokudb::prepare_index_scan() { | 
| 4470 |     TOKUDB_HANDLER_DBUG_ENTER("" ); | 
| 4471 |     int error = 0; | 
| 4472 |     HANDLE_INVALID_CURSOR(); | 
| 4473 |     error = prelock_range(NULL, NULL); | 
| 4474 |     if (error) { last_cursor_error = error; goto cleanup; } | 
| 4475 |  | 
| 4476 |     range_lock_grabbed = true; | 
| 4477 |     error = 0; | 
| 4478 | cleanup: | 
| 4479 |     TOKUDB_HANDLER_DBUG_RETURN(error); | 
| 4480 | } | 
| 4481 |  | 
| 4482 | static bool index_key_is_null( | 
| 4483 |     TABLE* table, | 
| 4484 |     uint keynr, | 
| 4485 |     const uchar* key, | 
| 4486 |     uint key_len) { | 
| 4487 |  | 
| 4488 |     bool key_can_be_null = false; | 
| 4489 |     KEY* key_info = &table->key_info[keynr]; | 
| 4490 |     KEY_PART_INFO* key_part = key_info->key_part; | 
| 4491 |     KEY_PART_INFO* end = key_part + key_info->user_defined_key_parts; | 
| 4492 |     for (; key_part != end; key_part++) { | 
| 4493 |         if (key_part->null_bit) { | 
| 4494 |             key_can_be_null = true; | 
| 4495 |             break; | 
| 4496 |         } | 
| 4497 |     } | 
| 4498 |     return key_can_be_null && key_len > 0 && key[0] != 0; | 
| 4499 | } | 
| 4500 |  | 
| 4501 | // Return true if bulk fetch can be used | 
| 4502 | static bool tokudb_do_bulk_fetch(THD *thd) { | 
| 4503 |     switch (thd_sql_command(thd)) { | 
| 4504 |     case SQLCOM_SELECT: | 
| 4505 |     case SQLCOM_CREATE_TABLE: | 
| 4506 |     case SQLCOM_INSERT_SELECT: | 
| 4507 |     case SQLCOM_REPLACE_SELECT: | 
| 4508 |     case SQLCOM_DELETE: | 
| 4509 |         return tokudb::sysvars::bulk_fetch(thd) != 0; | 
| 4510 |     default: | 
| 4511 |         return false; | 
| 4512 |     } | 
| 4513 | } | 
| 4514 |  | 
| 4515 | // | 
| 4516 | // Notification that a range query getting all elements that equal a key | 
| 4517 | //  to take place. Will pre acquire read lock | 
| 4518 | // Returns: | 
| 4519 | //      0 on success | 
| 4520 | //      error otherwise | 
| 4521 | // | 
| 4522 | int ha_tokudb::prepare_index_key_scan(const uchar * key, uint key_len) { | 
| 4523 |     TOKUDB_HANDLER_DBUG_ENTER("%p %u" , key, key_len); | 
| 4524 |     int error = 0; | 
| 4525 |     DBT start_key, end_key; | 
| 4526 |     THD* thd = ha_thd(); | 
| 4527 |     HANDLE_INVALID_CURSOR(); | 
| 4528 |     pack_key(&start_key, tokudb_active_index, prelocked_left_range, key, key_len, COL_NEG_INF); | 
| 4529 |     prelocked_left_range_size = start_key.size; | 
| 4530 |     pack_key(&end_key, tokudb_active_index, prelocked_right_range, key, key_len, COL_POS_INF); | 
| 4531 |     prelocked_right_range_size = end_key.size; | 
| 4532 |  | 
| 4533 |     error = cursor->c_set_bounds( | 
| 4534 |         cursor,  | 
| 4535 |         &start_key,  | 
| 4536 |         &end_key, | 
| 4537 |         true, | 
| 4538 |         (cursor_flags & DB_SERIALIZABLE) != 0 ? DB_NOTFOUND : 0 | 
| 4539 |         ); | 
| 4540 |  | 
| 4541 |     if (error){  | 
| 4542 |         goto cleanup;  | 
| 4543 |     } | 
| 4544 |  | 
| 4545 |     range_lock_grabbed = true; | 
| 4546 |     range_lock_grabbed_null = index_key_is_null(table, tokudb_active_index, key, key_len); | 
| 4547 |     doing_bulk_fetch = tokudb_do_bulk_fetch(thd); | 
| 4548 |     bulk_fetch_iteration = 0; | 
| 4549 |     rows_fetched_using_bulk_fetch = 0; | 
| 4550 |     error = 0; | 
| 4551 | cleanup: | 
| 4552 |     if (error) { | 
| 4553 |         error = map_to_handler_error(error); | 
| 4554 |         last_cursor_error = error; | 
| 4555 |         // | 
| 4556 |         // cursor should be initialized here, but in case it is not,  | 
| 4557 |         // we still check | 
| 4558 |         // | 
| 4559 |         if (cursor) { | 
| 4560 |             int r = cursor->c_close(cursor); | 
| 4561 |             assert_always(r==0); | 
| 4562 |             cursor = NULL; | 
| 4563 |             remove_from_trx_handler_list(); | 
| 4564 |         } | 
| 4565 |     } | 
| 4566 |     TOKUDB_HANDLER_DBUG_RETURN(error); | 
| 4567 | } | 
| 4568 |  | 
| 4569 | void ha_tokudb::invalidate_bulk_fetch() { | 
| 4570 |     bytes_used_in_range_query_buff= 0; | 
| 4571 |     curr_range_query_buff_offset = 0; | 
| 4572 |     icp_went_out_of_range = false;     | 
| 4573 | } | 
| 4574 |  | 
| 4575 | void ha_tokudb::invalidate_icp() { | 
| 4576 |     toku_pushed_idx_cond = NULL; | 
| 4577 |     toku_pushed_idx_cond_keyno = MAX_KEY; | 
| 4578 |     icp_went_out_of_range = false;     | 
| 4579 | } | 
| 4580 |  | 
| 4581 | // | 
| 4582 | // Initializes local cursor on DB with index keynr | 
| 4583 | // Parameters: | 
| 4584 | //          keynr - key (index) number | 
| 4585 | //          sorted - 1 if result MUST be sorted according to index | 
| 4586 | // Returns: | 
| 4587 | //      0 on success | 
| 4588 | //      error otherwise | 
| 4589 | // | 
| 4590 | int ha_tokudb::index_init(uint keynr, bool sorted) { | 
| 4591 |     TOKUDB_HANDLER_DBUG_ENTER("%d %u txn %p" , keynr, sorted, transaction); | 
| 4592 |  | 
| 4593 |     int error; | 
| 4594 |     THD* thd = ha_thd();  | 
| 4595 |     DBUG_PRINT("enter" , ("table: '%s'  key: %d" , table_share->table_name.str, keynr)); | 
| 4596 |  | 
| 4597 |     /* | 
| 4598 |        Under some very rare conditions (like full joins) we may already have | 
| 4599 |        an active cursor at this point | 
| 4600 |      */ | 
| 4601 |     if (cursor) { | 
| 4602 |         DBUG_PRINT("note" , ("Closing active cursor" )); | 
| 4603 |         int r = cursor->c_close(cursor); | 
| 4604 |         assert_always(r==0); | 
| 4605 |         remove_from_trx_handler_list(); | 
| 4606 |     } | 
| 4607 |     active_index = keynr; | 
| 4608 |  | 
| 4609 |     if (active_index < MAX_KEY) { | 
| 4610 |         DBUG_ASSERT(keynr <= table->s->keys); | 
| 4611 |     } else { | 
| 4612 |         DBUG_ASSERT(active_index == MAX_KEY); | 
| 4613 |         keynr = primary_key; | 
| 4614 |     } | 
| 4615 |     tokudb_active_index = keynr; | 
| 4616 |  | 
| 4617 | #if TOKU_CLUSTERING_IS_COVERING | 
| 4618 |     if (keynr < table->s->keys && table->key_info[keynr].option_struct->clustering) | 
| 4619 |         key_read = false; | 
| 4620 | #endif | 
| 4621 |  | 
| 4622 |     last_cursor_error = 0; | 
| 4623 |     range_lock_grabbed = false; | 
| 4624 |     range_lock_grabbed_null = false; | 
| 4625 |     DBUG_ASSERT(share->key_file[keynr]); | 
| 4626 |     cursor_flags = get_cursor_isolation_flags(lock.type, thd); | 
| 4627 |     if (use_write_locks) { | 
| 4628 |         cursor_flags |= DB_RMW; | 
| 4629 |     } | 
| 4630 |     if (tokudb::sysvars::disable_prefetching(thd)) { | 
| 4631 |         cursor_flags |= DBC_DISABLE_PREFETCHING; | 
| 4632 |     } | 
| 4633 |     if (lock.type == TL_READ_WITH_SHARED_LOCKS) { | 
| 4634 |        cursor_flags |= DB_LOCKING_READ; | 
| 4635 |     } | 
| 4636 |     if ((error = share->key_file[keynr]->cursor(share->key_file[keynr], | 
| 4637 |                                                 transaction, &cursor, | 
| 4638 |                                                 cursor_flags))) { | 
| 4639 |         if (error == TOKUDB_MVCC_DICTIONARY_TOO_NEW) { | 
| 4640 |             error = HA_ERR_TABLE_DEF_CHANGED; | 
| 4641 |             my_error(ER_TABLE_DEF_CHANGED, MYF(0)); | 
| 4642 |         } | 
| 4643 |         if (error == DB_LOCK_NOTGRANTED) { | 
| 4644 |             error = HA_ERR_LOCK_WAIT_TIMEOUT; | 
| 4645 |             my_error(ER_LOCK_WAIT_TIMEOUT, MYF(0)); | 
| 4646 |         } | 
| 4647 |         table->status = STATUS_NOT_FOUND; | 
| 4648 |         error = map_to_handler_error(error); | 
| 4649 |         last_cursor_error = error; | 
| 4650 |         cursor = NULL;             // Safety | 
| 4651 |         goto exit; | 
| 4652 |     } | 
| 4653 |     cursor->c_set_check_interrupt_callback(cursor, tokudb_killed_thd_callback, thd); | 
| 4654 |     memset((void *) &last_key, 0, sizeof(last_key)); | 
| 4655 |  | 
| 4656 |     add_to_trx_handler_list(); | 
| 4657 |  | 
| 4658 |     if (thd_sql_command(thd) == SQLCOM_SELECT) { | 
| 4659 |         set_query_columns(keynr); | 
| 4660 |         unpack_entire_row = false; | 
| 4661 |     } | 
| 4662 |     else { | 
| 4663 |         unpack_entire_row = true; | 
| 4664 |     } | 
| 4665 |     invalidate_bulk_fetch(); | 
| 4666 |     doing_bulk_fetch = false; | 
| 4667 |     maybe_index_scan = false; | 
| 4668 |     error = 0; | 
| 4669 | exit: | 
| 4670 |     TOKUDB_HANDLER_DBUG_RETURN(error); | 
| 4671 | } | 
| 4672 |  | 
| 4673 | // | 
| 4674 | // closes the local cursor | 
| 4675 | // | 
| 4676 | int ha_tokudb::index_end() { | 
| 4677 |     TOKUDB_HANDLER_DBUG_ENTER("" ); | 
| 4678 |     range_lock_grabbed = false; | 
| 4679 |     range_lock_grabbed_null = false; | 
| 4680 |     if (cursor) { | 
| 4681 |         DBUG_PRINT("enter" , ("table: '%s'" , table_share->table_name.str)); | 
| 4682 |         int r = cursor->c_close(cursor); | 
| 4683 |         assert_always(r==0); | 
| 4684 |         cursor = NULL; | 
| 4685 |         remove_from_trx_handler_list(); | 
| 4686 |         last_cursor_error = 0; | 
| 4687 |     } | 
| 4688 |     active_index = tokudb_active_index = MAX_KEY; | 
| 4689 |  | 
| 4690 |     // | 
| 4691 |     // reset query variables | 
| 4692 |     // | 
| 4693 |     unpack_entire_row = true; | 
| 4694 |     read_blobs = true; | 
| 4695 |     read_key = true; | 
| 4696 |     num_fixed_cols_for_query = 0; | 
| 4697 |     num_var_cols_for_query = 0; | 
| 4698 |  | 
| 4699 |     invalidate_bulk_fetch(); | 
| 4700 |     invalidate_icp(); | 
| 4701 |     doing_bulk_fetch = false; | 
| 4702 |     close_dsmrr(); | 
| 4703 |      | 
| 4704 |     TOKUDB_HANDLER_DBUG_RETURN(0); | 
| 4705 | } | 
| 4706 |  | 
| 4707 |  | 
| 4708 | int ha_tokudb::handle_cursor_error(int error, int err_to_return, uint keynr) { | 
| 4709 |     TOKUDB_HANDLER_DBUG_ENTER("" ); | 
| 4710 |     if (error) { | 
| 4711 |         error = map_to_handler_error(error); | 
| 4712 |         last_cursor_error = error; | 
| 4713 |         table->status = STATUS_NOT_FOUND; | 
| 4714 |         if (error == DB_NOTFOUND) { | 
| 4715 |             error = err_to_return; | 
| 4716 |         } | 
| 4717 |     } | 
| 4718 |     TOKUDB_HANDLER_DBUG_RETURN(error); | 
| 4719 | } | 
| 4720 |  | 
| 4721 |  | 
| 4722 | // | 
| 4723 | // Helper function for read_row and smart_dbt_callback_xxx functions | 
| 4724 | // When using a hidden primary key, upon reading a row,  | 
| 4725 | // we set the current_ident field to whatever the primary key we retrieved | 
| 4726 | // was | 
| 4727 | // | 
| 4728 | void ha_tokudb::(uint keynr, DBT const *found_key) { | 
| 4729 |     // | 
| 4730 |     // extract hidden primary key to current_ident | 
| 4731 |     // | 
| 4732 |     if (hidden_primary_key) { | 
| 4733 |         if (keynr == primary_key) { | 
| 4734 |             memcpy(current_ident, (char *) found_key->data, TOKUDB_HIDDEN_PRIMARY_KEY_LENGTH); | 
| 4735 |         } | 
| 4736 |         // | 
| 4737 |         // if secondary key, hidden primary key is at end of found_key | 
| 4738 |         // | 
| 4739 |         else { | 
| 4740 |             memcpy( | 
| 4741 |                 current_ident,  | 
| 4742 |                 (char *) found_key->data + found_key->size - TOKUDB_HIDDEN_PRIMARY_KEY_LENGTH,  | 
| 4743 |                 TOKUDB_HIDDEN_PRIMARY_KEY_LENGTH | 
| 4744 |                 ); | 
| 4745 |         } | 
| 4746 |     } | 
| 4747 | } | 
| 4748 |  | 
| 4749 |  | 
| 4750 | int ha_tokudb::read_row_callback (uchar * buf, uint keynr, DBT const *row, DBT const *found_key) { | 
| 4751 |     assert_always(keynr == primary_key); | 
| 4752 |     return unpack_row(buf, row,found_key, keynr); | 
| 4753 | } | 
| 4754 |  | 
| 4755 | // | 
| 4756 | // Reads the contents of row and found_key, DBT's retrieved from the DB associated to keynr, into buf | 
| 4757 | // This function assumes that we are using a covering index, as a result, if keynr is the primary key, | 
| 4758 | // we do not read row into buf | 
| 4759 | // Parameters: | 
| 4760 | //      [out]   buf - buffer for the row, in MySQL format | 
| 4761 | //              keynr - index into key_file that represents DB we are currently operating on. | 
| 4762 | //      [in]    row - the row that has been read from the preceding DB call | 
| 4763 | //      [in]    found_key - key used to retrieve the row | 
| 4764 | // | 
| 4765 | void ha_tokudb::read_key_only(uchar * buf, uint keynr, DBT const *found_key) { | 
| 4766 |     TOKUDB_HANDLER_DBUG_ENTER("" ); | 
| 4767 |     table->status = 0; | 
| 4768 |     // | 
| 4769 |     // only case when we do not unpack the key is if we are dealing with the main dictionary | 
| 4770 |     // of a table with a hidden primary key | 
| 4771 |     // | 
| 4772 |     if (!(hidden_primary_key && keynr == primary_key)) { | 
| 4773 |         unpack_key(buf, found_key, keynr); | 
| 4774 |     } | 
| 4775 |     TOKUDB_HANDLER_DBUG_VOID_RETURN; | 
| 4776 | } | 
| 4777 |  | 
| 4778 | // | 
| 4779 | // Helper function used to try to retrieve the entire row | 
| 4780 | // If keynr is associated with the main table, reads contents of found_key and row into buf, otherwise, | 
| 4781 | // makes copy of primary key and saves it to last_key. This can later be used to retrieve the entire row | 
| 4782 | // Parameters: | 
| 4783 | //      [out]   buf - buffer for the row, in MySQL format | 
| 4784 | //              keynr - index into key_file that represents DB we are currently operating on. | 
| 4785 | //      [in]    row - the row that has been read from the preceding DB call | 
| 4786 | //      [in]    found_key - key used to retrieve the row | 
| 4787 | // | 
| 4788 | int ha_tokudb::read_primary_key(uchar * buf, uint keynr, DBT const *row, DBT const *found_key) { | 
| 4789 |     TOKUDB_HANDLER_DBUG_ENTER("" ); | 
| 4790 |     int error = 0; | 
| 4791 |     table->status = 0; | 
| 4792 |     // | 
| 4793 |     // case where we read from secondary table that is not clustered | 
| 4794 |     // | 
| 4795 |     if (keynr != primary_key && !key_is_clustering(&table->key_info[keynr])) { | 
| 4796 |         bool has_null; | 
| 4797 |         // | 
| 4798 |         // create a DBT that has the same data as row, this is inefficient | 
| 4799 |         // extract_hidden_primary_key MUST have been called before this | 
| 4800 |         // | 
| 4801 |         memset((void *) &last_key, 0, sizeof(last_key)); | 
| 4802 |         if (!hidden_primary_key) { | 
| 4803 |             unpack_key(buf, found_key, keynr); | 
| 4804 |         } | 
| 4805 |         create_dbt_key_from_table( | 
| 4806 |             &last_key,  | 
| 4807 |             primary_key, | 
| 4808 |             key_buff, | 
| 4809 |             buf, | 
| 4810 |             &has_null | 
| 4811 |             ); | 
| 4812 |     } | 
| 4813 |     // | 
| 4814 |     // else read from clustered/primary key | 
| 4815 |     // | 
| 4816 |     else { | 
| 4817 |         error = unpack_row(buf, row, found_key, keynr); | 
| 4818 |         if (error) { goto exit; } | 
| 4819 |     } | 
| 4820 |     if (found_key) { DBUG_DUMP("read row key" , (uchar *) found_key->data, found_key->size); } | 
| 4821 |     error = 0; | 
| 4822 | exit: | 
| 4823 |     TOKUDB_HANDLER_DBUG_RETURN(error); | 
| 4824 | } | 
| 4825 |  | 
| 4826 | // | 
| 4827 | // This function reads an entire row into buf. This function also assumes that | 
| 4828 | // the key needed to retrieve the row is stored in the member variable last_key | 
| 4829 | // Parameters: | 
| 4830 | //      [out]   buf - buffer for the row, in MySQL format | 
| 4831 | // Returns: | 
| 4832 | //      0 on success, error otherwise | 
| 4833 | // | 
| 4834 | int ha_tokudb::read_full_row(uchar * buf) { | 
| 4835 |     TOKUDB_HANDLER_DBUG_ENTER("" ); | 
| 4836 |     int error = 0; | 
| 4837 |     struct smart_dbt_info info; | 
| 4838 |     info.ha = this; | 
| 4839 |     info.buf = buf; | 
| 4840 |     info.keynr = primary_key; | 
| 4841 |     // | 
| 4842 |     // assumes key is stored in this->last_key | 
| 4843 |     // | 
| 4844 |  | 
| 4845 |     error = share->file->getf_set( | 
| 4846 |         share->file,  | 
| 4847 |         transaction,  | 
| 4848 |         cursor_flags,  | 
| 4849 |         &last_key,  | 
| 4850 |         smart_dbt_callback_rowread_ptquery,  | 
| 4851 |         &info | 
| 4852 |         ); | 
| 4853 |  | 
| 4854 |     if (error) { | 
| 4855 |         if (error == DB_LOCK_NOTGRANTED) { | 
| 4856 |             error = HA_ERR_LOCK_WAIT_TIMEOUT; | 
| 4857 |         } | 
| 4858 |         table->status = STATUS_NOT_FOUND; | 
| 4859 |         TOKUDB_HANDLER_DBUG_RETURN(error == DB_NOTFOUND ? HA_ERR_CRASHED : error); | 
| 4860 |     } | 
| 4861 |  | 
| 4862 |     TOKUDB_HANDLER_DBUG_RETURN(error); | 
| 4863 | } | 
| 4864 |  | 
| 4865 |  | 
| 4866 | //  | 
| 4867 | // Reads the next row matching to the key, on success, advances cursor  | 
| 4868 | // Parameters:  | 
| 4869 | //      [out]   buf - buffer for the next row, in MySQL format  | 
| 4870 | //      [in]     key - key value  | 
| 4871 | //                keylen - length of key  | 
| 4872 | // Returns:  | 
| 4873 | //      0 on success  | 
| 4874 | //      HA_ERR_END_OF_FILE if not found  | 
| 4875 | //      error otherwise  | 
| 4876 | //  | 
| 4877 | int ha_tokudb::index_next_same(uchar* buf, const uchar* key, uint keylen) { | 
| 4878 |     TOKUDB_HANDLER_DBUG_ENTER("" ); | 
| 4879 |  | 
| 4880 |     DBT curr_key; | 
| 4881 |     DBT found_key; | 
| 4882 |     bool has_null; | 
| 4883 |     int cmp; | 
| 4884 |     // create the key that will be used to compare with what is found | 
| 4885 |     // in order to figure out if we should return an error | 
| 4886 |     pack_key(&curr_key, tokudb_active_index, key_buff2, key, keylen, COL_ZERO); | 
| 4887 |     int error = get_next(buf, 1, &curr_key, key_read); | 
| 4888 |     if (error) { | 
| 4889 |         goto cleanup; | 
| 4890 |     } | 
| 4891 |     // | 
| 4892 |     // now do the comparison | 
| 4893 |     // | 
| 4894 |     create_dbt_key_from_table( | 
| 4895 |         &found_key, | 
| 4896 |         tokudb_active_index, | 
| 4897 |         key_buff3,buf, | 
| 4898 |         &has_null); | 
| 4899 |     cmp = | 
| 4900 |         tokudb_prefix_cmp_dbt_key( | 
| 4901 |             share->key_file[tokudb_active_index], | 
| 4902 |             &curr_key, | 
| 4903 |             &found_key); | 
| 4904 |     if (cmp) { | 
| 4905 |         error = HA_ERR_END_OF_FILE;  | 
| 4906 |     } | 
| 4907 |  | 
| 4908 | cleanup: | 
| 4909 |     error = handle_cursor_error(error, HA_ERR_END_OF_FILE, tokudb_active_index); | 
| 4910 |     TOKUDB_HANDLER_DBUG_RETURN(error); | 
| 4911 | }  | 
| 4912 |  | 
| 4913 |  | 
| 4914 | // | 
| 4915 | // According to InnoDB handlerton: Positions an index cursor to the index | 
| 4916 | // specified in keynr. Fetches the row if any | 
| 4917 | // Parameters: | 
| 4918 | //      [out]       buf - buffer for the  returned row | 
| 4919 | //      [in]         key - key value, according to InnoDB, if NULL,  | 
| 4920 | //                              position cursor at start or end of index, | 
| 4921 | //                              not sure if this is done now | 
| 4922 | //                    key_len - length of key | 
| 4923 | //                    find_flag - according to InnoDB, search flags from my_base.h | 
| 4924 | // Returns: | 
| 4925 | //      0 on success | 
| 4926 | //      HA_ERR_KEY_NOT_FOUND if not found (per InnoDB),  | 
| 4927 | //          we seem to return HA_ERR_END_OF_FILE if find_flag != HA_READ_KEY_EXACT | 
| 4928 | //          TODO: investigate this for correctness | 
| 4929 | //      error otherwise | 
| 4930 | // | 
| 4931 | int ha_tokudb::index_read( | 
| 4932 |     uchar* buf, | 
| 4933 |     const uchar* key, | 
| 4934 |     uint key_len, | 
| 4935 |     enum ha_rkey_function find_flag) { | 
| 4936 |  | 
| 4937 |     TOKUDB_HANDLER_DBUG_ENTER( | 
| 4938 |         "key %p %u:%2.2x find=%u" , | 
| 4939 |         key, | 
| 4940 |         key_len, | 
| 4941 |         key ? key[0] : 0, | 
| 4942 |         find_flag); | 
| 4943 |     invalidate_bulk_fetch(); | 
| 4944 |     if (TOKUDB_UNLIKELY(TOKUDB_DEBUG_FLAGS(TOKUDB_DEBUG_INDEX_KEY))) { | 
| 4945 |         TOKUDB_DBUG_DUMP("mysql key=" , key, key_len); | 
| 4946 |     } | 
| 4947 |     DBT row; | 
| 4948 |     DBT lookup_key; | 
| 4949 |     int error = 0;     | 
| 4950 |     uint32_t flags = 0; | 
| 4951 |     THD* thd = ha_thd(); | 
| 4952 |     tokudb_trx_data* trx = (tokudb_trx_data*)thd_get_ha_data(thd, tokudb_hton); | 
| 4953 |     struct smart_dbt_info info; | 
| 4954 |     struct index_read_info ir_info; | 
| 4955 |  | 
| 4956 |     HANDLE_INVALID_CURSOR(); | 
| 4957 |  | 
| 4958 |     // if we locked a non-null key range and we now have a null key, then | 
| 4959 |     // remove the bounds from the cursor | 
| 4960 |     if (range_lock_grabbed && | 
| 4961 |         !range_lock_grabbed_null && | 
| 4962 |         index_key_is_null(table, tokudb_active_index, key, key_len)) { | 
| 4963 |         range_lock_grabbed = range_lock_grabbed_null = false; | 
| 4964 |         cursor->c_remove_restriction(cursor); | 
| 4965 |     } | 
| 4966 |  | 
| 4967 |     memset((void *) &row, 0, sizeof(row)); | 
| 4968 |  | 
| 4969 |     info.ha = this; | 
| 4970 |     info.buf = buf; | 
| 4971 |     info.keynr = tokudb_active_index; | 
| 4972 |  | 
| 4973 |     ir_info.smart_dbt_info = info; | 
| 4974 |     ir_info.cmp = 0; | 
| 4975 |  | 
| 4976 |     flags = SET_PRELOCK_FLAG(0); | 
| 4977 |     switch (find_flag) { | 
| 4978 |     case HA_READ_KEY_EXACT: /* Find first record else error */ { | 
| 4979 |         pack_key(&lookup_key, tokudb_active_index, key_buff3, key, key_len, COL_NEG_INF); | 
| 4980 |         DBT lookup_bound; | 
| 4981 |         pack_key(&lookup_bound, tokudb_active_index, key_buff4, key, key_len, COL_POS_INF); | 
| 4982 |         if (TOKUDB_UNLIKELY(TOKUDB_DEBUG_FLAGS(TOKUDB_DEBUG_INDEX_KEY))) { | 
| 4983 |             TOKUDB_DBUG_DUMP("tokudb key=" , lookup_key.data, lookup_key.size); | 
| 4984 |         } | 
| 4985 |         ir_info.orig_key = &lookup_key; | 
| 4986 |         error = cursor->c_getf_set_range_with_bound(cursor, flags, &lookup_key, &lookup_bound, SMART_DBT_IR_CALLBACK(key_read), &ir_info); | 
| 4987 |         if (ir_info.cmp) { | 
| 4988 |             error = DB_NOTFOUND; | 
| 4989 |         } | 
| 4990 |         break; | 
| 4991 |     } | 
| 4992 |     case HA_READ_AFTER_KEY: /* Find next rec. after key-record */ | 
| 4993 |         pack_key(&lookup_key, tokudb_active_index, key_buff3, key, key_len, COL_POS_INF); | 
| 4994 |         error = cursor->c_getf_set_range(cursor, flags, &lookup_key, SMART_DBT_CALLBACK(key_read), &info); | 
| 4995 |         break; | 
| 4996 |     case HA_READ_BEFORE_KEY: /* Find next rec. before key-record */ | 
| 4997 |         pack_key(&lookup_key, tokudb_active_index, key_buff3, key, key_len, COL_NEG_INF); | 
| 4998 |         error = cursor->c_getf_set_range_reverse(cursor, flags, &lookup_key, SMART_DBT_CALLBACK(key_read), &info); | 
| 4999 |         break; | 
| 5000 |     case HA_READ_KEY_OR_NEXT: /* Record or next record */ | 
| 5001 |         pack_key(&lookup_key, tokudb_active_index, key_buff3, key, key_len, COL_NEG_INF); | 
| 5002 |         error = cursor->c_getf_set_range(cursor, flags, &lookup_key, SMART_DBT_CALLBACK(key_read), &info); | 
| 5003 |         break; | 
| 5004 |     // | 
| 5005 |     // This case does not seem to ever be used, it is ok for it to be slow | 
| 5006 |     // | 
| 5007 |     case HA_READ_KEY_OR_PREV: /* Record or previous */ | 
| 5008 |         pack_key(&lookup_key, tokudb_active_index, key_buff3, key, key_len, COL_NEG_INF); | 
| 5009 |         ir_info.orig_key = &lookup_key; | 
| 5010 |         error = cursor->c_getf_set_range(cursor, flags, &lookup_key, SMART_DBT_IR_CALLBACK(key_read), &ir_info); | 
| 5011 |         if (error == DB_NOTFOUND) { | 
| 5012 |             error = cursor->c_getf_last(cursor, flags, SMART_DBT_CALLBACK(key_read), &info); | 
| 5013 |         } | 
| 5014 |         else if (ir_info.cmp) { | 
| 5015 |             error = cursor->c_getf_prev(cursor, flags, SMART_DBT_CALLBACK(key_read), &info); | 
| 5016 |         } | 
| 5017 |         break; | 
| 5018 |     case HA_READ_PREFIX_LAST_OR_PREV: /* Last or prev key with the same prefix */ | 
| 5019 |         pack_key(&lookup_key, tokudb_active_index, key_buff3, key, key_len, COL_POS_INF); | 
| 5020 |         error = cursor->c_getf_set_range_reverse(cursor, flags, &lookup_key, SMART_DBT_CALLBACK(key_read), &info); | 
| 5021 |         break; | 
| 5022 |     case HA_READ_PREFIX_LAST: | 
| 5023 |         pack_key(&lookup_key, tokudb_active_index, key_buff3, key, key_len, COL_POS_INF); | 
| 5024 |         ir_info.orig_key = &lookup_key; | 
| 5025 |         error = cursor->c_getf_set_range_reverse(cursor, flags, &lookup_key, SMART_DBT_IR_CALLBACK(key_read), &ir_info); | 
| 5026 |         if (ir_info.cmp) { | 
| 5027 |             error = DB_NOTFOUND; | 
| 5028 |         } | 
| 5029 |         break; | 
| 5030 |     default: | 
| 5031 |         TOKUDB_HANDLER_TRACE("unsupported:%d" , find_flag); | 
| 5032 |         error = HA_ERR_UNSUPPORTED; | 
| 5033 |         break; | 
| 5034 |     } | 
| 5035 |     error = handle_cursor_error(error,HA_ERR_KEY_NOT_FOUND,tokudb_active_index); | 
| 5036 |     if (!error && !key_read && tokudb_active_index != primary_key && !key_is_clustering(&table->key_info[tokudb_active_index])) { | 
| 5037 |         error = read_full_row(buf); | 
| 5038 |     } | 
| 5039 |  | 
| 5040 |     if (TOKUDB_UNLIKELY(error && TOKUDB_DEBUG_FLAGS(TOKUDB_DEBUG_ERROR))) { | 
| 5041 |         TOKUDB_HANDLER_TRACE("error:%d:%d" , error, find_flag); | 
| 5042 |     } | 
| 5043 |     trx->stmt_progress.queried++; | 
| 5044 |     track_progress(thd); | 
| 5045 |  | 
| 5046 | cleanup: | 
| 5047 |     TOKUDB_HANDLER_DBUG_RETURN(error); | 
| 5048 | } | 
| 5049 |  | 
| 5050 |  | 
| 5051 | int ha_tokudb::read_data_from_range_query_buff(uchar* buf, bool need_val, bool do_key_read) { | 
| 5052 |     // buffer has the next row, get it from there | 
| 5053 |     int error; | 
| 5054 |     uchar* curr_pos = range_query_buff+curr_range_query_buff_offset; | 
| 5055 |     DBT curr_key; | 
| 5056 |     memset((void *) &curr_key, 0, sizeof(curr_key)); | 
| 5057 |      | 
| 5058 |     // get key info | 
| 5059 |     uint32_t key_size = *(uint32_t *)curr_pos; | 
| 5060 |     curr_pos += sizeof(key_size); | 
| 5061 |     uchar* curr_key_buff = curr_pos; | 
| 5062 |     curr_pos += key_size; | 
| 5063 |      | 
| 5064 |     curr_key.data = curr_key_buff; | 
| 5065 |     curr_key.size = key_size; | 
| 5066 |      | 
| 5067 |     // if this is a covering index, this is all we need | 
| 5068 |     if (do_key_read) { | 
| 5069 |         assert_always(!need_val); | 
| 5070 |         extract_hidden_primary_key(tokudb_active_index, &curr_key); | 
| 5071 |         read_key_only(buf, tokudb_active_index, &curr_key); | 
| 5072 |         error = 0; | 
| 5073 |     } | 
| 5074 |     // we need to get more data | 
| 5075 |     else { | 
| 5076 |         DBT curr_val; | 
| 5077 |         memset((void *) &curr_val, 0, sizeof(curr_val)); | 
| 5078 |         uchar* curr_val_buff = NULL; | 
| 5079 |         uint32_t val_size = 0; | 
| 5080 |         // in this case, we don't have a val, we are simply extracting the pk | 
| 5081 |         if (!need_val) { | 
| 5082 |             curr_val.data = curr_val_buff; | 
| 5083 |             curr_val.size = val_size; | 
| 5084 |             extract_hidden_primary_key(tokudb_active_index, &curr_key); | 
| 5085 |             error = read_primary_key( buf, tokudb_active_index, &curr_val, &curr_key); | 
| 5086 |         } | 
| 5087 |         else { | 
| 5088 |             extract_hidden_primary_key(tokudb_active_index, &curr_key); | 
| 5089 |             // need to extract a val and place it into buf | 
| 5090 |             if (unpack_entire_row) { | 
| 5091 |                 // get val info | 
| 5092 |                 val_size = *(uint32_t *)curr_pos; | 
| 5093 |                 curr_pos += sizeof(val_size); | 
| 5094 |                 curr_val_buff = curr_pos; | 
| 5095 |                 curr_pos += val_size; | 
| 5096 |                 curr_val.data = curr_val_buff; | 
| 5097 |                 curr_val.size = val_size; | 
| 5098 |                 error = unpack_row(buf,&curr_val, &curr_key, tokudb_active_index); | 
| 5099 |             } | 
| 5100 |             else { | 
| 5101 |                 if (!(hidden_primary_key && tokudb_active_index == primary_key)) { | 
| 5102 |                     unpack_key(buf,&curr_key,tokudb_active_index); | 
| 5103 |                 } | 
| 5104 |                 // read rows we care about | 
| 5105 |  | 
| 5106 |                 // first the null bytes; | 
| 5107 |                 memcpy(buf, curr_pos, table_share->null_bytes); | 
| 5108 |                 curr_pos += table_share->null_bytes; | 
| 5109 |  | 
| 5110 |                 // now the fixed sized rows                 | 
| 5111 |                 for (uint32_t i = 0; i < num_fixed_cols_for_query; i++) { | 
| 5112 |                     uint field_index = fixed_cols_for_query[i]; | 
| 5113 |                     Field* field = table->field[field_index]; | 
| 5114 |                     unpack_fixed_field( | 
| 5115 |                         buf + field_offset(field, table), | 
| 5116 |                         curr_pos, | 
| 5117 |                         share->kc_info.field_lengths[field_index] | 
| 5118 |                         ); | 
| 5119 |                     curr_pos += share->kc_info.field_lengths[field_index]; | 
| 5120 |                 } | 
| 5121 |                 // now the variable sized rows | 
| 5122 |                 for (uint32_t i = 0; i < num_var_cols_for_query; i++) { | 
| 5123 |                     uint field_index = var_cols_for_query[i]; | 
| 5124 |                     Field* field = table->field[field_index]; | 
| 5125 |                     uint32_t field_len = *(uint32_t *)curr_pos; | 
| 5126 |                     curr_pos += sizeof(field_len); | 
| 5127 |                     unpack_var_field( | 
| 5128 |                         buf + field_offset(field, table), | 
| 5129 |                         curr_pos, | 
| 5130 |                         field_len, | 
| 5131 |                         share->kc_info.length_bytes[field_index] | 
| 5132 |                         ); | 
| 5133 |                     curr_pos += field_len; | 
| 5134 |                 } | 
| 5135 |                 // now the blobs | 
| 5136 |                 if (read_blobs) { | 
| 5137 |                     uint32_t blob_size = *(uint32_t *)curr_pos; | 
| 5138 |                     curr_pos += sizeof(blob_size); | 
| 5139 |                     error = unpack_blobs( | 
| 5140 |                         buf, | 
| 5141 |                         curr_pos, | 
| 5142 |                         blob_size, | 
| 5143 |                         true | 
| 5144 |                         ); | 
| 5145 |                     curr_pos += blob_size; | 
| 5146 |                     if (error) { | 
| 5147 |                         invalidate_bulk_fetch(); | 
| 5148 |                         goto exit; | 
| 5149 |                     } | 
| 5150 |                 } | 
| 5151 |                 error = 0; | 
| 5152 |             } | 
| 5153 |         } | 
| 5154 |     } | 
| 5155 |      | 
| 5156 |     curr_range_query_buff_offset = curr_pos - range_query_buff; | 
| 5157 | exit: | 
| 5158 |     return error; | 
| 5159 | } | 
| 5160 |  | 
| 5161 | static int smart_dbt_bf_callback( | 
| 5162 |     DBT const* key, | 
| 5163 |     DBT const* row, | 
| 5164 |     void* context) { | 
| 5165 |     SMART_DBT_BF_INFO info = (SMART_DBT_BF_INFO)context; | 
| 5166 |     return | 
| 5167 |         info->ha->fill_range_query_buf( | 
| 5168 |             info->need_val, | 
| 5169 |             key, | 
| 5170 |             row, | 
| 5171 |             info->direction, | 
| 5172 |             info->thd, | 
| 5173 |             info->buf, | 
| 5174 |             info->key_to_compare); | 
| 5175 | } | 
| 5176 |  | 
| 5177 | enum icp_result ha_tokudb::toku_handler_index_cond_check( | 
| 5178 |     Item* pushed_idx_cond) { | 
| 5179 |  | 
| 5180 |     enum icp_result res; | 
| 5181 |     if (end_range) { | 
| 5182 |         int cmp; | 
| 5183 | #ifdef MARIADB_BASE_VERSION | 
| 5184 |         cmp = compare_key2(end_range); | 
| 5185 | #else | 
| 5186 |         cmp = compare_key_icp(end_range); | 
| 5187 | #endif | 
| 5188 |         if (cmp > 0) { | 
| 5189 |             return ICP_OUT_OF_RANGE; | 
| 5190 |         } | 
| 5191 |     } | 
| 5192 |     res = pushed_idx_cond->val_int() ? ICP_MATCH : ICP_NO_MATCH; | 
| 5193 |     return res; | 
| 5194 | } | 
| 5195 |  | 
| 5196 | // fill in the range query buf for bulk fetch | 
| 5197 | int ha_tokudb::fill_range_query_buf( | 
| 5198 |     bool need_val,  | 
| 5199 |     DBT const* key, | 
| 5200 |     DBT const* row, | 
| 5201 |     int direction, | 
| 5202 |     THD* thd, | 
| 5203 |     uchar* buf, | 
| 5204 |     DBT* key_to_compare) { | 
| 5205 |  | 
| 5206 |     int error; | 
| 5207 |     // | 
| 5208 |     // first put the value into range_query_buf | 
| 5209 |     // | 
| 5210 |     uint32_t size_remaining = | 
| 5211 |         size_range_query_buff - bytes_used_in_range_query_buff; | 
| 5212 |     uint32_t size_needed; | 
| 5213 |     uint32_t user_defined_size = tokudb::sysvars::read_buf_size(thd); | 
| 5214 |     uchar* curr_pos = NULL; | 
| 5215 |  | 
| 5216 |     if (key_to_compare) { | 
| 5217 |         int cmp = tokudb_prefix_cmp_dbt_key( | 
| 5218 |             share->key_file[tokudb_active_index],  | 
| 5219 |             key_to_compare,  | 
| 5220 |             key); | 
| 5221 |         if (cmp) { | 
| 5222 |             icp_went_out_of_range = true; | 
| 5223 |             error = 0; | 
| 5224 |             goto cleanup; | 
| 5225 |         } | 
| 5226 |     } | 
| 5227 |  | 
| 5228 |     // if we have an index condition pushed down, we check it | 
| 5229 |     if (toku_pushed_idx_cond && | 
| 5230 |         (tokudb_active_index == toku_pushed_idx_cond_keyno)) { | 
| 5231 |         unpack_key(buf, key, tokudb_active_index); | 
| 5232 |         enum icp_result result = | 
| 5233 |             toku_handler_index_cond_check(toku_pushed_idx_cond); | 
| 5234 |  | 
| 5235 |         // If we have reason to stop, we set icp_went_out_of_range and get out | 
| 5236 |         // otherwise, if we simply see that the current key is no match, | 
| 5237 |         // we tell the cursor to continue and don't store | 
| 5238 |         // the key locally | 
| 5239 |         if (result == ICP_OUT_OF_RANGE || thd_kill_level(thd)) { | 
| 5240 |             icp_went_out_of_range = true; | 
| 5241 |             error = 0; | 
| 5242 |             DEBUG_SYNC(ha_thd(), "tokudb_icp_asc_scan_out_of_range" ); | 
| 5243 |             goto cleanup; | 
| 5244 |         } else if (result == ICP_NO_MATCH) { | 
| 5245 |             // Optimizer change for MyRocks also benefits us here in TokuDB as | 
| 5246 |             // opt_range.cc QUICK_SELECT::get_next now sets end_range during | 
| 5247 |             // descending scan. We should not ever hit this condition, but | 
| 5248 |             // leaving this code in to prevent any possibility of a descending | 
| 5249 |             // scan to the beginning of an index and catch any possibility | 
| 5250 |             // in debug builds with an assertion | 
| 5251 |             assert_debug(!(!end_range && direction < 0)); | 
| 5252 |             if (!end_range && | 
| 5253 |                 direction < 0) { | 
| 5254 |                 cancel_pushed_idx_cond(); | 
| 5255 |             } | 
| 5256 |             error = TOKUDB_CURSOR_CONTINUE; | 
| 5257 |             goto cleanup; | 
| 5258 |         } | 
| 5259 |     } | 
| 5260 |  | 
| 5261 |     // at this point, if ICP is on, we have verified that the key is one | 
| 5262 |     // we are interested in, so we proceed with placing the data  | 
| 5263 |     // into the range query buffer | 
| 5264 |      | 
| 5265 |     if (need_val) { | 
| 5266 |         if (unpack_entire_row) { | 
| 5267 |             size_needed = 2*sizeof(uint32_t) + key->size + row->size; | 
| 5268 |         } else { | 
| 5269 |             // this is an upper bound | 
| 5270 |             size_needed = | 
| 5271 |                 // size of key length | 
| 5272 |                 sizeof(uint32_t) + | 
| 5273 |                 // key and row | 
| 5274 |                 key->size + row->size + | 
| 5275 |                 // lengths of varchars stored | 
| 5276 |                 num_var_cols_for_query * (sizeof(uint32_t)) + | 
| 5277 |                 // length of blobs | 
| 5278 |                 sizeof(uint32_t); | 
| 5279 |         } | 
| 5280 |     } else { | 
| 5281 |         size_needed = sizeof(uint32_t) + key->size; | 
| 5282 |     } | 
| 5283 |     if (size_remaining < size_needed) { | 
| 5284 |         range_query_buff = | 
| 5285 |             static_cast<uchar*>(tokudb::memory::realloc( | 
| 5286 |                 static_cast<void*>(range_query_buff), | 
| 5287 |                 bytes_used_in_range_query_buff + size_needed, | 
| 5288 |                 MYF(MY_WME))); | 
| 5289 |         if (range_query_buff == NULL) { | 
| 5290 |             error = ENOMEM; | 
| 5291 |             invalidate_bulk_fetch(); | 
| 5292 |             goto cleanup; | 
| 5293 |         } | 
| 5294 |         size_range_query_buff = bytes_used_in_range_query_buff + size_needed; | 
| 5295 |     } | 
| 5296 |     // | 
| 5297 |     // now we know we have the size, let's fill the buffer, starting with the key | 
| 5298 |     // | 
| 5299 |     curr_pos = range_query_buff + bytes_used_in_range_query_buff; | 
| 5300 |  | 
| 5301 |     *reinterpret_cast<uint32_t*>(curr_pos) = key->size; | 
| 5302 |     curr_pos += sizeof(uint32_t); | 
| 5303 |     memcpy(curr_pos, key->data, key->size); | 
| 5304 |     curr_pos += key->size; | 
| 5305 |     if (need_val) { | 
| 5306 |         if (unpack_entire_row) { | 
| 5307 |             *reinterpret_cast<uint32_t*>(curr_pos) = row->size; | 
| 5308 |             curr_pos += sizeof(uint32_t); | 
| 5309 |             memcpy(curr_pos, row->data, row->size); | 
| 5310 |             curr_pos += row->size; | 
| 5311 |         } else { | 
| 5312 |             // need to unpack just the data we care about | 
| 5313 |             const uchar* fixed_field_ptr = static_cast<const uchar*>(row->data); | 
| 5314 |             fixed_field_ptr += table_share->null_bytes; | 
| 5315 |  | 
| 5316 |             const uchar* var_field_offset_ptr = NULL; | 
| 5317 |             const uchar* var_field_data_ptr = NULL; | 
| 5318 |              | 
| 5319 |             var_field_offset_ptr = | 
| 5320 |                 fixed_field_ptr + | 
| 5321 |                 share->kc_info.mcp_info[tokudb_active_index].fixed_field_size; | 
| 5322 |             var_field_data_ptr = | 
| 5323 |                 var_field_offset_ptr + | 
| 5324 |                 share->kc_info.mcp_info[tokudb_active_index].len_of_offsets; | 
| 5325 |  | 
| 5326 |             // first the null bytes | 
| 5327 |             memcpy(curr_pos, row->data, table_share->null_bytes); | 
| 5328 |             curr_pos += table_share->null_bytes; | 
| 5329 |             // now the fixed fields | 
| 5330 |             // | 
| 5331 |             // first the fixed fields | 
| 5332 |             // | 
| 5333 |             for (uint32_t i = 0; i < num_fixed_cols_for_query; i++) { | 
| 5334 |                 uint field_index = fixed_cols_for_query[i]; | 
| 5335 |                 memcpy( | 
| 5336 |                     curr_pos,  | 
| 5337 |                     fixed_field_ptr + share->kc_info.cp_info[tokudb_active_index][field_index].col_pack_val, | 
| 5338 |                     share->kc_info.field_lengths[field_index]); | 
| 5339 |                 curr_pos += share->kc_info.field_lengths[field_index]; | 
| 5340 |             } | 
| 5341 |              | 
| 5342 |             // | 
| 5343 |             // now the var fields | 
| 5344 |             // | 
| 5345 |             for (uint32_t i = 0; i < num_var_cols_for_query; i++) { | 
| 5346 |                 uint field_index = var_cols_for_query[i]; | 
| 5347 |                 uint32_t var_field_index = | 
| 5348 |                     share->kc_info.cp_info[tokudb_active_index][field_index].col_pack_val; | 
| 5349 |                 uint32_t data_start_offset; | 
| 5350 |                 uint32_t field_len; | 
| 5351 |                  | 
| 5352 |                 get_var_field_info( | 
| 5353 |                     &field_len,  | 
| 5354 |                     &data_start_offset,  | 
| 5355 |                     var_field_index,  | 
| 5356 |                     var_field_offset_ptr,  | 
| 5357 |                     share->kc_info.num_offset_bytes); | 
| 5358 |                 memcpy(curr_pos, &field_len, sizeof(field_len)); | 
| 5359 |                 curr_pos += sizeof(field_len); | 
| 5360 |                 memcpy( | 
| 5361 |                     curr_pos, | 
| 5362 |                     var_field_data_ptr + data_start_offset, | 
| 5363 |                     field_len); | 
| 5364 |                 curr_pos += field_len; | 
| 5365 |             } | 
| 5366 |              | 
| 5367 |             if (read_blobs) { | 
| 5368 |                 uint32_t blob_offset = 0; | 
| 5369 |                 uint32_t data_size = 0; | 
| 5370 |                 // | 
| 5371 |                 // now the blobs | 
| 5372 |                 // | 
| 5373 |                 get_blob_field_info( | 
| 5374 |                     &blob_offset,  | 
| 5375 |                     share->kc_info.mcp_info[tokudb_active_index].len_of_offsets, | 
| 5376 |                     var_field_data_ptr,  | 
| 5377 |                     share->kc_info.num_offset_bytes); | 
| 5378 |                 data_size = | 
| 5379 |                     row->size - | 
| 5380 |                     blob_offset - | 
| 5381 |                     static_cast<uint32_t>((var_field_data_ptr - | 
| 5382 |                         static_cast<const uchar*>(row->data))); | 
| 5383 |                 memcpy(curr_pos, &data_size, sizeof(data_size)); | 
| 5384 |                 curr_pos += sizeof(data_size); | 
| 5385 |                 memcpy(curr_pos, var_field_data_ptr + blob_offset, data_size); | 
| 5386 |                 curr_pos += data_size; | 
| 5387 |             } | 
| 5388 |         } | 
| 5389 |     } | 
| 5390 |  | 
| 5391 |     bytes_used_in_range_query_buff = curr_pos - range_query_buff; | 
| 5392 |     assert_always(bytes_used_in_range_query_buff <= size_range_query_buff); | 
| 5393 |  | 
| 5394 |     // | 
| 5395 |     // now determine if we should continue with the bulk fetch | 
| 5396 |     // we want to stop under these conditions: | 
| 5397 |     //  - we overran the prelocked range | 
| 5398 |     //  - we are close to the end of the buffer | 
| 5399 |     //  - we have fetched an exponential amount of rows with | 
| 5400 |     //  respect to the bulk fetch iteration, which is initialized  | 
| 5401 |     //  to 0 in index_init() and prelock_range(). | 
| 5402 |  | 
| 5403 |     rows_fetched_using_bulk_fetch++; | 
| 5404 |     // if the iteration is less than the number of possible shifts on | 
| 5405 |     // a 64 bit integer, check that we haven't exceeded this iterations | 
| 5406 |     // row fetch upper bound. | 
| 5407 |     if (bulk_fetch_iteration < HA_TOKU_BULK_FETCH_ITERATION_MAX) { | 
| 5408 |         uint64_t row_fetch_upper_bound = 1LLU << bulk_fetch_iteration; | 
| 5409 |         assert_always(row_fetch_upper_bound > 0); | 
| 5410 |         if (rows_fetched_using_bulk_fetch >= row_fetch_upper_bound) {  | 
| 5411 |             error = 0; | 
| 5412 |             goto cleanup; | 
| 5413 |         } | 
| 5414 |     } | 
| 5415 |  | 
| 5416 |     if (bytes_used_in_range_query_buff + | 
| 5417 |         table_share->rec_buff_length > | 
| 5418 |         user_defined_size) { | 
| 5419 |         error = 0; | 
| 5420 |         goto cleanup; | 
| 5421 |     } | 
| 5422 |     if (direction > 0) { | 
| 5423 |         // compare what we got to the right endpoint of prelocked range | 
| 5424 |         // because we are searching keys in ascending order | 
| 5425 |         if (prelocked_right_range_size == 0) { | 
| 5426 |             error = TOKUDB_CURSOR_CONTINUE; | 
| 5427 |             goto cleanup; | 
| 5428 |         } | 
| 5429 |         DBT right_range; | 
| 5430 |         memset(&right_range, 0, sizeof(right_range)); | 
| 5431 |         right_range.size = prelocked_right_range_size; | 
| 5432 |         right_range.data = prelocked_right_range; | 
| 5433 |         int cmp = tokudb_cmp_dbt_key( | 
| 5434 |             share->key_file[tokudb_active_index],  | 
| 5435 |             key,  | 
| 5436 |             &right_range); | 
| 5437 |         error = (cmp > 0) ? 0 : TOKUDB_CURSOR_CONTINUE; | 
| 5438 |     } else { | 
| 5439 |         // compare what we got to the left endpoint of prelocked range | 
| 5440 |         // because we are searching keys in descending order | 
| 5441 |         if (prelocked_left_range_size == 0) { | 
| 5442 |             error = TOKUDB_CURSOR_CONTINUE; | 
| 5443 |             goto cleanup; | 
| 5444 |         } | 
| 5445 |         DBT left_range; | 
| 5446 |         memset(&left_range, 0, sizeof(left_range)); | 
| 5447 |         left_range.size = prelocked_left_range_size; | 
| 5448 |         left_range.data = prelocked_left_range; | 
| 5449 |         int cmp = tokudb_cmp_dbt_key( | 
| 5450 |             share->key_file[tokudb_active_index],  | 
| 5451 |             key,  | 
| 5452 |             &left_range); | 
| 5453 |         error = (cmp < 0) ? 0 : TOKUDB_CURSOR_CONTINUE; | 
| 5454 |     } | 
| 5455 | cleanup: | 
| 5456 |     return error; | 
| 5457 | } | 
| 5458 |  | 
| 5459 | int ha_tokudb::get_next( | 
| 5460 |     uchar* buf, | 
| 5461 |     int direction, | 
| 5462 |     DBT* key_to_compare, | 
| 5463 |     bool do_key_read) { | 
| 5464 |  | 
| 5465 |     int error = 0; | 
| 5466 |     HANDLE_INVALID_CURSOR(); | 
| 5467 |  | 
| 5468 |     if (maybe_index_scan) { | 
| 5469 |         maybe_index_scan = false; | 
| 5470 |         if (!range_lock_grabbed) { | 
| 5471 |             error = prepare_index_scan(); | 
| 5472 |         } | 
| 5473 |     } | 
| 5474 |      | 
| 5475 |     if (!error) { | 
| 5476 |         uint32_t flags = SET_PRELOCK_FLAG(0); | 
| 5477 |  | 
| 5478 |         // we need to read the val of what we retrieve if | 
| 5479 |         // we do NOT have a covering index AND we are using a clustering secondary | 
| 5480 |         // key | 
| 5481 |         bool need_val = | 
| 5482 |             (do_key_read == 0) && | 
| 5483 |             (tokudb_active_index == primary_key || | 
| 5484 |              key_is_clustering(&table->key_info[tokudb_active_index])); | 
| 5485 |  | 
| 5486 |         if ((bytes_used_in_range_query_buff - | 
| 5487 |              curr_range_query_buff_offset) > 0) { | 
| 5488 |             error = read_data_from_range_query_buff(buf, need_val, do_key_read); | 
| 5489 |         } else if (icp_went_out_of_range) { | 
| 5490 |             icp_went_out_of_range = false; | 
| 5491 |             error = HA_ERR_END_OF_FILE; | 
| 5492 |         } else { | 
| 5493 |             invalidate_bulk_fetch(); | 
| 5494 |             if (doing_bulk_fetch) { | 
| 5495 |                 struct smart_dbt_bf_info bf_info; | 
| 5496 |                 bf_info.ha = this; | 
| 5497 |                 // you need the val if you have a clustering index and key_read is not 0; | 
| 5498 |                 bf_info.direction = direction; | 
| 5499 |                 bf_info.thd = ha_thd(); | 
| 5500 |                 bf_info.need_val = need_val; | 
| 5501 |                 bf_info.buf = buf; | 
| 5502 |                 bf_info.key_to_compare = key_to_compare; | 
| 5503 |                 // | 
| 5504 |                 // call c_getf_next with purpose of filling in range_query_buff | 
| 5505 |                 // | 
| 5506 |                 rows_fetched_using_bulk_fetch = 0; | 
| 5507 |                 // it is expected that we can do ICP in the smart_dbt_bf_callback | 
| 5508 |                 // as a result, it's possible we don't return any data because | 
| 5509 |                 // none of the rows matched the index condition. Therefore, we need | 
| 5510 |                 // this while loop. icp_out_of_range will be set if we hit a row that | 
| 5511 |                 // the index condition states is out of our range. When that hits, | 
| 5512 |                 // we know all the data in the buffer is the last data we will retrieve | 
| 5513 |                 while (bytes_used_in_range_query_buff == 0 && | 
| 5514 |                        !icp_went_out_of_range && error == 0) { | 
| 5515 |                     if (direction > 0) { | 
| 5516 |                         error = | 
| 5517 |                             cursor->c_getf_next( | 
| 5518 |                                 cursor, | 
| 5519 |                                 flags, | 
| 5520 |                                 smart_dbt_bf_callback, | 
| 5521 |                                 &bf_info); | 
| 5522 |                     } else { | 
| 5523 |                         error = | 
| 5524 |                             cursor->c_getf_prev( | 
| 5525 |                                 cursor, | 
| 5526 |                                 flags, | 
| 5527 |                                 smart_dbt_bf_callback, | 
| 5528 |                                 &bf_info); | 
| 5529 |                     } | 
| 5530 |                 } | 
| 5531 |                 // if there is no data set and we went out of range,  | 
| 5532 |                 // then there is nothing to return | 
| 5533 |                 if (bytes_used_in_range_query_buff == 0 && | 
| 5534 |                     icp_went_out_of_range) { | 
| 5535 |                     icp_went_out_of_range = false; | 
| 5536 |                     error = HA_ERR_END_OF_FILE; | 
| 5537 |                 } | 
| 5538 |                 if (bulk_fetch_iteration < HA_TOKU_BULK_FETCH_ITERATION_MAX) { | 
| 5539 |                     bulk_fetch_iteration++; | 
| 5540 |                 } | 
| 5541 |  | 
| 5542 |                 error = | 
| 5543 |                     handle_cursor_error( | 
| 5544 |                         error, | 
| 5545 |                         HA_ERR_END_OF_FILE, | 
| 5546 |                         tokudb_active_index); | 
| 5547 |                 if (error) { | 
| 5548 |                     goto cleanup; | 
| 5549 |                 } | 
| 5550 |              | 
| 5551 |                 // | 
| 5552 |                 // now that range_query_buff is filled, read an element | 
| 5553 |                 // | 
| 5554 |                 error = | 
| 5555 |                     read_data_from_range_query_buff(buf, need_val, do_key_read); | 
| 5556 |             } else { | 
| 5557 |                 struct smart_dbt_info info; | 
| 5558 |                 info.ha = this; | 
| 5559 |                 info.buf = buf; | 
| 5560 |                 info.keynr = tokudb_active_index; | 
| 5561 |                  | 
| 5562 |                 if (direction > 0) { | 
| 5563 |                     error = | 
| 5564 |                         cursor->c_getf_next( | 
| 5565 |                             cursor, | 
| 5566 |                             flags, | 
| 5567 |                             SMART_DBT_CALLBACK(do_key_read), | 
| 5568 |                             &info); | 
| 5569 |                 } else { | 
| 5570 |                     error = | 
| 5571 |                         cursor->c_getf_prev( | 
| 5572 |                             cursor, | 
| 5573 |                             flags, | 
| 5574 |                             SMART_DBT_CALLBACK(do_key_read), | 
| 5575 |                             &info); | 
| 5576 |                 } | 
| 5577 |                 error = | 
| 5578 |                     handle_cursor_error( | 
| 5579 |                         error, | 
| 5580 |                         HA_ERR_END_OF_FILE, | 
| 5581 |                         tokudb_active_index); | 
| 5582 |             } | 
| 5583 |         } | 
| 5584 |     } | 
| 5585 |  | 
| 5586 |     // | 
| 5587 |     // at this point, one of two things has happened | 
| 5588 |     // either we have unpacked the data into buf, and we  | 
| 5589 |     // are done, or we have unpacked the primary key | 
| 5590 |     // into last_key, and we use the code below to | 
| 5591 |     // read the full row by doing a point query into the  | 
| 5592 |     // main table. | 
| 5593 |     // | 
| 5594 |     if (!error && | 
| 5595 |         !do_key_read && | 
| 5596 |         (tokudb_active_index != primary_key) && | 
| 5597 |         !key_is_clustering(&table->key_info[tokudb_active_index])) { | 
| 5598 |         error = read_full_row(buf); | 
| 5599 |     } | 
| 5600 |  | 
| 5601 |     if (!error) { | 
| 5602 |         THD *thd = ha_thd(); | 
| 5603 |         tokudb_trx_data* trx = | 
| 5604 |             static_cast<tokudb_trx_data*>(thd_get_ha_data(thd, tokudb_hton)); | 
| 5605 |         trx->stmt_progress.queried++; | 
| 5606 |         track_progress(thd); | 
| 5607 |         if (thd_kill_level(thd)) | 
| 5608 |             error = ER_ABORTING_CONNECTION; | 
| 5609 |     } | 
| 5610 | cleanup: | 
| 5611 |     return error; | 
| 5612 | } | 
| 5613 |  | 
| 5614 |  | 
| 5615 | // | 
| 5616 | // Reads the next row from the active index (cursor) into buf, and advances cursor | 
| 5617 | // Parameters: | 
| 5618 | //      [out]   buf - buffer for the next row, in MySQL format | 
| 5619 | // Returns: | 
| 5620 | //      0 on success | 
| 5621 | //      HA_ERR_END_OF_FILE if not found | 
| 5622 | //      error otherwise | 
| 5623 | // | 
| 5624 | int ha_tokudb::index_next(uchar * buf) { | 
| 5625 |     TOKUDB_HANDLER_DBUG_ENTER("" ); | 
| 5626 |     int error = get_next(buf, 1, NULL, key_read); | 
| 5627 |     TOKUDB_HANDLER_DBUG_RETURN(error); | 
| 5628 | } | 
| 5629 |  | 
| 5630 |  | 
| 5631 | int ha_tokudb::index_read_last(uchar * buf, const uchar * key, uint key_len) { | 
| 5632 |     return(index_read(buf, key, key_len, HA_READ_PREFIX_LAST));     | 
| 5633 | } | 
| 5634 |  | 
| 5635 |  | 
| 5636 | // | 
| 5637 | // Reads the previous row from the active index (cursor) into buf, and advances cursor | 
| 5638 | // Parameters: | 
| 5639 | //      [out]   buf - buffer for the next row, in MySQL format | 
| 5640 | // Returns: | 
| 5641 | //      0 on success | 
| 5642 | //      HA_ERR_END_OF_FILE if not found | 
| 5643 | //      error otherwise | 
| 5644 | // | 
| 5645 | int ha_tokudb::index_prev(uchar * buf) { | 
| 5646 |     TOKUDB_HANDLER_DBUG_ENTER("" ); | 
| 5647 |     int error = get_next(buf, -1, NULL, key_read); | 
| 5648 |     TOKUDB_HANDLER_DBUG_RETURN(error); | 
| 5649 | } | 
| 5650 |  | 
| 5651 | // | 
| 5652 | // Reads the first row from the active index (cursor) into buf, and advances cursor | 
| 5653 | // Parameters: | 
| 5654 | //      [out]   buf - buffer for the next row, in MySQL format | 
| 5655 | // Returns: | 
| 5656 | //      0 on success | 
| 5657 | //      HA_ERR_END_OF_FILE if not found | 
| 5658 | //      error otherwise | 
| 5659 | // | 
| 5660 | int ha_tokudb::index_first(uchar * buf) { | 
| 5661 |     TOKUDB_HANDLER_DBUG_ENTER("" ); | 
| 5662 |     invalidate_bulk_fetch(); | 
| 5663 |     int error = 0; | 
| 5664 |     struct smart_dbt_info info; | 
| 5665 |     uint32_t flags = SET_PRELOCK_FLAG(0); | 
| 5666 |     THD* thd = ha_thd(); | 
| 5667 |     tokudb_trx_data* trx = (tokudb_trx_data *) thd_get_ha_data(thd, tokudb_hton);; | 
| 5668 |     HANDLE_INVALID_CURSOR(); | 
| 5669 |  | 
| 5670 |     info.ha = this; | 
| 5671 |     info.buf = buf; | 
| 5672 |     info.keynr = tokudb_active_index; | 
| 5673 |  | 
| 5674 |     error = cursor->c_getf_first(cursor, flags, SMART_DBT_CALLBACK(key_read), &info); | 
| 5675 |     error = handle_cursor_error(error,HA_ERR_END_OF_FILE,tokudb_active_index); | 
| 5676 |  | 
| 5677 |     // | 
| 5678 |     // still need to get entire contents of the row if operation done on | 
| 5679 |     // secondary DB and it was NOT a covering index | 
| 5680 |     // | 
| 5681 |     if (!error && !key_read && (tokudb_active_index != primary_key) && !key_is_clustering(&table->key_info[tokudb_active_index])) { | 
| 5682 |         error = read_full_row(buf); | 
| 5683 |     } | 
| 5684 |     if (trx) { | 
| 5685 |         trx->stmt_progress.queried++; | 
| 5686 |     } | 
| 5687 |     track_progress(thd); | 
| 5688 |     maybe_index_scan = true;     | 
| 5689 | cleanup: | 
| 5690 |     TOKUDB_HANDLER_DBUG_RETURN(error); | 
| 5691 | } | 
| 5692 |  | 
| 5693 | // | 
| 5694 | // Reads the last row from the active index (cursor) into buf, and advances cursor | 
| 5695 | // Parameters: | 
| 5696 | //      [out]   buf - buffer for the next row, in MySQL format | 
| 5697 | // Returns: | 
| 5698 | //      0 on success | 
| 5699 | //      HA_ERR_END_OF_FILE if not found | 
| 5700 | //      error otherwise | 
| 5701 | // | 
| 5702 | int ha_tokudb::index_last(uchar * buf) { | 
| 5703 |     TOKUDB_HANDLER_DBUG_ENTER("" ); | 
| 5704 |     invalidate_bulk_fetch(); | 
| 5705 |     int error = 0; | 
| 5706 |     struct smart_dbt_info info; | 
| 5707 |     uint32_t flags = SET_PRELOCK_FLAG(0); | 
| 5708 |     THD* thd = ha_thd(); | 
| 5709 |     tokudb_trx_data* trx = (tokudb_trx_data *) thd_get_ha_data(thd, tokudb_hton);; | 
| 5710 |     HANDLE_INVALID_CURSOR(); | 
| 5711 |  | 
| 5712 |     info.ha = this; | 
| 5713 |     info.buf = buf; | 
| 5714 |     info.keynr = tokudb_active_index; | 
| 5715 |  | 
| 5716 |     error = cursor->c_getf_last(cursor, flags, SMART_DBT_CALLBACK(key_read), &info); | 
| 5717 |     error = handle_cursor_error(error,HA_ERR_END_OF_FILE,tokudb_active_index); | 
| 5718 |     // | 
| 5719 |     // still need to get entire contents of the row if operation done on | 
| 5720 |     // secondary DB and it was NOT a covering index | 
| 5721 |     // | 
| 5722 |     if (!error && !key_read && (tokudb_active_index != primary_key) && !key_is_clustering(&table->key_info[tokudb_active_index])) { | 
| 5723 |         error = read_full_row(buf); | 
| 5724 |     } | 
| 5725 |  | 
| 5726 |     if (trx) { | 
| 5727 |         trx->stmt_progress.queried++; | 
| 5728 |     } | 
| 5729 |     track_progress(thd); | 
| 5730 |     maybe_index_scan = true; | 
| 5731 | cleanup: | 
| 5732 |     TOKUDB_HANDLER_DBUG_RETURN(error); | 
| 5733 | } | 
| 5734 |  | 
| 5735 | // | 
| 5736 | // Initialize a scan of the table (which is why index_init is called on primary_key) | 
| 5737 | // Parameters: | 
| 5738 | //          scan - unused | 
| 5739 | // Returns: | 
| 5740 | //      0 on success | 
| 5741 | //      error otherwise | 
| 5742 | // | 
| 5743 | int ha_tokudb::rnd_init(bool scan) { | 
| 5744 |     TOKUDB_HANDLER_DBUG_ENTER("" ); | 
| 5745 |     int error = 0; | 
| 5746 |     range_lock_grabbed = false; | 
| 5747 |     error = index_init(MAX_KEY, 0); | 
| 5748 |     if (error) { goto cleanup;} | 
| 5749 |  | 
| 5750 |     if (scan) { | 
| 5751 |         error = prelock_range(NULL, NULL); | 
| 5752 |         if (error) { goto cleanup; } | 
| 5753 |  | 
| 5754 |         // only want to set range_lock_grabbed to true after index_init | 
| 5755 |         // successfully executed for two reasons: | 
| 5756 |         // 1) index_init will reset it to false anyway | 
| 5757 |         // 2) if it fails, we don't want prelocking on, | 
| 5758 |         range_lock_grabbed = true; | 
| 5759 |     } | 
| 5760 |  | 
| 5761 |     error = 0; | 
| 5762 | cleanup: | 
| 5763 |     if (error) {  | 
| 5764 |         index_end(); | 
| 5765 |         last_cursor_error = error;  | 
| 5766 |     } | 
| 5767 |     TOKUDB_HANDLER_DBUG_RETURN(error); | 
| 5768 | } | 
| 5769 |  | 
| 5770 | // | 
| 5771 | // End a scan of the table | 
| 5772 | // | 
| 5773 | int ha_tokudb::rnd_end() { | 
| 5774 |     TOKUDB_HANDLER_DBUG_ENTER("" ); | 
| 5775 |     range_lock_grabbed = false; | 
| 5776 |     TOKUDB_HANDLER_DBUG_RETURN(index_end()); | 
| 5777 | } | 
| 5778 |  | 
| 5779 |  | 
| 5780 | // | 
| 5781 | // Read the next row in a table scan | 
| 5782 | // Parameters: | 
| 5783 | //      [out]   buf - buffer for the next row, in MySQL format | 
| 5784 | // Returns: | 
| 5785 | //      0 on success | 
| 5786 | //      HA_ERR_END_OF_FILE if not found | 
| 5787 | //      error otherwise | 
| 5788 | // | 
| 5789 | int ha_tokudb::rnd_next(uchar * buf) { | 
| 5790 |     TOKUDB_HANDLER_DBUG_ENTER("" ); | 
| 5791 |     int error = get_next(buf, 1, NULL, false); | 
| 5792 |     TOKUDB_HANDLER_DBUG_RETURN(error); | 
| 5793 | } | 
| 5794 |  | 
| 5795 |  | 
| 5796 | void ha_tokudb::track_progress(THD* thd) { | 
| 5797 |     tokudb_trx_data* trx = (tokudb_trx_data *) thd_get_ha_data(thd, tokudb_hton); | 
| 5798 |     if (trx) { | 
| 5799 |         ulonglong num_written = trx->stmt_progress.inserted + | 
| 5800 |             trx->stmt_progress.updated + | 
| 5801 |             trx->stmt_progress.deleted; | 
| 5802 |         bool update_status =  | 
| 5803 |             (trx->stmt_progress.queried && | 
| 5804 |              tokudb::sysvars::read_status_frequency && | 
| 5805 |              (trx->stmt_progress.queried % | 
| 5806 |                 tokudb::sysvars::read_status_frequency) == 0) || | 
| 5807 |              (num_written && tokudb::sysvars::write_status_frequency && | 
| 5808 |               (num_written % tokudb::sysvars::write_status_frequency) == 0); | 
| 5809 |         if (update_status) { | 
| 5810 |             char *next_status = write_status_msg; | 
| 5811 |             bool first = true; | 
| 5812 |             int r; | 
| 5813 |             if (trx->stmt_progress.queried) { | 
| 5814 |                 r = sprintf( | 
| 5815 |                     next_status, | 
| 5816 |                     "Queried about %llu row%s" , | 
| 5817 |                     trx->stmt_progress.queried, | 
| 5818 |                     trx->stmt_progress.queried == 1 ? ""  : "s" ); | 
| 5819 |                 assert_always(r >= 0); | 
| 5820 |                 next_status += r; | 
| 5821 |                 first = false; | 
| 5822 |             } | 
| 5823 |             if (trx->stmt_progress.inserted) { | 
| 5824 |                 if (trx->stmt_progress.using_loader) { | 
| 5825 |                     r = sprintf( | 
| 5826 |                         next_status, | 
| 5827 |                         "%sFetched about %llu row%s, loading data still remains" , | 
| 5828 |                         first ? ""  : ", " , | 
| 5829 |                         trx->stmt_progress.inserted, | 
| 5830 |                         trx->stmt_progress.inserted == 1 ? ""  : "s" ); | 
| 5831 |                 } else { | 
| 5832 |                     r = sprintf( | 
| 5833 |                         next_status, | 
| 5834 |                         "%sInserted about %llu row%s" , | 
| 5835 |                         first ? ""  : ", " , | 
| 5836 |                         trx->stmt_progress.inserted, | 
| 5837 |                         trx->stmt_progress.inserted == 1 ? ""  : "s" ); | 
| 5838 |                 } | 
| 5839 |                 assert_always(r >= 0); | 
| 5840 |                 next_status += r; | 
| 5841 |                 first = false; | 
| 5842 |             } | 
| 5843 |             if (trx->stmt_progress.updated) { | 
| 5844 |                 r = sprintf( | 
| 5845 |                     next_status, | 
| 5846 |                     "%sUpdated about %llu row%s" , | 
| 5847 |                     first ? ""  : ", " , | 
| 5848 |                     trx->stmt_progress.updated, | 
| 5849 |                     trx->stmt_progress.updated == 1 ? ""  : "s" ); | 
| 5850 |                 assert_always(r >= 0); | 
| 5851 |                 next_status += r; | 
| 5852 |                 first = false; | 
| 5853 |             } | 
| 5854 |             if (trx->stmt_progress.deleted) { | 
| 5855 |                 r = sprintf( | 
| 5856 |                     next_status, | 
| 5857 |                     "%sDeleted about %llu row%s" , | 
| 5858 |                     first ? ""  : ", " , | 
| 5859 |                     trx->stmt_progress.deleted, | 
| 5860 |                     trx->stmt_progress.deleted == 1 ? ""  : "s" ); | 
| 5861 |                 assert_always(r >= 0); | 
| 5862 |                 next_status += r; | 
| 5863 |                 first = false; | 
| 5864 |             } | 
| 5865 |             if (!first) | 
| 5866 |                 thd_proc_info(thd, write_status_msg); | 
| 5867 |         } | 
| 5868 |     } | 
| 5869 | } | 
| 5870 |  | 
| 5871 |  | 
| 5872 | DBT *ha_tokudb::get_pos(DBT * to, uchar * pos) { | 
| 5873 |     TOKUDB_HANDLER_DBUG_ENTER("" ); | 
| 5874 |     /* We don't need to set app_data here */ | 
| 5875 |     memset((void *) to, 0, sizeof(*to)); | 
| 5876 |     to->data = pos + sizeof(uint32_t); | 
| 5877 |     to->size = *(uint32_t *)pos; | 
| 5878 |     DBUG_DUMP("key" , (const uchar *) to->data, to->size); | 
| 5879 |     DBUG_RETURN(to); | 
| 5880 | } | 
| 5881 |  | 
| 5882 | // Retrieves a row with based on the primary key saved in pos | 
| 5883 | // Returns: | 
| 5884 | //      0 on success | 
| 5885 | //      HA_ERR_KEY_NOT_FOUND if not found | 
| 5886 | //      error otherwise | 
| 5887 | int ha_tokudb::rnd_pos(uchar * buf, uchar * pos) { | 
| 5888 |     TOKUDB_HANDLER_DBUG_ENTER("" ); | 
| 5889 |     DBT db_pos; | 
| 5890 |     int error = 0; | 
| 5891 |     struct smart_dbt_info info; | 
| 5892 |     bool old_unpack_entire_row = unpack_entire_row; | 
| 5893 |     DBT* key = get_pos(&db_pos, pos);  | 
| 5894 |  | 
| 5895 |     unpack_entire_row = true; | 
| 5896 |     tokudb_active_index = MAX_KEY; | 
| 5897 |  | 
| 5898 |     // test rpl slave by inducing a delay before the point query | 
| 5899 |     THD *thd = ha_thd(); | 
| 5900 |     if (thd->slave_thread && (in_rpl_delete_rows || in_rpl_update_rows)) { | 
| 5901 |         DBUG_EXECUTE_IF("tokudb_crash_if_rpl_looks_up_row" , DBUG_ASSERT(0);); | 
| 5902 |         uint64_t delay_ms = tokudb::sysvars::rpl_lookup_rows_delay(thd); | 
| 5903 |         if (delay_ms) | 
| 5904 |             usleep(delay_ms * 1000); | 
| 5905 |     } | 
| 5906 |  | 
| 5907 |     info.ha = this; | 
| 5908 |     info.buf = buf; | 
| 5909 |     info.keynr = primary_key; | 
| 5910 |  | 
| 5911 |     error = share->file->getf_set(share->file, transaction,  | 
| 5912 |             get_cursor_isolation_flags(lock.type, thd), | 
| 5913 |             key, smart_dbt_callback_rowread_ptquery, &info); | 
| 5914 |  | 
| 5915 |     if (error == DB_NOTFOUND) { | 
| 5916 |         error = HA_ERR_KEY_NOT_FOUND; | 
| 5917 |         goto cleanup; | 
| 5918 |     } | 
| 5919 | cleanup: | 
| 5920 |     unpack_entire_row = old_unpack_entire_row; | 
| 5921 |     TOKUDB_HANDLER_DBUG_RETURN(error); | 
| 5922 | } | 
| 5923 |  | 
| 5924 | int ha_tokudb::prelock_range(const key_range *start_key, const key_range *end_key) { | 
| 5925 |     TOKUDB_HANDLER_DBUG_ENTER("%p %p" , start_key, end_key); | 
| 5926 |     THD* thd = ha_thd();  | 
| 5927 |  | 
| 5928 |     int error = 0; | 
| 5929 |     DBT start_dbt_key; | 
| 5930 |     DBT end_dbt_key; | 
| 5931 |     uchar* start_key_buff  = prelocked_left_range; | 
| 5932 |     uchar* end_key_buff = prelocked_right_range; | 
| 5933 |  | 
| 5934 |     memset((void *) &start_dbt_key, 0, sizeof(start_dbt_key)); | 
| 5935 |     memset((void *) &end_dbt_key, 0, sizeof(end_dbt_key)); | 
| 5936 |  | 
| 5937 |     HANDLE_INVALID_CURSOR(); | 
| 5938 |     if (start_key) { | 
| 5939 |         switch (start_key->flag) { | 
| 5940 |         case HA_READ_AFTER_KEY: | 
| 5941 |             pack_key(&start_dbt_key, tokudb_active_index, start_key_buff, start_key->key, start_key->length, COL_POS_INF); | 
| 5942 |             break; | 
| 5943 |         default: | 
| 5944 |             pack_key(&start_dbt_key, tokudb_active_index, start_key_buff, start_key->key, start_key->length, COL_NEG_INF); | 
| 5945 |             break; | 
| 5946 |         } | 
| 5947 |         prelocked_left_range_size = start_dbt_key.size; | 
| 5948 |     } | 
| 5949 |     else { | 
| 5950 |         prelocked_left_range_size = 0; | 
| 5951 |     } | 
| 5952 |  | 
| 5953 |     if (end_key) { | 
| 5954 |         switch (end_key->flag) { | 
| 5955 |         case HA_READ_BEFORE_KEY: | 
| 5956 |             pack_key(&end_dbt_key, tokudb_active_index, end_key_buff, end_key->key, end_key->length, COL_NEG_INF); | 
| 5957 |             break; | 
| 5958 |         default: | 
| 5959 |             pack_key(&end_dbt_key, tokudb_active_index, end_key_buff, end_key->key, end_key->length, COL_POS_INF); | 
| 5960 |             break; | 
| 5961 |         }         | 
| 5962 |         prelocked_right_range_size = end_dbt_key.size; | 
| 5963 |     } | 
| 5964 |     else { | 
| 5965 |         prelocked_right_range_size = 0; | 
| 5966 |     } | 
| 5967 |  | 
| 5968 |     error = cursor->c_set_bounds( | 
| 5969 |         cursor,  | 
| 5970 |         start_key ? &start_dbt_key : share->key_file[tokudb_active_index]->dbt_neg_infty(),  | 
| 5971 |         end_key ? &end_dbt_key : share->key_file[tokudb_active_index]->dbt_pos_infty(), | 
| 5972 |         true, | 
| 5973 |         (cursor_flags & DB_SERIALIZABLE) != 0 ? DB_NOTFOUND : 0 | 
| 5974 |         ); | 
| 5975 |     if (error) {  | 
| 5976 |         error = map_to_handler_error(error); | 
| 5977 |         last_cursor_error = error; | 
| 5978 |         // | 
| 5979 |         // cursor should be initialized here, but in case it is not, we still check | 
| 5980 |         // | 
| 5981 |         if (cursor) { | 
| 5982 |             int r = cursor->c_close(cursor); | 
| 5983 |             assert_always(r==0); | 
| 5984 |             cursor = NULL; | 
| 5985 |             remove_from_trx_handler_list(); | 
| 5986 |         } | 
| 5987 |         goto cleanup;  | 
| 5988 |     } | 
| 5989 |  | 
| 5990 |     // at this point, determine if we will be doing bulk fetch | 
| 5991 |     doing_bulk_fetch = tokudb_do_bulk_fetch(thd); | 
| 5992 |     bulk_fetch_iteration = 0; | 
| 5993 |     rows_fetched_using_bulk_fetch = 0; | 
| 5994 |  | 
| 5995 | cleanup: | 
| 5996 |     TOKUDB_HANDLER_DBUG_RETURN(error); | 
| 5997 | } | 
| 5998 |  | 
| 5999 | // | 
| 6000 | // Prelock range if possible, start_key is leftmost, end_key is rightmost | 
| 6001 | // whether scanning forward or backward.  This function is called by MySQL | 
| 6002 | // for backward range queries (in QUICK_SELECT_DESC::get_next).  | 
| 6003 | // Forward scans use read_range_first()/read_range_next(). | 
| 6004 | // | 
| 6005 | int ha_tokudb::prepare_range_scan( const key_range *start_key, const key_range *end_key) { | 
| 6006 |     TOKUDB_HANDLER_DBUG_ENTER("%p %p" , start_key, end_key); | 
| 6007 |     int error = prelock_range(start_key, end_key); | 
| 6008 |     if (!error) { | 
| 6009 |         range_lock_grabbed = true; | 
| 6010 |     } | 
| 6011 |     TOKUDB_HANDLER_DBUG_RETURN(error); | 
| 6012 | } | 
| 6013 |  | 
| 6014 | int ha_tokudb::read_range_first( | 
| 6015 |     const key_range *start_key, | 
| 6016 |     const key_range *end_key, | 
| 6017 |     bool eq_range,  | 
| 6018 |     bool sorted)  | 
| 6019 | { | 
| 6020 |     TOKUDB_HANDLER_DBUG_ENTER("%p %p %u %u" , start_key, end_key, eq_range, sorted); | 
| 6021 |     int error = prelock_range(start_key, end_key); | 
| 6022 |     if (error) { goto cleanup; } | 
| 6023 |     range_lock_grabbed = true; | 
| 6024 |      | 
| 6025 |     error = handler::read_range_first(start_key, end_key, eq_range, sorted); | 
| 6026 | cleanup: | 
| 6027 |     TOKUDB_HANDLER_DBUG_RETURN(error); | 
| 6028 | } | 
| 6029 |  | 
| 6030 | int ha_tokudb::read_range_next() | 
| 6031 | { | 
| 6032 |     TOKUDB_HANDLER_DBUG_ENTER("" ); | 
| 6033 |     int error; | 
| 6034 |     error = handler::read_range_next(); | 
| 6035 |     if (error) { | 
| 6036 |         range_lock_grabbed = false; | 
| 6037 |     } | 
| 6038 |     TOKUDB_HANDLER_DBUG_RETURN(error); | 
| 6039 | } | 
| 6040 |  | 
| 6041 |  | 
| 6042 |  | 
| 6043 | /* | 
| 6044 |   Set a reference to the current record in (ref,ref_length). | 
| 6045 |  | 
| 6046 |   SYNOPSIS | 
| 6047 |   ha_tokudb::position() | 
| 6048 |   record                      The current record buffer | 
| 6049 |  | 
| 6050 |   DESCRIPTION | 
| 6051 |   The BDB handler stores the primary key in (ref,ref_length). | 
| 6052 |   There is either an explicit primary key, or an implicit (hidden) | 
| 6053 |   primary key. | 
| 6054 |   During open(), 'ref_length' is calculated as the maximum primary | 
| 6055 |   key length. When an actual key is shorter than that, the rest of | 
| 6056 |   the buffer must be cleared out. The row cannot be identified, if | 
| 6057 |   garbage follows behind the end of the key. There is no length | 
| 6058 |   field for the current key, so that the whole ref_length is used | 
| 6059 |   for comparison. | 
| 6060 |  | 
| 6061 |   RETURN | 
| 6062 |   nothing | 
| 6063 | */ | 
| 6064 | void ha_tokudb::position(const uchar * record) { | 
| 6065 |     TOKUDB_HANDLER_DBUG_ENTER("" ); | 
| 6066 |     DBT key; | 
| 6067 |     if (hidden_primary_key) { | 
| 6068 |         DBUG_ASSERT(ref_length == (TOKUDB_HIDDEN_PRIMARY_KEY_LENGTH + sizeof(uint32_t))); | 
| 6069 |         memcpy(ref + sizeof(uint32_t), current_ident, TOKUDB_HIDDEN_PRIMARY_KEY_LENGTH); | 
| 6070 |         *(uint32_t *)ref = TOKUDB_HIDDEN_PRIMARY_KEY_LENGTH; | 
| 6071 |     }  | 
| 6072 |     else { | 
| 6073 |         bool has_null; | 
| 6074 |         // | 
| 6075 |         // save the data | 
| 6076 |         // | 
| 6077 |         create_dbt_key_from_table(&key, primary_key, ref + sizeof(uint32_t), record, &has_null); | 
| 6078 |         // | 
| 6079 |         // save the size of data in the first four bytes of ref | 
| 6080 |         // | 
| 6081 |         memcpy(ref, &key.size, sizeof(uint32_t)); | 
| 6082 |     } | 
| 6083 |     TOKUDB_HANDLER_DBUG_VOID_RETURN; | 
| 6084 | } | 
| 6085 |  | 
| 6086 | // | 
| 6087 | // Per InnoDB: Returns statistics information of the table to the MySQL interpreter, | 
| 6088 | // in various fields of the handle object.  | 
| 6089 | // Return: | 
| 6090 | //      0, always success | 
| 6091 | // | 
| 6092 | int ha_tokudb::info(uint flag) { | 
| 6093 |     TOKUDB_HANDLER_DBUG_ENTER("%d" , flag); | 
| 6094 |     int error = 0; | 
| 6095 | #if TOKU_CLUSTERING_IS_COVERING | 
| 6096 |     for (uint i=0; i < table->s->keys; i++) | 
| 6097 |         if (key_is_clustering(&table->key_info[i])) | 
| 6098 |             table->covering_keys.set_bit(i); | 
| 6099 | #endif | 
| 6100 |     DB_TXN* txn = NULL; | 
| 6101 |     if (flag & HA_STATUS_VARIABLE) { | 
| 6102 |         stats.records = share->row_count() + share->rows_from_locked_table; | 
| 6103 |         stats.deleted = 0; | 
| 6104 |         if (!(flag & HA_STATUS_NO_LOCK)) { | 
| 6105 |  | 
| 6106 |             error = txn_begin(db_env, NULL, &txn, DB_READ_UNCOMMITTED, ha_thd()); | 
| 6107 |             if (error) { | 
| 6108 |                 goto cleanup; | 
| 6109 |             } | 
| 6110 |  | 
| 6111 |             // we should always have a primary key | 
| 6112 |             assert_always(share->file != NULL); | 
| 6113 |  | 
| 6114 |             DB_BTREE_STAT64 dict_stats; | 
| 6115 |             error = share->file->stat64(share->file, txn, &dict_stats); | 
| 6116 |             if (error) { | 
| 6117 |                 goto cleanup; | 
| 6118 |             } | 
| 6119 |             share->set_row_count(dict_stats.bt_ndata, false); | 
| 6120 |             stats.records = dict_stats.bt_ndata; | 
| 6121 |             stats.create_time = dict_stats.bt_create_time_sec; | 
| 6122 |             stats.update_time = dict_stats.bt_modify_time_sec; | 
| 6123 |             stats.check_time = dict_stats.bt_verify_time_sec; | 
| 6124 |             stats.data_file_length = dict_stats.bt_dsize; | 
| 6125 |             stats.delete_length = dict_stats.bt_fsize - dict_stats.bt_dsize; | 
| 6126 |             if (hidden_primary_key) { | 
| 6127 |                 // | 
| 6128 |                 // in this case, we have a hidden primary key, do not | 
| 6129 |                 // want to report space taken up by the hidden primary key to the user | 
| 6130 |                 // | 
| 6131 |                 uint64_t hpk_space = | 
| 6132 |                     TOKUDB_HIDDEN_PRIMARY_KEY_LENGTH * dict_stats.bt_ndata; | 
| 6133 |                 stats.data_file_length = | 
| 6134 |                     (hpk_space > stats.data_file_length) ? | 
| 6135 |                         0 : stats.data_file_length - hpk_space; | 
| 6136 |             } else { | 
| 6137 |                 // | 
| 6138 |                 // one infinity byte per key needs to be subtracted | 
| 6139 |                 // | 
| 6140 |                 uint64_t inf_byte_space = dict_stats.bt_ndata; | 
| 6141 |                 stats.data_file_length = | 
| 6142 |                     (inf_byte_space > stats.data_file_length) ? | 
| 6143 |                         0 : stats.data_file_length - inf_byte_space; | 
| 6144 |             } | 
| 6145 |  | 
| 6146 |             stats.mean_rec_length = | 
| 6147 |                 stats.records ? | 
| 6148 |                     (ulong)(stats.data_file_length/stats.records) : 0; | 
| 6149 |             stats.index_file_length = 0; | 
| 6150 |             // curr_num_DBs is the number of keys we have, according | 
| 6151 |             // to the mysql layer. if drop index is running concurrently | 
| 6152 |             // with info() (it can, because info does not take table locks), | 
| 6153 |             // then it could be the case that one of the dbs was dropped | 
| 6154 |             // and set to NULL before mysql was able to set table->s->keys | 
| 6155 |             // accordingly.  | 
| 6156 |             // | 
| 6157 |             // we should just ignore any DB * that is NULL.  | 
| 6158 |             // | 
| 6159 |             // this solution is much simpler than trying to maintain an  | 
| 6160 |             // accurate number of valid keys at the handlerton layer. | 
| 6161 |             uint curr_num_DBs = | 
| 6162 |                 table->s->keys + tokudb_test(hidden_primary_key); | 
| 6163 |             for (uint i = 0; i < curr_num_DBs; i++) { | 
| 6164 |                 // skip the primary key, skip dropped indexes | 
| 6165 |                 if (i == primary_key || share->key_file[i] == NULL) { | 
| 6166 |                     continue; | 
| 6167 |                 } | 
| 6168 |                 error = share->key_file[i]->stat64( | 
| 6169 |                     share->key_file[i], txn, &dict_stats); | 
| 6170 |                 if (error) { | 
| 6171 |                     goto cleanup; | 
| 6172 |                 } | 
| 6173 |                 stats.index_file_length += dict_stats.bt_dsize; | 
| 6174 |                 stats.delete_length += | 
| 6175 |                     dict_stats.bt_fsize - dict_stats.bt_dsize; | 
| 6176 |             } | 
| 6177 |         } | 
| 6178 |  | 
| 6179 |         /* | 
| 6180 |         The following comment and logic has been taken from InnoDB and  | 
| 6181 |         an old hack was removed that forced to always set stats.records > 0 | 
| 6182 |         --- | 
| 6183 |         The MySQL optimizer seems to assume in a left join that n_rows | 
| 6184 |         is an accurate estimate if it is zero. Of course, it is not, | 
| 6185 |         since we do not have any locks on the rows yet at this phase. | 
| 6186 |         Since SHOW TABLE STATUS seems to call this function with the | 
| 6187 |         HA_STATUS_TIME flag set, while the left join optimizer does not | 
| 6188 |         set that flag, we add one to a zero value if the flag is not | 
| 6189 |         set. That way SHOW TABLE STATUS will show the best estimate, | 
| 6190 |         while the optimizer never sees the table empty. */ | 
| 6191 |         if (stats.records == 0 && !(flag & HA_STATUS_TIME)) { | 
| 6192 |             stats.records++; | 
| 6193 |         } | 
| 6194 |     } | 
| 6195 |     if ((flag & HA_STATUS_CONST)) { | 
| 6196 |         stats.max_data_file_length = 9223372036854775807ULL; | 
| 6197 |     } | 
| 6198 |     if (flag & (HA_STATUS_VARIABLE | HA_STATUS_CONST)) { | 
| 6199 |         share->set_cardinality_counts_in_table(table); | 
| 6200 |     } | 
| 6201 |  | 
| 6202 |     /* Don't return key if we got an error for the internal primary key */ | 
| 6203 |     if (flag & HA_STATUS_ERRKEY && last_dup_key < table_share->keys) { | 
| 6204 |         errkey = last_dup_key; | 
| 6205 |     } | 
| 6206 |  | 
| 6207 |     if (flag & HA_STATUS_AUTO && table->found_next_number_field) { | 
| 6208 |         THD* thd = table->in_use; | 
| 6209 |         struct system_variables* variables = &thd->variables; | 
| 6210 |         stats.auto_increment_value = | 
| 6211 |             share->last_auto_increment + variables->auto_increment_increment; | 
| 6212 |     } | 
| 6213 |     error = 0; | 
| 6214 | cleanup: | 
| 6215 |     if (txn != NULL) { | 
| 6216 |         commit_txn(txn, DB_TXN_NOSYNC); | 
| 6217 |         txn = NULL; | 
| 6218 |     } | 
| 6219 |     TOKUDB_HANDLER_DBUG_RETURN(error); | 
| 6220 | } | 
| 6221 |  | 
| 6222 | // | 
| 6223 | //  Per InnoDB: Tells something additional to the handler about how to do things. | 
| 6224 | // | 
| 6225 | int ha_tokudb::(enum ha_extra_function operation) { | 
| 6226 |     TOKUDB_HANDLER_DBUG_ENTER("%d" , operation); | 
| 6227 |     switch (operation) { | 
| 6228 |     case HA_EXTRA_RESET_STATE: | 
| 6229 |         reset(); | 
| 6230 |         break; | 
| 6231 |     case HA_EXTRA_KEYREAD: | 
| 6232 |         key_read = true;           // Query satisfied with key | 
| 6233 |         break; | 
| 6234 |     case HA_EXTRA_NO_KEYREAD: | 
| 6235 |         key_read = false; | 
| 6236 |         break; | 
| 6237 |     case HA_EXTRA_IGNORE_DUP_KEY: | 
| 6238 |         using_ignore = true; | 
| 6239 |         break; | 
| 6240 |     case HA_EXTRA_NO_IGNORE_DUP_KEY: | 
| 6241 |         using_ignore = false; | 
| 6242 |         break; | 
| 6243 |     case HA_EXTRA_IGNORE_NO_KEY: | 
| 6244 |         using_ignore_no_key = true; | 
| 6245 |         break; | 
| 6246 |     case HA_EXTRA_NO_IGNORE_NO_KEY: | 
| 6247 |         using_ignore_no_key = false; | 
| 6248 |         break; | 
| 6249 |     case HA_EXTRA_NOT_USED: | 
| 6250 |     case HA_EXTRA_PREPARE_FOR_RENAME: | 
| 6251 |         break; // must do nothing and return 0 | 
| 6252 |     default: | 
| 6253 |         break; | 
| 6254 |     } | 
| 6255 |     TOKUDB_HANDLER_DBUG_RETURN(0); | 
| 6256 | } | 
| 6257 |  | 
| 6258 | int ha_tokudb::reset() { | 
| 6259 |     TOKUDB_HANDLER_DBUG_ENTER("" ); | 
| 6260 |     key_read = false; | 
| 6261 |     using_ignore = false; | 
| 6262 |     using_ignore_no_key = false; | 
| 6263 |     reset_dsmrr(); | 
| 6264 |     invalidate_icp(); | 
| 6265 |     TOKUDB_HANDLER_DBUG_RETURN(0); | 
| 6266 | } | 
| 6267 |  | 
| 6268 | // | 
| 6269 | // helper function that iterates through all DB's  | 
| 6270 | // and grabs a lock (either read or write, but not both) | 
| 6271 | // Parameters: | 
| 6272 | //      [in]    trans - transaction to be used to pre acquire the lock | 
| 6273 | //              lt - type of lock to get, either lock_read or lock_write | 
| 6274 | //  Returns: | 
| 6275 | //      0 on success | 
| 6276 | //      error otherwise | 
| 6277 | // | 
| 6278 | int ha_tokudb::acquire_table_lock (DB_TXN* trans, TABLE_LOCK_TYPE lt) { | 
| 6279 |     TOKUDB_HANDLER_DBUG_ENTER("%p %s" , trans, lt == lock_read ? "r"  : "w" ); | 
| 6280 |     int error = ENOSYS; | 
| 6281 |     if (!num_DBs_locked_in_bulk) { | 
| 6282 |         rwlock_t_lock_read(share->_num_DBs_lock); | 
| 6283 |     } | 
| 6284 |     uint curr_num_DBs = share->num_DBs; | 
| 6285 |     if (lt == lock_read) { | 
| 6286 |         error = 0; | 
| 6287 |         goto cleanup; | 
| 6288 |     } else if (lt == lock_write) { | 
| 6289 |         for (uint i = 0; i < curr_num_DBs; i++) { | 
| 6290 |             DB* db = share->key_file[i]; | 
| 6291 |             error = db->pre_acquire_table_lock(db, trans); | 
| 6292 |             if (error == EINVAL)  | 
| 6293 |                 TOKUDB_HANDLER_TRACE("%d db=%p trans=%p" , i, db, trans); | 
| 6294 |             if (error) break; | 
| 6295 |         } | 
| 6296 |         TOKUDB_HANDLER_TRACE_FOR_FLAGS(TOKUDB_DEBUG_LOCK, "error=%d" , error); | 
| 6297 |         if (error) goto cleanup; | 
| 6298 |     } else { | 
| 6299 |         error = ENOSYS; | 
| 6300 |         goto cleanup; | 
| 6301 |     } | 
| 6302 |  | 
| 6303 |     error = 0; | 
| 6304 | cleanup: | 
| 6305 |     if (!num_DBs_locked_in_bulk) { | 
| 6306 |         share->_num_DBs_lock.unlock(); | 
| 6307 |     } | 
| 6308 |     TOKUDB_HANDLER_DBUG_RETURN(error); | 
| 6309 | } | 
| 6310 |  | 
| 6311 | int ha_tokudb::create_txn(THD* thd, tokudb_trx_data* trx) { | 
| 6312 |     int error; | 
| 6313 |     ulong tx_isolation = thd_tx_isolation(thd); | 
| 6314 |     HA_TOKU_ISO_LEVEL toku_iso_level = tx_to_toku_iso(tx_isolation); | 
| 6315 |     bool is_autocommit = !thd_test_options( | 
| 6316 |             thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN); | 
| 6317 |  | 
| 6318 |     /* First table lock, start transaction */ | 
| 6319 |     if (thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN) &&  | 
| 6320 |          !trx->all && | 
| 6321 |          (thd_sql_command(thd) != SQLCOM_CREATE_TABLE) && | 
| 6322 |          (thd_sql_command(thd) != SQLCOM_DROP_TABLE) && | 
| 6323 |          (thd_sql_command(thd) != SQLCOM_DROP_INDEX) && | 
| 6324 |          (thd_sql_command(thd) != SQLCOM_CREATE_INDEX) && | 
| 6325 |          (thd_sql_command(thd) != SQLCOM_ALTER_TABLE)) { | 
| 6326 |         /* QQQ We have to start a master transaction */ | 
| 6327 |         // DBUG_PRINT("trans", ("starting transaction all ")); | 
| 6328 |         uint32_t txn_begin_flags = toku_iso_to_txn_flag(toku_iso_level); | 
| 6329 | #if 50614 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50699 | 
| 6330 |         if (thd_tx_is_read_only(thd)) { | 
| 6331 |             txn_begin_flags |= DB_TXN_READ_ONLY; | 
| 6332 |         } | 
| 6333 | #endif | 
| 6334 |         if ((error = txn_begin(db_env, NULL, &trx->all, txn_begin_flags, thd))) { | 
| 6335 |             goto cleanup; | 
| 6336 |         } | 
| 6337 |         TOKUDB_HANDLER_TRACE_FOR_FLAGS( | 
| 6338 |             TOKUDB_DEBUG_TXN, | 
| 6339 |             "created master %p" , | 
| 6340 |             trx->all); | 
| 6341 |         trx->sp_level = trx->all; | 
| 6342 |         trans_register_ha(thd, true, tokudb_hton); | 
| 6343 |     } | 
| 6344 |     DBUG_PRINT("trans" , ("starting transaction stmt" )); | 
| 6345 |     if (trx->stmt) {  | 
| 6346 |         TOKUDB_HANDLER_TRACE_FOR_FLAGS( | 
| 6347 |             TOKUDB_DEBUG_TXN, | 
| 6348 |             "warning:stmt=%p" , | 
| 6349 |             trx->stmt); | 
| 6350 |     } | 
| 6351 |     uint32_t txn_begin_flags; | 
| 6352 |     if (trx->all == NULL) { | 
| 6353 |         txn_begin_flags = toku_iso_to_txn_flag(toku_iso_level); | 
| 6354 |         // | 
| 6355 |         // if the isolation level that the user has set is serializable, | 
| 6356 |         // but autocommit is on and this is just a select, | 
| 6357 |         // then we can go ahead and set the isolation level to | 
| 6358 |         // be a snapshot read, because we can serialize | 
| 6359 |         // the transaction to be the point in time at which the snapshot began. | 
| 6360 |         //  | 
| 6361 |         if (txn_begin_flags == 0 && is_autocommit && thd_sql_command(thd) == SQLCOM_SELECT) { | 
| 6362 |             txn_begin_flags = DB_TXN_SNAPSHOT; | 
| 6363 |         } | 
| 6364 |         if (is_autocommit && thd_sql_command(thd) == SQLCOM_SELECT && | 
| 6365 |             !thd->in_sub_stmt && lock.type <= TL_READ_NO_INSERT && | 
| 6366 |             !thd->lex->uses_stored_routines()) { | 
| 6367 |             txn_begin_flags |= DB_TXN_READ_ONLY; | 
| 6368 |         } | 
| 6369 |     } else { | 
| 6370 |         txn_begin_flags = DB_INHERIT_ISOLATION; | 
| 6371 |     } | 
| 6372 |     error = txn_begin(db_env, trx->sp_level, &trx->stmt, txn_begin_flags, thd); | 
| 6373 |     if (error) { | 
| 6374 |         /* We leave the possible master transaction open */ | 
| 6375 |         goto cleanup; | 
| 6376 |     } | 
| 6377 |     trx->sub_sp_level = trx->stmt; | 
| 6378 |     TOKUDB_HANDLER_TRACE_FOR_FLAGS( | 
| 6379 |         TOKUDB_DEBUG_TXN, | 
| 6380 |         "created stmt %p sp_level %p" , | 
| 6381 |         trx->sp_level, | 
| 6382 |         trx->stmt); | 
| 6383 |     reset_stmt_progress(&trx->stmt_progress); | 
| 6384 |     trans_register_ha(thd, false, tokudb_hton); | 
| 6385 | cleanup: | 
| 6386 |     return error; | 
| 6387 | } | 
| 6388 |  | 
| 6389 | static const char *lock_type_str(int lock_type) { | 
| 6390 |     if (lock_type == F_RDLCK) return "F_RDLCK" ; | 
| 6391 |     if (lock_type == F_WRLCK) return "F_WRLCK" ; | 
| 6392 |     if (lock_type == F_UNLCK) return "F_UNLCK" ; | 
| 6393 |     return "?" ; | 
| 6394 | } | 
| 6395 |  | 
| 6396 | /* | 
| 6397 |   As MySQL will execute an external lock for every new table it uses | 
| 6398 |   we can use this to start the transactions. | 
| 6399 |   If we are in auto_commit mode we just need to start a transaction | 
| 6400 |   for the statement to be able to rollback the statement. | 
| 6401 |   If not, we have to start a master transaction if there doesn't exist | 
| 6402 |   one from before. | 
| 6403 | */ | 
| 6404 | // | 
| 6405 | // Parameters: | 
| 6406 | //      [in]    thd - handle to the user thread | 
| 6407 | //              lock_type - the type of lock | 
| 6408 | // Returns: | 
| 6409 | //      0 on success | 
| 6410 | //      error otherwise | 
| 6411 | // | 
| 6412 | int ha_tokudb::external_lock(THD * thd, int lock_type) { | 
| 6413 |     TOKUDB_HANDLER_DBUG_ENTER( | 
| 6414 |         "cmd %d lock %d %s %s" , | 
| 6415 |         thd_sql_command(thd), | 
| 6416 |         lock_type, | 
| 6417 |         lock_type_str(lock_type), | 
| 6418 |         share->full_table_name()); | 
| 6419 |     if (TOKUDB_UNLIKELY(!TOKUDB_DEBUG_FLAGS(TOKUDB_DEBUG_ENTER) && | 
| 6420 |         TOKUDB_DEBUG_FLAGS(TOKUDB_DEBUG_LOCK))) { | 
| 6421 |         TOKUDB_HANDLER_TRACE( | 
| 6422 |             "cmd %d lock %d %s %s" , | 
| 6423 |             thd_sql_command(thd), | 
| 6424 |             lock_type, | 
| 6425 |             lock_type_str(lock_type), | 
| 6426 |             share->full_table_name()); | 
| 6427 |     } | 
| 6428 |     TOKUDB_HANDLER_TRACE_FOR_FLAGS(TOKUDB_DEBUG_LOCK, "q %s" , thd->query()); | 
| 6429 |  | 
| 6430 |     int error = 0; | 
| 6431 |     tokudb_trx_data* trx = (tokudb_trx_data*)thd_get_ha_data(thd, tokudb_hton); | 
| 6432 |     if (!trx) { | 
| 6433 |         error = create_tokudb_trx_data_instance(&trx); | 
| 6434 |         if (error) { goto cleanup; } | 
| 6435 |         thd_set_ha_data(thd, tokudb_hton, trx); | 
| 6436 |     } | 
| 6437 |  | 
| 6438 |     TOKUDB_HANDLER_TRACE_FOR_FLAGS( | 
| 6439 |         TOKUDB_DEBUG_TXN, | 
| 6440 |         "trx %p %p %p %p %u %u" , | 
| 6441 |         trx->all, | 
| 6442 |         trx->stmt, | 
| 6443 |         trx->sp_level, | 
| 6444 |         trx->sub_sp_level, | 
| 6445 |         trx->tokudb_lock_count, | 
| 6446 |         trx->create_lock_count); | 
| 6447 |  | 
| 6448 |     if (trx->all == NULL) { | 
| 6449 |         trx->sp_level = NULL; | 
| 6450 |     } | 
| 6451 |     if (lock_type != F_UNLCK) { | 
| 6452 |         use_write_locks = false; | 
| 6453 |         if (lock_type == F_WRLCK) { | 
| 6454 |             use_write_locks = true; | 
| 6455 |         } | 
| 6456 |         if (!trx->stmt) { | 
| 6457 |             transaction = NULL;    // Safety | 
| 6458 |             error = create_txn(thd, trx); | 
| 6459 |             if (error) { | 
| 6460 |                 goto cleanup; | 
| 6461 |             } | 
| 6462 |             trx->create_lock_count = trx->tokudb_lock_count; | 
| 6463 |         } | 
| 6464 |         transaction = trx->sub_sp_level; | 
| 6465 |         trx->tokudb_lock_count++; | 
| 6466 |     } else { | 
| 6467 |         share->update_row_count(thd, added_rows, deleted_rows, updated_rows); | 
| 6468 |         added_rows = 0; | 
| 6469 |         deleted_rows = 0; | 
| 6470 |         updated_rows = 0; | 
| 6471 |         share->rows_from_locked_table = 0; | 
| 6472 |         if (trx->tokudb_lock_count > 0) { | 
| 6473 |             if (--trx->tokudb_lock_count <= trx->create_lock_count) { | 
| 6474 |                 trx->create_lock_count = 0; | 
| 6475 |                 if (trx->stmt) { | 
| 6476 |                     /* | 
| 6477 |                       F_UNLCK is done without a transaction commit / rollback. | 
| 6478 |                       This happens if the thread didn't update any rows | 
| 6479 |                       We must in this case commit the work to keep the row locks | 
| 6480 |                     */ | 
| 6481 |                     DBUG_PRINT("trans" , ("commiting non-updating transaction" )); | 
| 6482 |                     reset_stmt_progress(&trx->stmt_progress); | 
| 6483 |                     commit_txn(trx->stmt, 0); | 
| 6484 |                     trx->stmt = NULL; | 
| 6485 |                     trx->sub_sp_level = NULL; | 
| 6486 |                 } | 
| 6487 |             } | 
| 6488 |             transaction = NULL; | 
| 6489 |         } | 
| 6490 |     } | 
| 6491 | cleanup: | 
| 6492 |     TOKUDB_HANDLER_TRACE_FOR_FLAGS(TOKUDB_DEBUG_LOCK, "error=%d" , error); | 
| 6493 |     TOKUDB_HANDLER_DBUG_RETURN(error); | 
| 6494 | } | 
| 6495 |  | 
| 6496 | /* | 
| 6497 |   When using LOCK TABLE's external_lock is only called when the actual | 
| 6498 |   TABLE LOCK is done. | 
| 6499 |   Under LOCK TABLES, each used tables will force a call to start_stmt. | 
| 6500 | */ | 
| 6501 | int ha_tokudb::start_stmt(THD* thd, thr_lock_type lock_type) { | 
| 6502 |     TOKUDB_HANDLER_DBUG_ENTER( | 
| 6503 |         "cmd %d lock %d %s" , | 
| 6504 |         thd_sql_command(thd), | 
| 6505 |         lock_type, | 
| 6506 |         share->full_table_name()); | 
| 6507 |  | 
| 6508 |     TOKUDB_HANDLER_TRACE_FOR_FLAGS(TOKUDB_DEBUG_LOCK, "q %s" , thd->query()); | 
| 6509 |  | 
| 6510 |     int error = 0; | 
| 6511 |     tokudb_trx_data* trx = (tokudb_trx_data*)thd_get_ha_data(thd, tokudb_hton); | 
| 6512 |     if (!trx) { | 
| 6513 |         error = create_tokudb_trx_data_instance(&trx); | 
| 6514 |         if (error) { goto cleanup; } | 
| 6515 |         thd_set_ha_data(thd, tokudb_hton, trx); | 
| 6516 |     } | 
| 6517 |  | 
| 6518 |     TOKUDB_HANDLER_TRACE_FOR_FLAGS( | 
| 6519 |         TOKUDB_DEBUG_TXN, | 
| 6520 |         "trx %p %p %p %p %u %u" , | 
| 6521 |         trx->all, | 
| 6522 |         trx->stmt, | 
| 6523 |         trx->sp_level, | 
| 6524 |         trx->sub_sp_level, | 
| 6525 |         trx->tokudb_lock_count, | 
| 6526 |         trx->create_lock_count); | 
| 6527 |  | 
| 6528 |     /* | 
| 6529 |        note that trx->stmt may have been already initialized as start_stmt() | 
| 6530 |        is called for *each table* not for each storage engine, | 
| 6531 |        and there could be many bdb tables referenced in the query | 
| 6532 |      */ | 
| 6533 |     if (!trx->stmt) { | 
| 6534 |         error = create_txn(thd, trx); | 
| 6535 |         if (error) { | 
| 6536 |             goto cleanup; | 
| 6537 |         } | 
| 6538 |         trx->create_lock_count = trx->tokudb_lock_count; | 
| 6539 |     } else { | 
| 6540 |         TOKUDB_HANDLER_TRACE_FOR_FLAGS( | 
| 6541 |             TOKUDB_DEBUG_TXN, | 
| 6542 |             "trx->stmt %p already existed" , | 
| 6543 |             trx->stmt); | 
| 6544 |     } | 
| 6545 |     if (added_rows > deleted_rows) { | 
| 6546 |         share->rows_from_locked_table = added_rows - deleted_rows; | 
| 6547 |     } | 
| 6548 |     transaction = trx->sub_sp_level; | 
| 6549 |     trans_register_ha(thd, false, tokudb_hton); | 
| 6550 | cleanup: | 
| 6551 |     TOKUDB_HANDLER_DBUG_RETURN(error); | 
| 6552 | } | 
| 6553 |  | 
| 6554 |  | 
| 6555 | uint32_t ha_tokudb::get_cursor_isolation_flags(enum thr_lock_type lock_type, THD* thd) { | 
| 6556 |     uint sql_command = thd_sql_command(thd); | 
| 6557 |     bool in_lock_tables = thd_in_lock_tables(thd); | 
| 6558 |  | 
| 6559 |     // | 
| 6560 |     // following InnoDB's lead and having checksum command use a snapshot read if told | 
| 6561 |     // | 
| 6562 |     if (sql_command == SQLCOM_CHECKSUM) { | 
| 6563 |         return 0; | 
| 6564 |     } | 
| 6565 |     else if ((lock_type == TL_READ && in_lock_tables) ||  | 
| 6566 |              (lock_type == TL_READ_HIGH_PRIORITY && in_lock_tables) ||  | 
| 6567 |              sql_command != SQLCOM_SELECT || | 
| 6568 |              (sql_command == SQLCOM_SELECT && lock_type >= TL_WRITE_ALLOW_WRITE)) { // select for update  | 
| 6569 |       ulong tx_isolation = thd_tx_isolation(thd); | 
| 6570 |       // pattern matched from InnoDB | 
| 6571 |       if ( (tx_isolation == ISO_READ_COMMITTED || tx_isolation == ISO_READ_UNCOMMITTED) && | 
| 6572 | 	   (lock_type == TL_READ || lock_type == TL_READ_NO_INSERT) && | 
| 6573 | 	   (sql_command == SQLCOM_INSERT_SELECT | 
| 6574 |               || sql_command == SQLCOM_REPLACE_SELECT | 
| 6575 |               || sql_command == SQLCOM_UPDATE | 
| 6576 | 	    || sql_command == SQLCOM_CREATE_TABLE) )  | 
| 6577 |         { | 
| 6578 | 	  return 0; | 
| 6579 |         } | 
| 6580 |       else { | 
| 6581 | 	return DB_SERIALIZABLE; | 
| 6582 |       }             | 
| 6583 |     } | 
| 6584 |     else { | 
| 6585 |         return 0; | 
| 6586 |     } | 
| 6587 | } | 
| 6588 |  | 
| 6589 | /* | 
| 6590 |   The idea with handler::store_lock() is the following: | 
| 6591 |  | 
| 6592 |   The statement decided which locks we should need for the table | 
| 6593 |   for updates/deletes/inserts we get WRITE locks, for SELECT... we get | 
| 6594 |   read locks. | 
| 6595 |  | 
| 6596 |   Before adding the lock into the table lock handler (see thr_lock.c) | 
| 6597 |   mysqld calls store lock with the requested locks.  Store lock can now | 
| 6598 |   modify a write lock to a read lock (or some other lock), ignore the | 
| 6599 |   lock (if we don't want to use MySQL table locks at all) or add locks | 
| 6600 |   for many tables (like we do when we are using a MERGE handler). | 
| 6601 |  | 
| 6602 |   TokuDB changes all WRITE locks to TL_WRITE_ALLOW_WRITE (which | 
| 6603 |   signals that we are doing WRITES, but we are still allowing other | 
| 6604 |   reader's and writer's. | 
| 6605 |  | 
| 6606 |   When releasing locks, store_lock() are also called. In this case one | 
| 6607 |   usually doesn't have to do anything. | 
| 6608 |  | 
| 6609 |   In some exceptional cases MySQL may send a request for a TL_IGNORE; | 
| 6610 |   This means that we are requesting the same lock as last time and this | 
| 6611 |   should also be ignored. (This may happen when someone does a flush | 
| 6612 |   table when we have opened a part of the tables, in which case mysqld | 
| 6613 |   closes and reopens the tables and tries to get the same locks at last | 
| 6614 |   time).  In the future we will probably try to remove this. | 
| 6615 | */ | 
| 6616 |  | 
| 6617 | THR_LOCK_DATA* *ha_tokudb::store_lock( | 
| 6618 |     THD* thd, | 
| 6619 |     THR_LOCK_DATA** to, | 
| 6620 |     enum thr_lock_type lock_type) { | 
| 6621 |  | 
| 6622 |     TOKUDB_HANDLER_DBUG_ENTER( | 
| 6623 |         "lock_type=%d cmd=%d" , | 
| 6624 |         lock_type, | 
| 6625 |         thd_sql_command(thd)); | 
| 6626 |     TOKUDB_HANDLER_TRACE_FOR_FLAGS( | 
| 6627 |         TOKUDB_DEBUG_LOCK, | 
| 6628 |         "lock_type=%d cmd=%d" , | 
| 6629 |         lock_type, | 
| 6630 |         thd_sql_command(thd)); | 
| 6631 |  | 
| 6632 |     if (lock_type != TL_IGNORE && lock.type == TL_UNLOCK) { | 
| 6633 |         enum_sql_command sql_command = (enum_sql_command) thd_sql_command(thd); | 
| 6634 |         if (!thd->in_lock_tables) { | 
| 6635 |             if (sql_command == SQLCOM_CREATE_INDEX && | 
| 6636 |                 tokudb::sysvars::create_index_online(thd)) { | 
| 6637 |                 // hot indexing | 
| 6638 |                 rwlock_t_lock_read(share->_num_DBs_lock); | 
| 6639 |                 if (share->num_DBs == | 
| 6640 |                     (table->s->keys + tokudb_test(hidden_primary_key))) { | 
| 6641 |                     lock_type = TL_WRITE_ALLOW_WRITE; | 
| 6642 |                 } | 
| 6643 |                 share->_num_DBs_lock.unlock(); | 
| 6644 |             } else if ((lock_type >= TL_WRITE_CONCURRENT_INSERT && | 
| 6645 |                         lock_type <= TL_WRITE) && | 
| 6646 |                         sql_command != SQLCOM_TRUNCATE && | 
| 6647 |                         !thd_tablespace_op(thd)) { | 
| 6648 |                 // allow concurrent writes | 
| 6649 |                 lock_type = TL_WRITE_ALLOW_WRITE; | 
| 6650 |             } else if (sql_command == SQLCOM_OPTIMIZE && | 
| 6651 |                        lock_type == TL_READ_NO_INSERT) { | 
| 6652 |                 // hot optimize table | 
| 6653 |                 lock_type = TL_READ; | 
| 6654 |             } | 
| 6655 |         } | 
| 6656 |         lock.type = lock_type; | 
| 6657 |     } | 
| 6658 |     *to++ = &lock; | 
| 6659 |     TOKUDB_HANDLER_TRACE_FOR_FLAGS( | 
| 6660 |         TOKUDB_DEBUG_LOCK, | 
| 6661 |         "lock_type=%d" , | 
| 6662 |         lock_type); | 
| 6663 |     TOKUDB_HANDLER_DBUG_RETURN_PTR(to); | 
| 6664 | } | 
| 6665 |  | 
| 6666 | static toku_compression_method get_compression_method(DB* file) { | 
| 6667 |     enum toku_compression_method method; | 
| 6668 |     int r = file->get_compression_method(file, &method); | 
| 6669 |     assert_always(r == 0); | 
| 6670 |     return method; | 
| 6671 | } | 
| 6672 |  | 
| 6673 | #if TOKU_INCLUDE_ROW_TYPE_COMPRESSION | 
| 6674 | enum row_type ha_tokudb::get_row_type() const { | 
| 6675 |     toku_compression_method compression_method = get_compression_method(share->file); | 
| 6676 |     return toku_compression_method_to_row_type(compression_method); | 
| 6677 | } | 
| 6678 | #endif | 
| 6679 |  | 
| 6680 | static int create_sub_table( | 
| 6681 |     const char* table_name, | 
| 6682 |     DBT* row_descriptor, | 
| 6683 |     DB_TXN* txn, | 
| 6684 |     uint32_t block_size, | 
| 6685 |     uint32_t read_block_size, | 
| 6686 |     toku_compression_method compression_method, | 
| 6687 |     bool is_hot_index, | 
| 6688 |     uint32_t fanout) { | 
| 6689 |  | 
| 6690 |     TOKUDB_DBUG_ENTER("" ); | 
| 6691 |     int error; | 
| 6692 |     DB *file = NULL; | 
| 6693 |     uint32_t create_flags; | 
| 6694 |  | 
| 6695 |  | 
| 6696 |     error = db_create(&file, db_env, 0); | 
| 6697 |     if (error) { | 
| 6698 |         DBUG_PRINT("error" , ("Got error: %d when creating table" , error)); | 
| 6699 |         my_errno = error; | 
| 6700 |         goto exit; | 
| 6701 |     } | 
| 6702 |  | 
| 6703 |  | 
| 6704 |     if (block_size != 0) { | 
| 6705 |         error = file->set_pagesize(file, block_size); | 
| 6706 |         if (error != 0) { | 
| 6707 |             DBUG_PRINT( | 
| 6708 |                 "error" , | 
| 6709 |                 ("Got error: %d when setting block size %u for table '%s'" , | 
| 6710 |                     error, | 
| 6711 |                     block_size, | 
| 6712 |                     table_name)); | 
| 6713 |             goto exit; | 
| 6714 |         } | 
| 6715 |     } | 
| 6716 |     if (read_block_size != 0) { | 
| 6717 |         error = file->set_readpagesize(file, read_block_size); | 
| 6718 |         if (error != 0) { | 
| 6719 |             DBUG_PRINT( | 
| 6720 |                 "error" , | 
| 6721 |                 ("Got error: %d when setting read block size %u for table '%s'" , | 
| 6722 |                     error, | 
| 6723 |                     read_block_size, | 
| 6724 |                     table_name)); | 
| 6725 |             goto exit; | 
| 6726 |         } | 
| 6727 |     } | 
| 6728 |     if (fanout != 0) { | 
| 6729 |         error = file->set_fanout(file, fanout); | 
| 6730 |         if (error != 0) { | 
| 6731 |             DBUG_PRINT( | 
| 6732 |                 "error" , | 
| 6733 |                 ("Got error: %d when setting fanout %u for table '%s'" , | 
| 6734 |                     error, | 
| 6735 |                     fanout, | 
| 6736 |                     table_name)); | 
| 6737 |             goto exit; | 
| 6738 |         } | 
| 6739 |     } | 
| 6740 |     error = file->set_compression_method(file, compression_method); | 
| 6741 |     if (error != 0) { | 
| 6742 |         DBUG_PRINT( | 
| 6743 |             "error" , | 
| 6744 |             ("Got error: %d when setting compression type %u for table '%s'" , | 
| 6745 |                 error, | 
| 6746 |                 compression_method, | 
| 6747 |                 table_name)); | 
| 6748 |         goto exit; | 
| 6749 |     } | 
| 6750 |  | 
| 6751 |     create_flags = | 
| 6752 |         DB_THREAD | DB_CREATE | DB_EXCL | (is_hot_index ? DB_IS_HOT_INDEX : 0); | 
| 6753 |     error = | 
| 6754 |         file->open( | 
| 6755 |             file, | 
| 6756 |             txn, | 
| 6757 |             table_name, | 
| 6758 |             NULL, | 
| 6759 |             DB_BTREE, | 
| 6760 |             create_flags, | 
| 6761 |             my_umask); | 
| 6762 |     if (error) { | 
| 6763 |         DBUG_PRINT( | 
| 6764 |             "error" , | 
| 6765 |             ("Got error: %d when opening table '%s'" , error, table_name)); | 
| 6766 |         goto exit; | 
| 6767 |     }  | 
| 6768 |  | 
| 6769 |     error = | 
| 6770 |         file->change_descriptor( | 
| 6771 |             file, | 
| 6772 |             txn, | 
| 6773 |             row_descriptor, | 
| 6774 |             (is_hot_index ? DB_IS_HOT_INDEX | | 
| 6775 |                 DB_UPDATE_CMP_DESCRIPTOR : | 
| 6776 |                 DB_UPDATE_CMP_DESCRIPTOR)); | 
| 6777 |     if (error) { | 
| 6778 |         DBUG_PRINT( | 
| 6779 |             "error" , | 
| 6780 |             ("Got error: %d when setting row descriptor for table '%s'" , | 
| 6781 |                 error, | 
| 6782 |                 table_name)); | 
| 6783 |         goto exit; | 
| 6784 |     } | 
| 6785 |  | 
| 6786 |     error = 0; | 
| 6787 | exit: | 
| 6788 |     if (file) { | 
| 6789 |         int r = file->close(file, 0); | 
| 6790 |         assert_always(r==0); | 
| 6791 |     } | 
| 6792 |     TOKUDB_DBUG_RETURN(error); | 
| 6793 | } | 
| 6794 |  | 
| 6795 | void ha_tokudb::update_create_info(HA_CREATE_INFO* create_info) { | 
| 6796 |     if (share->has_auto_inc) { | 
| 6797 |         info(HA_STATUS_AUTO); | 
| 6798 |         if (!(create_info->used_fields & HA_CREATE_USED_AUTO) || | 
| 6799 |             create_info->auto_increment_value < stats.auto_increment_value) { | 
| 6800 |             create_info->auto_increment_value = stats.auto_increment_value; | 
| 6801 |         } | 
| 6802 |     } | 
| 6803 | #if TOKU_INCLUDE_ROW_TYPE_COMPRESSION | 
| 6804 |     if (!(create_info->used_fields & HA_CREATE_USED_ROW_FORMAT)) { | 
| 6805 |         // show create table asks us to update this create_info, this makes it | 
| 6806 |         // so we'll always show what compression type we're using | 
| 6807 |         create_info->row_type = get_row_type(); | 
| 6808 |         if (create_info->row_type == ROW_TYPE_TOKU_ZLIB && | 
| 6809 |             tokudb::sysvars::hide_default_row_format(ha_thd()) != 0) { | 
| 6810 |             create_info->row_type = ROW_TYPE_DEFAULT; | 
| 6811 |         } | 
| 6812 |     } | 
| 6813 | #endif | 
| 6814 | } | 
| 6815 |  | 
| 6816 | // | 
| 6817 | // removes key name from status.tokudb. | 
| 6818 | // needed for when we are dropping indexes, so that  | 
| 6819 | // during drop table, we do not attempt to remove already dropped | 
| 6820 | // indexes because we did not keep status.tokudb in sync with list of indexes. | 
| 6821 | // | 
| 6822 | int ha_tokudb::remove_key_name_from_status(DB* status_block, const char* key_name, DB_TXN* txn) { | 
| 6823 |     int error; | 
| 6824 |     uchar status_key_info[FN_REFLEN + sizeof(HA_METADATA_KEY)]; | 
| 6825 |     HA_METADATA_KEY md_key = hatoku_key_name; | 
| 6826 |     memcpy(status_key_info, &md_key, sizeof(HA_METADATA_KEY)); | 
| 6827 |     // | 
| 6828 |     // put index name in status.tokudb | 
| 6829 |     //  | 
| 6830 |     memcpy( | 
| 6831 |         status_key_info + sizeof(HA_METADATA_KEY),  | 
| 6832 |         key_name,  | 
| 6833 |         strlen(key_name) + 1 | 
| 6834 |         ); | 
| 6835 |     error = remove_metadata( | 
| 6836 |         status_block, | 
| 6837 |         status_key_info, | 
| 6838 |         sizeof(HA_METADATA_KEY) + strlen(key_name) + 1, | 
| 6839 |         txn | 
| 6840 |         ); | 
| 6841 |     return error; | 
| 6842 | } | 
| 6843 |  | 
| 6844 | // | 
| 6845 | // writes the key name in status.tokudb, so that we may later delete or rename | 
| 6846 | // the dictionary associated with key_name | 
| 6847 | // | 
| 6848 | int ha_tokudb::write_key_name_to_status(DB* status_block, const char* key_name, | 
| 6849 |  DB_TXN* txn) { | 
| 6850 |     int error; | 
| 6851 |     uchar status_key_info[FN_REFLEN + sizeof(HA_METADATA_KEY)]; | 
| 6852 |     HA_METADATA_KEY md_key = hatoku_key_name; | 
| 6853 |     memcpy(status_key_info, &md_key, sizeof(HA_METADATA_KEY)); | 
| 6854 |     // | 
| 6855 |     // put index name in status.tokudb | 
| 6856 |     //  | 
| 6857 |     memcpy( | 
| 6858 |         status_key_info + sizeof(HA_METADATA_KEY),  | 
| 6859 |         key_name,  | 
| 6860 |         strlen(key_name) + 1 | 
| 6861 |         ); | 
| 6862 |     error = write_metadata( | 
| 6863 |         status_block, | 
| 6864 |         status_key_info, | 
| 6865 |         sizeof(HA_METADATA_KEY) + strlen(key_name) + 1, | 
| 6866 |         NULL, | 
| 6867 |         0, | 
| 6868 |         txn | 
| 6869 |         ); | 
| 6870 |     return error; | 
| 6871 | } | 
| 6872 |  | 
| 6873 | // | 
| 6874 | // some tracing moved out of ha_tokudb::create, because ::create was | 
| 6875 | // getting cluttered | 
| 6876 | // | 
| 6877 | void ha_tokudb::trace_create_table_info(const char *name, TABLE * form) { | 
| 6878 |     uint i; | 
| 6879 |     // | 
| 6880 |     // tracing information about what type of table we are creating | 
| 6881 |     // | 
| 6882 |     if (TOKUDB_UNLIKELY(TOKUDB_DEBUG_FLAGS(TOKUDB_DEBUG_OPEN))) { | 
| 6883 |         for (i = 0; i < form->s->fields; i++) { | 
| 6884 |             Field *field = form->s->field[i]; | 
| 6885 |             TOKUDB_HANDLER_TRACE( | 
| 6886 |                 "field:%d:%s:type=%d:flags=%x" , | 
| 6887 |                 i, | 
| 6888 |                 field->field_name.str, | 
| 6889 |                 field->type(), | 
| 6890 |                 field->flags); | 
| 6891 |         } | 
| 6892 |         for (i = 0; i < form->s->keys; i++) { | 
| 6893 |             KEY *key = &form->s->key_info[i]; | 
| 6894 |             TOKUDB_HANDLER_TRACE( | 
| 6895 |                 "key:%d:%s:%d" , | 
| 6896 |                 i, | 
| 6897 |                 key->name.str, | 
| 6898 |                 key->user_defined_key_parts); | 
| 6899 |             uint p; | 
| 6900 |             for (p = 0; p < key->user_defined_key_parts; p++) { | 
| 6901 |                 KEY_PART_INFO* key_part = &key->key_part[p]; | 
| 6902 |                 Field* field = key_part->field; | 
| 6903 |                 TOKUDB_HANDLER_TRACE( | 
| 6904 |                     "key:%d:%d:length=%d:%s:type=%d:flags=%x" , | 
| 6905 |                     i, | 
| 6906 |                     p, | 
| 6907 |                     key_part->length, | 
| 6908 |                     field->field_name.str, | 
| 6909 |                     field->type(), | 
| 6910 |                     field->flags); | 
| 6911 |             } | 
| 6912 |         } | 
| 6913 |     } | 
| 6914 | } | 
| 6915 |  | 
| 6916 | static uint32_t get_max_desc_size(KEY_AND_COL_INFO* kc_info, TABLE* form) { | 
| 6917 |     uint32_t max_row_desc_buff_size; | 
| 6918 |     // upper bound of key comparison descriptor | 
| 6919 |     max_row_desc_buff_size = 2*(form->s->fields * 6)+10; | 
| 6920 |     // upper bound for sec. key part | 
| 6921 |     max_row_desc_buff_size += get_max_secondary_key_pack_desc_size(kc_info); | 
| 6922 |     // upper bound for clustering val part | 
| 6923 |     max_row_desc_buff_size += get_max_clustering_val_pack_desc_size(form->s); | 
| 6924 |     return max_row_desc_buff_size; | 
| 6925 | } | 
| 6926 |  | 
| 6927 | static uint32_t create_secondary_key_descriptor( | 
| 6928 |     uchar* buf, | 
| 6929 |     KEY* key_info, | 
| 6930 |     KEY* prim_key, | 
| 6931 |     uint hpk, | 
| 6932 |     TABLE* form, | 
| 6933 |     uint primary_key, | 
| 6934 |     uint32_t keynr, | 
| 6935 |     KEY_AND_COL_INFO* kc_info) { | 
| 6936 |  | 
| 6937 |     uchar* ptr = NULL; | 
| 6938 |  | 
| 6939 |     ptr = buf; | 
| 6940 |     ptr += create_toku_key_descriptor( | 
| 6941 |         ptr, | 
| 6942 |         false, | 
| 6943 |         key_info, | 
| 6944 |         hpk, | 
| 6945 |         prim_key | 
| 6946 |         ); | 
| 6947 |  | 
| 6948 |     ptr += create_toku_secondary_key_pack_descriptor( | 
| 6949 |         ptr, | 
| 6950 |         hpk, | 
| 6951 |         primary_key, | 
| 6952 |         form->s, | 
| 6953 |         form, | 
| 6954 |         kc_info, | 
| 6955 |         key_info, | 
| 6956 |         prim_key | 
| 6957 |         ); | 
| 6958 |  | 
| 6959 |     ptr += create_toku_clustering_val_pack_descriptor( | 
| 6960 |         ptr, | 
| 6961 |         primary_key, | 
| 6962 |         form->s, | 
| 6963 |         kc_info, | 
| 6964 |         keynr, | 
| 6965 |         key_is_clustering(key_info) | 
| 6966 |         ); | 
| 6967 |     return ptr - buf; | 
| 6968 | } | 
| 6969 |  | 
| 6970 |  | 
| 6971 | // | 
| 6972 | // creates dictionary for secondary index, with key description key_info, all using txn | 
| 6973 | // | 
| 6974 | int ha_tokudb::create_secondary_dictionary( | 
| 6975 |     const char* name, | 
| 6976 |     TABLE* form, | 
| 6977 |     KEY* key_info, | 
| 6978 |     DB_TXN* txn, | 
| 6979 |     KEY_AND_COL_INFO* kc_info, | 
| 6980 |     uint32_t keynr, | 
| 6981 |     bool is_hot_index, | 
| 6982 |     toku_compression_method compression_method) { | 
| 6983 |  | 
| 6984 |     int error; | 
| 6985 |     DBT row_descriptor; | 
| 6986 |     uchar* row_desc_buff = NULL; | 
| 6987 |     char* newname = NULL; | 
| 6988 |     size_t newname_len = 0; | 
| 6989 |     KEY* prim_key = NULL; | 
| 6990 |     char dict_name[MAX_DICT_NAME_LEN]; | 
| 6991 |     uint32_t max_row_desc_buff_size; | 
| 6992 |     uint hpk= (form->s->primary_key >= MAX_KEY) ? | 
| 6993 |         TOKUDB_HIDDEN_PRIMARY_KEY_LENGTH : 0; | 
| 6994 |     uint32_t block_size; | 
| 6995 |     uint32_t read_block_size; | 
| 6996 |     uint32_t fanout; | 
| 6997 |     THD* thd = ha_thd(); | 
| 6998 |  | 
| 6999 |     memset(&row_descriptor, 0, sizeof(row_descriptor)); | 
| 7000 |      | 
| 7001 |     max_row_desc_buff_size = get_max_desc_size(kc_info,form); | 
| 7002 |  | 
| 7003 |     row_desc_buff = (uchar*)tokudb::memory::malloc( | 
| 7004 |         max_row_desc_buff_size, | 
| 7005 |         MYF(MY_WME)); | 
| 7006 |     if (row_desc_buff == NULL) { | 
| 7007 |         error = ENOMEM; | 
| 7008 |         goto cleanup; | 
| 7009 |     } | 
| 7010 |  | 
| 7011 |     newname_len = get_max_dict_name_path_length(name); | 
| 7012 |     newname = (char*)tokudb::memory::malloc(newname_len, MYF(MY_WME)); | 
| 7013 |     if (newname == NULL) { | 
| 7014 |         error = ENOMEM; | 
| 7015 |         goto cleanup; | 
| 7016 |     } | 
| 7017 |  | 
| 7018 |     sprintf(dict_name, "key-%s" , key_info->name.str); | 
| 7019 |     make_name(newname, newname_len, name, dict_name); | 
| 7020 |  | 
| 7021 |     prim_key = (hpk) ? NULL : &form->s->key_info[primary_key]; | 
| 7022 |  | 
| 7023 |     // | 
| 7024 |     // setup the row descriptor | 
| 7025 |     // | 
| 7026 |     row_descriptor.data = row_desc_buff; | 
| 7027 |     // | 
| 7028 |     // save data necessary for key comparisons | 
| 7029 |     // | 
| 7030 |     row_descriptor.size = create_secondary_key_descriptor( | 
| 7031 |         row_desc_buff, | 
| 7032 |         key_info, | 
| 7033 |         prim_key, | 
| 7034 |         hpk, | 
| 7035 |         form, | 
| 7036 |         primary_key, | 
| 7037 |         keynr, | 
| 7038 |         kc_info); | 
| 7039 |     assert_always(row_descriptor.size <= max_row_desc_buff_size); | 
| 7040 |  | 
| 7041 |     block_size = tokudb::sysvars::block_size(thd); | 
| 7042 |     read_block_size = tokudb::sysvars::read_block_size(thd); | 
| 7043 |     fanout = tokudb::sysvars::fanout(thd); | 
| 7044 |  | 
| 7045 |     error = create_sub_table( | 
| 7046 |         newname, | 
| 7047 |         &row_descriptor, | 
| 7048 |         txn, | 
| 7049 |         block_size, | 
| 7050 |         read_block_size, | 
| 7051 |         compression_method, | 
| 7052 |         is_hot_index, | 
| 7053 |         fanout); | 
| 7054 | cleanup:     | 
| 7055 |     tokudb::memory::free(newname); | 
| 7056 |     tokudb::memory::free(row_desc_buff); | 
| 7057 |     return error; | 
| 7058 | } | 
| 7059 |  | 
| 7060 |  | 
| 7061 | static uint32_t create_main_key_descriptor( | 
| 7062 |     uchar* buf, | 
| 7063 |     KEY* prim_key, | 
| 7064 |     uint hpk, | 
| 7065 |     uint primary_key, | 
| 7066 |     TABLE* form, | 
| 7067 |     KEY_AND_COL_INFO* kc_info) { | 
| 7068 |  | 
| 7069 |     uchar* ptr = buf; | 
| 7070 |     ptr += create_toku_key_descriptor( | 
| 7071 |         ptr,  | 
| 7072 |         hpk, | 
| 7073 |         prim_key, | 
| 7074 |         false, | 
| 7075 |         NULL); | 
| 7076 |      | 
| 7077 |     ptr += create_toku_main_key_pack_descriptor(ptr); | 
| 7078 |  | 
| 7079 |     ptr += create_toku_clustering_val_pack_descriptor( | 
| 7080 |         ptr, | 
| 7081 |         primary_key, | 
| 7082 |         form->s, | 
| 7083 |         kc_info, | 
| 7084 |         primary_key, | 
| 7085 |         false); | 
| 7086 |     return ptr - buf; | 
| 7087 | } | 
| 7088 |  | 
| 7089 | // | 
| 7090 | // create and close the main dictionarr with name of "name" using table form, all within | 
| 7091 | // transaction txn. | 
| 7092 | // | 
| 7093 | int ha_tokudb::create_main_dictionary( | 
| 7094 |     const char* name, | 
| 7095 |     TABLE* form, | 
| 7096 |     DB_TXN* txn, | 
| 7097 |     KEY_AND_COL_INFO* kc_info, | 
| 7098 |     toku_compression_method compression_method) { | 
| 7099 |  | 
| 7100 |     int error; | 
| 7101 |     DBT row_descriptor; | 
| 7102 |     uchar* row_desc_buff = NULL; | 
| 7103 |     char* newname = NULL; | 
| 7104 |     size_t newname_len = 0; | 
| 7105 |     KEY* prim_key = NULL; | 
| 7106 |     uint32_t max_row_desc_buff_size; | 
| 7107 |     uint hpk = (form->s->primary_key >= MAX_KEY) ? TOKUDB_HIDDEN_PRIMARY_KEY_LENGTH : 0; | 
| 7108 |     uint32_t block_size; | 
| 7109 |     uint32_t read_block_size; | 
| 7110 |     uint32_t fanout; | 
| 7111 |     THD* thd = ha_thd(); | 
| 7112 |  | 
| 7113 |     memset(&row_descriptor, 0, sizeof(row_descriptor)); | 
| 7114 |     max_row_desc_buff_size = get_max_desc_size(kc_info, form); | 
| 7115 |  | 
| 7116 |     row_desc_buff = (uchar*)tokudb::memory::malloc( | 
| 7117 |         max_row_desc_buff_size, | 
| 7118 |         MYF(MY_WME)); | 
| 7119 |     if (row_desc_buff == NULL) { | 
| 7120 |         error = ENOMEM; | 
| 7121 |         goto cleanup; | 
| 7122 |     } | 
| 7123 |  | 
| 7124 |     newname_len = get_max_dict_name_path_length(name); | 
| 7125 |     newname = (char*)tokudb::memory::malloc(newname_len, MYF(MY_WME)); | 
| 7126 |     if (newname == NULL) { | 
| 7127 |         error = ENOMEM; | 
| 7128 |         goto cleanup; | 
| 7129 |     } | 
| 7130 |  | 
| 7131 |     make_name(newname, newname_len, name, "main" ); | 
| 7132 |  | 
| 7133 |     prim_key = (hpk) ? NULL : &form->s->key_info[primary_key]; | 
| 7134 |  | 
| 7135 |     // | 
| 7136 |     // setup the row descriptor | 
| 7137 |     // | 
| 7138 |     row_descriptor.data = row_desc_buff; | 
| 7139 |     // | 
| 7140 |     // save data necessary for key comparisons | 
| 7141 |     // | 
| 7142 |     row_descriptor.size = create_main_key_descriptor( | 
| 7143 |         row_desc_buff, | 
| 7144 |         prim_key, | 
| 7145 |         hpk, | 
| 7146 |         primary_key, | 
| 7147 |         form, | 
| 7148 |         kc_info); | 
| 7149 |     assert_always(row_descriptor.size <= max_row_desc_buff_size); | 
| 7150 |  | 
| 7151 |     block_size = tokudb::sysvars::block_size(thd); | 
| 7152 |     read_block_size = tokudb::sysvars::read_block_size(thd); | 
| 7153 |     fanout = tokudb::sysvars::fanout(thd); | 
| 7154 |  | 
| 7155 |     /* Create the main table that will hold the real rows */ | 
| 7156 |     error = create_sub_table( | 
| 7157 |         newname, | 
| 7158 |         &row_descriptor, | 
| 7159 |         txn, | 
| 7160 |         block_size, | 
| 7161 |         read_block_size, | 
| 7162 |         compression_method, | 
| 7163 |         false, | 
| 7164 |         fanout); | 
| 7165 | cleanup:     | 
| 7166 |     tokudb::memory::free(newname); | 
| 7167 |     tokudb::memory::free(row_desc_buff); | 
| 7168 |     return error; | 
| 7169 | } | 
| 7170 |  | 
| 7171 | // | 
| 7172 | // Creates a new table | 
| 7173 | // Parameters: | 
| 7174 | //      [in]    name - table name | 
| 7175 | //      [in]    form - info on table, columns and indexes | 
| 7176 | //      [in]    create_info - more info on table, CURRENTLY UNUSED | 
| 7177 | // Returns: | 
| 7178 | //      0 on success | 
| 7179 | //      error otherwise | 
| 7180 | // | 
| 7181 | int ha_tokudb::create( | 
| 7182 |     const char* name, | 
| 7183 |     TABLE* form, | 
| 7184 |     HA_CREATE_INFO* create_info) { | 
| 7185 |  | 
| 7186 |     TOKUDB_HANDLER_DBUG_ENTER("%s" , name); | 
| 7187 |  | 
| 7188 |     int error; | 
| 7189 |     DB *status_block = NULL; | 
| 7190 |     uint version; | 
| 7191 |     uint capabilities; | 
| 7192 |     DB_TXN* txn = NULL; | 
| 7193 |     bool do_commit = false; | 
| 7194 |     char* newname = NULL; | 
| 7195 |     size_t newname_len = 0; | 
| 7196 |     KEY_AND_COL_INFO kc_info; | 
| 7197 |     tokudb_trx_data *trx = NULL; | 
| 7198 |     THD* thd = ha_thd(); | 
| 7199 |  | 
| 7200 |     memset(&kc_info, 0, sizeof(kc_info)); | 
| 7201 |  | 
| 7202 | #if 100000 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 100999 | 
| 7203 |     // TokuDB does not support discover_table_names() and writes no files | 
| 7204 |     // in the database directory, so automatic filename-based | 
| 7205 |     // discover_table_names() doesn't work either. So, it must force .frm | 
| 7206 |     // file to disk. | 
| 7207 |     form->s->write_frm_image(); | 
| 7208 | #endif | 
| 7209 |  | 
| 7210 | #if TOKU_INCLUDE_OPTION_STRUCTS | 
| 7211 |     const tokudb::sysvars::row_format_t row_format = | 
| 7212 |         (tokudb::sysvars::row_format_t)form->s->option_struct->row_format; | 
| 7213 | #else | 
| 7214 |     // TDB-76 : CREATE TABLE ... LIKE ... does not use source row_format on | 
| 7215 |     //          target table | 
| 7216 |     // Original code would only use create_info->row_type if | 
| 7217 |     // create_info->used_fields & HA_CREATE_USED_ROW_FORMAT was true. This | 
| 7218 |     // would cause us to skip transferring the row_format for a table created | 
| 7219 |     // via CREATE TABLE tn LIKE tn. We also take on more InnoDB like behavior | 
| 7220 |     // and throw a warning if we get a row_format that we can't translate into | 
| 7221 |     // a known TokuDB row_format. | 
| 7222 |     tokudb::sysvars::row_format_t row_format = | 
| 7223 |         tokudb::sysvars::row_format(thd); | 
| 7224 |  | 
| 7225 |     if ((create_info->used_fields & HA_CREATE_USED_ROW_FORMAT) || | 
| 7226 |         create_info->row_type != ROW_TYPE_DEFAULT) { | 
| 7227 |         row_format = row_type_to_row_format(create_info->row_type); | 
| 7228 |         if (row_format == tokudb::sysvars::SRV_ROW_FORMAT_DEFAULT && | 
| 7229 |             create_info->row_type != ROW_TYPE_DEFAULT) { | 
| 7230 |             push_warning(thd, | 
| 7231 |                          Sql_condition::WARN_LEVEL_WARN, | 
| 7232 |                          ER_ILLEGAL_HA_CREATE_OPTION, | 
| 7233 |                          "TokuDB: invalid ROW_FORMAT specifier." ); | 
| 7234 |         } | 
| 7235 |     } | 
| 7236 | #endif | 
| 7237 |     const toku_compression_method compression_method = | 
| 7238 |         row_format_to_toku_compression_method(row_format); | 
| 7239 |  | 
| 7240 |     bool create_from_engine = (create_info->table_options & HA_OPTION_CREATE_FROM_ENGINE); | 
| 7241 |     if (create_from_engine) { | 
| 7242 |         // table already exists, nothing to do | 
| 7243 |         error = 0; | 
| 7244 |         goto cleanup; | 
| 7245 |     } | 
| 7246 |      | 
| 7247 |     // validate the fields in the table. If the table has fields | 
| 7248 |     // we do not support that came from an old version of MySQL, | 
| 7249 |     // gracefully return an error | 
| 7250 |     for (uint32_t i = 0; i < form->s->fields; i++) { | 
| 7251 |         Field* field = table_share->field[i]; | 
| 7252 |         if (!field_valid_for_tokudb_table(field)) { | 
| 7253 |             sql_print_error("Table %s has an invalid field %s, that was created "  | 
| 7254 |                 "with an old version of MySQL. This field is no longer supported. "  | 
| 7255 |                 "This is probably due to an alter table engine=TokuDB. To load this "  | 
| 7256 |                 "table, do a dump and load" , | 
| 7257 |                 name, | 
| 7258 |                 field->field_name.str | 
| 7259 |                 ); | 
| 7260 |             error = HA_ERR_UNSUPPORTED; | 
| 7261 |             goto cleanup; | 
| 7262 |         } | 
| 7263 |     } | 
| 7264 |  | 
| 7265 |     newname_len = get_max_dict_name_path_length(name); | 
| 7266 |     newname = (char*)tokudb::memory::malloc(newname_len, MYF(MY_WME)); | 
| 7267 |     if (newname == NULL) { | 
| 7268 |         error = ENOMEM; | 
| 7269 |         goto cleanup; | 
| 7270 |     } | 
| 7271 |  | 
| 7272 |     trx = (tokudb_trx_data *) thd_get_ha_data(ha_thd(), tokudb_hton); | 
| 7273 |     if (trx && trx->sub_sp_level && | 
| 7274 |         thd_sql_command(thd) == SQLCOM_CREATE_TABLE) { | 
| 7275 |         txn = trx->sub_sp_level; | 
| 7276 |     } else { | 
| 7277 |         do_commit = true; | 
| 7278 |         error = txn_begin(db_env, 0, &txn, 0, thd); | 
| 7279 |         if (error) { | 
| 7280 |             goto cleanup; | 
| 7281 |         } | 
| 7282 |     } | 
| 7283 |      | 
| 7284 |     primary_key = form->s->primary_key; | 
| 7285 |     hidden_primary_key = (primary_key  >= MAX_KEY) ? TOKUDB_HIDDEN_PRIMARY_KEY_LENGTH : 0; | 
| 7286 |     if (hidden_primary_key) { | 
| 7287 |         primary_key = form->s->keys; | 
| 7288 |     } | 
| 7289 |  | 
| 7290 |     /* do some tracing */ | 
| 7291 |     trace_create_table_info(name,form); | 
| 7292 |  | 
| 7293 |     /* Create status.tokudb and save relevant metadata */ | 
| 7294 |     make_name(newname, newname_len, name, "status" ); | 
| 7295 |  | 
| 7296 |     error = tokudb::metadata::create(db_env, &status_block, newname, txn); | 
| 7297 |     if (error) { goto cleanup; } | 
| 7298 |  | 
| 7299 |     version = HA_TOKU_VERSION;     | 
| 7300 |     error = write_to_status( | 
| 7301 |         status_block, | 
| 7302 |         hatoku_new_version, | 
| 7303 |         &version, | 
| 7304 |         sizeof(version), | 
| 7305 |         txn); | 
| 7306 |     if (error) { | 
| 7307 |         goto cleanup; | 
| 7308 |     } | 
| 7309 |  | 
| 7310 |     capabilities = HA_TOKU_CAP; | 
| 7311 |     error = write_to_status( | 
| 7312 |         status_block, | 
| 7313 |         hatoku_capabilities, | 
| 7314 |         &capabilities, | 
| 7315 |         sizeof(capabilities), | 
| 7316 |         txn); | 
| 7317 |     if (error) { | 
| 7318 |         goto cleanup; | 
| 7319 |     } | 
| 7320 |  | 
| 7321 |     error = write_auto_inc_create( | 
| 7322 |         status_block, | 
| 7323 |         create_info->auto_increment_value, | 
| 7324 |         txn); | 
| 7325 |     if (error) { | 
| 7326 |         goto cleanup; | 
| 7327 |     } | 
| 7328 |  | 
| 7329 | #if WITH_PARTITION_STORAGE_ENGINE | 
| 7330 |     if (TOKU_PARTITION_WRITE_FRM_DATA || form->part_info == NULL) { | 
| 7331 |         error = write_frm_data(status_block, txn, form->s->path.str); | 
| 7332 |         if (error) { | 
| 7333 |             goto cleanup; | 
| 7334 |         } | 
| 7335 |     } | 
| 7336 | #else | 
| 7337 |     error = write_frm_data(status_block, txn, form->s->path.str); | 
| 7338 |     if (error) { | 
| 7339 |         goto cleanup; | 
| 7340 |     } | 
| 7341 | #endif | 
| 7342 |  | 
| 7343 |     error = allocate_key_and_col_info(form->s, &kc_info); | 
| 7344 |     if (error) { | 
| 7345 |         goto cleanup; | 
| 7346 |     } | 
| 7347 |  | 
| 7348 |     error = initialize_key_and_col_info( | 
| 7349 |         form->s, | 
| 7350 |         form, | 
| 7351 |         &kc_info, | 
| 7352 |         hidden_primary_key, | 
| 7353 |         primary_key); | 
| 7354 |     if (error) { | 
| 7355 |         goto cleanup; | 
| 7356 |     } | 
| 7357 |  | 
| 7358 |     error = create_main_dictionary( | 
| 7359 |         name, | 
| 7360 |         form, | 
| 7361 |         txn, | 
| 7362 |         &kc_info, | 
| 7363 |         compression_method); | 
| 7364 |     if (error) { | 
| 7365 |         goto cleanup; | 
| 7366 |     } | 
| 7367 |  | 
| 7368 |  | 
| 7369 |     for (uint i = 0; i < form->s->keys; i++) { | 
| 7370 |         if (i != primary_key) { | 
| 7371 |             error = create_secondary_dictionary( | 
| 7372 |                 name, | 
| 7373 |                 form, | 
| 7374 |                 &form->key_info[i], | 
| 7375 |                 txn, | 
| 7376 |                 &kc_info, | 
| 7377 |                 i, | 
| 7378 |                 false, | 
| 7379 |                 compression_method); | 
| 7380 |             if (error) { | 
| 7381 |                 goto cleanup; | 
| 7382 |             } | 
| 7383 |  | 
| 7384 |             error = write_key_name_to_status( | 
| 7385 |                 status_block, | 
| 7386 |                 form->s->key_info[i].name.str, | 
| 7387 |                 txn); | 
| 7388 |             if (error) { | 
| 7389 |                 goto cleanup; | 
| 7390 |             } | 
| 7391 |         } | 
| 7392 |     } | 
| 7393 |  | 
| 7394 |     error = 0; | 
| 7395 | cleanup: | 
| 7396 |     if (status_block != NULL) { | 
| 7397 |         int r = tokudb::metadata::close(&status_block); | 
| 7398 |         assert_always(r==0); | 
| 7399 |     } | 
| 7400 |     free_key_and_col_info(&kc_info); | 
| 7401 |     if (do_commit && txn) { | 
| 7402 |         if (error) { | 
| 7403 |             abort_txn(txn); | 
| 7404 |         } else { | 
| 7405 |             commit_txn(txn,0); | 
| 7406 |         } | 
| 7407 |     } | 
| 7408 |     tokudb::memory::free(newname); | 
| 7409 |     TOKUDB_HANDLER_DBUG_RETURN(error); | 
| 7410 | } | 
| 7411 |  | 
| 7412 | int ha_tokudb::discard_or_import_tablespace(my_bool discard) { | 
| 7413 |     /* | 
| 7414 |     if (discard) { | 
| 7415 |         my_errno=HA_ERR_WRONG_COMMAND; | 
| 7416 |         return my_errno; | 
| 7417 |     } | 
| 7418 |     return add_table_to_metadata(share->table_name); | 
| 7419 |     */ | 
| 7420 |     my_errno=HA_ERR_WRONG_COMMAND; | 
| 7421 |     return my_errno; | 
| 7422 | } | 
| 7423 |  | 
| 7424 |  | 
| 7425 | // | 
| 7426 | // deletes from_name or renames from_name to to_name, all using transaction txn. | 
| 7427 | // is_delete specifies which we are doing | 
| 7428 | // is_key specifies if it is a secondary index (and hence a "key-" needs to be prepended) or | 
| 7429 | // if it is not a secondary index | 
| 7430 | // | 
| 7431 | int ha_tokudb::delete_or_rename_dictionary( | 
| 7432 |     const char* from_name, | 
| 7433 |     const char* to_name, | 
| 7434 |     const char* secondary_name, | 
| 7435 |     bool is_key, | 
| 7436 |     DB_TXN* txn, | 
| 7437 |     bool is_delete) { | 
| 7438 |  | 
| 7439 |     int error; | 
| 7440 |     char dict_name[MAX_DICT_NAME_LEN]; | 
| 7441 |     char* new_from_name = NULL; | 
| 7442 |     size_t new_from_name_len = 0; | 
| 7443 |     char* new_to_name = NULL; | 
| 7444 |     size_t new_to_name_len = 0; | 
| 7445 |     assert_always(txn); | 
| 7446 |      | 
| 7447 |     new_from_name_len = get_max_dict_name_path_length(from_name); | 
| 7448 |     new_from_name = (char*)tokudb::memory::malloc( | 
| 7449 |         new_from_name_len, | 
| 7450 |         MYF(MY_WME)); | 
| 7451 |     if (new_from_name == NULL) { | 
| 7452 |         error = ENOMEM; | 
| 7453 |         goto cleanup; | 
| 7454 |     } | 
| 7455 |     if (!is_delete) { | 
| 7456 |         assert_always(to_name); | 
| 7457 |         new_to_name_len = get_max_dict_name_path_length(to_name); | 
| 7458 |         new_to_name = (char*)tokudb::memory::malloc( | 
| 7459 |             new_to_name_len, | 
| 7460 |             MYF(MY_WME)); | 
| 7461 |         if (new_to_name == NULL) { | 
| 7462 |             error = ENOMEM; | 
| 7463 |             goto cleanup; | 
| 7464 |         } | 
| 7465 |     } | 
| 7466 |      | 
| 7467 |     if (is_key) { | 
| 7468 |         sprintf(dict_name, "key-%s" , secondary_name); | 
| 7469 |         make_name(new_from_name, new_from_name_len, from_name, dict_name); | 
| 7470 |     } else { | 
| 7471 |         make_name(new_from_name, new_from_name_len, from_name, secondary_name); | 
| 7472 |     } | 
| 7473 |     if (!is_delete) { | 
| 7474 |         if (is_key) { | 
| 7475 |             sprintf(dict_name, "key-%s" , secondary_name); | 
| 7476 |             make_name(new_to_name, new_to_name_len, to_name, dict_name); | 
| 7477 |         } else { | 
| 7478 |             make_name(new_to_name, new_to_name_len, to_name, secondary_name); | 
| 7479 |         } | 
| 7480 |     } | 
| 7481 |  | 
| 7482 |     if (is_delete) {     | 
| 7483 |         error = db_env->dbremove(db_env, txn, new_from_name, NULL, 0); | 
| 7484 |     } else { | 
| 7485 |         error = db_env->dbrename( | 
| 7486 |             db_env, | 
| 7487 |             txn, | 
| 7488 |             new_from_name, | 
| 7489 |             NULL, | 
| 7490 |             new_to_name, | 
| 7491 |             0); | 
| 7492 |     } | 
| 7493 |     if (error) { | 
| 7494 |         goto cleanup; | 
| 7495 |     } | 
| 7496 |  | 
| 7497 | cleanup: | 
| 7498 |     tokudb::memory::free(new_from_name); | 
| 7499 |     tokudb::memory::free(new_to_name); | 
| 7500 |     return error; | 
| 7501 | } | 
| 7502 |  | 
| 7503 |  | 
| 7504 | // | 
| 7505 | // deletes or renames a table. if is_delete is true, then we delete, and to_name can be NULL | 
| 7506 | // if is_delete is false, then to_name must be non-NULL, as we are renaming the table. | 
| 7507 | // | 
| 7508 | int ha_tokudb::delete_or_rename_table (const char* from_name, const char* to_name, bool is_delete) { | 
| 7509 |     THD *thd = ha_thd(); | 
| 7510 |     int error; | 
| 7511 |     DB* status_db = NULL; | 
| 7512 |     DBC* status_cursor = NULL; | 
| 7513 |     DB_TXN* txn = NULL; | 
| 7514 |     DBT curr_key; | 
| 7515 |     DBT curr_val; | 
| 7516 |     memset(&curr_key, 0, sizeof(curr_key)); | 
| 7517 |     memset(&curr_val, 0, sizeof(curr_val)); | 
| 7518 |  | 
| 7519 |     DB_TXN *parent_txn = NULL; | 
| 7520 |     tokudb_trx_data *trx = NULL; | 
| 7521 |     trx = (tokudb_trx_data *) thd_get_ha_data(thd, tokudb_hton); | 
| 7522 |     if (thd_sql_command(ha_thd()) == SQLCOM_CREATE_TABLE && trx && trx->sub_sp_level) { | 
| 7523 |         parent_txn = trx->sub_sp_level; | 
| 7524 |     } | 
| 7525 |  | 
| 7526 |     error = txn_begin(db_env, parent_txn, &txn, 0, thd); | 
| 7527 |     if (error) { goto cleanup; } | 
| 7528 |  | 
| 7529 |     // | 
| 7530 |     // open status db, | 
| 7531 |     // create cursor, | 
| 7532 |     // for each name read out of there, create a db and delete or rename it | 
| 7533 |     // | 
| 7534 |     error = open_status_dictionary(&status_db, from_name, txn); | 
| 7535 |     if (error) { goto cleanup; } | 
| 7536 |  | 
| 7537 |     error = status_db->cursor(status_db, txn, &status_cursor, 0); | 
| 7538 |     if (error) { goto cleanup; } | 
| 7539 |     status_cursor->c_set_check_interrupt_callback(status_cursor, tokudb_killed_thd_callback, thd); | 
| 7540 |  | 
| 7541 |     while (error != DB_NOTFOUND) { | 
| 7542 |         error = status_cursor->c_get(status_cursor, &curr_key, &curr_val, DB_NEXT); | 
| 7543 |         if (error && error != DB_NOTFOUND) { | 
| 7544 |             error = map_to_handler_error(error); | 
| 7545 |             goto cleanup; | 
| 7546 |         } | 
| 7547 |         if (error == DB_NOTFOUND) { | 
| 7548 |             break; | 
| 7549 |         } | 
| 7550 |         HA_METADATA_KEY mk = *(HA_METADATA_KEY *)curr_key.data; | 
| 7551 |         if (mk != hatoku_key_name) { | 
| 7552 |             continue; | 
| 7553 |         } | 
| 7554 |         error = delete_or_rename_dictionary(from_name, to_name, (char *)((char *)curr_key.data + sizeof(HA_METADATA_KEY)), true, txn, is_delete); | 
| 7555 |         if (error) { goto cleanup; } | 
| 7556 |     } | 
| 7557 |  | 
| 7558 |     // | 
| 7559 |     // delete or rename main.tokudb | 
| 7560 |     // | 
| 7561 |     error = delete_or_rename_dictionary(from_name, to_name, "main" , false, txn, is_delete); | 
| 7562 |     if (error) { goto cleanup; } | 
| 7563 |  | 
| 7564 |     error = status_cursor->c_close(status_cursor); | 
| 7565 |     assert_always(error==0); | 
| 7566 |     status_cursor = NULL; | 
| 7567 |     if (error) { goto cleanup; } | 
| 7568 |  | 
| 7569 |     error = status_db->close(status_db, 0); | 
| 7570 |     assert_always(error == 0); | 
| 7571 |     status_db = NULL; | 
| 7572 |      | 
| 7573 |     // | 
| 7574 |     // delete or rename status.tokudb | 
| 7575 |     // | 
| 7576 |     error = delete_or_rename_dictionary(from_name, to_name, "status" , false, txn, is_delete); | 
| 7577 |     if (error) { goto cleanup; } | 
| 7578 |  | 
| 7579 |     my_errno = error; | 
| 7580 | cleanup: | 
| 7581 |     if (status_cursor) { | 
| 7582 |         int r = status_cursor->c_close(status_cursor); | 
| 7583 |         assert_always(r==0); | 
| 7584 |     } | 
| 7585 |     if (status_db) { | 
| 7586 |         int r = status_db->close(status_db, 0); | 
| 7587 |         assert_always(r==0); | 
| 7588 |     } | 
| 7589 |     if (txn) { | 
| 7590 |         if (error) { | 
| 7591 |             abort_txn(txn); | 
| 7592 |         } | 
| 7593 |         else { | 
| 7594 |             commit_txn(txn, 0); | 
| 7595 |         } | 
| 7596 |     } | 
| 7597 |     return error; | 
| 7598 | } | 
| 7599 |  | 
| 7600 |  | 
| 7601 | // | 
| 7602 | // Drops table | 
| 7603 | // Parameters: | 
| 7604 | //      [in]    name - name of table to be deleted | 
| 7605 | // Returns: | 
| 7606 | //      0 on success | 
| 7607 | //      error otherwise | 
| 7608 | // | 
| 7609 | int ha_tokudb::delete_table(const char *name) { | 
| 7610 |     TOKUDB_HANDLER_DBUG_ENTER("%s" , name); | 
| 7611 |     TOKUDB_SHARE* share = TOKUDB_SHARE::get_share(name, NULL, NULL, false); | 
| 7612 |     if (share) { | 
| 7613 |         share->unlock(); | 
| 7614 |         share->release(); | 
| 7615 |         // this should be enough to handle locking as the higher level MDL | 
| 7616 |         // on this table should prevent any new analyze tasks. | 
| 7617 |         share->cancel_background_jobs(); | 
| 7618 |         TOKUDB_SHARE::drop_share(share); | 
| 7619 |     } | 
| 7620 |  | 
| 7621 |     int error; | 
| 7622 |     error = delete_or_rename_table(name, NULL, true); | 
| 7623 |     if (TOKUDB_LIKELY(TOKUDB_DEBUG_FLAGS(TOKUDB_DEBUG_HIDE_DDL_LOCK_ERRORS) == 0) && | 
| 7624 |         error == DB_LOCK_NOTGRANTED) { | 
| 7625 |         sql_print_error( | 
| 7626 |             "Could not delete table %s because another transaction has "  | 
| 7627 |             "accessed the table. To drop the table, make sure no "  | 
| 7628 |             "transactions touch the table." , | 
| 7629 |             name); | 
| 7630 |     } | 
| 7631 |     TOKUDB_HANDLER_DBUG_RETURN(error); | 
| 7632 | } | 
| 7633 |  | 
| 7634 | static bool tokudb_check_db_dir_exist_from_table_name(const char *table_name) { | 
| 7635 |     DBUG_ASSERT(table_name); | 
| 7636 |     bool mysql_dir_exists; | 
| 7637 |     char db_name[FN_REFLEN]; | 
| 7638 |     const char *db_name_begin = strchr(table_name, FN_LIBCHAR); | 
| 7639 |     const char *db_name_end = strrchr(table_name, FN_LIBCHAR); | 
| 7640 |     DBUG_ASSERT(db_name_begin); | 
| 7641 |     DBUG_ASSERT(db_name_end); | 
| 7642 |     DBUG_ASSERT(db_name_begin != db_name_end); | 
| 7643 |  | 
| 7644 |     ++db_name_begin; | 
| 7645 |     size_t db_name_size = db_name_end - db_name_begin; | 
| 7646 |  | 
| 7647 |     DBUG_ASSERT(db_name_size < FN_REFLEN); | 
| 7648 |  | 
| 7649 |     memcpy(db_name, db_name_begin, db_name_size); | 
| 7650 |     db_name[db_name_size] = '\0'; | 
| 7651 |  | 
| 7652 |     // At this point, db_name contains the MySQL formatted database name. | 
| 7653 |     // This is exactly the same format that would come into us through a | 
| 7654 |     // CREATE TABLE. Some charaters (like ':' for example) might be expanded | 
| 7655 |     // into hex (':' would papear as "@003a"). | 
| 7656 |     // We need to check that the MySQL destination database directory exists. | 
| 7657 |     mysql_dir_exists = (my_access(db_name, F_OK) == 0); | 
| 7658 |  | 
| 7659 |     return mysql_dir_exists; | 
| 7660 | } | 
| 7661 |  | 
| 7662 | // | 
| 7663 | // renames table from "from" to "to" | 
| 7664 | // Parameters: | 
| 7665 | //      [in]    name - old name of table | 
| 7666 | //      [in]    to - new name of table | 
| 7667 | // Returns: | 
| 7668 | //      0 on success | 
| 7669 | //      error otherwise | 
| 7670 | // | 
| 7671 | int ha_tokudb::rename_table(const char *from, const char *to) { | 
| 7672 |     TOKUDB_HANDLER_DBUG_ENTER("%s %s" , from, to); | 
| 7673 |     TOKUDB_SHARE* share = TOKUDB_SHARE::get_share(from, NULL, NULL, false); | 
| 7674 |     if (share) { | 
| 7675 |         share->unlock(); | 
| 7676 |         share->release(); | 
| 7677 |         // this should be enough to handle locking as the higher level MDL | 
| 7678 |         // on this table should prevent any new analyze tasks. | 
| 7679 |         share->cancel_background_jobs(); | 
| 7680 |         TOKUDB_SHARE::drop_share(share); | 
| 7681 |     } | 
| 7682 |     int error; | 
| 7683 |     bool to_db_dir_exist = tokudb_check_db_dir_exist_from_table_name(to); | 
| 7684 |     if (!to_db_dir_exist) { | 
| 7685 |         sql_print_error( | 
| 7686 |             "Could not rename table from %s to %s because "  | 
| 7687 |             "destination db does not exist" , | 
| 7688 |             from, | 
| 7689 |             to); | 
| 7690 | #ifndef __WIN__ | 
| 7691 |         /* Small hack. tokudb_check_db_dir_exist_from_table_name calls | 
| 7692 |          * my_access, which sets my_errno on Windows, but doesn't on | 
| 7693 |          * unix. Set it for unix too. | 
| 7694 |          */ | 
| 7695 |         my_errno= errno; | 
| 7696 | #endif | 
| 7697 |         error= my_errno; | 
| 7698 |     } | 
| 7699 |     else { | 
| 7700 |         error = delete_or_rename_table(from, to, false); | 
| 7701 |         if (TOKUDB_LIKELY(TOKUDB_DEBUG_FLAGS(TOKUDB_DEBUG_HIDE_DDL_LOCK_ERRORS) == 0) && | 
| 7702 |             error == DB_LOCK_NOTGRANTED) { | 
| 7703 |             sql_print_error( | 
| 7704 |                 "Could not rename table from %s to %s because another transaction "  | 
| 7705 |                 "has accessed the table. To rename the table, make sure no "  | 
| 7706 |                 "transactions touch the table." , | 
| 7707 |                 from, | 
| 7708 |                 to); | 
| 7709 |         } | 
| 7710 |     } | 
| 7711 |     TOKUDB_HANDLER_DBUG_RETURN(error); | 
| 7712 | } | 
| 7713 |  | 
| 7714 |  | 
| 7715 | /* | 
| 7716 |   Returns estimate on number of seeks it will take to read through the table | 
| 7717 |   This is to be comparable to the number returned by records_in_range so | 
| 7718 |   that we can decide if we should scan the table or use keys. | 
| 7719 | */ | 
| 7720 | /// QQQ why divide by 3 | 
| 7721 | double ha_tokudb::scan_time() { | 
| 7722 |     TOKUDB_HANDLER_DBUG_ENTER("" ); | 
| 7723 |     double ret_val = (double)stats.records / 3; | 
| 7724 |     TOKUDB_HANDLER_TRACE_FOR_FLAGS( | 
| 7725 |         TOKUDB_DEBUG_RETURN, | 
| 7726 |         "return %"  PRIu64 " %f" , | 
| 7727 |         (uint64_t)stats.records, | 
| 7728 |         ret_val); | 
| 7729 |     DBUG_RETURN(ret_val); | 
| 7730 | } | 
| 7731 |  | 
| 7732 | double ha_tokudb::keyread_time(uint index, uint ranges, ha_rows rows) | 
| 7733 | { | 
| 7734 |     TOKUDB_HANDLER_DBUG_ENTER("%u %u %"  PRIu64, index, ranges, (uint64_t) rows); | 
| 7735 |     double ret_val; | 
| 7736 |     if (index == primary_key || key_is_clustering(&table->key_info[index])) { | 
| 7737 |         ret_val = read_time(index, ranges, rows); | 
| 7738 |         DBUG_RETURN(ret_val); | 
| 7739 |     } | 
| 7740 |     /* | 
| 7741 |       It is assumed that we will read trough the whole key range and that all | 
| 7742 |       key blocks are half full (normally things are much better). It is also | 
| 7743 |       assumed that each time we read the next key from the index, the handler | 
| 7744 |       performs a random seek, thus the cost is proportional to the number of | 
| 7745 |       blocks read. This model does not take into account clustered indexes - | 
| 7746 |       engines that support that (e.g. InnoDB) may want to overwrite this method. | 
| 7747 |     */ | 
| 7748 |     double keys_per_block= (stats.block_size/2.0/ | 
| 7749 |                             (table->key_info[index].key_length + | 
| 7750 |                              ref_length) + 1); | 
| 7751 |     ret_val = (rows + keys_per_block - 1)/ keys_per_block; | 
| 7752 |     TOKUDB_HANDLER_DBUG_RETURN_DOUBLE(ret_val); | 
| 7753 | } | 
| 7754 |  | 
| 7755 | // | 
| 7756 | // Calculate the time it takes to read a set of ranges through an index | 
| 7757 | // This enables us to optimize reads for clustered indexes. | 
| 7758 | // Implementation pulled from InnoDB | 
| 7759 | // Parameters: | 
| 7760 | //          index - index to use | 
| 7761 | //          ranges - number of ranges | 
| 7762 | //          rows - estimated number of rows in the range | 
| 7763 | // Returns: | 
| 7764 | //      estimated time measured in disk seeks | 
| 7765 | // | 
| 7766 | double ha_tokudb::read_time( | 
| 7767 |     uint    index, | 
| 7768 |     uint    ranges, | 
| 7769 |     ha_rows rows | 
| 7770 |     ) | 
| 7771 | { | 
| 7772 |     TOKUDB_HANDLER_DBUG_ENTER("%u %u %"  PRIu64, index, ranges, (uint64_t) rows); | 
| 7773 |     double total_scan; | 
| 7774 |     double ret_val;  | 
| 7775 |     bool is_primary = (index == primary_key); | 
| 7776 |     bool is_clustering; | 
| 7777 |  | 
| 7778 |     // | 
| 7779 |     // in case for hidden primary key, this is called | 
| 7780 |     // | 
| 7781 |     if (index >= table_share->keys) { | 
| 7782 |         ret_val = handler::read_time(index, ranges, rows); | 
| 7783 |         goto cleanup; | 
| 7784 |     } | 
| 7785 |      | 
| 7786 |     is_clustering = key_is_clustering(&table->key_info[index]); | 
| 7787 |  | 
| 7788 |  | 
| 7789 |     // | 
| 7790 |     // if it is not the primary key, and it is not a clustering key, then return handler::read_time | 
| 7791 |     // | 
| 7792 |     if (!(is_primary || is_clustering)) { | 
| 7793 |         ret_val = handler::read_time(index, ranges, rows); | 
| 7794 |         goto cleanup; | 
| 7795 |     } | 
| 7796 |  | 
| 7797 |     // | 
| 7798 |     // for primary key and for clustered keys, return a fraction of scan_time() | 
| 7799 |     // | 
| 7800 |     total_scan = scan_time(); | 
| 7801 |  | 
| 7802 |     if (stats.records <= rows) { | 
| 7803 |         ret_val = is_clustering ? total_scan + 0.00001 : total_scan; | 
| 7804 |         goto cleanup; | 
| 7805 |     } | 
| 7806 |  | 
| 7807 |     // | 
| 7808 |     // one disk seek per range plus the proportional scan time of the rows | 
| 7809 |     // | 
| 7810 |     ret_val = (ranges + (double) rows / (double) stats.records * total_scan); | 
| 7811 |     ret_val = is_clustering ? ret_val + 0.00001 : ret_val; | 
| 7812 |      | 
| 7813 | cleanup: | 
| 7814 |     TOKUDB_HANDLER_DBUG_RETURN_DOUBLE(ret_val); | 
| 7815 | } | 
| 7816 |  | 
| 7817 | double ha_tokudb::index_only_read_time(uint keynr, double records) { | 
| 7818 |     TOKUDB_HANDLER_DBUG_ENTER("%u %f" , keynr, records); | 
| 7819 |     double ret_val = keyread_time(keynr, 1, (ha_rows)records); | 
| 7820 |     TOKUDB_HANDLER_DBUG_RETURN_DOUBLE(ret_val); | 
| 7821 | } | 
| 7822 |  | 
| 7823 | // | 
| 7824 | // Estimates the number of index records in a range. In case of errors, return | 
| 7825 | //   HA_TOKUDB_RANGE_COUNT instead of HA_POS_ERROR. This was behavior | 
| 7826 | //   when we got the handlerton from MySQL. | 
| 7827 | // Parameters: | 
| 7828 | //              keynr -index to use  | 
| 7829 | //      [in]    start_key - low end of the range | 
| 7830 | //      [in]    end_key - high end of the range | 
| 7831 | // Returns: | 
| 7832 | //      0 - There are no matching keys in the given range | 
| 7833 | //      number > 0 - There are approximately number matching rows in the range | 
| 7834 | //      HA_POS_ERROR - Something is wrong with the index tree | 
| 7835 | // | 
| 7836 | ha_rows ha_tokudb::records_in_range(uint keynr, key_range* start_key, key_range* end_key) { | 
| 7837 |     TOKUDB_HANDLER_DBUG_ENTER("%d %p %p" , keynr, start_key, end_key); | 
| 7838 |     DBT *pleft_key, *pright_key; | 
| 7839 |     DBT left_key, right_key; | 
| 7840 |     ha_rows ret_val = HA_TOKUDB_RANGE_COUNT; | 
| 7841 |     DB *kfile = share->key_file[keynr]; | 
| 7842 |     uint64_t rows = 0; | 
| 7843 |     int error; | 
| 7844 |  | 
| 7845 |     // get start_rows and end_rows values so that we can estimate range | 
| 7846 |     // when calling key_range64, the only value we can trust is the value for less | 
| 7847 |     // The reason is that the key being passed in may be a prefix of keys in the DB | 
| 7848 |     // As a result, equal may be 0 and greater may actually be equal+greater | 
| 7849 |     // So, we call key_range64 on the key, and the key that is after it. | 
| 7850 |     if (!start_key && !end_key) { | 
| 7851 |         error = estimate_num_rows(share->file, &rows, transaction); | 
| 7852 |         if (error) { | 
| 7853 |             ret_val = HA_TOKUDB_RANGE_COUNT; | 
| 7854 |             goto cleanup; | 
| 7855 |         } | 
| 7856 |         ret_val = (rows <= 1) ? 1 : rows; | 
| 7857 |         goto cleanup; | 
| 7858 |     } | 
| 7859 |     if (start_key) { | 
| 7860 |         uchar inf_byte = (start_key->flag == HA_READ_KEY_EXACT) ? COL_NEG_INF : COL_POS_INF; | 
| 7861 |         pack_key(&left_key, keynr, key_buff, start_key->key, start_key->length, inf_byte); | 
| 7862 |         pleft_key = &left_key; | 
| 7863 |     } else { | 
| 7864 |         pleft_key = NULL; | 
| 7865 |     } | 
| 7866 |     if (end_key) { | 
| 7867 |         uchar inf_byte = (end_key->flag == HA_READ_BEFORE_KEY) ? COL_NEG_INF : COL_POS_INF; | 
| 7868 |         pack_key(&right_key, keynr, key_buff2, end_key->key, end_key->length, inf_byte); | 
| 7869 |         pright_key = &right_key; | 
| 7870 |     } else { | 
| 7871 |         pright_key = NULL; | 
| 7872 |     } | 
| 7873 |     // keys_range64 can not handle a degenerate range (left_key > right_key), so we filter here | 
| 7874 |     if (pleft_key && pright_key && tokudb_cmp_dbt_key(kfile, pleft_key, pright_key) > 0) { | 
| 7875 |         rows = 0; | 
| 7876 |     } else { | 
| 7877 |         uint64_t less, equal1, middle, equal2, greater; | 
| 7878 |         bool is_exact; | 
| 7879 |         error = kfile->keys_range64(kfile, transaction, pleft_key, pright_key,  | 
| 7880 |                                     &less, &equal1, &middle, &equal2, &greater, &is_exact); | 
| 7881 |         if (error) { | 
| 7882 |             ret_val = HA_TOKUDB_RANGE_COUNT; | 
| 7883 |             goto cleanup; | 
| 7884 |         } | 
| 7885 |         rows = middle; | 
| 7886 |     } | 
| 7887 |  | 
| 7888 |     // MySQL thinks a return value of 0 means there are exactly 0 rows | 
| 7889 |     // Therefore, always return non-zero so this assumption is not made | 
| 7890 |     ret_val = (ha_rows) (rows <= 1 ? 1 : rows); | 
| 7891 |  | 
| 7892 | cleanup: | 
| 7893 |     TOKUDB_HANDLER_TRACE_FOR_FLAGS( | 
| 7894 |         TOKUDB_DEBUG_RETURN, | 
| 7895 |         "return %"  PRIu64 " %"  PRIu64, | 
| 7896 |         (uint64_t)ret_val, | 
| 7897 |         rows); | 
| 7898 |     DBUG_RETURN(ret_val); | 
| 7899 | } | 
| 7900 |  | 
| 7901 |  | 
| 7902 | // | 
| 7903 | // Initializes the auto-increment data in the local "share" object to the | 
| 7904 | // greater of two values: what's stored in the metadata or the last inserted | 
| 7905 | // auto-increment field (if auto-increment field is the first field of a key). | 
| 7906 | // | 
| 7907 | void ha_tokudb::init_auto_increment() { | 
| 7908 |     int error; | 
| 7909 |     DB_TXN* txn = NULL; | 
| 7910 |  | 
| 7911 |     error = txn_begin(db_env, 0, &txn, 0, ha_thd()); | 
| 7912 |     if (error) { | 
| 7913 |         share->last_auto_increment = 0;     | 
| 7914 |     } else { | 
| 7915 |         HA_METADATA_KEY key_val; | 
| 7916 |         DBT key;  | 
| 7917 |         memset(&key, 0, sizeof(key)); | 
| 7918 |         key.data = &key_val; | 
| 7919 |         key.size = sizeof(key_val); | 
| 7920 |         DBT value;  | 
| 7921 |         memset(&value, 0, sizeof(value)); | 
| 7922 |         value.flags = DB_DBT_USERMEM; | 
| 7923 |  | 
| 7924 |         // Retrieve the initial auto increment value, as specified by create table | 
| 7925 |         // so if a user does "create table t1 (a int auto_increment, primary key (a)) auto_increment=100", | 
| 7926 |         // then the value 100 should be stored here | 
| 7927 |         key_val = hatoku_ai_create_value; | 
| 7928 |         value.ulen = sizeof(share->auto_inc_create_value); | 
| 7929 |         value.data = &share->auto_inc_create_value; | 
| 7930 |         error = share->status_block->get(share->status_block, txn, &key, &value, 0); | 
| 7931 |          | 
| 7932 |         if (error || value.size != sizeof(share->auto_inc_create_value)) { | 
| 7933 |             share->auto_inc_create_value = 0; | 
| 7934 |         } | 
| 7935 |  | 
| 7936 |         // Retrieve hatoku_max_ai, which is max value used by auto increment | 
| 7937 |         // column so far, the max value could have been auto generated (e.g. insert (NULL)) | 
| 7938 |         // or it could have been manually inserted by user (e.g. insert (345)) | 
| 7939 |         key_val = hatoku_max_ai; | 
| 7940 |         value.ulen = sizeof(share->last_auto_increment); | 
| 7941 |         value.data = &share->last_auto_increment; | 
| 7942 |         error = share->status_block->get(share->status_block, txn, &key, &value, 0); | 
| 7943 |          | 
| 7944 |         if (error || value.size != sizeof(share->last_auto_increment)) { | 
| 7945 |             if (share->auto_inc_create_value) | 
| 7946 |                 share->last_auto_increment = share->auto_inc_create_value - 1; | 
| 7947 |             else | 
| 7948 |                 share->last_auto_increment = 0; | 
| 7949 |         } | 
| 7950 |  | 
| 7951 |         commit_txn(txn, 0); | 
| 7952 |     } | 
| 7953 |     TOKUDB_HANDLER_TRACE_FOR_FLAGS( | 
| 7954 |         TOKUDB_DEBUG_AUTO_INCREMENT, | 
| 7955 |         "init auto increment:%lld" , | 
| 7956 |         share->last_auto_increment); | 
| 7957 | } | 
| 7958 |  | 
| 7959 | void ha_tokudb::get_auto_increment( | 
| 7960 |     ulonglong offset, | 
| 7961 |     ulonglong increment, | 
| 7962 |     ulonglong nb_desired_values, | 
| 7963 |     ulonglong* first_value, | 
| 7964 |     ulonglong* nb_reserved_values) { | 
| 7965 |  | 
| 7966 |     TOKUDB_HANDLER_DBUG_ENTER("" ); | 
| 7967 |     ulonglong nr; | 
| 7968 |     bool over; | 
| 7969 |  | 
| 7970 |     if (table->s->next_number_key_offset) | 
| 7971 |     { | 
| 7972 |       handler::get_auto_increment(offset, increment, nb_desired_values, first_value, nb_reserved_values); | 
| 7973 |       DBUG_VOID_RETURN; | 
| 7974 |     } | 
| 7975 |  | 
| 7976 |     share->lock(); | 
| 7977 |  | 
| 7978 |     if (share->auto_inc_create_value > share->last_auto_increment) { | 
| 7979 |         nr = share->auto_inc_create_value; | 
| 7980 |         over = false; | 
| 7981 |         share->last_auto_increment = share->auto_inc_create_value; | 
| 7982 |     } else { | 
| 7983 |         nr = share->last_auto_increment + increment; | 
| 7984 |         over = nr < share->last_auto_increment; | 
| 7985 |         if (over) | 
| 7986 |             nr = ULONGLONG_MAX; | 
| 7987 |     } | 
| 7988 |     if (!over) { | 
| 7989 |         share->last_auto_increment = nr + (nb_desired_values - 1)*increment; | 
| 7990 |         if (delay_updating_ai_metadata) { | 
| 7991 |             ai_metadata_update_required = true; | 
| 7992 |         } else { | 
| 7993 |             update_max_auto_inc( | 
| 7994 |                 share->status_block, | 
| 7995 |                 share->last_auto_increment); | 
| 7996 |         } | 
| 7997 |     } | 
| 7998 |     TOKUDB_HANDLER_TRACE_FOR_FLAGS( | 
| 7999 |         TOKUDB_DEBUG_AUTO_INCREMENT, | 
| 8000 |         "get_auto_increment(%lld,%lld,%lld): got:%lld:%lld" , | 
| 8001 |         offset, | 
| 8002 |         increment, | 
| 8003 |         nb_desired_values, | 
| 8004 |         nr, | 
| 8005 |         nb_desired_values); | 
| 8006 |     *first_value = nr; | 
| 8007 |     *nb_reserved_values = nb_desired_values; | 
| 8008 |     share->unlock(); | 
| 8009 |     TOKUDB_HANDLER_DBUG_VOID_RETURN; | 
| 8010 | } | 
| 8011 |  | 
| 8012 | bool ha_tokudb::is_optimize_blocking() { | 
| 8013 |     return false; | 
| 8014 | } | 
| 8015 |  | 
| 8016 | bool ha_tokudb::is_auto_inc_singleton(){ | 
| 8017 |     return false; | 
| 8018 | } | 
| 8019 |  | 
| 8020 |  | 
| 8021 | // Internal function called by ha_tokudb::add_index and ha_tokudb::alter_table_phase2 | 
| 8022 | // With a transaction, drops dictionaries associated with indexes in key_num | 
| 8023 | // | 
| 8024 | // | 
| 8025 | // Adds indexes to the table. Takes the array of KEY passed in key_info, and creates | 
| 8026 | // DB's that will go at the end of share->key_file. THE IMPLICIT ASSUMPTION HERE is | 
| 8027 | // that the table will be modified and that these added keys will be appended to the end | 
| 8028 | // of the array table->key_info | 
| 8029 | // Parameters: | 
| 8030 | //      [in]    table_arg - table that is being modified, seems to be identical to this->table | 
| 8031 | //      [in]    key_info - array of KEY's to be added | 
| 8032 | //              num_of_keys - number of keys to be added, number of elements in key_info | 
| 8033 | //  Returns: | 
| 8034 | //      0 on success, error otherwise | 
| 8035 | // | 
| 8036 | int ha_tokudb::tokudb_add_index( | 
| 8037 |     TABLE* table_arg, | 
| 8038 |     KEY* key_info, | 
| 8039 |     uint num_of_keys, | 
| 8040 |     DB_TXN* txn, | 
| 8041 |     bool* inc_num_DBs, | 
| 8042 |     bool* modified_DBs) { | 
| 8043 |  | 
| 8044 |     TOKUDB_HANDLER_DBUG_ENTER("" ); | 
| 8045 |     assert_always(txn); | 
| 8046 |  | 
| 8047 |     int error; | 
| 8048 |     uint curr_index = 0; | 
| 8049 |     DBC* tmp_cursor = NULL; | 
| 8050 |     int cursor_ret_val = 0; | 
| 8051 |     DBT curr_pk_key, curr_pk_val; | 
| 8052 |     THD* thd = ha_thd(); | 
| 8053 |     DB_LOADER* loader = NULL; | 
| 8054 |     DB_INDEXER* indexer = NULL; | 
| 8055 |     bool loader_save_space = tokudb::sysvars::load_save_space(thd); | 
| 8056 |     bool use_hot_index = (lock.type == TL_WRITE_ALLOW_WRITE); | 
| 8057 |     uint32_t loader_flags = loader_save_space ? LOADER_COMPRESS_INTERMEDIATES : 0; | 
| 8058 |     uint32_t indexer_flags = 0; | 
| 8059 |     uint32_t mult_db_flags[MAX_KEY + 1] = {0}; | 
| 8060 |     uint32_t mult_put_flags[MAX_KEY + 1]; | 
| 8061 |     uint32_t mult_dbt_flags[MAX_KEY + 1]; | 
| 8062 |     bool creating_hot_index = false; | 
| 8063 |     struct loader_context lc; | 
| 8064 |     memset(&lc, 0, sizeof lc); | 
| 8065 |     lc.thd = thd; | 
| 8066 |     lc.ha = this; | 
| 8067 |     loader_error = 0; | 
| 8068 |     bool rw_lock_taken = false; | 
| 8069 |     *inc_num_DBs = false; | 
| 8070 |     *modified_DBs = false; | 
| 8071 |     invalidate_bulk_fetch(); | 
| 8072 |     unpack_entire_row = true; // for bulk fetching rows | 
| 8073 |     for (uint32_t i = 0; i < MAX_KEY+1; i++) { | 
| 8074 |         mult_put_flags[i] = 0; | 
| 8075 |         mult_dbt_flags[i] = DB_DBT_REALLOC; | 
| 8076 |     } | 
| 8077 |     // | 
| 8078 |     // number of DB files we have open currently, before add_index is executed | 
| 8079 |     // | 
| 8080 |     uint curr_num_DBs = table_arg->s->keys + tokudb_test(hidden_primary_key); | 
| 8081 |  | 
| 8082 |     // | 
| 8083 |     // get the row type to use for the indexes we're adding | 
| 8084 |     // | 
| 8085 |     toku_compression_method compression_method = | 
| 8086 |         get_compression_method(share->file); | 
| 8087 |  | 
| 8088 |     // | 
| 8089 |     // status message to be shown in "show process list" | 
| 8090 |     // | 
| 8091 |     const char *orig_proc_info = tokudb_thd_get_proc_info(thd); | 
| 8092 |     // buffer of 200 should be a good upper bound. | 
| 8093 |     char status_msg[MAX_ALIAS_NAME + 200]; | 
| 8094 |     // variable that stores number of elements inserted thus far | 
| 8095 |     ulonglong num_processed = 0; | 
| 8096 |     thd_proc_info(thd, "Adding indexes" ); | 
| 8097 |      | 
| 8098 |     // | 
| 8099 |     // in unpack_row, MySQL passes a buffer that is this long, | 
| 8100 |     // so this length should be good enough for us as well | 
| 8101 |     // | 
| 8102 |     memset((void *) &curr_pk_key, 0, sizeof(curr_pk_key)); | 
| 8103 |     memset((void *) &curr_pk_val, 0, sizeof(curr_pk_val)); | 
| 8104 |  | 
| 8105 |     // | 
| 8106 |     // The files for secondary tables are derived from the name of keys | 
| 8107 |     // If we try to add a key with the same name as an already existing key, | 
| 8108 |     // We can crash. So here we check if any of the keys added has the same | 
| 8109 |     // name of an existing key, and if so, we fail gracefully | 
| 8110 |     // | 
| 8111 |     for (uint i = 0; i < num_of_keys; i++) { | 
| 8112 |         for (uint j = 0; j < table_arg->s->keys; j++) { | 
| 8113 |             if (strcmp(key_info[i].name.str, | 
| 8114 |                        table_arg->s->key_info[j].name.str) == 0) { | 
| 8115 |                 error = HA_ERR_WRONG_COMMAND; | 
| 8116 |                 goto cleanup; | 
| 8117 |             } | 
| 8118 |         } | 
| 8119 |     } | 
| 8120 |  | 
| 8121 |     rwlock_t_lock_write(share->_num_DBs_lock); | 
| 8122 |     rw_lock_taken = true; | 
| 8123 |     // | 
| 8124 |     // open all the DB files and set the appropriate variables in share | 
| 8125 |     // they go to the end of share->key_file | 
| 8126 |     // | 
| 8127 |     creating_hot_index = | 
| 8128 |         use_hot_index && num_of_keys == 1 && | 
| 8129 |         (key_info[0].flags & HA_NOSAME) == 0; | 
| 8130 |     if (use_hot_index && (share->num_DBs > curr_num_DBs)) { | 
| 8131 |         // | 
| 8132 |         // already have hot index in progress, get out | 
| 8133 |         // | 
| 8134 |         error = HA_ERR_INTERNAL_ERROR; | 
| 8135 |         goto cleanup; | 
| 8136 |     } | 
| 8137 |     curr_index = curr_num_DBs; | 
| 8138 |     *modified_DBs = true; | 
| 8139 |     for (uint i = 0; i < num_of_keys; i++, curr_index++) { | 
| 8140 |         if (key_is_clustering(&key_info[i])) { | 
| 8141 |             set_key_filter( | 
| 8142 |                 &share->kc_info.key_filters[curr_index], | 
| 8143 |                 &key_info[i], | 
| 8144 |                 table_arg, | 
| 8145 |                 false); | 
| 8146 |             if (!hidden_primary_key) { | 
| 8147 |                 set_key_filter( | 
| 8148 |                     &share->kc_info.key_filters[curr_index], | 
| 8149 |                     &table_arg->key_info[primary_key], | 
| 8150 |                     table_arg, | 
| 8151 |                     false); | 
| 8152 |             } | 
| 8153 |  | 
| 8154 |             error = initialize_col_pack_info( | 
| 8155 |                 &share->kc_info, | 
| 8156 |                 table_arg->s, | 
| 8157 |                 curr_index); | 
| 8158 |             if (error) { | 
| 8159 |                 goto cleanup; | 
| 8160 |             } | 
| 8161 |         } | 
| 8162 |  | 
| 8163 |  | 
| 8164 |         error = create_secondary_dictionary( | 
| 8165 |             share->full_table_name(), | 
| 8166 |             table_arg, | 
| 8167 |             &key_info[i], | 
| 8168 |             txn, | 
| 8169 |             &share->kc_info, | 
| 8170 |             curr_index, | 
| 8171 |             creating_hot_index, | 
| 8172 |             compression_method); | 
| 8173 |         if (error) { | 
| 8174 |             goto cleanup; | 
| 8175 |         } | 
| 8176 |  | 
| 8177 |         error = open_secondary_dictionary( | 
| 8178 |             &share->key_file[curr_index], | 
| 8179 |             &key_info[i], | 
| 8180 |             share->full_table_name(), | 
| 8181 |             false, | 
| 8182 |             txn); | 
| 8183 |         if (error) { | 
| 8184 |             goto cleanup; | 
| 8185 |         } | 
| 8186 |     } | 
| 8187 |      | 
| 8188 |     if (creating_hot_index) { | 
| 8189 |         share->num_DBs++; | 
| 8190 |         *inc_num_DBs = true; | 
| 8191 |         error = db_env->create_indexer( | 
| 8192 |             db_env, | 
| 8193 |             txn, | 
| 8194 |             &indexer, | 
| 8195 |             share->file, | 
| 8196 |             num_of_keys, | 
| 8197 |             &share->key_file[curr_num_DBs], | 
| 8198 |             mult_db_flags, | 
| 8199 |             indexer_flags); | 
| 8200 |         if (error) { | 
| 8201 |             goto cleanup; | 
| 8202 |         } | 
| 8203 |  | 
| 8204 |         error = indexer->set_poll_function( | 
| 8205 |             indexer, ha_tokudb::tokudb_add_index_poll, &lc); | 
| 8206 |         if (error) { | 
| 8207 |             goto cleanup; | 
| 8208 |         } | 
| 8209 |  | 
| 8210 |         error = indexer->set_error_callback( | 
| 8211 |             indexer, ha_tokudb::loader_add_index_err, &lc); | 
| 8212 |         if (error) { | 
| 8213 |             goto cleanup; | 
| 8214 |         } | 
| 8215 |  | 
| 8216 |         share->_num_DBs_lock.unlock(); | 
| 8217 |         rw_lock_taken = false; | 
| 8218 |          | 
| 8219 | #ifdef HA_TOKUDB_HAS_THD_PROGRESS | 
| 8220 |         // initialize a one phase progress report. | 
| 8221 |         // incremental reports are done in the indexer's callback function. | 
| 8222 |         thd_progress_init(thd, 1); | 
| 8223 | #endif | 
| 8224 |  | 
| 8225 |         error = indexer->build(indexer); | 
| 8226 |  | 
| 8227 |         if (error) { | 
| 8228 |             goto cleanup; | 
| 8229 |         } | 
| 8230 |  | 
| 8231 |         rwlock_t_lock_write(share->_num_DBs_lock); | 
| 8232 |         error = indexer->close(indexer); | 
| 8233 |         share->_num_DBs_lock.unlock(); | 
| 8234 |         if (error) { | 
| 8235 |             goto cleanup; | 
| 8236 |         } | 
| 8237 |         indexer = NULL; | 
| 8238 |     } else { | 
| 8239 |         DBUG_ASSERT(table->mdl_ticket->get_type() >= MDL_SHARED_NO_WRITE); | 
| 8240 |         share->_num_DBs_lock.unlock(); | 
| 8241 |         rw_lock_taken = false; | 
| 8242 |         prelocked_right_range_size = 0; | 
| 8243 |         prelocked_left_range_size = 0; | 
| 8244 |         struct smart_dbt_bf_info bf_info; | 
| 8245 |         bf_info.ha = this; | 
| 8246 |         // you need the val if you have a clustering index and key_read is not 0; | 
| 8247 |         bf_info.direction = 1; | 
| 8248 |         bf_info.thd = ha_thd(); | 
| 8249 |         bf_info.need_val = true; | 
| 8250 |         bf_info.key_to_compare = NULL; | 
| 8251 |  | 
| 8252 |         error = db_env->create_loader( | 
| 8253 |             db_env, | 
| 8254 |             txn, | 
| 8255 |             &loader, | 
| 8256 |             NULL, // no src_db needed | 
| 8257 |             num_of_keys, | 
| 8258 |             &share->key_file[curr_num_DBs], | 
| 8259 |             mult_put_flags, | 
| 8260 |             mult_dbt_flags, | 
| 8261 |             loader_flags); | 
| 8262 |         if (error) { | 
| 8263 |             goto cleanup; | 
| 8264 |         } | 
| 8265 |  | 
| 8266 |         error = | 
| 8267 |             loader->set_poll_function(loader, ha_tokudb::bulk_insert_poll, &lc); | 
| 8268 |         if (error) { | 
| 8269 |             goto cleanup; | 
| 8270 |         } | 
| 8271 |  | 
| 8272 |         error = loader->set_error_callback( | 
| 8273 |             loader, ha_tokudb::loader_add_index_err, &lc); | 
| 8274 |         if (error) { | 
| 8275 |             goto cleanup; | 
| 8276 |         } | 
| 8277 |         // | 
| 8278 |         // scan primary table, create each secondary key, add to each DB | 
| 8279 |         //     | 
| 8280 |         error = share->file->cursor( | 
| 8281 |             share->file, | 
| 8282 |             txn, | 
| 8283 |             &tmp_cursor, | 
| 8284 |             DB_SERIALIZABLE); | 
| 8285 |         if (error) { | 
| 8286 |             tmp_cursor = NULL;             // Safety | 
| 8287 |             goto cleanup; | 
| 8288 |         } | 
| 8289 |  | 
| 8290 |         // | 
| 8291 |         // grab some locks to make this go faster | 
| 8292 |         // first a global read lock on the main DB, because | 
| 8293 |         // we intend to scan the entire thing | 
| 8294 |         // | 
| 8295 |         error = tmp_cursor->c_set_bounds( | 
| 8296 |             tmp_cursor, | 
| 8297 |             share->file->dbt_neg_infty(), | 
| 8298 |             share->file->dbt_pos_infty(), | 
| 8299 |             true, | 
| 8300 |             0); | 
| 8301 |         if (error) { | 
| 8302 |             goto cleanup; | 
| 8303 |         } | 
| 8304 |  | 
| 8305 |         // set the bulk fetch iteration to its max so that adding an | 
| 8306 |         // index fills the bulk fetch buffer every time. we do not | 
| 8307 |         // want it to grow exponentially fast. | 
| 8308 |         rows_fetched_using_bulk_fetch = 0; | 
| 8309 |         bulk_fetch_iteration = HA_TOKU_BULK_FETCH_ITERATION_MAX; | 
| 8310 |         cursor_ret_val = tmp_cursor->c_getf_next( | 
| 8311 |             tmp_cursor, | 
| 8312 |             DB_PRELOCKED, | 
| 8313 |             smart_dbt_bf_callback, | 
| 8314 |             &bf_info); | 
| 8315 |  | 
| 8316 | #ifdef HA_TOKUDB_HAS_THD_PROGRESS | 
| 8317 |         // initialize a two phase progress report. | 
| 8318 |         // first phase: putting rows into the loader | 
| 8319 |         thd_progress_init(thd, 2); | 
| 8320 | #endif | 
| 8321 |  | 
| 8322 |         while (cursor_ret_val != DB_NOTFOUND || | 
| 8323 |                ((bytes_used_in_range_query_buff - | 
| 8324 |                  curr_range_query_buff_offset) > 0)) { | 
| 8325 |             if ((bytes_used_in_range_query_buff - | 
| 8326 |                  curr_range_query_buff_offset) == 0) { | 
| 8327 |                 invalidate_bulk_fetch(); // reset the buffers | 
| 8328 |                 cursor_ret_val = tmp_cursor->c_getf_next( | 
| 8329 |                     tmp_cursor, | 
| 8330 |                     DB_PRELOCKED, | 
| 8331 |                     smart_dbt_bf_callback, | 
| 8332 |                     &bf_info); | 
| 8333 |                 if (cursor_ret_val != DB_NOTFOUND && cursor_ret_val != 0) { | 
| 8334 |                     error = cursor_ret_val; | 
| 8335 |                     goto cleanup; | 
| 8336 |                 } | 
| 8337 |             } | 
| 8338 |             // do this check in case the the c_getf_next did not put anything | 
| 8339 |             // into the buffer because there was no more data | 
| 8340 |             if ((bytes_used_in_range_query_buff - | 
| 8341 |                  curr_range_query_buff_offset) == 0) { | 
| 8342 |                 break; | 
| 8343 |             } | 
| 8344 |             // at this point, we know the range query buffer has at least one | 
| 8345 |             // key/val pair | 
| 8346 |             uchar* curr_pos = range_query_buff+curr_range_query_buff_offset; | 
| 8347 |              | 
| 8348 |             uint32_t key_size = *(uint32_t *)curr_pos;     | 
| 8349 |             curr_pos += sizeof(key_size);     | 
| 8350 |             uchar* curr_key_buff = curr_pos;     | 
| 8351 |             curr_pos += key_size;         | 
| 8352 |             curr_pk_key.data = curr_key_buff;     | 
| 8353 |             curr_pk_key.size = key_size; | 
| 8354 |              | 
| 8355 |             uint32_t val_size = *(uint32_t *)curr_pos;     | 
| 8356 |             curr_pos += sizeof(val_size);     | 
| 8357 |             uchar* curr_val_buff = curr_pos;     | 
| 8358 |             curr_pos += val_size;         | 
| 8359 |             curr_pk_val.data = curr_val_buff;     | 
| 8360 |             curr_pk_val.size = val_size; | 
| 8361 |              | 
| 8362 |             curr_range_query_buff_offset = curr_pos - range_query_buff; | 
| 8363 |  | 
| 8364 |             error = loader->put(loader, &curr_pk_key, &curr_pk_val); | 
| 8365 |             if (error) { | 
| 8366 |                 goto cleanup; | 
| 8367 |             } | 
| 8368 |  | 
| 8369 |             num_processed++;  | 
| 8370 |  | 
| 8371 |             if ((num_processed % 1000) == 0) { | 
| 8372 |                 sprintf( | 
| 8373 |                     status_msg, | 
| 8374 |                     "Adding indexes: Fetched %llu of about %llu rows, loading "  | 
| 8375 |                     "of data still remains." , | 
| 8376 |                     num_processed, | 
| 8377 |                     (long long unsigned)share->row_count()); | 
| 8378 |                 thd_proc_info(thd, status_msg); | 
| 8379 |  | 
| 8380 | #ifdef HA_TOKUDB_HAS_THD_PROGRESS | 
| 8381 |                 thd_progress_report( | 
| 8382 |                     thd, | 
| 8383 |                     num_processed, | 
| 8384 |                     (long long unsigned)share->row_count()); | 
| 8385 | #endif | 
| 8386 |  | 
| 8387 |                 if (thd_kill_level(thd)) { | 
| 8388 |                     error = ER_ABORTING_CONNECTION; | 
| 8389 |                     goto cleanup; | 
| 8390 |                 } | 
| 8391 |             } | 
| 8392 |         } | 
| 8393 |         error = tmp_cursor->c_close(tmp_cursor); | 
| 8394 |         assert_always(error==0); | 
| 8395 |         tmp_cursor = NULL; | 
| 8396 |  | 
| 8397 | #ifdef HA_TOKUDB_HAS_THD_PROGRESS | 
| 8398 |         // next progress report phase: closing the loader.  | 
| 8399 |         // incremental reports are done in the loader's callback function. | 
| 8400 |         thd_progress_next_stage(thd); | 
| 8401 | #endif | 
| 8402 |  | 
| 8403 |         error = loader->close(loader); | 
| 8404 |         loader = NULL; | 
| 8405 |  | 
| 8406 |         if (error) goto cleanup; | 
| 8407 |     } | 
| 8408 |     curr_index = curr_num_DBs; | 
| 8409 |     for (uint i = 0; i < num_of_keys; i++, curr_index++) { | 
| 8410 |         if (key_info[i].flags & HA_NOSAME) { | 
| 8411 |             bool is_unique; | 
| 8412 |             error = is_index_unique( | 
| 8413 |                 &is_unique, | 
| 8414 |                 txn, | 
| 8415 |                 share->key_file[curr_index], | 
| 8416 |                 &key_info[i], | 
| 8417 |                 creating_hot_index ? 0 : DB_PRELOCKED_WRITE); | 
| 8418 |             if (error) | 
| 8419 |                 goto cleanup; | 
| 8420 |             if (!is_unique) { | 
| 8421 |                 error = HA_ERR_FOUND_DUPP_KEY; | 
| 8422 |                 last_dup_key = i; | 
| 8423 |                 goto cleanup; | 
| 8424 |             } | 
| 8425 |         } | 
| 8426 |     } | 
| 8427 |  | 
| 8428 |     share->lock(); | 
| 8429 |     // | 
| 8430 |     // We have an accurate row count, might as well update share->rows | 
| 8431 |     // | 
| 8432 |     if(!creating_hot_index) { | 
| 8433 |         share->set_row_count(num_processed, true); | 
| 8434 |     } | 
| 8435 |     // | 
| 8436 |     // now write stuff to status.tokudb | 
| 8437 |     // | 
| 8438 |     for (uint i = 0; i < num_of_keys; i++) { | 
| 8439 |         write_key_name_to_status(share->status_block, key_info[i].name.str, txn); | 
| 8440 |     } | 
| 8441 |     share->unlock(); | 
| 8442 |      | 
| 8443 |     error = 0; | 
| 8444 | cleanup: | 
| 8445 | #ifdef HA_TOKUDB_HAS_THD_PROGRESS | 
| 8446 |     thd_progress_end(thd); | 
| 8447 | #endif | 
| 8448 |     if (rw_lock_taken) { | 
| 8449 |         share->_num_DBs_lock.unlock(); | 
| 8450 |         rw_lock_taken = false; | 
| 8451 |     } | 
| 8452 |     if (tmp_cursor) {             | 
| 8453 |         int r = tmp_cursor->c_close(tmp_cursor); | 
| 8454 |         assert_always(r==0); | 
| 8455 |         tmp_cursor = NULL; | 
| 8456 |     } | 
| 8457 |     if (loader != NULL) { | 
| 8458 |         sprintf(status_msg, "aborting creation of indexes." ); | 
| 8459 |         thd_proc_info(thd, status_msg); | 
| 8460 |         loader->abort(loader); | 
| 8461 |     } | 
| 8462 |     if (indexer != NULL) { | 
| 8463 |         sprintf(status_msg, "aborting creation of indexes." ); | 
| 8464 |         thd_proc_info(thd, status_msg); | 
| 8465 |         rwlock_t_lock_write(share->_num_DBs_lock); | 
| 8466 |         indexer->abort(indexer); | 
| 8467 |         share->_num_DBs_lock.unlock(); | 
| 8468 |     } | 
| 8469 |     if (TOKUDB_LIKELY(TOKUDB_DEBUG_FLAGS(TOKUDB_DEBUG_HIDE_DDL_LOCK_ERRORS) == 0) && | 
| 8470 |         error == DB_LOCK_NOTGRANTED) { | 
| 8471 |         sql_print_error( | 
| 8472 |             "Could not add indexes to table %s because another transaction has "  | 
| 8473 |             "accessed the table. To add indexes, make sure no transactions "  | 
| 8474 |             "touch the table." , | 
| 8475 |             share->full_table_name()); | 
| 8476 |     } | 
| 8477 |     thd_proc_info(thd, orig_proc_info); | 
| 8478 |     TOKUDB_HANDLER_DBUG_RETURN(error ? error : loader_error); | 
| 8479 | } | 
| 8480 | int ha_tokudb::tokudb_add_index_poll(void* , float progress) { | 
| 8481 |     LOADER_CONTEXT context = (LOADER_CONTEXT)extra; | 
| 8482 |     if (thd_killed(context->thd)) { | 
| 8483 |         sprintf(context->write_status_msg, | 
| 8484 |                 "The process has been killed, aborting add index." ); | 
| 8485 |         return ER_ABORTING_CONNECTION; | 
| 8486 |     } | 
| 8487 |     float percentage = progress * 100; | 
| 8488 |     sprintf(context->write_status_msg, | 
| 8489 |             "Adding of indexes to %s about %.1f%% done" , | 
| 8490 |             context->ha->share->full_table_name(), | 
| 8491 |             percentage); | 
| 8492 |     thd_proc_info(context->thd, context->write_status_msg); | 
| 8493 | #ifdef HA_TOKUDB_HAS_THD_PROGRESS | 
| 8494 |     thd_progress_report(context->thd, (unsigned long long)percentage, 100); | 
| 8495 | #endif | 
| 8496 |     return 0; | 
| 8497 | } | 
| 8498 |  | 
| 8499 | // | 
| 8500 | // Internal function called by ha_tokudb::add_index and ha_tokudb::alter_table_phase2 | 
| 8501 | // Closes added indexes in case of error in error path of add_index and alter_table_phase2 | 
| 8502 | // | 
| 8503 | void ha_tokudb::restore_add_index( | 
| 8504 |     TABLE* table_arg, | 
| 8505 |     uint num_of_keys, | 
| 8506 |     bool incremented_numDBs, | 
| 8507 |     bool modified_DBs) { | 
| 8508 |  | 
| 8509 |     uint curr_num_DBs = table_arg->s->keys + tokudb_test(hidden_primary_key); | 
| 8510 |     uint curr_index = 0; | 
| 8511 |  | 
| 8512 |     // | 
| 8513 |     // need to restore num_DBs, and we have to do it before we close the dictionaries | 
| 8514 |     // so that there is not a window | 
| 8515 |     // | 
| 8516 |     if (incremented_numDBs) { | 
| 8517 |         rwlock_t_lock_write(share->_num_DBs_lock); | 
| 8518 |         share->num_DBs--; | 
| 8519 |     } | 
| 8520 |     if (modified_DBs) { | 
| 8521 |         curr_index = curr_num_DBs; | 
| 8522 |         for (uint i = 0; i < num_of_keys; i++, curr_index++) { | 
| 8523 |             reset_key_and_col_info(&share->kc_info, curr_index); | 
| 8524 |         } | 
| 8525 |         curr_index = curr_num_DBs; | 
| 8526 |         for (uint i = 0; i < num_of_keys; i++, curr_index++) { | 
| 8527 |             if (share->key_file[curr_index]) { | 
| 8528 |                 int r = share->key_file[curr_index]->close( | 
| 8529 |                     share->key_file[curr_index], | 
| 8530 |                     0); | 
| 8531 |                 assert_always(r==0); | 
| 8532 |                 share->key_file[curr_index] = NULL; | 
| 8533 |             } | 
| 8534 |         } | 
| 8535 |     } | 
| 8536 |     if (incremented_numDBs) { | 
| 8537 |         share->_num_DBs_lock.unlock(); | 
| 8538 |     } | 
| 8539 | } | 
| 8540 |  | 
| 8541 | // | 
| 8542 | // Internal function called by ha_tokudb::prepare_drop_index and ha_tokudb::alter_table_phase2 | 
| 8543 | // With a transaction, drops dictionaries associated with indexes in key_num | 
| 8544 | // | 
| 8545 | int ha_tokudb::drop_indexes( | 
| 8546 |     TABLE* table_arg, | 
| 8547 |     uint* key_num, | 
| 8548 |     uint num_of_keys, | 
| 8549 |     KEY* key_info, | 
| 8550 |     DB_TXN* txn) { | 
| 8551 |  | 
| 8552 |     TOKUDB_HANDLER_DBUG_ENTER("" ); | 
| 8553 |     assert_always(txn); | 
| 8554 |  | 
| 8555 |     int error = 0; | 
| 8556 |     for (uint i = 0; i < num_of_keys; i++) { | 
| 8557 |         uint curr_index = key_num[i]; | 
| 8558 |         error = share->key_file[curr_index]->pre_acquire_fileops_lock( | 
| 8559 |             share->key_file[curr_index], | 
| 8560 |             txn); | 
| 8561 |         if (error != 0) { | 
| 8562 |             goto cleanup; | 
| 8563 |         } | 
| 8564 |     } | 
| 8565 |     for (uint i = 0; i < num_of_keys; i++) { | 
| 8566 |         uint curr_index = key_num[i]; | 
| 8567 |         int r = share->key_file[curr_index]->close(share->key_file[curr_index],0); | 
| 8568 |         assert_always(r==0); | 
| 8569 |         share->key_file[curr_index] = NULL; | 
| 8570 |  | 
| 8571 |         error = remove_key_name_from_status( | 
| 8572 |             share->status_block, | 
| 8573 |             key_info[curr_index].name.str, | 
| 8574 |             txn); | 
| 8575 |         if (error) { | 
| 8576 |             goto cleanup; | 
| 8577 |         } | 
| 8578 |          | 
| 8579 |         error = delete_or_rename_dictionary( | 
| 8580 |             share->full_table_name(), | 
| 8581 |             NULL, | 
| 8582 |             key_info[curr_index].name.str, | 
| 8583 |             true, | 
| 8584 |             txn, | 
| 8585 |             true); | 
| 8586 |         if (error) { | 
| 8587 |             goto cleanup; | 
| 8588 |         } | 
| 8589 |     } | 
| 8590 |  | 
| 8591 | cleanup: | 
| 8592 |     if (TOKUDB_LIKELY(TOKUDB_DEBUG_FLAGS(TOKUDB_DEBUG_HIDE_DDL_LOCK_ERRORS) == 0) && | 
| 8593 |         error == DB_LOCK_NOTGRANTED) { | 
| 8594 |         sql_print_error( | 
| 8595 |             "Could not drop indexes from table %s because another transaction "  | 
| 8596 |             "has accessed the table. To drop indexes, make sure no "  | 
| 8597 |             "transactions touch the table." , | 
| 8598 |             share->full_table_name()); | 
| 8599 |     } | 
| 8600 |     TOKUDB_HANDLER_DBUG_RETURN(error); | 
| 8601 | } | 
| 8602 |  | 
| 8603 | // | 
| 8604 | // Internal function called by ha_tokudb::prepare_drop_index and | 
| 8605 | // ha_tokudb::alter_table_phase2 | 
| 8606 | // Restores dropped indexes in case of error in error path of | 
| 8607 | // prepare_drop_index and alter_table_phase2 | 
| 8608 | // | 
| 8609 | void ha_tokudb::restore_drop_indexes( | 
| 8610 |     TABLE* table_arg, | 
| 8611 |     uint* key_num, | 
| 8612 |     uint num_of_keys) { | 
| 8613 |  | 
| 8614 |     // | 
| 8615 |     // reopen closed dictionaries | 
| 8616 |     // | 
| 8617 |     for (uint i = 0; i < num_of_keys; i++) { | 
| 8618 |         int r; | 
| 8619 |         uint curr_index = key_num[i]; | 
| 8620 |         if (share->key_file[curr_index] == NULL) { | 
| 8621 |             r = open_secondary_dictionary( | 
| 8622 |                 &share->key_file[curr_index], | 
| 8623 |                 &table_share->key_info[curr_index], | 
| 8624 |                 share->full_table_name(), | 
| 8625 |                 false, | 
| 8626 |                 NULL); | 
| 8627 |             assert_always(!r); | 
| 8628 |         } | 
| 8629 |     }             | 
| 8630 | } | 
| 8631 |  | 
| 8632 | int ha_tokudb::map_to_handler_error(int error) { | 
| 8633 |     switch (error) { | 
| 8634 |     case DB_LOCK_DEADLOCK: | 
| 8635 |         error = HA_ERR_LOCK_DEADLOCK; | 
| 8636 |         break; | 
| 8637 |     case DB_LOCK_NOTGRANTED: | 
| 8638 |         error = HA_ERR_LOCK_WAIT_TIMEOUT; | 
| 8639 |         break; | 
| 8640 | #if defined(HA_ERR_DISK_FULL) | 
| 8641 |     case ENOSPC: | 
| 8642 |         error = HA_ERR_DISK_FULL; | 
| 8643 |         break; | 
| 8644 | #endif | 
| 8645 |     case DB_KEYEXIST: | 
| 8646 |         error = HA_ERR_FOUND_DUPP_KEY; | 
| 8647 |         break; | 
| 8648 | #if defined(HA_ALTER_ERROR) | 
| 8649 |     case HA_ALTER_ERROR: | 
| 8650 |         error = HA_ERR_UNSUPPORTED; | 
| 8651 |         break; | 
| 8652 | #endif | 
| 8653 |     case TOKUDB_INTERRUPTED: | 
| 8654 |         error = ER_QUERY_INTERRUPTED; | 
| 8655 |         break; | 
| 8656 |     case TOKUDB_OUT_OF_LOCKS: | 
| 8657 |         error = HA_ERR_LOCK_TABLE_FULL; | 
| 8658 |         break; | 
| 8659 |     } | 
| 8660 |     return error; | 
| 8661 | } | 
| 8662 |  | 
| 8663 | void ha_tokudb::print_error(int error, myf errflag) { | 
| 8664 |     error = map_to_handler_error(error); | 
| 8665 |     handler::print_error(error, errflag); | 
| 8666 | } | 
| 8667 |  | 
| 8668 | // | 
| 8669 | // truncate's dictionary associated with keynr index using transaction txn | 
| 8670 | // does so by deleting and then recreating the dictionary in the context | 
| 8671 | // of a transaction | 
| 8672 | // | 
| 8673 | int ha_tokudb::truncate_dictionary(uint keynr, DB_TXN* txn) { | 
| 8674 |     int error; | 
| 8675 |     bool is_pk = (keynr == primary_key); | 
| 8676 |  | 
| 8677 |     toku_compression_method compression_method = | 
| 8678 |         get_compression_method(share->key_file[keynr]); | 
| 8679 |     error = share->key_file[keynr]->close(share->key_file[keynr], 0); | 
| 8680 |     assert_always(error == 0); | 
| 8681 |  | 
| 8682 |     share->key_file[keynr] = NULL; | 
| 8683 |     if (is_pk) { | 
| 8684 |         share->file = NULL; | 
| 8685 |     } | 
| 8686 |  | 
| 8687 |     if (is_pk) { | 
| 8688 |         error = delete_or_rename_dictionary( | 
| 8689 |             share->full_table_name(), | 
| 8690 |             NULL, | 
| 8691 |             "main" , | 
| 8692 |             false, //is_key | 
| 8693 |             txn, | 
| 8694 |             true); // is a delete | 
| 8695 |         if (error) { | 
| 8696 |             goto cleanup; | 
| 8697 |         } | 
| 8698 |     } else { | 
| 8699 |         error = delete_or_rename_dictionary( | 
| 8700 |             share->full_table_name(), | 
| 8701 |             NULL, | 
| 8702 |             table_share->key_info[keynr].name.str, | 
| 8703 |             true, //is_key | 
| 8704 |             txn, | 
| 8705 |             true); // is a delete | 
| 8706 |         if (error) { | 
| 8707 |             goto cleanup; | 
| 8708 |         } | 
| 8709 |     } | 
| 8710 |  | 
| 8711 |     if (is_pk) { | 
| 8712 |         error = create_main_dictionary( | 
| 8713 |             share->full_table_name(), | 
| 8714 |             table, | 
| 8715 |             txn, | 
| 8716 |             &share->kc_info, | 
| 8717 |             compression_method); | 
| 8718 |     } else { | 
| 8719 |         error = create_secondary_dictionary( | 
| 8720 |             share->full_table_name(), | 
| 8721 |             table, | 
| 8722 |             &table_share->key_info[keynr], | 
| 8723 |             txn, | 
| 8724 |             &share->kc_info, | 
| 8725 |             keynr, | 
| 8726 |             false, | 
| 8727 |             compression_method); | 
| 8728 |     } | 
| 8729 |     if (error) { | 
| 8730 |         goto cleanup; | 
| 8731 |     } | 
| 8732 |  | 
| 8733 | cleanup: | 
| 8734 |     return error; | 
| 8735 | } | 
| 8736 |  | 
| 8737 | // for 5.5 | 
| 8738 | int ha_tokudb::truncate() { | 
| 8739 |     TOKUDB_HANDLER_DBUG_ENTER("" ); | 
| 8740 |     int error = delete_all_rows_internal(); | 
| 8741 |     TOKUDB_HANDLER_DBUG_RETURN(error); | 
| 8742 | } | 
| 8743 |  | 
| 8744 | // delete all rows from a table | 
| 8745 | // | 
| 8746 | // effects: delete all of the rows in the main dictionary and all of the | 
| 8747 | // indices.  this must be atomic, so we use the statement transaction | 
| 8748 | // for all of the truncate operations. | 
| 8749 | // locks:  if we have an exclusive table write lock, all of the concurrency | 
| 8750 | // issues go away. | 
| 8751 | // returns: 0 if success | 
| 8752 | int ha_tokudb::delete_all_rows() { | 
| 8753 |     TOKUDB_HANDLER_DBUG_ENTER("" ); | 
| 8754 |     int error = 0; | 
| 8755 |     if (thd_sql_command(ha_thd()) != SQLCOM_TRUNCATE) { | 
| 8756 |         share->try_table_lock = true; | 
| 8757 |         error = HA_ERR_WRONG_COMMAND; | 
| 8758 |     } | 
| 8759 |     if (error == 0) | 
| 8760 |         error = delete_all_rows_internal(); | 
| 8761 |     TOKUDB_HANDLER_DBUG_RETURN(error); | 
| 8762 | } | 
| 8763 |  | 
| 8764 | int ha_tokudb::delete_all_rows_internal() { | 
| 8765 |     TOKUDB_HANDLER_DBUG_ENTER("" ); | 
| 8766 |     int error = 0; | 
| 8767 |     uint curr_num_DBs = 0; | 
| 8768 |     DB_TXN* txn = NULL; | 
| 8769 |  | 
| 8770 |     // this should be enough to handle locking as the higher level MDL | 
| 8771 |     // on this table should prevent any new analyze tasks. | 
| 8772 |     share->cancel_background_jobs(); | 
| 8773 |  | 
| 8774 |     error = txn_begin(db_env, 0, &txn, 0, ha_thd()); | 
| 8775 |     if (error) { | 
| 8776 |         goto cleanup; | 
| 8777 |     } | 
| 8778 |  | 
| 8779 |     curr_num_DBs = table->s->keys + tokudb_test(hidden_primary_key); | 
| 8780 |     for (uint i = 0; i < curr_num_DBs; i++) { | 
| 8781 |         error = share->key_file[i]->pre_acquire_fileops_lock( | 
| 8782 |             share->key_file[i], | 
| 8783 |             txn); | 
| 8784 |         if (error) { | 
| 8785 |             goto cleanup; | 
| 8786 |         } | 
| 8787 |         error = share->key_file[i]->pre_acquire_table_lock( | 
| 8788 |             share->key_file[i], | 
| 8789 |             txn); | 
| 8790 |         if (error) { | 
| 8791 |             goto cleanup; | 
| 8792 |         } | 
| 8793 |     } | 
| 8794 |     for (uint i = 0; i < curr_num_DBs; i++) { | 
| 8795 |         error = truncate_dictionary(i, txn); | 
| 8796 |         if (error) { | 
| 8797 |             goto cleanup; | 
| 8798 |         } | 
| 8799 |     } | 
| 8800 |  | 
| 8801 |     DEBUG_SYNC(ha_thd(), "tokudb_after_truncate_all_dictionarys" ); | 
| 8802 |  | 
| 8803 |     // zap the row count | 
| 8804 |     if (error == 0) { | 
| 8805 |         share->set_row_count(0, false); | 
| 8806 |         // update auto increment | 
| 8807 |         share->last_auto_increment = 0; | 
| 8808 |         // calling write_to_status directly because we need to use txn | 
| 8809 |         write_to_status( | 
| 8810 |             share->status_block, | 
| 8811 |             hatoku_max_ai, | 
| 8812 |             &share->last_auto_increment, | 
| 8813 |             sizeof(share->last_auto_increment), | 
| 8814 |             txn); | 
| 8815 |     } | 
| 8816 |  | 
| 8817 |     share->try_table_lock = true; | 
| 8818 | cleanup: | 
| 8819 |     if (txn) { | 
| 8820 |         if (error) { | 
| 8821 |             abort_txn(txn); | 
| 8822 |         } else { | 
| 8823 |             commit_txn(txn,0); | 
| 8824 |         } | 
| 8825 |     } | 
| 8826 |  | 
| 8827 |     if (TOKUDB_LIKELY(TOKUDB_DEBUG_FLAGS( | 
| 8828 |         TOKUDB_DEBUG_HIDE_DDL_LOCK_ERRORS) == 0) && | 
| 8829 |         error == DB_LOCK_NOTGRANTED) { | 
| 8830 |         sql_print_error( | 
| 8831 |             "Could not truncate table %s because another transaction has "  | 
| 8832 |             "accessed the table. To truncate the table, make sure no "  | 
| 8833 |             "transactions touch the table." , | 
| 8834 |             share->full_table_name()); | 
| 8835 |     } | 
| 8836 |     // | 
| 8837 |     // regardless of errors, need to reopen the DB's | 
| 8838 |     //     | 
| 8839 |     for (uint i = 0; i < curr_num_DBs; i++) { | 
| 8840 |         int r = 0; | 
| 8841 |         if (share->key_file[i] == NULL) { | 
| 8842 |             if (i != primary_key) { | 
| 8843 |                 r = open_secondary_dictionary( | 
| 8844 |                         &share->key_file[i], | 
| 8845 |                         &table_share->key_info[i], | 
| 8846 |                         share->full_table_name(), | 
| 8847 |                         false, | 
| 8848 |                         NULL); | 
| 8849 |                 assert_always(!r); | 
| 8850 |             } else { | 
| 8851 |                 r = open_main_dictionary( | 
| 8852 |                        share->full_table_name(), | 
| 8853 |                         false, | 
| 8854 |                         NULL); | 
| 8855 |                 assert_always(!r); | 
| 8856 |             } | 
| 8857 |         } | 
| 8858 |     } | 
| 8859 |     TOKUDB_HANDLER_DBUG_RETURN(error); | 
| 8860 | } | 
| 8861 |  | 
| 8862 | void ha_tokudb::set_loader_error(int err) { | 
| 8863 |     loader_error = err; | 
| 8864 | } | 
| 8865 |  | 
| 8866 | void ha_tokudb::set_dup_value_for_pk(DBT* key) { | 
| 8867 |     assert_always(!hidden_primary_key); | 
| 8868 |     unpack_key(table->record[0],key,primary_key); | 
| 8869 |     last_dup_key = primary_key; | 
| 8870 | } | 
| 8871 |  | 
| 8872 | void ha_tokudb::close_dsmrr() { | 
| 8873 | #ifdef MARIADB_BASE_VERSION | 
| 8874 |     ds_mrr.dsmrr_close(); | 
| 8875 | #elif 50600 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50699 | 
| 8876 |     ds_mrr.dsmrr_close(); | 
| 8877 | #endif | 
| 8878 | } | 
| 8879 |  | 
| 8880 | void ha_tokudb::reset_dsmrr() { | 
| 8881 | #ifdef MARIADB_BASE_VERSION | 
| 8882 |     ds_mrr.dsmrr_close(); | 
| 8883 | #elif 50600 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50699 | 
| 8884 |     ds_mrr.reset(); | 
| 8885 | #endif | 
| 8886 | } | 
| 8887 |  | 
| 8888 | // we cache the information so we can do filtering ourselves, | 
| 8889 | // but as far as MySQL knows, we are not doing any filtering, | 
| 8890 | // so if we happen to miss filtering a row that does not match | 
| 8891 | // idx_cond_arg, MySQL will catch it. | 
| 8892 | // This allows us the ability to deal with only index_next and index_prev, | 
| 8893 | // and not need to worry about other index_XXX functions | 
| 8894 | Item* ha_tokudb::idx_cond_push(uint keyno_arg, Item* idx_cond_arg) { | 
| 8895 |     toku_pushed_idx_cond_keyno = keyno_arg; | 
| 8896 |     toku_pushed_idx_cond = idx_cond_arg; | 
| 8897 |     return idx_cond_arg; | 
| 8898 | } | 
| 8899 |  | 
| 8900 | void ha_tokudb::cancel_pushed_idx_cond() { | 
| 8901 |     invalidate_icp(); | 
| 8902 |     handler::cancel_pushed_idx_cond(); | 
| 8903 | } | 
| 8904 |  | 
| 8905 | void ha_tokudb::cleanup_txn(DB_TXN *txn) { | 
| 8906 |     if (transaction == txn && cursor) { | 
| 8907 |         int r = cursor->c_close(cursor); | 
| 8908 |         assert_always(r == 0); | 
| 8909 |         cursor = NULL; | 
| 8910 |     } | 
| 8911 | } | 
| 8912 |  | 
| 8913 | void ha_tokudb::add_to_trx_handler_list() { | 
| 8914 |     tokudb_trx_data* trx = | 
| 8915 |         (tokudb_trx_data*)thd_get_ha_data(ha_thd(), tokudb_hton); | 
| 8916 |     trx->handlers = list_add(trx->handlers, &trx_handler_list); | 
| 8917 | } | 
| 8918 |  | 
| 8919 | void ha_tokudb::remove_from_trx_handler_list() { | 
| 8920 |     tokudb_trx_data* trx = | 
| 8921 |         (tokudb_trx_data*)thd_get_ha_data(ha_thd(), tokudb_hton); | 
| 8922 |     trx->handlers = list_delete(trx->handlers, &trx_handler_list); | 
| 8923 | } | 
| 8924 |  | 
| 8925 | void ha_tokudb::rpl_before_write_rows() { | 
| 8926 |     in_rpl_write_rows = true; | 
| 8927 | } | 
| 8928 |  | 
| 8929 | void ha_tokudb::rpl_after_write_rows() { | 
| 8930 |     in_rpl_write_rows = false; | 
| 8931 | } | 
| 8932 |  | 
| 8933 | void ha_tokudb::rpl_before_delete_rows() { | 
| 8934 |     in_rpl_delete_rows = true; | 
| 8935 | } | 
| 8936 |  | 
| 8937 | void ha_tokudb::rpl_after_delete_rows() { | 
| 8938 |     in_rpl_delete_rows = false; | 
| 8939 | } | 
| 8940 |  | 
| 8941 | void ha_tokudb::rpl_before_update_rows() { | 
| 8942 |     in_rpl_update_rows = true; | 
| 8943 | } | 
| 8944 |  | 
| 8945 | void ha_tokudb::rpl_after_update_rows() { | 
| 8946 |     in_rpl_update_rows = false; | 
| 8947 | } | 
| 8948 |  | 
| 8949 | bool ha_tokudb::rpl_lookup_rows() { | 
| 8950 |     if (!in_rpl_delete_rows && !in_rpl_update_rows) | 
| 8951 |         return true; | 
| 8952 |     else | 
| 8953 |         return tokudb::sysvars::rpl_lookup_rows(ha_thd()); | 
| 8954 | } | 
| 8955 |  | 
| 8956 | // table admin  | 
| 8957 | #include "ha_tokudb_admin.cc" | 
| 8958 |  | 
| 8959 | // update functions | 
| 8960 | #include "tokudb_update_fun.cc" | 
| 8961 |  | 
| 8962 | // fast updates | 
| 8963 | #include "ha_tokudb_update.cc" | 
| 8964 |  | 
| 8965 | // alter table code for various mysql distros | 
| 8966 | #include "ha_tokudb_alter_55.cc" | 
| 8967 | #include "ha_tokudb_alter_56.cc" | 
| 8968 |  | 
| 8969 | // mrr | 
| 8970 | #ifdef MARIADB_BASE_VERSION | 
| 8971 | #include  "ha_tokudb_mrr_maria.cc" | 
| 8972 | #elif 50600 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50699 | 
| 8973 | #include  "ha_tokudb_mrr_mysql.cc" | 
| 8974 | #endif | 
| 8975 |  | 
| 8976 | // key comparisons | 
| 8977 | #include "hatoku_cmp.cc" | 
| 8978 |  | 
| 8979 | // handlerton | 
| 8980 | #include "hatoku_hton.cc" | 
| 8981 |  | 
| 8982 | // generate template functions | 
| 8983 | namespace tokudb { | 
| 8984 |     template size_t vlq_encode_ui(uint32_t n, void *p, size_t s); | 
| 8985 |     template size_t vlq_decode_ui(uint32_t *np, void *p, size_t s); | 
| 8986 |     template size_t vlq_encode_ui(uint64_t n, void *p, size_t s); | 
| 8987 |     template size_t vlq_decode_ui(uint64_t *np, void *p, size_t s); | 
| 8988 | }; | 
| 8989 |  |