| 1 | /***************************************************************************** |
| 2 | |
| 3 | Copyright (c) 2014, 2015, Oracle and/or its affiliates. All Rights Reserved. |
| 4 | |
| 5 | This program is free software; you can redistribute it and/or modify it under |
| 6 | the terms of the GNU General Public License as published by the Free Software |
| 7 | Foundation; version 2 of the License. |
| 8 | |
| 9 | This program is distributed in the hope that it will be useful, but WITHOUT |
| 10 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
| 11 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. |
| 12 | |
| 13 | You should have received a copy of the GNU General Public License along with |
| 14 | this program; if not, write to the Free Software Foundation, Inc., |
| 15 | 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA |
| 16 | |
| 17 | *****************************************************************************/ |
| 18 | |
| 19 | /********************************************************************//** |
| 20 | @file include/btr0bulk.h |
| 21 | The B-tree bulk load |
| 22 | |
| 23 | Created 03/11/2014 Shaohua Wang |
| 24 | *************************************************************************/ |
| 25 | |
| 26 | #ifndef btr0bulk_h |
| 27 | #define btr0bulk_h |
| 28 | |
| 29 | #include "dict0dict.h" |
| 30 | #include "page0cur.h" |
| 31 | #include "ut0new.h" |
| 32 | |
| 33 | #include <vector> |
| 34 | |
| 35 | /** Innodb B-tree index fill factor for bulk load. */ |
| 36 | extern uint innobase_fill_factor; |
| 37 | |
| 38 | /* |
| 39 | The proper function call sequence of PageBulk is as below: |
| 40 | -- PageBulk::init |
| 41 | -- PageBulk::insert |
| 42 | -- PageBulk::finish |
| 43 | -- PageBulk::compress(COMPRESSED table only) |
| 44 | -- PageBulk::pageSplit(COMPRESSED table only) |
| 45 | -- PageBulk::commit |
| 46 | */ |
| 47 | |
| 48 | class PageBulk |
| 49 | { |
| 50 | public: |
| 51 | /** Constructor |
| 52 | @param[in] index B-tree index |
| 53 | @param[in] page_no page number |
| 54 | @param[in] level page level |
| 55 | @param[in] trx_id transaction id |
| 56 | @param[in] observer flush observer */ |
| 57 | PageBulk( |
| 58 | dict_index_t* index, |
| 59 | trx_id_t trx_id, |
| 60 | ulint page_no, |
| 61 | ulint level, |
| 62 | FlushObserver* observer) |
| 63 | : |
| 64 | m_heap(NULL), |
| 65 | m_index(index), |
| 66 | m_mtr(NULL), |
| 67 | m_trx_id(trx_id), |
| 68 | m_block(NULL), |
| 69 | m_page(NULL), |
| 70 | m_page_zip(NULL), |
| 71 | m_cur_rec(NULL), |
| 72 | m_page_no(page_no), |
| 73 | m_level(level), |
| 74 | m_is_comp(dict_table_is_comp(index->table)), |
| 75 | m_heap_top(NULL), |
| 76 | m_rec_no(0), |
| 77 | m_free_space(0), |
| 78 | m_reserved_space(0), |
| 79 | #ifdef UNIV_DEBUG |
| 80 | m_total_data(0), |
| 81 | #endif /* UNIV_DEBUG */ |
| 82 | m_modify_clock(0), |
| 83 | m_flush_observer(observer), |
| 84 | m_err(DB_SUCCESS) |
| 85 | { |
| 86 | ut_ad(!dict_index_is_spatial(m_index)); |
| 87 | } |
| 88 | |
| 89 | /** Deconstructor */ |
| 90 | ~PageBulk() |
| 91 | { |
| 92 | mem_heap_free(m_heap); |
| 93 | } |
| 94 | |
| 95 | /** Initialize members and allocate page if needed and start mtr. |
| 96 | Note: must be called and only once right after constructor. |
| 97 | @return error code */ |
| 98 | dberr_t init(); |
| 99 | |
| 100 | /** Insert a record in the page. |
| 101 | @param[in] rec record |
| 102 | @param[in] offsets record offsets */ |
| 103 | void insert(const rec_t* rec, ulint* offsets); |
| 104 | |
| 105 | /** Mark end of insertion to the page. Scan all records to set page |
| 106 | dirs, and set page header members. */ |
| 107 | void finish(); |
| 108 | |
| 109 | /** Commit mtr for a page |
| 110 | @param[in] success Flag whether all inserts succeed. */ |
| 111 | void commit(bool success); |
| 112 | |
| 113 | /** Compress if it is compressed table |
| 114 | @return true compress successfully or no need to compress |
| 115 | @return false compress failed. */ |
| 116 | bool compress(); |
| 117 | |
| 118 | /** Check whether the record needs to be stored externally. |
| 119 | @return true |
| 120 | @return false */ |
| 121 | bool needExt(const dtuple_t* tuple, ulint rec_size); |
| 122 | |
| 123 | /** Store external record |
| 124 | @param[in] big_rec external recrod |
| 125 | @param[in] offsets record offsets |
| 126 | @return error code */ |
| 127 | dberr_t storeExt(const big_rec_t* big_rec, ulint* offsets); |
| 128 | |
| 129 | /** Get node pointer |
| 130 | @return node pointer */ |
| 131 | dtuple_t* getNodePtr(); |
| 132 | |
| 133 | /** Get split rec in the page. We split a page in half when compresssion |
| 134 | fails, and the split rec should be copied to the new page. |
| 135 | @return split rec */ |
| 136 | rec_t* getSplitRec(); |
| 137 | |
| 138 | /** Copy all records after split rec including itself. |
| 139 | @param[in] rec split rec */ |
| 140 | void copyIn(rec_t* split_rec); |
| 141 | |
| 142 | /** Remove all records after split rec including itself. |
| 143 | @param[in] rec split rec */ |
| 144 | void copyOut(rec_t* split_rec); |
| 145 | |
| 146 | /** Set next page |
| 147 | @param[in] next_page_no next page no */ |
| 148 | void setNext(ulint next_page_no); |
| 149 | |
| 150 | /** Set previous page |
| 151 | @param[in] prev_page_no previous page no */ |
| 152 | void setPrev(ulint prev_page_no); |
| 153 | |
| 154 | /** Release block by commiting mtr */ |
| 155 | inline void release(); |
| 156 | |
| 157 | /** Start mtr and latch block */ |
| 158 | inline dberr_t latch(); |
| 159 | |
| 160 | /** Check if required space is available in the page for the rec |
| 161 | to be inserted. We check fill factor & padding here. |
| 162 | @param[in] length required length |
| 163 | @return true if space is available */ |
| 164 | inline bool isSpaceAvailable(ulint rec_size); |
| 165 | |
| 166 | /** Get page no */ |
| 167 | ulint getPageNo() |
| 168 | { |
| 169 | return(m_page_no); |
| 170 | } |
| 171 | |
| 172 | /** Get page level */ |
| 173 | ulint getLevel() |
| 174 | { |
| 175 | return(m_level); |
| 176 | } |
| 177 | |
| 178 | /** Get record no */ |
| 179 | ulint getRecNo() |
| 180 | { |
| 181 | return(m_rec_no); |
| 182 | } |
| 183 | |
| 184 | /** Get page */ |
| 185 | page_t* getPage() |
| 186 | { |
| 187 | return(m_page); |
| 188 | } |
| 189 | |
| 190 | /** Get page zip */ |
| 191 | page_zip_des_t* getPageZip() |
| 192 | { |
| 193 | return(m_page_zip); |
| 194 | } |
| 195 | |
| 196 | dberr_t getError() |
| 197 | { |
| 198 | return(m_err); |
| 199 | } |
| 200 | |
| 201 | /* Memory heap for internal allocation */ |
| 202 | mem_heap_t* m_heap; |
| 203 | |
| 204 | private: |
| 205 | /** The index B-tree */ |
| 206 | dict_index_t* m_index; |
| 207 | |
| 208 | /** The min-transaction */ |
| 209 | mtr_t* m_mtr; |
| 210 | |
| 211 | /** The transaction id */ |
| 212 | trx_id_t m_trx_id; |
| 213 | |
| 214 | /** The buffer block */ |
| 215 | buf_block_t* m_block; |
| 216 | |
| 217 | /** The page */ |
| 218 | page_t* m_page; |
| 219 | |
| 220 | /** The page zip descriptor */ |
| 221 | page_zip_des_t* m_page_zip; |
| 222 | |
| 223 | /** The current rec, just before the next insert rec */ |
| 224 | rec_t* m_cur_rec; |
| 225 | |
| 226 | /** The page no */ |
| 227 | ulint m_page_no; |
| 228 | |
| 229 | /** The page level in B-tree */ |
| 230 | ulint m_level; |
| 231 | |
| 232 | /** Flag: is page in compact format */ |
| 233 | const bool m_is_comp; |
| 234 | |
| 235 | /** The heap top in page for next insert */ |
| 236 | byte* m_heap_top; |
| 237 | |
| 238 | /** User record no */ |
| 239 | ulint m_rec_no; |
| 240 | |
| 241 | /** The free space left in the page */ |
| 242 | ulint m_free_space; |
| 243 | |
| 244 | /** The reserved space for fill factor */ |
| 245 | ulint m_reserved_space; |
| 246 | |
| 247 | /** The padding space for compressed page */ |
| 248 | ulint m_padding_space; |
| 249 | |
| 250 | #ifdef UNIV_DEBUG |
| 251 | /** Total data in the page */ |
| 252 | ulint m_total_data; |
| 253 | #endif /* UNIV_DEBUG */ |
| 254 | |
| 255 | /** The modify clock value of the buffer block |
| 256 | when the block is re-pinned */ |
| 257 | ib_uint64_t m_modify_clock; |
| 258 | |
| 259 | /** Flush observer */ |
| 260 | FlushObserver* m_flush_observer; |
| 261 | |
| 262 | /** Operation result DB_SUCCESS or error code */ |
| 263 | dberr_t m_err; |
| 264 | }; |
| 265 | |
| 266 | typedef std::vector<PageBulk*, ut_allocator<PageBulk*> > |
| 267 | page_bulk_vector; |
| 268 | |
| 269 | class BtrBulk |
| 270 | { |
| 271 | public: |
| 272 | /** Constructor |
| 273 | @param[in] index B-tree index |
| 274 | @param[in] trx_id transaction id |
| 275 | @param[in] observer flush observer */ |
| 276 | BtrBulk( |
| 277 | dict_index_t* index, |
| 278 | trx_id_t trx_id, |
| 279 | FlushObserver* observer) |
| 280 | : |
| 281 | m_heap(NULL), |
| 282 | m_index(index), |
| 283 | m_trx_id(trx_id), |
| 284 | m_flush_observer(observer) |
| 285 | { |
| 286 | ut_ad(m_flush_observer != NULL); |
| 287 | ut_d(my_atomic_addlint( |
| 288 | &m_index->table->space->redo_skipped_count, 1)); |
| 289 | } |
| 290 | |
| 291 | /** Destructor */ |
| 292 | ~BtrBulk() |
| 293 | { |
| 294 | mem_heap_free(m_heap); |
| 295 | UT_DELETE(m_page_bulks); |
| 296 | ut_d(my_atomic_addlint( |
| 297 | &m_index->table->space->redo_skipped_count, |
| 298 | ulint(-1))); |
| 299 | } |
| 300 | |
| 301 | /** Initialization |
| 302 | Note: must be called right after constructor. */ |
| 303 | void init() |
| 304 | { |
| 305 | ut_ad(m_heap == NULL); |
| 306 | m_heap = mem_heap_create(1000); |
| 307 | |
| 308 | m_page_bulks = UT_NEW_NOKEY(page_bulk_vector()); |
| 309 | } |
| 310 | |
| 311 | /** Insert a tuple |
| 312 | @param[in] tuple tuple to insert. |
| 313 | @return error code */ |
| 314 | dberr_t insert(dtuple_t* tuple) |
| 315 | { |
| 316 | return(insert(tuple, 0)); |
| 317 | } |
| 318 | |
| 319 | /** Btree bulk load finish. We commit the last page in each level |
| 320 | and copy the last page in top level to the root page of the index |
| 321 | if no error occurs. |
| 322 | @param[in] err whether bulk load was successful until now |
| 323 | @return error code */ |
| 324 | dberr_t finish(dberr_t err); |
| 325 | |
| 326 | /** Release all latches */ |
| 327 | void release(); |
| 328 | |
| 329 | /** Re-latch all latches */ |
| 330 | void latch(); |
| 331 | |
| 332 | private: |
| 333 | /** Insert a tuple to a page in a level |
| 334 | @param[in] tuple tuple to insert |
| 335 | @param[in] level B-tree level |
| 336 | @return error code */ |
| 337 | dberr_t insert(dtuple_t* tuple, ulint level); |
| 338 | |
| 339 | /** Split a page |
| 340 | @param[in] page_bulk page to split |
| 341 | @param[in] next_page_bulk next page |
| 342 | @return error code */ |
| 343 | dberr_t pageSplit(PageBulk* page_bulk, |
| 344 | PageBulk* next_page_bulk); |
| 345 | |
| 346 | /** Commit(finish) a page. We set next/prev page no, compress a page of |
| 347 | compressed table and split the page if compression fails, insert a node |
| 348 | pointer to father page if needed, and commit mini-transaction. |
| 349 | @param[in] page_bulk page to commit |
| 350 | @param[in] next_page_bulk next page |
| 351 | @param[in] insert_father flag whether need to insert node ptr |
| 352 | @return error code */ |
| 353 | dberr_t pageCommit(PageBulk* page_bulk, |
| 354 | PageBulk* next_page_bulk, |
| 355 | bool insert_father); |
| 356 | |
| 357 | /** Abort a page when an error occurs |
| 358 | @param[in] page_bulk page bulk object |
| 359 | Note: we should call pageAbort for a PageBulk object, which is not in |
| 360 | m_page_bulks after pageCommit, and we will commit or abort PageBulk |
| 361 | objects in function "finish". */ |
| 362 | void pageAbort(PageBulk* page_bulk) |
| 363 | { |
| 364 | page_bulk->commit(false); |
| 365 | } |
| 366 | |
| 367 | /** Log free check */ |
| 368 | void logFreeCheck(); |
| 369 | |
| 370 | private: |
| 371 | /** Memory heap for allocation */ |
| 372 | mem_heap_t* m_heap; |
| 373 | |
| 374 | /** B-tree index */ |
| 375 | dict_index_t* m_index; |
| 376 | |
| 377 | /** Transaction id */ |
| 378 | trx_id_t m_trx_id; |
| 379 | |
| 380 | /** Root page level */ |
| 381 | ulint m_root_level; |
| 382 | |
| 383 | /** Flush observer */ |
| 384 | FlushObserver* m_flush_observer; |
| 385 | |
| 386 | /** Page cursor vector for all level */ |
| 387 | page_bulk_vector* m_page_bulks; |
| 388 | }; |
| 389 | |
| 390 | #endif |
| 391 | |