1/*****************************************************************************
2
3Copyright (c) 2014, 2015, Oracle and/or its affiliates. All Rights Reserved.
4
5This program is free software; you can redistribute it and/or modify it under
6the terms of the GNU General Public License as published by the Free Software
7Foundation; version 2 of the License.
8
9This program is distributed in the hope that it will be useful, but WITHOUT
10ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
12
13You should have received a copy of the GNU General Public License along with
14this program; if not, write to the Free Software Foundation, Inc.,
1551 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
16
17*****************************************************************************/
18
19/********************************************************************//**
20@file include/btr0bulk.h
21The B-tree bulk load
22
23Created 03/11/2014 Shaohua Wang
24*************************************************************************/
25
26#ifndef btr0bulk_h
27#define btr0bulk_h
28
29#include "dict0dict.h"
30#include "page0cur.h"
31#include "ut0new.h"
32
33#include <vector>
34
35/** Innodb B-tree index fill factor for bulk load. */
36extern uint innobase_fill_factor;
37
38/*
39The proper function call sequence of PageBulk is as below:
40-- PageBulk::init
41-- PageBulk::insert
42-- PageBulk::finish
43-- PageBulk::compress(COMPRESSED table only)
44-- PageBulk::pageSplit(COMPRESSED table only)
45-- PageBulk::commit
46*/
47
48class PageBulk
49{
50public:
51 /** Constructor
52 @param[in] index B-tree index
53 @param[in] page_no page number
54 @param[in] level page level
55 @param[in] trx_id transaction id
56 @param[in] observer flush observer */
57 PageBulk(
58 dict_index_t* index,
59 trx_id_t trx_id,
60 ulint page_no,
61 ulint level,
62 FlushObserver* observer)
63 :
64 m_heap(NULL),
65 m_index(index),
66 m_mtr(NULL),
67 m_trx_id(trx_id),
68 m_block(NULL),
69 m_page(NULL),
70 m_page_zip(NULL),
71 m_cur_rec(NULL),
72 m_page_no(page_no),
73 m_level(level),
74 m_is_comp(dict_table_is_comp(index->table)),
75 m_heap_top(NULL),
76 m_rec_no(0),
77 m_free_space(0),
78 m_reserved_space(0),
79#ifdef UNIV_DEBUG
80 m_total_data(0),
81#endif /* UNIV_DEBUG */
82 m_modify_clock(0),
83 m_flush_observer(observer),
84 m_err(DB_SUCCESS)
85 {
86 ut_ad(!dict_index_is_spatial(m_index));
87 }
88
89 /** Deconstructor */
90 ~PageBulk()
91 {
92 mem_heap_free(m_heap);
93 }
94
95 /** Initialize members and allocate page if needed and start mtr.
96 Note: must be called and only once right after constructor.
97 @return error code */
98 dberr_t init();
99
100 /** Insert a record in the page.
101 @param[in] rec record
102 @param[in] offsets record offsets */
103 void insert(const rec_t* rec, ulint* offsets);
104
105 /** Mark end of insertion to the page. Scan all records to set page
106 dirs, and set page header members. */
107 void finish();
108
109 /** Commit mtr for a page
110 @param[in] success Flag whether all inserts succeed. */
111 void commit(bool success);
112
113 /** Compress if it is compressed table
114 @return true compress successfully or no need to compress
115 @return false compress failed. */
116 bool compress();
117
118 /** Check whether the record needs to be stored externally.
119 @return true
120 @return false */
121 bool needExt(const dtuple_t* tuple, ulint rec_size);
122
123 /** Store external record
124 @param[in] big_rec external recrod
125 @param[in] offsets record offsets
126 @return error code */
127 dberr_t storeExt(const big_rec_t* big_rec, ulint* offsets);
128
129 /** Get node pointer
130 @return node pointer */
131 dtuple_t* getNodePtr();
132
133 /** Get split rec in the page. We split a page in half when compresssion
134 fails, and the split rec should be copied to the new page.
135 @return split rec */
136 rec_t* getSplitRec();
137
138 /** Copy all records after split rec including itself.
139 @param[in] rec split rec */
140 void copyIn(rec_t* split_rec);
141
142 /** Remove all records after split rec including itself.
143 @param[in] rec split rec */
144 void copyOut(rec_t* split_rec);
145
146 /** Set next page
147 @param[in] next_page_no next page no */
148 void setNext(ulint next_page_no);
149
150 /** Set previous page
151 @param[in] prev_page_no previous page no */
152 void setPrev(ulint prev_page_no);
153
154 /** Release block by commiting mtr */
155 inline void release();
156
157 /** Start mtr and latch block */
158 inline dberr_t latch();
159
160 /** Check if required space is available in the page for the rec
161 to be inserted. We check fill factor & padding here.
162 @param[in] length required length
163 @return true if space is available */
164 inline bool isSpaceAvailable(ulint rec_size);
165
166 /** Get page no */
167 ulint getPageNo()
168 {
169 return(m_page_no);
170 }
171
172 /** Get page level */
173 ulint getLevel()
174 {
175 return(m_level);
176 }
177
178 /** Get record no */
179 ulint getRecNo()
180 {
181 return(m_rec_no);
182 }
183
184 /** Get page */
185 page_t* getPage()
186 {
187 return(m_page);
188 }
189
190 /** Get page zip */
191 page_zip_des_t* getPageZip()
192 {
193 return(m_page_zip);
194 }
195
196 dberr_t getError()
197 {
198 return(m_err);
199 }
200
201 /* Memory heap for internal allocation */
202 mem_heap_t* m_heap;
203
204private:
205 /** The index B-tree */
206 dict_index_t* m_index;
207
208 /** The min-transaction */
209 mtr_t* m_mtr;
210
211 /** The transaction id */
212 trx_id_t m_trx_id;
213
214 /** The buffer block */
215 buf_block_t* m_block;
216
217 /** The page */
218 page_t* m_page;
219
220 /** The page zip descriptor */
221 page_zip_des_t* m_page_zip;
222
223 /** The current rec, just before the next insert rec */
224 rec_t* m_cur_rec;
225
226 /** The page no */
227 ulint m_page_no;
228
229 /** The page level in B-tree */
230 ulint m_level;
231
232 /** Flag: is page in compact format */
233 const bool m_is_comp;
234
235 /** The heap top in page for next insert */
236 byte* m_heap_top;
237
238 /** User record no */
239 ulint m_rec_no;
240
241 /** The free space left in the page */
242 ulint m_free_space;
243
244 /** The reserved space for fill factor */
245 ulint m_reserved_space;
246
247 /** The padding space for compressed page */
248 ulint m_padding_space;
249
250#ifdef UNIV_DEBUG
251 /** Total data in the page */
252 ulint m_total_data;
253#endif /* UNIV_DEBUG */
254
255 /** The modify clock value of the buffer block
256 when the block is re-pinned */
257 ib_uint64_t m_modify_clock;
258
259 /** Flush observer */
260 FlushObserver* m_flush_observer;
261
262 /** Operation result DB_SUCCESS or error code */
263 dberr_t m_err;
264};
265
266typedef std::vector<PageBulk*, ut_allocator<PageBulk*> >
267 page_bulk_vector;
268
269class BtrBulk
270{
271public:
272 /** Constructor
273 @param[in] index B-tree index
274 @param[in] trx_id transaction id
275 @param[in] observer flush observer */
276 BtrBulk(
277 dict_index_t* index,
278 trx_id_t trx_id,
279 FlushObserver* observer)
280 :
281 m_heap(NULL),
282 m_index(index),
283 m_trx_id(trx_id),
284 m_flush_observer(observer)
285 {
286 ut_ad(m_flush_observer != NULL);
287 ut_d(my_atomic_addlint(
288 &m_index->table->space->redo_skipped_count, 1));
289 }
290
291 /** Destructor */
292 ~BtrBulk()
293 {
294 mem_heap_free(m_heap);
295 UT_DELETE(m_page_bulks);
296 ut_d(my_atomic_addlint(
297 &m_index->table->space->redo_skipped_count,
298 ulint(-1)));
299 }
300
301 /** Initialization
302 Note: must be called right after constructor. */
303 void init()
304 {
305 ut_ad(m_heap == NULL);
306 m_heap = mem_heap_create(1000);
307
308 m_page_bulks = UT_NEW_NOKEY(page_bulk_vector());
309 }
310
311 /** Insert a tuple
312 @param[in] tuple tuple to insert.
313 @return error code */
314 dberr_t insert(dtuple_t* tuple)
315 {
316 return(insert(tuple, 0));
317 }
318
319 /** Btree bulk load finish. We commit the last page in each level
320 and copy the last page in top level to the root page of the index
321 if no error occurs.
322 @param[in] err whether bulk load was successful until now
323 @return error code */
324 dberr_t finish(dberr_t err);
325
326 /** Release all latches */
327 void release();
328
329 /** Re-latch all latches */
330 void latch();
331
332private:
333 /** Insert a tuple to a page in a level
334 @param[in] tuple tuple to insert
335 @param[in] level B-tree level
336 @return error code */
337 dberr_t insert(dtuple_t* tuple, ulint level);
338
339 /** Split a page
340 @param[in] page_bulk page to split
341 @param[in] next_page_bulk next page
342 @return error code */
343 dberr_t pageSplit(PageBulk* page_bulk,
344 PageBulk* next_page_bulk);
345
346 /** Commit(finish) a page. We set next/prev page no, compress a page of
347 compressed table and split the page if compression fails, insert a node
348 pointer to father page if needed, and commit mini-transaction.
349 @param[in] page_bulk page to commit
350 @param[in] next_page_bulk next page
351 @param[in] insert_father flag whether need to insert node ptr
352 @return error code */
353 dberr_t pageCommit(PageBulk* page_bulk,
354 PageBulk* next_page_bulk,
355 bool insert_father);
356
357 /** Abort a page when an error occurs
358 @param[in] page_bulk page bulk object
359 Note: we should call pageAbort for a PageBulk object, which is not in
360 m_page_bulks after pageCommit, and we will commit or abort PageBulk
361 objects in function "finish". */
362 void pageAbort(PageBulk* page_bulk)
363 {
364 page_bulk->commit(false);
365 }
366
367 /** Log free check */
368 void logFreeCheck();
369
370private:
371 /** Memory heap for allocation */
372 mem_heap_t* m_heap;
373
374 /** B-tree index */
375 dict_index_t* m_index;
376
377 /** Transaction id */
378 trx_id_t m_trx_id;
379
380 /** Root page level */
381 ulint m_root_level;
382
383 /** Flush observer */
384 FlushObserver* m_flush_observer;
385
386 /** Page cursor vector for all level */
387 page_bulk_vector* m_page_bulks;
388};
389
390#endif
391