1 | /***************************************************************************** |
2 | |
3 | Copyright (c) 2014, 2015, Oracle and/or its affiliates. All Rights Reserved. |
4 | Copyright (c) 2017, MariaDB Corporation. |
5 | |
6 | This program is free software; you can redistribute it and/or modify it under |
7 | the terms of the GNU General Public License as published by the Free Software |
8 | Foundation; version 2 of the License. |
9 | |
10 | This program is distributed in the hope that it will be useful, but WITHOUT |
11 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
12 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. |
13 | |
14 | You should have received a copy of the GNU General Public License along with |
15 | this program; if not, write to the Free Software Foundation, Inc., |
16 | 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA |
17 | |
18 | *****************************************************************************/ |
19 | |
20 | /**************************************************//** |
21 | @file ut/ut0stage.h |
22 | Supplementary code to performance schema stage instrumentation. |
23 | |
24 | Created Nov 12, 2014 Vasil Dimov |
25 | *******************************************************/ |
26 | |
27 | #ifndef ut0stage_h |
28 | #define ut0stage_h |
29 | |
30 | #include <algorithm> |
31 | #include <math.h> |
32 | |
33 | #include "my_global.h" /* needed for headers from mysql/psi/ */ |
34 | |
35 | #include "mysql/psi/mysql_stage.h" /* mysql_stage_inc_work_completed */ |
36 | #include "mysql/psi/psi.h" /* HAVE_PSI_STAGE_INTERFACE, PSI_stage_progress */ |
37 | |
38 | #include "univ.i" |
39 | |
40 | #include "dict0mem.h" /* dict_index_t */ |
41 | #include "row0log.h" /* row_log_estimate_work() */ |
42 | #include "srv0srv.h" /* ut_stage_alter_t */ |
43 | |
44 | #ifdef HAVE_PSI_STAGE_INTERFACE |
45 | |
46 | typedef void PSI_stage_progress; |
47 | |
48 | /** Class used to report ALTER TABLE progress via performance_schema. |
49 | The only user of this class is the ALTER TABLE code and it calls the methods |
50 | in the following order |
51 | constructor |
52 | begin_phase_read_pk() |
53 | multiple times: |
54 | n_pk_recs_inc() // once per record read |
55 | inc() // once per page read |
56 | end_phase_read_pk() |
57 | if any new indexes are being added, for each one: |
58 | begin_phase_sort() |
59 | multiple times: |
60 | inc() // once per record sorted |
61 | begin_phase_insert() |
62 | multiple times: |
63 | inc() // once per record inserted |
64 | being_phase_log_index() |
65 | multiple times: |
66 | inc() // once per log-block applied |
67 | begin_phase_flush() |
68 | multiple times: |
69 | inc() // once per page flushed |
70 | begin_phase_log_table() |
71 | multiple times: |
72 | inc() // once per log-block applied |
73 | begin_phase_end() |
74 | destructor |
75 | |
76 | This class knows the specifics of each phase and tries to increment the |
77 | progress in an even manner across the entire ALTER TABLE lifetime. */ |
78 | class ut_stage_alter_t { |
79 | public: |
80 | /** Constructor. |
81 | @param[in] pk primary key of the old table */ |
82 | explicit |
83 | ut_stage_alter_t( |
84 | const dict_index_t* pk) |
85 | : |
86 | m_progress(NULL), |
87 | m_pk(pk), |
88 | m_n_pk_recs(0), |
89 | m_n_pk_pages(0), |
90 | m_n_recs_processed(0), |
91 | m_n_flush_pages(0), |
92 | m_cur_phase(NOT_STARTED) |
93 | { |
94 | } |
95 | |
96 | /** Destructor. */ |
97 | ~ut_stage_alter_t(); |
98 | |
99 | /** Flag an ALTER TABLE start (read primary key phase). |
100 | @param[in] n_sort_indexes number of indexes that will be sorted |
101 | during ALTER TABLE, used for estimating the total work to be done */ |
102 | void |
103 | begin_phase_read_pk( |
104 | ulint n_sort_indexes); |
105 | |
106 | /** Increment the number of records in PK (table) with 1. |
107 | This is used to get more accurate estimate about the number of |
108 | records per page which is needed because some phases work on |
109 | per-page basis while some work on per-record basis and we want |
110 | to get the progress as even as possible. */ |
111 | void |
112 | n_pk_recs_inc(); |
113 | |
114 | /** Flag either one record or one page processed, depending on the |
115 | current phase. |
116 | @param[in] inc_val flag this many units processed at once */ |
117 | void |
118 | inc( |
119 | ulint inc_val = 1); |
120 | |
121 | /** Flag the end of reading of the primary key. |
122 | Here we know the exact number of pages and records and calculate |
123 | the number of records per page and refresh the estimate. */ |
124 | void |
125 | end_phase_read_pk(); |
126 | |
127 | /** Flag the beginning of the sort phase. |
128 | @param[in] sort_multi_factor since merge sort processes |
129 | one page more than once we only update the estimate once per this |
130 | many pages processed. */ |
131 | void |
132 | begin_phase_sort( |
133 | double sort_multi_factor); |
134 | |
135 | /** Flag the beginning of the insert phase. */ |
136 | void |
137 | begin_phase_insert(); |
138 | |
139 | /** Flag the beginning of the flush phase. |
140 | @param[in] n_flush_pages this many pages are going to be |
141 | flushed */ |
142 | void |
143 | begin_phase_flush( |
144 | ulint n_flush_pages); |
145 | |
146 | /** Flag the beginning of the log index phase. */ |
147 | void |
148 | begin_phase_log_index(); |
149 | |
150 | /** Flag the beginning of the log table phase. */ |
151 | void |
152 | begin_phase_log_table(); |
153 | |
154 | /** Flag the beginning of the end phase. */ |
155 | void |
156 | begin_phase_end(); |
157 | |
158 | private: |
159 | |
160 | /** Update the estimate of total work to be done. */ |
161 | void |
162 | reestimate(); |
163 | |
164 | /** Change the current phase. |
165 | @param[in] new_stage pointer to the new stage to change to */ |
166 | void |
167 | change_phase( |
168 | const PSI_stage_info* new_stage); |
169 | |
170 | /** Performance schema accounting object. */ |
171 | /* TODO: MySQL 5.7 PSI */ |
172 | PSI_stage_progress* m_progress; |
173 | |
174 | /** Old table PK. Used for calculating the estimate. */ |
175 | const dict_index_t* m_pk; |
176 | |
177 | /** Number of records in the primary key (table), including delete |
178 | marked records. */ |
179 | ulint m_n_pk_recs; |
180 | |
181 | /** Number of leaf pages in the primary key. */ |
182 | ulint m_n_pk_pages; |
183 | |
184 | /** Estimated number of records per page in the primary key. */ |
185 | double m_n_recs_per_page; |
186 | |
187 | /** Number of indexes that are being added. */ |
188 | ulint m_n_sort_indexes; |
189 | |
190 | /** During the sort phase, increment the counter once per this |
191 | many pages processed. This is because sort processes one page more |
192 | than once. */ |
193 | ulint m_sort_multi_factor; |
194 | |
195 | /** Number of records processed during sort & insert phases. We |
196 | need to increment the counter only once page, or once per |
197 | recs-per-page records. */ |
198 | ulint m_n_recs_processed; |
199 | |
200 | /** Number of pages to flush. */ |
201 | ulint m_n_flush_pages; |
202 | |
203 | /** Current phase. */ |
204 | enum { |
205 | NOT_STARTED = 0, |
206 | READ_PK = 1, |
207 | SORT = 2, |
208 | INSERT = 3, |
209 | FLUSH = 4, |
210 | /* JAN: TODO: MySQL 5.7 vrs. MariaDB sql/log.h |
211 | LOG_INDEX = 5, |
212 | LOG_TABLE = 6, */ |
213 | LOG_INNODB_INDEX = 5, |
214 | LOG_INNODB_TABLE = 6, |
215 | END = 7, |
216 | } m_cur_phase; |
217 | }; |
218 | |
219 | /** Destructor. */ |
220 | inline |
221 | ut_stage_alter_t::~ut_stage_alter_t() |
222 | { |
223 | if (m_progress == NULL) { |
224 | return; |
225 | } |
226 | |
227 | /* TODO: MySQL 5.7 PSI: Set completed = estimated before we quit. |
228 | mysql_stage_set_work_completed( |
229 | m_progress, |
230 | mysql_stage_get_work_estimated(m_progress)); |
231 | |
232 | mysql_end_stage(); |
233 | */ |
234 | } |
235 | |
236 | /** Flag an ALTER TABLE start (read primary key phase). |
237 | @param[in] n_sort_indexes number of indexes that will be sorted |
238 | during ALTER TABLE, used for estimating the total work to be done */ |
239 | inline |
240 | void |
241 | ut_stage_alter_t::begin_phase_read_pk( |
242 | ulint n_sort_indexes) |
243 | { |
244 | m_n_sort_indexes = n_sort_indexes; |
245 | |
246 | m_cur_phase = READ_PK; |
247 | |
248 | /* TODO: MySQL 5.7 PSI |
249 | m_progress = mysql_set_stage( |
250 | srv_stage_alter_table_read_pk_internal_sort.m_key); |
251 | |
252 | mysql_stage_set_work_completed(m_progress, 0); |
253 | */ |
254 | reestimate(); |
255 | } |
256 | |
257 | /** Increment the number of records in PK (table) with 1. |
258 | This is used to get more accurate estimate about the number of |
259 | records per page which is needed because some phases work on |
260 | per-page basis while some work on per-record basis and we want |
261 | to get the progress as even as possible. */ |
262 | inline |
263 | void |
264 | ut_stage_alter_t::n_pk_recs_inc() |
265 | { |
266 | m_n_pk_recs++; |
267 | } |
268 | |
269 | /** Flag either one record or one page processed, depending on the |
270 | current phase. */ |
271 | inline |
272 | void |
273 | ut_stage_alter_t::inc(ulint) |
274 | { |
275 | if (m_progress == NULL) { |
276 | return; |
277 | } |
278 | |
279 | ulint multi_factor = 1; |
280 | bool should_proceed = true; |
281 | |
282 | switch (m_cur_phase) { |
283 | case NOT_STARTED: |
284 | ut_error; |
285 | case READ_PK: |
286 | m_n_pk_pages++; |
287 | #if 0 /* TODO: MySQL 5.7 PSI */ |
288 | ut_ad(inc_val == 1); |
289 | /* Overall the read pk phase will read all the pages from the |
290 | PK and will do work, proportional to the number of added |
291 | indexes, thus when this is called once per read page we |
292 | increment with 1 + m_n_sort_indexes */ |
293 | inc_val = 1 + m_n_sort_indexes; |
294 | #endif |
295 | break; |
296 | case SORT: |
297 | multi_factor = m_sort_multi_factor; |
298 | /* fall through */ |
299 | case INSERT: { |
300 | /* Increment the progress every nth record. During |
301 | sort and insert phases, this method is called once per |
302 | record processed. We need fractional point numbers here |
303 | because "records per page" is such a number naturally and |
304 | to avoid rounding skew we want, for example: if there are |
305 | (double) N records per page, then the work_completed |
306 | should be incremented on the inc() calls round(k*N), |
307 | for k=1,2,3... */ |
308 | const double every_nth = m_n_recs_per_page * multi_factor; |
309 | |
310 | const ulint k = static_cast<ulint>( |
311 | round(m_n_recs_processed / every_nth)); |
312 | |
313 | const ulint nth = static_cast<ulint>( |
314 | round(k * every_nth)); |
315 | |
316 | should_proceed = m_n_recs_processed == nth; |
317 | |
318 | m_n_recs_processed++; |
319 | |
320 | break; |
321 | } |
322 | case FLUSH: |
323 | break; |
324 | /* JAN: TODO: MySQL 5.7 |
325 | case LOG_INDEX: |
326 | break; |
327 | case LOG_TABLE: |
328 | break; */ |
329 | case LOG_INNODB_INDEX: |
330 | case LOG_INNODB_TABLE: |
331 | break; |
332 | case END: |
333 | break; |
334 | } |
335 | |
336 | if (should_proceed) { |
337 | /* TODO: MySQL 5.7 PSI |
338 | mysql_stage_inc_work_completed(m_progress, inc_val); |
339 | */ |
340 | reestimate(); |
341 | } |
342 | } |
343 | |
344 | /** Flag the end of reading of the primary key. |
345 | Here we know the exact number of pages and records and calculate |
346 | the number of records per page and refresh the estimate. */ |
347 | inline |
348 | void |
349 | ut_stage_alter_t::end_phase_read_pk() |
350 | { |
351 | reestimate(); |
352 | |
353 | if (m_n_pk_pages == 0) { |
354 | /* The number of pages in the PK could be 0 if the tree is |
355 | empty. In this case we set m_n_recs_per_page to 1 to avoid |
356 | division by zero later. */ |
357 | m_n_recs_per_page = 1.0; |
358 | } else { |
359 | m_n_recs_per_page = std::max( |
360 | static_cast<double>(m_n_pk_recs) / m_n_pk_pages, |
361 | 1.0); |
362 | } |
363 | } |
364 | |
365 | /** Flag the beginning of the sort phase. |
366 | @param[in] sort_multi_factor since merge sort processes |
367 | one page more than once we only update the estimate once per this |
368 | many pages processed. */ |
369 | inline |
370 | void |
371 | ut_stage_alter_t::begin_phase_sort( |
372 | double sort_multi_factor) |
373 | { |
374 | if (sort_multi_factor <= 1.0) { |
375 | m_sort_multi_factor = 1; |
376 | } else { |
377 | m_sort_multi_factor = static_cast<ulint>( |
378 | round(sort_multi_factor)); |
379 | } |
380 | |
381 | change_phase(&srv_stage_alter_table_merge_sort); |
382 | } |
383 | |
384 | /** Flag the beginning of the insert phase. */ |
385 | inline |
386 | void |
387 | ut_stage_alter_t::begin_phase_insert() |
388 | { |
389 | change_phase(&srv_stage_alter_table_insert); |
390 | } |
391 | |
392 | /** Flag the beginning of the flush phase. |
393 | @param[in] n_flush_pages this many pages are going to be |
394 | flushed */ |
395 | inline |
396 | void |
397 | ut_stage_alter_t::begin_phase_flush( |
398 | ulint n_flush_pages) |
399 | { |
400 | m_n_flush_pages = n_flush_pages; |
401 | |
402 | reestimate(); |
403 | |
404 | change_phase(&srv_stage_alter_table_flush); |
405 | } |
406 | |
407 | /** Flag the beginning of the log index phase. */ |
408 | inline |
409 | void |
410 | ut_stage_alter_t::begin_phase_log_index() |
411 | { |
412 | change_phase(&srv_stage_alter_table_log_index); |
413 | } |
414 | |
415 | /** Flag the beginning of the log table phase. */ |
416 | inline |
417 | void |
418 | ut_stage_alter_t::begin_phase_log_table() |
419 | { |
420 | change_phase(&srv_stage_alter_table_log_table); |
421 | } |
422 | |
423 | /** Flag the beginning of the end phase. */ |
424 | inline |
425 | void |
426 | ut_stage_alter_t::begin_phase_end() |
427 | { |
428 | change_phase(&srv_stage_alter_table_end); |
429 | } |
430 | |
431 | /** Update the estimate of total work to be done. */ |
432 | inline |
433 | void |
434 | ut_stage_alter_t::reestimate() |
435 | { |
436 | if (m_progress == NULL) { |
437 | return; |
438 | } |
439 | |
440 | /* During the log table phase we calculate the estimate as |
441 | work done so far + log size remaining. */ |
442 | if (m_cur_phase == LOG_INNODB_TABLE) { |
443 | /* TODO: MySQL 5.7 PSI |
444 | mysql_stage_set_work_estimated( |
445 | m_progress, |
446 | mysql_stage_get_work_completed(m_progress) |
447 | + row_log_estimate_work(m_pk)); |
448 | */ |
449 | return; |
450 | } |
451 | |
452 | /* During the other phases we use a formula, regardless of |
453 | how much work has been done so far. */ |
454 | |
455 | /* For number of pages in the PK - if the PK has not been |
456 | read yet, use stat_n_leaf_pages (approximate), otherwise |
457 | use the exact number we gathered. */ |
458 | const ulint n_pk_pages |
459 | = m_cur_phase != READ_PK |
460 | ? m_n_pk_pages |
461 | : m_pk->stat_n_leaf_pages; |
462 | |
463 | /* If flush phase has not started yet and we do not know how |
464 | many pages are to be flushed, then use a wild guess - the |
465 | number of pages in the PK / 2. */ |
466 | if (m_n_flush_pages == 0) { |
467 | m_n_flush_pages = n_pk_pages / 2; |
468 | } |
469 | |
470 | ulonglong estimate __attribute__((unused)) |
471 | = n_pk_pages |
472 | * (1 /* read PK */ |
473 | + m_n_sort_indexes /* row_merge_buf_sort() inside the |
474 | read PK per created index */ |
475 | + m_n_sort_indexes * 2 /* sort & insert per created index */) |
476 | + m_n_flush_pages |
477 | + row_log_estimate_work(m_pk); |
478 | |
479 | /* Prevent estimate < completed */ |
480 | /* TODO: MySQL 5.7 PSI |
481 | estimate = std::max(estimate, |
482 | mysql_stage_get_work_completed(m_progress)); |
483 | |
484 | mysql_stage_set_work_estimated(m_progress, estimate); |
485 | */ |
486 | } |
487 | |
488 | /** Change the current phase. |
489 | @param[in] new_stage pointer to the new stage to change to */ |
490 | inline |
491 | void |
492 | ut_stage_alter_t::change_phase( |
493 | const PSI_stage_info* new_stage) |
494 | { |
495 | if (m_progress == NULL) { |
496 | return; |
497 | } |
498 | |
499 | if (new_stage == &srv_stage_alter_table_read_pk_internal_sort) { |
500 | m_cur_phase = READ_PK; |
501 | } else if (new_stage == &srv_stage_alter_table_merge_sort) { |
502 | m_cur_phase = SORT; |
503 | } else if (new_stage == &srv_stage_alter_table_insert) { |
504 | m_cur_phase = INSERT; |
505 | } else if (new_stage == &srv_stage_alter_table_flush) { |
506 | m_cur_phase = FLUSH; |
507 | /* JAN: TODO: MySQL 5.7 used LOG_INDEX and LOG_TABLE */ |
508 | } else if (new_stage == &srv_stage_alter_table_log_index) { |
509 | m_cur_phase = LOG_INNODB_INDEX; |
510 | } else if (new_stage == &srv_stage_alter_table_log_table) { |
511 | m_cur_phase = LOG_INNODB_TABLE; |
512 | } else if (new_stage == &srv_stage_alter_table_end) { |
513 | m_cur_phase = END; |
514 | } else { |
515 | ut_error; |
516 | } |
517 | |
518 | /* TODO: MySQL 5.7 PSI |
519 | const ulonglong c = mysql_stage_get_work_completed(m_progress); |
520 | const ulonglong e = mysql_stage_get_work_estimated(m_progress); |
521 | |
522 | m_progress = mysql_set_stage(new_stage->m_key); |
523 | |
524 | mysql_stage_set_work_completed(m_progress, c); |
525 | mysql_stage_set_work_estimated(m_progress, e); |
526 | */ |
527 | } |
528 | #else /* HAVE_PSI_STAGE_INTERFACE */ |
529 | |
530 | class ut_stage_alter_t { |
531 | public: |
532 | explicit ut_stage_alter_t(const dict_index_t*) {} |
533 | |
534 | void begin_phase_read_pk(ulint) {} |
535 | |
536 | void n_pk_recs_inc() {} |
537 | |
538 | void inc() {} |
539 | void inc(ulint) {} |
540 | |
541 | void end_phase_read_pk() {} |
542 | |
543 | void begin_phase_sort(double) {} |
544 | |
545 | void begin_phase_insert() {} |
546 | |
547 | void begin_phase_flush(ulint) {} |
548 | |
549 | void begin_phase_log_index() {} |
550 | |
551 | void begin_phase_log_table() {} |
552 | |
553 | void begin_phase_end() {} |
554 | }; |
555 | |
556 | #endif /* HAVE_PSI_STAGE_INTERFACE */ |
557 | |
558 | #endif /* ut0stage_h */ |
559 | |