1/*
2 Copyright (c) 2000, 2010, Oracle and/or its affiliates.
3 Copyright (c) 2009, 2017, MariaDB Corporation
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; version 2 of the License.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
17
18#ifdef USE_PRAGMA_INTERFACE
19#pragma implementation /* gcc class implementation */
20#endif
21
22/**
23 @file
24
25 @brief
26 Functions for easy reading of records, possible through a cache
27*/
28
29#include "mariadb.h"
30#include "records.h"
31#include "sql_priv.h"
32#include "records.h"
33#include "opt_range.h" // SQL_SELECT
34#include "sql_class.h" // THD
35#include "sql_base.h"
36#include "sql_sort.h" // SORT_ADDON_FIELD
37
38static int rr_quick(READ_RECORD *info);
39int rr_sequential(READ_RECORD *info);
40static int rr_from_tempfile(READ_RECORD *info);
41static int rr_unpack_from_tempfile(READ_RECORD *info);
42static int rr_unpack_from_buffer(READ_RECORD *info);
43int rr_from_pointers(READ_RECORD *info);
44static int rr_from_cache(READ_RECORD *info);
45static int init_rr_cache(THD *thd, READ_RECORD *info);
46static int rr_cmp(uchar *a,uchar *b);
47static int rr_index_first(READ_RECORD *info);
48static int rr_index_last(READ_RECORD *info);
49static int rr_index(READ_RECORD *info);
50static int rr_index_desc(READ_RECORD *info);
51
52
53/**
54 Initialize READ_RECORD structure to perform full index scan in desired
55 direction using read_record.read_record() interface
56
57 This function has been added at late stage and is used only by
58 UPDATE/DELETE. Other statements perform index scans using
59 join_read_first/next functions.
60
61 @param info READ_RECORD structure to initialize.
62 @param thd Thread handle
63 @param table Table to be accessed
64 @param print_error If true, call table->file->print_error() if an error
65 occurs (except for end-of-records error)
66 @param idx index to scan
67 @param reverse Scan in the reverse direction
68*/
69
70bool init_read_record_idx(READ_RECORD *info, THD *thd, TABLE *table,
71 bool print_error, uint idx, bool reverse)
72{
73 int error= 0;
74 DBUG_ENTER("init_read_record_idx");
75
76 empty_record(table);
77 bzero((char*) info,sizeof(*info));
78 info->thd= thd;
79 info->table= table;
80 info->record= table->record[0];
81 info->print_error= print_error;
82 info->unlock_row= rr_unlock_row;
83
84 table->status=0; /* And it's always found */
85 if (!table->file->inited &&
86 unlikely(error= table->file->ha_index_init(idx, 1)))
87 {
88 if (print_error)
89 table->file->print_error(error, MYF(0));
90 }
91
92 /* read_record_func will be changed to rr_index in rr_index_first */
93 info->read_record_func= reverse ? rr_index_last : rr_index_first;
94 DBUG_RETURN(error != 0);
95}
96
97
98/*
99 init_read_record is used to scan by using a number of different methods.
100 Which method to use is set-up in this call so that later calls to
101 the info->read_record will call the appropriate method using a function
102 pointer.
103
104 There are five methods that relate completely to the sort function
105 filesort. The result of a filesort is retrieved using read_record
106 calls. The other two methods are used for normal table access.
107
108 The filesort will produce references to the records sorted, these
109 references can be stored in memory or in a temporary file.
110
111 The temporary file is normally used when the references doesn't fit into
112 a properly sized memory buffer. For most small queries the references
113 are stored in the memory buffer.
114 SYNOPSIS
115 init_read_record()
116 info OUT read structure
117 thd Thread handle
118 table Table the data [originally] comes from.
119 select SQL_SELECT structure. We may select->quick or
120 select->file as data source
121 use_record_cache Call file->extra_opt(HA_EXTRA_CACHE,...)
122 if we're going to do sequential read and some
123 additional conditions are satisfied.
124 print_error Copy this to info->print_error
125 disable_rr_cache Don't use rr_from_cache (used by sort-union
126 index-merge which produces rowid sequences that
127 are already ordered)
128
129 DESCRIPTION
130 This function sets up reading data via one of the methods:
131
132 The temporary file is also used when performing an update where a key is
133 modified.
134
135 Methods used when ref's are in memory (using rr_from_pointers):
136 rr_unpack_from_buffer:
137 ----------------------
138 This method is used when table->sort.addon_field is allocated.
139 This is allocated for most SELECT queries not involving any BLOB's.
140 In this case the records are fetched from a memory buffer.
141 rr_from_pointers:
142 -----------------
143 Used when the above is not true, UPDATE, DELETE and so forth and
144 SELECT's involving BLOB's. It is also used when the addon_field
145 buffer is not allocated due to that its size was bigger than the
146 session variable max_length_for_sort_data.
147 In this case the record data is fetched from the handler using the
148 saved reference using the rnd_pos handler call.
149
150 Methods used when ref's are in a temporary file (using rr_from_tempfile)
151 rr_unpack_from_tempfile:
152 ------------------------
153 Same as rr_unpack_from_buffer except that references are fetched from
154 temporary file. Should obviously not really happen other than in
155 strange configurations.
156
157 rr_from_tempfile:
158 -----------------
159 Same as rr_from_pointers except that references are fetched from
160 temporary file instead of from
161 rr_from_cache:
162 --------------
163 This is a special variant of rr_from_tempfile that can be used for
164 handlers that is not using the HA_FAST_KEY_READ table flag. Instead
165 of reading the references one by one from the temporary file it reads
166 a set of them, sorts them and reads all of them into a buffer which
167 is then used for a number of subsequent calls to rr_from_cache.
168 It is only used for SELECT queries and a number of other conditions
169 on table size.
170
171 All other accesses use either index access methods (rr_quick) or a full
172 table scan (rr_sequential).
173 rr_quick:
174 ---------
175 rr_quick uses one of the QUICK_SELECT classes in opt_range.cc to
176 perform an index scan. There are loads of functionality hidden
177 in these quick classes. It handles all index scans of various kinds.
178 rr_sequential:
179 --------------
180 This is the most basic access method of a table using rnd_init,
181 rnd_next and rnd_end. No indexes are used.
182*/
183
184bool init_read_record(READ_RECORD *info,THD *thd, TABLE *table,
185 SQL_SELECT *select,
186 SORT_INFO *filesort,
187 int use_record_cache, bool print_error,
188 bool disable_rr_cache)
189{
190 IO_CACHE *tempfile;
191 SORT_ADDON_FIELD *addon_field= filesort ? filesort->addon_field : 0;
192 DBUG_ENTER("init_read_record");
193
194 bzero((char*) info,sizeof(*info));
195 info->thd=thd;
196 info->table=table;
197 info->forms= &info->table; /* Only one table */
198 info->addon_field= addon_field;
199
200 if ((table->s->tmp_table == INTERNAL_TMP_TABLE ||
201 table->s->tmp_table == NON_TRANSACTIONAL_TMP_TABLE) &&
202 !addon_field)
203 (void) table->file->extra(HA_EXTRA_MMAP);
204
205 if (addon_field)
206 {
207 info->rec_buf= (uchar*) filesort->addon_buf.str;
208 info->ref_length= (uint)filesort->addon_buf.length;
209 info->unpack= filesort->unpack;
210 }
211 else
212 {
213 empty_record(table);
214 info->record= table->record[0];
215 info->ref_length= (uint)table->file->ref_length;
216 }
217 info->select=select;
218 info->print_error=print_error;
219 info->unlock_row= rr_unlock_row;
220 table->status= 0; /* Rows are always found */
221
222 tempfile= 0;
223 if (select && my_b_inited(&select->file))
224 tempfile= &select->file;
225 else if (filesort && my_b_inited(&filesort->io_cache))
226 tempfile= &filesort->io_cache;
227
228 if (tempfile && !(select && select->quick))
229 {
230 DBUG_PRINT("info",("using rr_from_tempfile"));
231 info->read_record_func=
232 addon_field ? rr_unpack_from_tempfile : rr_from_tempfile;
233 info->io_cache= tempfile;
234 reinit_io_cache(info->io_cache,READ_CACHE,0L,0,0);
235 info->ref_pos=table->file->ref;
236 if (!table->file->inited)
237 if (unlikely(table->file->ha_rnd_init_with_error(0)))
238 DBUG_RETURN(1);
239
240 /*
241 addon_field is checked because if we use addon fields,
242 it doesn't make sense to use cache - we don't read from the table
243 and filesort->io_cache is read sequentially
244 */
245 if (!disable_rr_cache &&
246 !addon_field &&
247 thd->variables.read_rnd_buff_size &&
248 !(table->file->ha_table_flags() & HA_FAST_KEY_READ) &&
249 (table->db_stat & HA_READ_ONLY ||
250 table->reginfo.lock_type <= TL_READ_NO_INSERT) &&
251 (ulonglong) table->s->reclength* (table->file->stats.records+
252 table->file->stats.deleted) >
253 (ulonglong) MIN_FILE_LENGTH_TO_USE_ROW_CACHE &&
254 info->io_cache->end_of_file/info->ref_length * table->s->reclength >
255 (my_off_t) MIN_ROWS_TO_USE_TABLE_CACHE &&
256 !table->s->blob_fields &&
257 info->ref_length <= MAX_REFLENGTH)
258 {
259 if (! init_rr_cache(thd, info))
260 {
261 DBUG_PRINT("info",("using rr_from_cache"));
262 info->read_record_func= rr_from_cache;
263 }
264 }
265 }
266 else if (select && select->quick)
267 {
268 DBUG_PRINT("info",("using rr_quick"));
269 info->read_record_func= rr_quick;
270 }
271 else if (filesort && filesort->record_pointers)
272 {
273 DBUG_PRINT("info",("using record_pointers"));
274 if (unlikely(table->file->ha_rnd_init_with_error(0)))
275 DBUG_RETURN(1);
276 info->cache_pos= filesort->record_pointers;
277 info->cache_end= (info->cache_pos+
278 filesort->return_rows * info->ref_length);
279 info->read_record_func=
280 addon_field ? rr_unpack_from_buffer : rr_from_pointers;
281 }
282 else if (table->file->keyread_enabled())
283 {
284 int error;
285 info->read_record_func= rr_index_first;
286 if (!table->file->inited &&
287 unlikely((error= table->file->ha_index_init(table->file->keyread, 1))))
288 {
289 if (print_error)
290 table->file->print_error(error, MYF(0));
291 DBUG_RETURN(1);
292 }
293 }
294 else
295 {
296 DBUG_PRINT("info",("using rr_sequential"));
297 info->read_record_func= rr_sequential;
298 if (unlikely(table->file->ha_rnd_init_with_error(1)))
299 DBUG_RETURN(1);
300 /* We can use record cache if we don't update dynamic length tables */
301 if (!table->no_cache &&
302 (use_record_cache > 0 ||
303 (int) table->reginfo.lock_type <= (int) TL_READ_HIGH_PRIORITY ||
304 !(table->s->db_options_in_use & HA_OPTION_PACK_RECORD) ||
305 (use_record_cache < 0 &&
306 !(table->file->ha_table_flags() & HA_NOT_DELETE_WITH_CACHE))))
307 (void) table->file->extra_opt(HA_EXTRA_CACHE,
308 thd->variables.read_buff_size);
309 }
310 /* Condition pushdown to storage engine */
311 if ((table->file->ha_table_flags() & HA_CAN_TABLE_CONDITION_PUSHDOWN) &&
312 select && select->cond &&
313 (select->cond->used_tables() & table->map) &&
314 !table->file->pushed_cond)
315 table->file->cond_push(select->cond);
316
317 DBUG_RETURN(0);
318} /* init_read_record */
319
320
321
322void end_read_record(READ_RECORD *info)
323{ /* free cache if used */
324 if (info->cache)
325 {
326 my_free_lock(info->cache);
327 info->cache=0;
328 }
329 if (info->table)
330 {
331 if (info->table->is_created())
332 (void) info->table->file->extra(HA_EXTRA_NO_CACHE);
333 if (info->read_record_func != rr_quick) // otherwise quick_range does it
334 (void) info->table->file->ha_index_or_rnd_end();
335 info->table=0;
336 }
337}
338
339static int rr_handle_error(READ_RECORD *info, int error)
340{
341 if (info->thd->killed)
342 {
343 info->thd->send_kill_message();
344 return 1;
345 }
346
347 if (error == HA_ERR_END_OF_FILE)
348 error= -1;
349 else
350 {
351 if (info->print_error)
352 info->table->file->print_error(error, MYF(0));
353 if (error < 0) // Fix negative BDB errno
354 error= 1;
355 }
356 return error;
357}
358
359
360/** Read a record from head-database. */
361
362static int rr_quick(READ_RECORD *info)
363{
364 int tmp;
365 while ((tmp= info->select->quick->get_next()))
366 {
367 tmp= rr_handle_error(info, tmp);
368 break;
369 }
370 return tmp;
371}
372
373
374/**
375 Reads first row in an index scan.
376
377 @param info Scan info
378
379 @retval
380 0 Ok
381 @retval
382 -1 End of records
383 @retval
384 1 Error
385*/
386
387static int rr_index_first(READ_RECORD *info)
388{
389 int tmp;
390 // tell handler that we are doing an index scan
391 if ((tmp = info->table->file->prepare_index_scan()))
392 {
393 tmp= rr_handle_error(info, tmp);
394 return tmp;
395 }
396
397 tmp= info->table->file->ha_index_first(info->record);
398 info->read_record_func= rr_index;
399 if (tmp)
400 tmp= rr_handle_error(info, tmp);
401 return tmp;
402}
403
404
405/**
406 Reads last row in an index scan.
407
408 @param info Scan info
409
410 @retval
411 0 Ok
412 @retval
413 -1 End of records
414 @retval
415 1 Error
416*/
417
418static int rr_index_last(READ_RECORD *info)
419{
420 int tmp= info->table->file->ha_index_last(info->record);
421 info->read_record_func= rr_index_desc;
422 if (tmp)
423 tmp= rr_handle_error(info, tmp);
424 return tmp;
425}
426
427
428/**
429 Reads index sequentially after first row.
430
431 Read the next index record (in forward direction) and translate return
432 value.
433
434 @param info Scan info
435
436 @retval
437 0 Ok
438 @retval
439 -1 End of records
440 @retval
441 1 Error
442*/
443
444static int rr_index(READ_RECORD *info)
445{
446 int tmp= info->table->file->ha_index_next(info->record);
447 if (tmp)
448 tmp= rr_handle_error(info, tmp);
449 return tmp;
450}
451
452
453/**
454 Reads index sequentially from the last row to the first.
455
456 Read the prev index record (in backward direction) and translate return
457 value.
458
459 @param info Scan info
460
461 @retval
462 0 Ok
463 @retval
464 -1 End of records
465 @retval
466 1 Error
467*/
468
469static int rr_index_desc(READ_RECORD *info)
470{
471 int tmp= info->table->file->ha_index_prev(info->record);
472 if (tmp)
473 tmp= rr_handle_error(info, tmp);
474 return tmp;
475}
476
477
478int rr_sequential(READ_RECORD *info)
479{
480 int tmp;
481 while ((tmp= info->table->file->ha_rnd_next(info->record)))
482 {
483 tmp= rr_handle_error(info, tmp);
484 break;
485 }
486 return tmp;
487}
488
489
490static int rr_from_tempfile(READ_RECORD *info)
491{
492 int tmp;
493 for (;;)
494 {
495 if (my_b_read(info->io_cache,info->ref_pos,info->ref_length))
496 return -1; /* End of file */
497 if (!(tmp= info->table->file->ha_rnd_pos(info->record,info->ref_pos)))
498 break;
499 /* The following is extremely unlikely to happen */
500 if (tmp == HA_ERR_KEY_NOT_FOUND)
501 continue;
502 tmp= rr_handle_error(info, tmp);
503 break;
504 }
505 return tmp;
506} /* rr_from_tempfile */
507
508
509/**
510 Read a result set record from a temporary file after sorting.
511
512 The function first reads the next sorted record from the temporary file.
513 into a buffer. If a success it calls a callback function that unpacks
514 the fields values use in the result set from this buffer into their
515 positions in the regular record buffer.
516
517 @param info Reference to the context including record descriptors
518
519 @retval
520 0 Record successfully read.
521 @retval
522 -1 There is no record to be read anymore.
523*/
524
525static int rr_unpack_from_tempfile(READ_RECORD *info)
526{
527 if (my_b_read(info->io_cache, info->rec_buf, info->ref_length))
528 return -1;
529 (*info->unpack)(info->addon_field, info->rec_buf,
530 info->rec_buf + info->ref_length);
531
532 return 0;
533}
534
535int rr_from_pointers(READ_RECORD *info)
536{
537 int tmp;
538 uchar *cache_pos;
539
540 for (;;)
541 {
542 if (info->cache_pos == info->cache_end)
543 return -1; /* End of file */
544 cache_pos= info->cache_pos;
545 info->cache_pos+= info->ref_length;
546
547 if (!(tmp= info->table->file->ha_rnd_pos(info->record,cache_pos)))
548 break;
549
550 /* The following is extremely unlikely to happen */
551 if (tmp == HA_ERR_KEY_NOT_FOUND)
552 continue;
553 tmp= rr_handle_error(info, tmp);
554 break;
555 }
556 return tmp;
557}
558
559/**
560 Read a result set record from a buffer after sorting.
561
562 The function first reads the next sorted record from the sort buffer.
563 If a success it calls a callback function that unpacks
564 the fields values use in the result set from this buffer into their
565 positions in the regular record buffer.
566
567 @param info Reference to the context including record descriptors
568
569 @retval
570 0 Record successfully read.
571 @retval
572 -1 There is no record to be read anymore.
573*/
574
575static int rr_unpack_from_buffer(READ_RECORD *info)
576{
577 if (info->cache_pos == info->cache_end)
578 return -1; /* End of buffer */
579 (*info->unpack)(info->addon_field, info->cache_pos,
580 info->cache_end);
581 info->cache_pos+= info->ref_length;
582 return 0;
583}
584 /* cacheing of records from a database */
585
586static int init_rr_cache(THD *thd, READ_RECORD *info)
587{
588 uint rec_cache_size;
589 DBUG_ENTER("init_rr_cache");
590
591 info->struct_length= 3+MAX_REFLENGTH;
592 info->reclength= ALIGN_SIZE(info->table->s->reclength+1);
593 if (info->reclength < info->struct_length)
594 info->reclength= ALIGN_SIZE(info->struct_length);
595
596 info->error_offset= info->table->s->reclength;
597 info->cache_records= (thd->variables.read_rnd_buff_size /
598 (info->reclength+info->struct_length));
599 rec_cache_size= info->cache_records*info->reclength;
600 info->rec_cache_size= info->cache_records*info->ref_length;
601
602 // We have to allocate one more byte to use uint3korr (see comments for it)
603 if (info->cache_records <= 2 ||
604 !(info->cache=(uchar*) my_malloc_lock(rec_cache_size+info->cache_records*
605 info->struct_length+1,
606 MYF(MY_THREAD_SPECIFIC))))
607 DBUG_RETURN(1);
608#ifdef HAVE_valgrind
609 // Avoid warnings in qsort
610 bzero(info->cache,rec_cache_size+info->cache_records* info->struct_length+1);
611#endif
612 DBUG_PRINT("info",("Allocated buffert for %d records",info->cache_records));
613 info->read_positions=info->cache+rec_cache_size;
614 info->cache_pos=info->cache_end=info->cache;
615 DBUG_RETURN(0);
616} /* init_rr_cache */
617
618
619static int rr_from_cache(READ_RECORD *info)
620{
621 uint i;
622 ulong length;
623 my_off_t rest_of_file;
624 int16 error;
625 uchar *position,*ref_position,*record_pos;
626 ulong record;
627
628 for (;;)
629 {
630 if (info->cache_pos != info->cache_end)
631 {
632 if (unlikely(info->cache_pos[info->error_offset]))
633 {
634 shortget(error,info->cache_pos);
635 if (info->print_error)
636 info->table->file->print_error(error,MYF(0));
637 }
638 else
639 {
640 error=0;
641 memcpy(info->record,info->cache_pos,
642 (size_t) info->table->s->reclength);
643 }
644 info->cache_pos+=info->reclength;
645 return ((int) error);
646 }
647 length=info->rec_cache_size;
648 rest_of_file=info->io_cache->end_of_file - my_b_tell(info->io_cache);
649 if ((my_off_t) length > rest_of_file)
650 length= (ulong) rest_of_file;
651 if (!length || my_b_read(info->io_cache,info->cache,length))
652 {
653 DBUG_PRINT("info",("Found end of file"));
654 return -1; /* End of file */
655 }
656
657 length/=info->ref_length;
658 position=info->cache;
659 ref_position=info->read_positions;
660 for (i=0 ; i < length ; i++,position+=info->ref_length)
661 {
662 memcpy(ref_position,position,(size_t) info->ref_length);
663 ref_position+=MAX_REFLENGTH;
664 int3store(ref_position,(long) i);
665 ref_position+=3;
666 }
667 my_qsort(info->read_positions, length, info->struct_length,
668 (qsort_cmp) rr_cmp);
669
670 position=info->read_positions;
671 for (i=0 ; i < length ; i++)
672 {
673 memcpy(info->ref_pos,position,(size_t) info->ref_length);
674 position+=MAX_REFLENGTH;
675 record=uint3korr(position);
676 position+=3;
677 record_pos=info->cache+record*info->reclength;
678 if (unlikely((error= (int16) info->table->file->
679 ha_rnd_pos(record_pos,info->ref_pos))))
680 {
681 record_pos[info->error_offset]=1;
682 shortstore(record_pos,error);
683 DBUG_PRINT("error",("Got error: %d:%d when reading row",
684 my_errno, error));
685 }
686 else
687 record_pos[info->error_offset]=0;
688 }
689 info->cache_end=(info->cache_pos=info->cache)+length*info->reclength;
690 }
691} /* rr_from_cache */
692
693
694static int rr_cmp(uchar *a,uchar *b)
695{
696 if (a[0] != b[0])
697 return (int) a[0] - (int) b[0];
698 if (a[1] != b[1])
699 return (int) a[1] - (int) b[1];
700 if (a[2] != b[2])
701 return (int) a[2] - (int) b[2];
702#if MAX_REFLENGTH == 4
703 return (int) a[3] - (int) b[3];
704#else
705 if (a[3] != b[3])
706 return (int) a[3] - (int) b[3];
707 if (a[4] != b[4])
708 return (int) a[4] - (int) b[4];
709 if (a[5] != b[5])
710 return (int) a[5] - (int) b[5];
711 if (a[6] != b[6])
712 return (int) a[6] - (int) b[6];
713 return (int) a[7] - (int) b[7];
714#endif
715}
716