1/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
3#ident "$Id$"
4/*======
5This file is part of TokuDB
6
7
8Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
9
10 TokuDBis is free software: you can redistribute it and/or modify
11 it under the terms of the GNU General Public License, version 2,
12 as published by the Free Software Foundation.
13
14 TokuDB is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with TokuDB. If not, see <http://www.gnu.org/licenses/>.
21
22======= */
23
24#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
25
26#include "hatoku_cmp.h"
27
28#ifdef WORDS_BIGENDIAN
29#error "WORDS_BIGENDIAN not supported"
30#endif
31
32// returns true if the field is a valid field to be used
33// in a TokuDB table. The non-valid fields are those
34// that have been deprecated since before 5.1, and can
35// only exist through upgrades of old versions of MySQL
36static bool field_valid_for_tokudb_table(Field* field) {
37 bool ret_val = false;
38 enum_field_types mysql_type = field->real_type();
39 switch (mysql_type) {
40 case MYSQL_TYPE_LONG:
41 case MYSQL_TYPE_LONGLONG:
42 case MYSQL_TYPE_TINY:
43 case MYSQL_TYPE_SHORT:
44 case MYSQL_TYPE_INT24:
45 case MYSQL_TYPE_DATE:
46 case MYSQL_TYPE_YEAR:
47 case MYSQL_TYPE_NEWDATE:
48 case MYSQL_TYPE_ENUM:
49 case MYSQL_TYPE_SET:
50 case MYSQL_TYPE_TIME:
51 case MYSQL_TYPE_DATETIME:
52 case MYSQL_TYPE_TIMESTAMP:
53 case MYSQL_TYPE_DOUBLE:
54 case MYSQL_TYPE_FLOAT:
55#if (50600 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50699) || \
56 (50700 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50799) || \
57 (100000 <= MYSQL_VERSION_ID)
58 case MYSQL_TYPE_DATETIME2:
59 case MYSQL_TYPE_TIMESTAMP2:
60 case MYSQL_TYPE_TIME2:
61#endif
62 case MYSQL_TYPE_NEWDECIMAL:
63 case MYSQL_TYPE_BIT:
64 case MYSQL_TYPE_STRING:
65 case MYSQL_TYPE_VARCHAR:
66 case MYSQL_TYPE_TINY_BLOB:
67 case MYSQL_TYPE_MEDIUM_BLOB:
68 case MYSQL_TYPE_BLOB:
69 case MYSQL_TYPE_LONG_BLOB:
70 ret_val = true;
71 goto exit;
72 //
73 // I believe these are old types that are no longer
74 // in any 5.1 tables, so tokudb does not need
75 // to worry about them
76 // Putting in this assert in case I am wrong.
77 // Do not support geometry yet.
78 //
79 case MYSQL_TYPE_GEOMETRY:
80 case MYSQL_TYPE_DECIMAL:
81 case MYSQL_TYPE_VAR_STRING:
82 case MYSQL_TYPE_NULL:
83 case MYSQL_TYPE_VARCHAR_COMPRESSED:
84 case MYSQL_TYPE_BLOB_COMPRESSED:
85 ret_val = false;
86 }
87exit:
88 return ret_val;
89}
90
91static void get_var_field_info(
92 uint32_t* field_len, // output: length of field
93 uint32_t* start_offset, // output, length of offset where data starts
94 uint32_t var_field_index, //input, index of var field we want info on
95 const uchar* var_field_offset_ptr, //input, pointer to where offset information for all var fields begins
96 uint32_t num_offset_bytes //input, number of bytes used to store offsets starting at var_field_offset_ptr
97 )
98{
99 uint32_t data_start_offset = 0;
100 uint32_t data_end_offset = 0;
101 switch (num_offset_bytes) {
102 case (1):
103 data_end_offset = (var_field_offset_ptr + var_field_index)[0];
104 break;
105 case (2):
106 data_end_offset = uint2korr(var_field_offset_ptr + 2*var_field_index);
107 break;
108 default:
109 assert_unreachable();
110 }
111
112 if (var_field_index) {
113 switch (num_offset_bytes) {
114 case (1):
115 data_start_offset = (var_field_offset_ptr + var_field_index - 1)[0];
116 break;
117 case (2):
118 data_start_offset = uint2korr(var_field_offset_ptr + 2*(var_field_index-1));
119 break;
120 default:
121 assert_unreachable();
122 }
123 }
124 else {
125 data_start_offset = 0;
126 }
127
128 *start_offset = data_start_offset;
129 assert_always(data_end_offset >= data_start_offset);
130 *field_len = data_end_offset - data_start_offset;
131}
132
133static void get_blob_field_info(
134 uint32_t* start_offset,
135 uint32_t len_of_offsets,
136 const uchar* var_field_data_ptr,
137 uint32_t num_offset_bytes
138 )
139{
140 uint32_t data_end_offset;
141 //
142 // need to set var_field_data_ptr to point to beginning of blobs, which
143 // is at the end of the var stuff (if they exist), if var stuff does not exist
144 // then the bottom variable will be 0, and var_field_data_ptr is already
145 // set correctly
146 //
147 if (len_of_offsets) {
148 switch (num_offset_bytes) {
149 case (1):
150 data_end_offset = (var_field_data_ptr - 1)[0];
151 break;
152 case (2):
153 data_end_offset = uint2korr(var_field_data_ptr - 2);
154 break;
155 default:
156 assert_unreachable();
157 }
158 }
159 else {
160 data_end_offset = 0;
161 }
162 *start_offset = data_end_offset;
163}
164
165
166// this function is pattern matched from
167// InnoDB's get_innobase_type_from_mysql_type
168static TOKU_TYPE mysql_to_toku_type (Field* field) {
169 TOKU_TYPE ret_val = toku_type_unknown;
170 enum_field_types mysql_type = field->real_type();
171 switch (mysql_type) {
172 case MYSQL_TYPE_LONG:
173 case MYSQL_TYPE_LONGLONG:
174 case MYSQL_TYPE_TINY:
175 case MYSQL_TYPE_SHORT:
176 case MYSQL_TYPE_INT24:
177 case MYSQL_TYPE_DATE:
178 case MYSQL_TYPE_YEAR:
179 case MYSQL_TYPE_NEWDATE:
180 case MYSQL_TYPE_ENUM:
181 case MYSQL_TYPE_SET:
182 ret_val = toku_type_int;
183 goto exit;
184 case MYSQL_TYPE_TIME:
185 case MYSQL_TYPE_DATETIME:
186 case MYSQL_TYPE_TIMESTAMP:
187#ifdef MARIADB_BASE_VERSION
188 // case to handle fractional seconds in MariaDB
189 //
190 if (field->key_type() == HA_KEYTYPE_BINARY) {
191 ret_val = toku_type_fixbinary;
192 goto exit;
193 }
194#endif
195 ret_val = toku_type_int;
196 goto exit;
197 case MYSQL_TYPE_DOUBLE:
198 ret_val = toku_type_double;
199 goto exit;
200 case MYSQL_TYPE_FLOAT:
201 ret_val = toku_type_float;
202 goto exit;
203#if (50600 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50699) || \
204 (50700 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50799) || \
205 (100000 <= MYSQL_VERSION_ID)
206 case MYSQL_TYPE_DATETIME2:
207 case MYSQL_TYPE_TIMESTAMP2:
208 case MYSQL_TYPE_TIME2:
209#endif
210 case MYSQL_TYPE_NEWDECIMAL:
211 case MYSQL_TYPE_BIT:
212 ret_val = toku_type_fixbinary;
213 goto exit;
214 case MYSQL_TYPE_STRING:
215 if (field->binary()) {
216 ret_val = toku_type_fixbinary;
217 }
218 else {
219 ret_val = toku_type_fixstring;
220 }
221 goto exit;
222 case MYSQL_TYPE_VARCHAR:
223 if (field->binary()) {
224 ret_val = toku_type_varbinary;
225 }
226 else {
227 ret_val = toku_type_varstring;
228 }
229 goto exit;
230 case MYSQL_TYPE_TINY_BLOB:
231 case MYSQL_TYPE_MEDIUM_BLOB:
232 case MYSQL_TYPE_BLOB:
233 case MYSQL_TYPE_LONG_BLOB:
234 ret_val = toku_type_blob;
235 goto exit;
236 //
237 // I believe these are old types that are no longer
238 // in any 5.1 tables, so tokudb does not need
239 // to worry about them
240 // Putting in this assert in case I am wrong.
241 // Do not support geometry yet.
242 //
243 case MYSQL_TYPE_GEOMETRY:
244 case MYSQL_TYPE_DECIMAL:
245 case MYSQL_TYPE_VAR_STRING:
246 case MYSQL_TYPE_NULL:
247 case MYSQL_TYPE_VARCHAR_COMPRESSED:
248 case MYSQL_TYPE_BLOB_COMPRESSED:
249 assert_unreachable();
250 }
251exit:
252 return ret_val;
253}
254
255
256static inline CHARSET_INFO* get_charset_from_num (uint32_t charset_number) {
257 //
258 // patternmatched off of InnoDB, due to MySQL bug 42649
259 //
260 if (charset_number == default_charset_info->number) {
261 return default_charset_info;
262 }
263 else if (charset_number == my_charset_latin1.number) {
264 return &my_charset_latin1;
265 }
266 else {
267 return get_charset(charset_number, MYF(MY_WME));
268 }
269}
270
271
272
273//
274// used to read the length of a variable sized field in a tokudb key (buf).
275//
276static inline uint32_t get_length_from_var_tokudata (uchar* buf, uint32_t length_bytes) {
277 uint32_t length = (uint32_t)(buf[0]);
278 if (length_bytes == 2) {
279 uint32_t rest_of_length = (uint32_t)buf[1];
280 length += rest_of_length<<8;
281 }
282 return length;
283}
284
285//
286// used to deduce the number of bytes used to store the length of a varstring/varbinary
287// in a key field stored in tokudb
288//
289static inline uint32_t get_length_bytes_from_max(uint32_t max_num_bytes) {
290 return (max_num_bytes > 255) ? 2 : 1;
291}
292
293
294
295//
296// assuming MySQL in little endian, and we are storing in little endian
297//
298static inline uchar* pack_toku_int (uchar* to_tokudb, uchar* from_mysql, uint32_t num_bytes) {
299 switch (num_bytes) {
300 case (1):
301 memcpy(to_tokudb, from_mysql, 1);
302 break;
303 case (2):
304 memcpy(to_tokudb, from_mysql, 2);
305 break;
306 case (3):
307 memcpy(to_tokudb, from_mysql, 3);
308 break;
309 case (4):
310 memcpy(to_tokudb, from_mysql, 4);
311 break;
312 case (8):
313 memcpy(to_tokudb, from_mysql, 8);
314 break;
315 default:
316 assert_unreachable();
317 }
318 return to_tokudb+num_bytes;
319}
320
321//
322// assuming MySQL in little endian, and we are unpacking to little endian
323//
324static inline uchar* unpack_toku_int(uchar* to_mysql, uchar* from_tokudb, uint32_t num_bytes) {
325 switch (num_bytes) {
326 case (1):
327 memcpy(to_mysql, from_tokudb, 1);
328 break;
329 case (2):
330 memcpy(to_mysql, from_tokudb, 2);
331 break;
332 case (3):
333 memcpy(to_mysql, from_tokudb, 3);
334 break;
335 case (4):
336 memcpy(to_mysql, from_tokudb, 4);
337 break;
338 case (8):
339 memcpy(to_mysql, from_tokudb, 8);
340 break;
341 default:
342 assert_unreachable();
343 }
344 return from_tokudb+num_bytes;
345}
346
347static inline int cmp_toku_int (uchar* a_buf, uchar* b_buf, bool is_unsigned, uint32_t num_bytes) {
348 int ret_val = 0;
349 //
350 // case for unsigned integers
351 //
352 if (is_unsigned) {
353 uint32_t a_num, b_num = 0;
354 uint64_t a_big_num, b_big_num = 0;
355 switch (num_bytes) {
356 case (1):
357 a_num = *a_buf;
358 b_num = *b_buf;
359 ret_val = a_num-b_num;
360 goto exit;
361 case (2):
362 a_num = uint2korr(a_buf);
363 b_num = uint2korr(b_buf);
364 ret_val = a_num-b_num;
365 goto exit;
366 case (3):
367 a_num = tokudb_uint3korr(a_buf);
368 b_num = tokudb_uint3korr(b_buf);
369 ret_val = a_num-b_num;
370 goto exit;
371 case (4):
372 a_num = uint4korr(a_buf);
373 b_num = uint4korr(b_buf);
374 if (a_num < b_num) {
375 ret_val = -1; goto exit;
376 }
377 if (a_num > b_num) {
378 ret_val = 1; goto exit;
379 }
380 ret_val = 0;
381 goto exit;
382 case (8):
383 a_big_num = uint8korr(a_buf);
384 b_big_num = uint8korr(b_buf);
385 if (a_big_num < b_big_num) {
386 ret_val = -1; goto exit;
387 }
388 else if (a_big_num > b_big_num) {
389 ret_val = 1; goto exit;
390 }
391 ret_val = 0;
392 goto exit;
393 default:
394 assert_unreachable();
395 }
396 }
397 //
398 // case for signed integers
399 //
400 else {
401 int32_t a_num, b_num = 0;
402 int64_t a_big_num, b_big_num = 0;
403 switch (num_bytes) {
404 case (1):
405 a_num = *(signed char *)a_buf;
406 b_num = *(signed char *)b_buf;
407 ret_val = a_num-b_num;
408 goto exit;
409 case (2):
410 a_num = sint2korr(a_buf);
411 b_num = sint2korr(b_buf);
412 ret_val = a_num-b_num;
413 goto exit;
414 case (3):
415 a_num = sint3korr(a_buf);
416 b_num = sint3korr(b_buf);
417 ret_val = a_num - b_num;
418 goto exit;
419 case (4):
420 a_num = sint4korr(a_buf);
421 b_num = sint4korr(b_buf);
422 if (a_num < b_num) {
423 ret_val = -1; goto exit;
424 }
425 if (a_num > b_num) {
426 ret_val = 1; goto exit;
427 }
428 ret_val = 0;
429 goto exit;
430 case (8):
431 a_big_num = sint8korr(a_buf);
432 b_big_num = sint8korr(b_buf);
433 if (a_big_num < b_big_num) {
434 ret_val = -1; goto exit;
435 }
436 else if (a_big_num > b_big_num) {
437 ret_val = 1; goto exit;
438 }
439 ret_val = 0;
440 goto exit;
441 default:
442 assert_unreachable();
443 }
444 }
445 //
446 // if this is hit, indicates bug in writing of this function
447 //
448 assert_unreachable();
449exit:
450 return ret_val;
451}
452
453static inline uchar* pack_toku_double (uchar* to_tokudb, uchar* from_mysql) {
454 memcpy(to_tokudb, from_mysql, sizeof(double));
455 return to_tokudb + sizeof(double);
456}
457
458
459static inline uchar* unpack_toku_double(uchar* to_mysql, uchar* from_tokudb) {
460 memcpy(to_mysql, from_tokudb, sizeof(double));
461 return from_tokudb + sizeof(double);
462}
463
464static inline int cmp_toku_double(uchar* a_buf, uchar* b_buf) {
465 int ret_val;
466 double a_num;
467 double b_num;
468 doubleget(a_num, a_buf);
469 doubleget(b_num, b_buf);
470 if (a_num < b_num) {
471 ret_val = -1;
472 goto exit;
473 }
474 else if (a_num > b_num) {
475 ret_val = 1;
476 goto exit;
477 }
478 ret_val = 0;
479exit:
480 return ret_val;
481}
482
483
484static inline uchar* pack_toku_float (uchar* to_tokudb, uchar* from_mysql) {
485 memcpy(to_tokudb, from_mysql, sizeof(float));
486 return to_tokudb + sizeof(float);
487}
488
489
490static inline uchar* unpack_toku_float(uchar* to_mysql, uchar* from_tokudb) {
491 memcpy(to_mysql, from_tokudb, sizeof(float));
492 return from_tokudb + sizeof(float);
493}
494
495static inline int cmp_toku_float(uchar* a_buf, uchar* b_buf) {
496 int ret_val;
497 float a_num;
498 float b_num;
499 //
500 // This is the way Field_float::cmp gets the floats from the buffers
501 //
502 memcpy(&a_num, a_buf, sizeof(float));
503 memcpy(&b_num, b_buf, sizeof(float));
504 if (a_num < b_num) {
505 ret_val = -1;
506 goto exit;
507 }
508 else if (a_num > b_num) {
509 ret_val = 1;
510 goto exit;
511 }
512 ret_val = 0;
513exit:
514 return ret_val;
515}
516
517
518static inline uchar* pack_toku_binary(uchar* to_tokudb, uchar* from_mysql, uint32_t num_bytes) {
519 memcpy(to_tokudb, from_mysql, num_bytes);
520 return to_tokudb + num_bytes;
521}
522
523static inline uchar* unpack_toku_binary(uchar* to_mysql, uchar* from_tokudb, uint32_t num_bytes) {
524 memcpy(to_mysql, from_tokudb, num_bytes);
525 return from_tokudb + num_bytes;
526}
527
528
529static inline int cmp_toku_binary(
530 uchar* a_buf,
531 uint32_t a_num_bytes,
532 uchar* b_buf,
533 uint32_t b_num_bytes
534 )
535{
536 int ret_val = 0;
537 uint32_t num_bytes_to_cmp = (a_num_bytes < b_num_bytes) ? a_num_bytes : b_num_bytes;
538 ret_val = memcmp(a_buf, b_buf, num_bytes_to_cmp);
539 if ((ret_val != 0) || (a_num_bytes == b_num_bytes)) {
540 goto exit;
541 }
542 if (a_num_bytes < b_num_bytes) {
543 ret_val = -1;
544 goto exit;
545 }
546 else {
547 ret_val = 1;
548 goto exit;
549 }
550exit:
551 return ret_val;
552}
553
554//
555// partially copied from below
556//
557static uchar* pack_toku_varbinary_from_desc(
558 uchar* to_tokudb,
559 const uchar* from_desc,
560 uint32_t key_part_length, //number of bytes to use to encode the length in to_tokudb
561 uint32_t field_length //length of field
562 )
563{
564 uint32_t length_bytes_in_tokudb = get_length_bytes_from_max(key_part_length);
565 uint32_t length = field_length;
566 set_if_smaller(length, key_part_length);
567
568 //
569 // copy the length bytes, assuming both are in little endian
570 //
571 to_tokudb[0] = (uchar)length & 255;
572 if (length_bytes_in_tokudb > 1) {
573 to_tokudb[1] = (uchar) (length >> 8);
574 }
575 //
576 // copy the string
577 //
578 memcpy(to_tokudb + length_bytes_in_tokudb, from_desc, length);
579 return to_tokudb + length + length_bytes_in_tokudb;
580}
581
582static inline uchar* pack_toku_varbinary(
583 uchar* to_tokudb,
584 uchar* from_mysql,
585 uint32_t length_bytes_in_mysql, //number of bytes used to encode the length in from_mysql
586 uint32_t max_num_bytes
587 )
588{
589 uint32_t length = 0;
590 uint32_t length_bytes_in_tokudb;
591 switch (length_bytes_in_mysql) {
592 case (0):
593 length = max_num_bytes;
594 break;
595 case (1):
596 length = (uint32_t)(*from_mysql);
597 break;
598 case (2):
599 length = uint2korr(from_mysql);
600 break;
601 case (3):
602 length = tokudb_uint3korr(from_mysql);
603 break;
604 case (4):
605 length = uint4korr(from_mysql);
606 break;
607 }
608
609 //
610 // from this point on, functionality equivalent to pack_toku_varbinary_from_desc
611 //
612 set_if_smaller(length,max_num_bytes);
613
614 length_bytes_in_tokudb = get_length_bytes_from_max(max_num_bytes);
615 //
616 // copy the length bytes, assuming both are in little endian
617 //
618 to_tokudb[0] = (uchar)length & 255;
619 if (length_bytes_in_tokudb > 1) {
620 to_tokudb[1] = (uchar) (length >> 8);
621 }
622 //
623 // copy the string
624 //
625 memcpy(to_tokudb + length_bytes_in_tokudb, from_mysql + length_bytes_in_mysql, length);
626 return to_tokudb + length + length_bytes_in_tokudb;
627}
628
629static inline uchar* unpack_toku_varbinary(
630 uchar* to_mysql,
631 uchar* from_tokudb,
632 uint32_t length_bytes_in_tokudb, // number of bytes used to encode length in from_tokudb
633 uint32_t length_bytes_in_mysql // number of bytes used to encode length in to_mysql
634 )
635{
636 uint32_t length = get_length_from_var_tokudata(from_tokudb, length_bytes_in_tokudb);
637
638 //
639 // copy the length into the mysql buffer
640 //
641 switch (length_bytes_in_mysql) {
642 case (0):
643 break;
644 case (1):
645 *to_mysql = (uchar) length;
646 break;
647 case (2):
648 int2store(to_mysql, length);
649 break;
650 case (3):
651 int3store(to_mysql, length);
652 break;
653 case (4):
654 int4store(to_mysql, length);
655 break;
656 default:
657 assert_unreachable();
658 }
659 //
660 // copy the binary data
661 //
662 memcpy(to_mysql + length_bytes_in_mysql, from_tokudb + length_bytes_in_tokudb, length);
663 return from_tokudb + length_bytes_in_tokudb+ length;
664}
665
666static inline int cmp_toku_varbinary(
667 uchar* a_buf,
668 uchar* b_buf,
669 uint32_t length_bytes, //number of bytes used to encode length in a_buf and b_buf
670 uint32_t* a_bytes_read,
671 uint32_t* b_bytes_read
672 )
673{
674 int ret_val = 0;
675 uint32_t a_len = get_length_from_var_tokudata(a_buf, length_bytes);
676 uint32_t b_len = get_length_from_var_tokudata(b_buf, length_bytes);
677 ret_val = cmp_toku_binary(
678 a_buf + length_bytes,
679 a_len,
680 b_buf + length_bytes,
681 b_len
682 );
683 *a_bytes_read = a_len + length_bytes;
684 *b_bytes_read = b_len + length_bytes;
685 return ret_val;
686}
687
688static inline uchar* pack_toku_blob(
689 uchar* to_tokudb,
690 uchar* from_mysql,
691 uint32_t length_bytes_in_tokudb, //number of bytes to use to encode the length in to_tokudb
692 uint32_t length_bytes_in_mysql, //number of bytes used to encode the length in from_mysql
693 uint32_t max_num_bytes,
694#if MYSQL_VERSION_ID >= 50600
695 const CHARSET_INFO* charset
696#else
697 CHARSET_INFO* charset
698#endif
699 )
700{
701 uint32_t length = 0;
702 uint32_t local_char_length = 0;
703 uchar* blob_buf = NULL;
704
705 switch (length_bytes_in_mysql) {
706 case (0):
707 length = max_num_bytes;
708 break;
709 case (1):
710 length = (uint32_t)(*from_mysql);
711 break;
712 case (2):
713 length = uint2korr(from_mysql);
714 break;
715 case (3):
716 length = tokudb_uint3korr(from_mysql);
717 break;
718 case (4):
719 length = uint4korr(from_mysql);
720 break;
721 }
722 set_if_smaller(length,max_num_bytes);
723
724 memcpy(&blob_buf,from_mysql+length_bytes_in_mysql,sizeof(uchar *));
725
726 local_char_length= ((charset->mbmaxlen > 1) ?
727 max_num_bytes/charset->mbmaxlen : max_num_bytes);
728 if (length > local_char_length)
729 {
730 local_char_length= my_charpos(
731 charset,
732 blob_buf,
733 blob_buf+length,
734 local_char_length
735 );
736 set_if_smaller(length, local_char_length);
737 }
738
739
740 //
741 // copy the length bytes, assuming both are in little endian
742 //
743 to_tokudb[0] = (uchar)length & 255;
744 if (length_bytes_in_tokudb > 1) {
745 to_tokudb[1] = (uchar) (length >> 8);
746 }
747 //
748 // copy the string
749 //
750 memcpy(to_tokudb + length_bytes_in_tokudb, blob_buf, length);
751 return to_tokudb + length + length_bytes_in_tokudb;
752}
753
754
755static inline uchar* unpack_toku_blob(
756 uchar* to_mysql,
757 uchar* from_tokudb,
758 uint32_t length_bytes_in_tokudb, // number of bytes used to encode length in from_tokudb
759 uint32_t length_bytes_in_mysql // number of bytes used to encode length in to_mysql
760 )
761{
762 uint32_t length = get_length_from_var_tokudata(from_tokudb, length_bytes_in_tokudb);
763 uchar* blob_pos = NULL;
764 //
765 // copy the length into the mysql buffer
766 //
767 switch (length_bytes_in_mysql) {
768 case (0):
769 break;
770 case (1):
771 *to_mysql = (uchar) length;
772 break;
773 case (2):
774 int2store(to_mysql, length);
775 break;
776 case (3):
777 int3store(to_mysql, length);
778 break;
779 case (4):
780 int4store(to_mysql, length);
781 break;
782 default:
783 assert_unreachable();
784 }
785 //
786 // copy the binary data
787 //
788 blob_pos = from_tokudb + length_bytes_in_tokudb;
789 memcpy(to_mysql + length_bytes_in_mysql, &blob_pos, sizeof(uchar *));
790 return from_tokudb + length_bytes_in_tokudb+ length;
791}
792
793
794//
795// partially copied from below
796//
797static uchar* pack_toku_varstring_from_desc(
798 uchar* to_tokudb,
799 const uchar* from_desc,
800 uint32_t key_part_length, //number of bytes to use to encode the length in to_tokudb
801 uint32_t field_length,
802 uint32_t charset_num//length of field
803 )
804{
805 CHARSET_INFO* charset = NULL;
806 uint32_t length_bytes_in_tokudb = get_length_bytes_from_max(key_part_length);
807 uint32_t length = field_length;
808 uint32_t local_char_length = 0;
809 set_if_smaller(length, key_part_length);
810
811 charset = get_charset_from_num(charset_num);
812
813 //
814 // copy the string
815 //
816 local_char_length= ((charset->mbmaxlen > 1) ?
817 key_part_length/charset->mbmaxlen : key_part_length);
818 if (length > local_char_length)
819 {
820 local_char_length= my_charpos(
821 charset,
822 from_desc,
823 from_desc+length,
824 local_char_length
825 );
826 set_if_smaller(length, local_char_length);
827 }
828
829
830 //
831 // copy the length bytes, assuming both are in little endian
832 //
833 to_tokudb[0] = (uchar)length & 255;
834 if (length_bytes_in_tokudb > 1) {
835 to_tokudb[1] = (uchar) (length >> 8);
836 }
837 //
838 // copy the string
839 //
840 memcpy(to_tokudb + length_bytes_in_tokudb, from_desc, length);
841 return to_tokudb + length + length_bytes_in_tokudb;
842}
843
844static inline uchar* pack_toku_varstring(
845 uchar* to_tokudb,
846 uchar* from_mysql,
847 uint32_t length_bytes_in_tokudb, //number of bytes to use to encode the length in to_tokudb
848 uint32_t length_bytes_in_mysql, //number of bytes used to encode the length in from_mysql
849 uint32_t max_num_bytes,
850#if MYSQL_VERSION_ID >= 50600
851 const CHARSET_INFO *charset
852#else
853 CHARSET_INFO* charset
854#endif
855 )
856{
857 uint32_t length = 0;
858 uint32_t local_char_length = 0;
859
860 switch (length_bytes_in_mysql) {
861 case (0):
862 length = max_num_bytes;
863 break;
864 case (1):
865 length = (uint32_t)(*from_mysql);
866 break;
867 case (2):
868 length = uint2korr(from_mysql);
869 break;
870 case (3):
871 length = tokudb_uint3korr(from_mysql);
872 break;
873 case (4):
874 length = uint4korr(from_mysql);
875 break;
876 }
877 set_if_smaller(length,max_num_bytes);
878
879 local_char_length= ((charset->mbmaxlen > 1) ?
880 max_num_bytes/charset->mbmaxlen : max_num_bytes);
881 if (length > local_char_length)
882 {
883 local_char_length= my_charpos(
884 charset,
885 from_mysql+length_bytes_in_mysql,
886 from_mysql+length_bytes_in_mysql+length,
887 local_char_length
888 );
889 set_if_smaller(length, local_char_length);
890 }
891
892
893 //
894 // copy the length bytes, assuming both are in little endian
895 //
896 to_tokudb[0] = (uchar)length & 255;
897 if (length_bytes_in_tokudb > 1) {
898 to_tokudb[1] = (uchar) (length >> 8);
899 }
900 //
901 // copy the string
902 //
903 memcpy(to_tokudb + length_bytes_in_tokudb, from_mysql + length_bytes_in_mysql, length);
904 return to_tokudb + length + length_bytes_in_tokudb;
905}
906
907static inline int cmp_toku_string(
908 uchar* a_buf,
909 uint32_t a_num_bytes,
910 uchar* b_buf,
911 uint32_t b_num_bytes,
912 uint32_t charset_number
913 )
914{
915 int ret_val = 0;
916 CHARSET_INFO* charset = NULL;
917
918 charset = get_charset_from_num(charset_number);
919
920 ret_val = charset->coll->strnncollsp(
921 charset,
922 a_buf,
923 a_num_bytes,
924 b_buf,
925 b_num_bytes
926 );
927 return ret_val;
928}
929
930static inline int cmp_toku_varstring(
931 uchar* a_buf,
932 uchar* b_buf,
933 uint32_t length_bytes, //number of bytes used to encode length in a_buf and b_buf
934 uint32_t charset_num,
935 uint32_t* a_bytes_read,
936 uint32_t* b_bytes_read
937 )
938{
939 int ret_val = 0;
940 uint32_t a_len = get_length_from_var_tokudata(a_buf, length_bytes);
941 uint32_t b_len = get_length_from_var_tokudata(b_buf, length_bytes);
942 ret_val = cmp_toku_string(
943 a_buf + length_bytes,
944 a_len,
945 b_buf + length_bytes,
946 b_len,
947 charset_num
948 );
949 *a_bytes_read = a_len + length_bytes;
950 *b_bytes_read = b_len + length_bytes;
951 return ret_val;
952}
953
954static inline int tokudb_compare_two_hidden_keys(
955 const void* new_key_data,
956 const uint32_t new_key_size,
957 const void* saved_key_data,
958 const uint32_t saved_key_size
959 ) {
960 assert_always(
961 (new_key_size >= TOKUDB_HIDDEN_PRIMARY_KEY_LENGTH) &&
962 (saved_key_size >= TOKUDB_HIDDEN_PRIMARY_KEY_LENGTH));
963 ulonglong a = hpk_char_to_num((uchar *) new_key_data);
964 ulonglong b = hpk_char_to_num((uchar *) saved_key_data);
965 return a < b ? -1 : (a > b ? 1 : 0);
966}
967
968//
969// Returns number of bytes used for a given TOKU_TYPE
970// in a key descriptor. The number of bytes returned
971// here MUST match the number of bytes used for the encoding
972// in create_toku_key_descriptor_for_key
973// Parameters:
974// [in] row_desc - buffer that contains portion of descriptor
975// created in create_toku_key_descriptor_for_key. The first
976// byte points to the TOKU_TYPE.
977//
978static uint32_t skip_field_in_descriptor(uchar* row_desc) {
979 uchar* row_desc_pos = row_desc;
980 TOKU_TYPE toku_type = (TOKU_TYPE)row_desc_pos[0];
981 row_desc_pos++;
982
983 switch (toku_type) {
984 case (toku_type_hpk):
985 case (toku_type_double):
986 case (toku_type_float):
987 break;
988 case (toku_type_int):
989 row_desc_pos += 2;
990 break;
991 case (toku_type_fixbinary):
992 case (toku_type_varbinary):
993 row_desc_pos++;
994 break;
995 case (toku_type_fixstring):
996 case (toku_type_varstring):
997 case (toku_type_blob):
998 row_desc_pos++;
999 row_desc_pos += sizeof(uint32_t);
1000 break;
1001 default:
1002 assert_unreachable();
1003 }
1004 return (uint32_t)(row_desc_pos - row_desc);
1005}
1006
1007//
1008// outputs a descriptor for key into buf. Returns number of bytes used in buf
1009// to store the descriptor. Number of bytes used MUST match number of bytes
1010// we would skip in skip_field_in_descriptor
1011//
1012static int create_toku_key_descriptor_for_key(KEY* key, uchar* buf) {
1013 uchar* pos = buf;
1014 uint32_t num_bytes_in_field = 0;
1015 uint32_t charset_num = 0;
1016 for (uint i = 0; i < key->user_defined_key_parts; i++) {
1017 Field* field = key->key_part[i].field;
1018 //
1019 // The first byte states if there is a null byte
1020 // 0 means no null byte, non-zer means there
1021 // is one
1022 //
1023 *pos = field->null_bit;
1024 pos++;
1025
1026 //
1027 // The second byte for each field is the type
1028 //
1029 TOKU_TYPE type = mysql_to_toku_type(field);
1030 assert_always((int)type < 256);
1031 *pos = (uchar)(type & 255);
1032 pos++;
1033
1034 //
1035 // based on the type, extra data follows afterwards
1036 //
1037 switch (type) {
1038 //
1039 // two bytes follow for ints, first one states how many
1040 // bytes the int is (1 , 2, 3, 4 or 8)
1041 // next one states if it is signed or not
1042 //
1043 case (toku_type_int):
1044 num_bytes_in_field = field->pack_length();
1045 assert_always (num_bytes_in_field < 256);
1046 *pos = (uchar)(num_bytes_in_field & 255);
1047 pos++;
1048 *pos = (field->flags & UNSIGNED_FLAG) ? 1 : 0;
1049 pos++;
1050 break;
1051 //
1052 // nothing follows floats and doubles
1053 //
1054 case (toku_type_double):
1055 case (toku_type_float):
1056 break;
1057 //
1058 // one byte follow stating the length of the field
1059 //
1060 case (toku_type_fixbinary):
1061 num_bytes_in_field = field->pack_length();
1062 set_if_smaller(num_bytes_in_field, key->key_part[i].length);
1063 assert_always(num_bytes_in_field < 256);
1064 pos[0] = (uchar)(num_bytes_in_field & 255);
1065 pos++;
1066 break;
1067 //
1068 // one byte follows: the number of bytes used to encode the length
1069 //
1070 case (toku_type_varbinary):
1071 *pos = (uchar)(get_length_bytes_from_max(key->key_part[i].length) & 255);
1072 pos++;
1073 break;
1074 //
1075 // five bytes follow: one for the number of bytes to encode the length,
1076 // four for the charset number
1077 //
1078 case (toku_type_fixstring):
1079 case (toku_type_varstring):
1080 case (toku_type_blob):
1081 *pos = (uchar)(get_length_bytes_from_max(key->key_part[i].length) & 255);
1082 pos++;
1083 charset_num = field->charset()->number;
1084 pos[0] = (uchar)(charset_num & 255);
1085 pos[1] = (uchar)((charset_num >> 8) & 255);
1086 pos[2] = (uchar)((charset_num >> 16) & 255);
1087 pos[3] = (uchar)((charset_num >> 24) & 255);
1088 pos += 4;
1089 break;
1090 default:
1091 assert_unreachable();
1092 }
1093 }
1094 return pos - buf;
1095}
1096
1097
1098//
1099// Creates a descriptor for a DB. That contains all information necessary
1100// to do both key comparisons and data comparisons (for dup-sort databases).
1101//
1102// There are two types of descriptors we care about:
1103// 1) Primary key, (in a no-dup database)
1104// 2) secondary keys, which are a secondary key followed by a primary key,
1105// but in a no-dup database.
1106//
1107// I realize this may be confusing, but here is how it works.
1108// All DB's have a key compare.
1109// The format of the descriptor must be able to handle both.
1110//
1111// The first four bytes store an offset into the descriptor to the second piece
1112// used for data comparisons. So, if in the future we want to append something
1113// to the descriptor, we can.
1114//
1115//
1116static int create_toku_key_descriptor(
1117 uchar* buf,
1118 bool is_first_hpk,
1119 KEY* first_key,
1120 bool is_second_hpk,
1121 KEY* second_key
1122 )
1123{
1124 //
1125 // The first four bytes always contain the offset of where the first key
1126 // ends.
1127 //
1128 uchar* pos = buf + 4;
1129 uint32_t num_bytes = 0;
1130 uint32_t offset = 0;
1131
1132
1133 if (is_first_hpk) {
1134 pos[0] = 0; //say there is NO infinity byte
1135 pos[1] = 0; //field cannot be NULL, stating it
1136 pos[2] = toku_type_hpk;
1137 pos += 3;
1138 }
1139 else {
1140 //
1141 // first key is NOT a hidden primary key, so we now pack first_key
1142 //
1143 pos[0] = 1; //say there is an infinity byte
1144 pos++;
1145 num_bytes = create_toku_key_descriptor_for_key(first_key, pos);
1146 pos += num_bytes;
1147 }
1148
1149 //
1150 // if we do not have a second key, we can jump to exit right now
1151 // we do not have a second key if it is not a hidden primary key
1152 // and if second_key is NULL
1153 //
1154 if (is_first_hpk || (!is_second_hpk && (second_key == NULL)) ) {
1155 goto exit;
1156 }
1157
1158 //
1159 // if we have a second key, and it is an hpk, we need to pack it, and
1160 // write in the offset to this position in the first four bytes
1161 //
1162 if (is_second_hpk) {
1163 pos[0] = 0; //field cannot be NULL, stating it
1164 pos[1] = toku_type_hpk;
1165 pos += 2;
1166 }
1167 else {
1168 //
1169 // second key is NOT a hidden primary key, so we now pack second_key
1170 //
1171 num_bytes = create_toku_key_descriptor_for_key(second_key, pos);
1172 pos += num_bytes;
1173 }
1174
1175
1176exit:
1177 offset = pos - buf;
1178 buf[0] = (uchar)(offset & 255);
1179 buf[1] = (uchar)((offset >> 8) & 255);
1180 buf[2] = (uchar)((offset >> 16) & 255);
1181 buf[3] = (uchar)((offset >> 24) & 255);
1182
1183 return pos - buf;
1184}
1185
1186
1187static inline int compare_toku_field(
1188 uchar* a_buf,
1189 uchar* b_buf,
1190 uchar* row_desc,
1191 uint32_t* a_bytes_read,
1192 uint32_t* b_bytes_read,
1193 uint32_t* row_desc_bytes_read,
1194 bool* read_string
1195 )
1196{
1197 int ret_val = 0;
1198 uchar* row_desc_pos = row_desc;
1199 uint32_t num_bytes = 0;
1200 uint32_t length_bytes = 0;
1201 uint32_t charset_num = 0;
1202 bool is_unsigned = false;
1203
1204 TOKU_TYPE toku_type = (TOKU_TYPE)row_desc_pos[0];
1205 row_desc_pos++;
1206
1207 switch (toku_type) {
1208 case (toku_type_hpk):
1209 ret_val = tokudb_compare_two_hidden_keys(
1210 a_buf,
1211 TOKUDB_HIDDEN_PRIMARY_KEY_LENGTH,
1212 b_buf,
1213 TOKUDB_HIDDEN_PRIMARY_KEY_LENGTH
1214 );
1215 *a_bytes_read = TOKUDB_HIDDEN_PRIMARY_KEY_LENGTH;
1216 *b_bytes_read = TOKUDB_HIDDEN_PRIMARY_KEY_LENGTH;
1217 break;
1218 case (toku_type_int):
1219 num_bytes = row_desc_pos[0];
1220 is_unsigned = row_desc_pos[1];
1221 ret_val = cmp_toku_int(
1222 a_buf,
1223 b_buf,
1224 is_unsigned,
1225 num_bytes
1226 );
1227 *a_bytes_read = num_bytes;
1228 *b_bytes_read = num_bytes;
1229 row_desc_pos += 2;
1230 break;
1231 case (toku_type_double):
1232 ret_val = cmp_toku_double(a_buf, b_buf);
1233 *a_bytes_read = sizeof(double);
1234 *b_bytes_read = sizeof(double);
1235 break;
1236 case (toku_type_float):
1237 ret_val = cmp_toku_float(a_buf, b_buf);
1238 *a_bytes_read = sizeof(float);
1239 *b_bytes_read = sizeof(float);
1240 break;
1241 case (toku_type_fixbinary):
1242 num_bytes = row_desc_pos[0];
1243 ret_val = cmp_toku_binary(a_buf, num_bytes, b_buf,num_bytes);
1244 *a_bytes_read = num_bytes;
1245 *b_bytes_read = num_bytes;
1246 row_desc_pos++;
1247 break;
1248 case (toku_type_varbinary):
1249 length_bytes = row_desc_pos[0];
1250 ret_val = cmp_toku_varbinary(
1251 a_buf,
1252 b_buf,
1253 length_bytes,
1254 a_bytes_read,
1255 b_bytes_read
1256 );
1257 row_desc_pos++;
1258 break;
1259 case (toku_type_fixstring):
1260 case (toku_type_varstring):
1261 case (toku_type_blob):
1262 length_bytes = row_desc_pos[0];
1263 row_desc_pos++;
1264 //
1265 // not sure we want to read charset_num like this
1266 //
1267 charset_num = *(uint32_t *)row_desc_pos;
1268 row_desc_pos += sizeof(uint32_t);
1269 ret_val = cmp_toku_varstring(
1270 a_buf,
1271 b_buf,
1272 length_bytes,
1273 charset_num,
1274 a_bytes_read,
1275 b_bytes_read
1276 );
1277 *read_string = true;
1278 break;
1279 default:
1280 assert_unreachable();
1281 }
1282
1283 *row_desc_bytes_read = row_desc_pos - row_desc;
1284 return ret_val;
1285}
1286
1287//
1288// packs a field from a MySQL buffer into a tokudb buffer.
1289// Used for inserts/updates
1290//
1291static uchar* pack_toku_key_field(
1292 uchar* to_tokudb,
1293 uchar* from_mysql,
1294 Field* field,
1295 uint32_t key_part_length //I really hope this is temporary as I phase out the pack_cmp stuff
1296 )
1297{
1298 uchar* new_pos = NULL;
1299 uint32_t num_bytes = 0;
1300 TOKU_TYPE toku_type = mysql_to_toku_type(field);
1301 switch(toku_type) {
1302 case (toku_type_int):
1303 assert_always(key_part_length == field->pack_length());
1304 new_pos = pack_toku_int(
1305 to_tokudb,
1306 from_mysql,
1307 field->pack_length()
1308 );
1309 goto exit;
1310 case (toku_type_double):
1311 assert_always(field->pack_length() == sizeof(double));
1312 assert_always(key_part_length == sizeof(double));
1313 new_pos = pack_toku_double(to_tokudb, from_mysql);
1314 goto exit;
1315 case (toku_type_float):
1316 assert_always(field->pack_length() == sizeof(float));
1317 assert_always(key_part_length == sizeof(float));
1318 new_pos = pack_toku_float(to_tokudb, from_mysql);
1319 goto exit;
1320 case (toku_type_fixbinary):
1321 num_bytes = field->pack_length();
1322 set_if_smaller(num_bytes, key_part_length);
1323 new_pos = pack_toku_binary(
1324 to_tokudb,
1325 from_mysql,
1326 num_bytes
1327 );
1328 goto exit;
1329 case (toku_type_fixstring):
1330 num_bytes = field->pack_length();
1331 set_if_smaller(num_bytes, key_part_length);
1332 new_pos = pack_toku_varstring(
1333 to_tokudb,
1334 from_mysql,
1335 get_length_bytes_from_max(key_part_length),
1336 0,
1337 num_bytes,
1338 field->charset()
1339 );
1340 goto exit;
1341 case (toku_type_varbinary):
1342 new_pos = pack_toku_varbinary(
1343 to_tokudb,
1344 from_mysql,
1345 ((Field_varstring *)field)->length_bytes,
1346 key_part_length
1347 );
1348 goto exit;
1349 case (toku_type_varstring):
1350 new_pos = pack_toku_varstring(
1351 to_tokudb,
1352 from_mysql,
1353 get_length_bytes_from_max(key_part_length),
1354 ((Field_varstring *)field)->length_bytes,
1355 key_part_length,
1356 field->charset()
1357 );
1358 goto exit;
1359 case (toku_type_blob):
1360 new_pos = pack_toku_blob(
1361 to_tokudb,
1362 from_mysql,
1363 get_length_bytes_from_max(key_part_length),
1364 ((Field_blob *)field)->row_pack_length(), //only calling this because packlength is returned
1365 key_part_length,
1366 field->charset()
1367 );
1368 goto exit;
1369 default:
1370 assert_unreachable();
1371 }
1372 assert_unreachable();
1373exit:
1374 return new_pos;
1375}
1376
1377//
1378// packs a field from a MySQL buffer into a tokudb buffer.
1379// Used for queries. The only difference between this function
1380// and pack_toku_key_field is that all variable sized columns
1381// use 2 bytes to encode the length, regardless of the field
1382// So varchar(4) will still use 2 bytes to encode the field
1383//
1384static uchar* pack_key_toku_key_field(
1385 uchar* to_tokudb,
1386 uchar* from_mysql,
1387 Field* field,
1388 uint32_t key_part_length //I really hope this is temporary as I phase out the pack_cmp stuff
1389 )
1390{
1391 uchar* new_pos = NULL;
1392 TOKU_TYPE toku_type = mysql_to_toku_type(field);
1393 switch(toku_type) {
1394 case (toku_type_int):
1395 case (toku_type_double):
1396 case (toku_type_float):
1397 case (toku_type_fixbinary):
1398 case (toku_type_fixstring):
1399 new_pos = pack_toku_key_field(to_tokudb, from_mysql, field, key_part_length);
1400 goto exit;
1401 case (toku_type_varbinary):
1402 new_pos = pack_toku_varbinary(
1403 to_tokudb,
1404 from_mysql,
1405 2, // for some idiotic reason, 2 bytes are always used here, regardless of length of field
1406 key_part_length
1407 );
1408 goto exit;
1409 case (toku_type_varstring):
1410 case (toku_type_blob):
1411 new_pos = pack_toku_varstring(
1412 to_tokudb,
1413 from_mysql,
1414 get_length_bytes_from_max(key_part_length),
1415 2, // for some idiotic reason, 2 bytes are always used here, regardless of length of field
1416 key_part_length,
1417 field->charset()
1418 );
1419 goto exit;
1420 default:
1421 assert_unreachable();
1422 }
1423
1424 assert_unreachable();
1425exit:
1426 return new_pos;
1427}
1428
1429
1430uchar* unpack_toku_key_field(
1431 uchar* to_mysql,
1432 uchar* from_tokudb,
1433 Field* field,
1434 uint32_t key_part_length) {
1435
1436 uchar* new_pos = NULL;
1437 uint32_t num_bytes = 0;
1438 uint32_t num_bytes_copied;
1439 TOKU_TYPE toku_type = mysql_to_toku_type(field);
1440 switch(toku_type) {
1441 case (toku_type_int):
1442 assert_always(key_part_length == field->pack_length());
1443 new_pos = unpack_toku_int(
1444 to_mysql,
1445 from_tokudb,
1446 field->pack_length()
1447 );
1448 goto exit;
1449 case (toku_type_double):
1450 assert_always(field->pack_length() == sizeof(double));
1451 assert_always(key_part_length == sizeof(double));
1452 new_pos = unpack_toku_double(to_mysql, from_tokudb);
1453 goto exit;
1454 case (toku_type_float):
1455 assert_always(field->pack_length() == sizeof(float));
1456 assert_always(key_part_length == sizeof(float));
1457 new_pos = unpack_toku_float(to_mysql, from_tokudb);
1458 goto exit;
1459 case (toku_type_fixbinary):
1460 num_bytes = field->pack_length();
1461 set_if_smaller(num_bytes, key_part_length);
1462 new_pos = unpack_toku_binary(
1463 to_mysql,
1464 from_tokudb,
1465 num_bytes);
1466 goto exit;
1467 case (toku_type_fixstring):
1468 num_bytes = field->pack_length();
1469 new_pos = unpack_toku_varbinary(
1470 to_mysql,
1471 from_tokudb,
1472 get_length_bytes_from_max(key_part_length),
1473 0);
1474 num_bytes_copied =
1475 new_pos -
1476 (from_tokudb + get_length_bytes_from_max(key_part_length));
1477 assert_always(num_bytes_copied <= num_bytes);
1478 memset(
1479 to_mysql + num_bytes_copied,
1480 field->charset()->pad_char,
1481 num_bytes - num_bytes_copied);
1482 goto exit;
1483 case (toku_type_varbinary):
1484 case (toku_type_varstring):
1485 new_pos = unpack_toku_varbinary(
1486 to_mysql,
1487 from_tokudb,
1488 get_length_bytes_from_max(key_part_length),
1489 ((Field_varstring*)field)->length_bytes);
1490 goto exit;
1491 case (toku_type_blob):
1492 new_pos = unpack_toku_blob(
1493 to_mysql,
1494 from_tokudb,
1495 get_length_bytes_from_max(key_part_length),
1496 //only calling this because packlength is returned
1497 ((Field_blob *)field)->row_pack_length());
1498 goto exit;
1499 default:
1500 assert_unreachable();
1501 }
1502 assert_unreachable();
1503exit:
1504 return new_pos;
1505}
1506
1507
1508static int tokudb_compare_two_keys(
1509 const void* new_key_data,
1510 const uint32_t new_key_size,
1511 const void* saved_key_data,
1512 const uint32_t saved_key_size,
1513 const void* row_desc,
1514 const uint32_t row_desc_size,
1515 bool cmp_prefix,
1516 bool* read_string) {
1517
1518 int ret_val = 0;
1519 int8_t new_key_inf_val = COL_NEG_INF;
1520 int8_t saved_key_inf_val = COL_NEG_INF;
1521
1522 uchar* row_desc_ptr = (uchar *)row_desc;
1523 uchar *new_key_ptr = (uchar *)new_key_data;
1524 uchar *saved_key_ptr = (uchar *)saved_key_data;
1525
1526 uint32_t new_key_bytes_left = new_key_size;
1527 uint32_t saved_key_bytes_left = saved_key_size;
1528
1529 //
1530 // if the keys have an infinity byte, set it
1531 //
1532 if (row_desc_ptr[0]) {
1533 new_key_inf_val = (int8_t)new_key_ptr[0];
1534 saved_key_inf_val = (int8_t)saved_key_ptr[0];
1535 new_key_ptr++;
1536 saved_key_ptr++;
1537 }
1538 row_desc_ptr++;
1539
1540 while ((uint32_t)(new_key_ptr - (uchar*)new_key_data) < new_key_size &&
1541 (uint32_t)(saved_key_ptr - (uchar*)saved_key_data) < saved_key_size &&
1542 (uint32_t)(row_desc_ptr - (uchar*)row_desc) < row_desc_size) {
1543 uint32_t new_key_field_length;
1544 uint32_t saved_key_field_length;
1545 uint32_t row_desc_field_length;
1546 //
1547 // if there is a null byte at this point in the key
1548 //
1549 if (row_desc_ptr[0]) {
1550 //
1551 // compare null bytes. If different, return
1552 //
1553 if (new_key_ptr[0] != saved_key_ptr[0]) {
1554 ret_val = ((int) *new_key_ptr - (int) *saved_key_ptr);
1555 goto exit;
1556 }
1557 saved_key_ptr++;
1558 //
1559 // in case we just read the fact that new_key_ptr and saved_key_ptr
1560 // have NULL as their next field
1561 //
1562 if (!*new_key_ptr++) {
1563 //
1564 // skip row_desc_ptr[0] read in if clause
1565 //
1566 row_desc_ptr++;
1567 //
1568 // skip data that describes rest of field
1569 //
1570 row_desc_ptr += skip_field_in_descriptor(row_desc_ptr);
1571 continue;
1572 }
1573 }
1574 row_desc_ptr++;
1575
1576 ret_val = compare_toku_field(
1577 new_key_ptr,
1578 saved_key_ptr,
1579 row_desc_ptr,
1580 &new_key_field_length,
1581 &saved_key_field_length,
1582 &row_desc_field_length,
1583 read_string);
1584 new_key_ptr += new_key_field_length;
1585 saved_key_ptr += saved_key_field_length;
1586 row_desc_ptr += row_desc_field_length;
1587 if (ret_val) {
1588 goto exit;
1589 }
1590
1591 assert_always(
1592 (uint32_t)(new_key_ptr - (uchar*)new_key_data) <= new_key_size);
1593 assert_always(
1594 (uint32_t)(saved_key_ptr - (uchar*)saved_key_data) <= saved_key_size);
1595 assert_always(
1596 (uint32_t)(row_desc_ptr - (uchar*)row_desc) <= row_desc_size);
1597 }
1598 new_key_bytes_left =
1599 new_key_size - ((uint32_t)(new_key_ptr - (uchar*)new_key_data));
1600 saved_key_bytes_left =
1601 saved_key_size - ((uint32_t)(saved_key_ptr - (uchar*)saved_key_data));
1602 if (cmp_prefix) {
1603 ret_val = 0;
1604 } else if (new_key_bytes_left== 0 && saved_key_bytes_left== 0) {
1605 // in this case, read both keys to completion, now read infinity byte
1606 ret_val = new_key_inf_val - saved_key_inf_val;
1607 } else if (new_key_bytes_left == 0 && saved_key_bytes_left > 0) {
1608 // at this point, one SHOULD be 0
1609 ret_val = (new_key_inf_val == COL_POS_INF ) ? 1 : -1;
1610 } else if (new_key_bytes_left > 0 && saved_key_bytes_left == 0) {
1611 ret_val = (saved_key_inf_val == COL_POS_INF ) ? -1 : 1;
1612 } else {
1613 // this should never happen, perhaps we should assert(false)
1614 assert_unreachable();
1615 ret_val = new_key_bytes_left - saved_key_bytes_left;
1616 }
1617exit:
1618 return ret_val;
1619}
1620
1621static int simple_memcmp(const DBT *keya, const DBT *keyb) {
1622 int cmp;
1623 int num_bytes_cmp = keya->size < keyb->size ?
1624 keya->size : keyb->size;
1625 cmp = memcmp(keya->data,keyb->data,num_bytes_cmp);
1626 if (cmp == 0 && (keya->size != keyb->size)) {
1627 cmp = keya->size < keyb->size ? -1 : 1;
1628 }
1629 return cmp;
1630}
1631
1632// comparison function to be used by the fractal trees.
1633static int tokudb_cmp_dbt_key(DB* file, const DBT *keya, const DBT *keyb) {
1634 int cmp;
1635 if (file->cmp_descriptor->dbt.size == 0) {
1636 cmp = simple_memcmp(keya, keyb);
1637 }
1638 else {
1639 bool read_string = false;
1640 cmp = tokudb_compare_two_keys(
1641 keya->data,
1642 keya->size,
1643 keyb->data,
1644 keyb->size,
1645 (uchar *)file->cmp_descriptor->dbt.data + 4,
1646 (*(uint32_t *)file->cmp_descriptor->dbt.data) - 4,
1647 false,
1648 &read_string
1649 );
1650 // comparison above may be case-insensitive, but fractal tree
1651 // needs to distinguish between different data, so we do this
1652 // additional check here
1653 if (read_string && (cmp == 0)) {
1654 cmp = simple_memcmp(keya, keyb);
1655 }
1656 }
1657 return cmp;
1658}
1659
1660//TODO: QQQ Only do one direction for prefix.
1661static int tokudb_prefix_cmp_dbt_key(DB *file, const DBT *keya, const DBT *keyb) {
1662 // calls to this function are done by the handlerton, and are
1663 // comparing just the keys as MySQL would compare them.
1664 bool read_string = false;
1665 int cmp = tokudb_compare_two_keys(
1666 keya->data,
1667 keya->size,
1668 keyb->data,
1669 keyb->size,
1670 (uchar *)file->cmp_descriptor->dbt.data + 4,
1671 *(uint32_t *)file->cmp_descriptor->dbt.data - 4,
1672 true,
1673 &read_string
1674 );
1675 return cmp;
1676}
1677
1678static int tokudb_compare_two_key_parts(
1679 const void* new_key_data,
1680 const uint32_t new_key_size,
1681 const void* saved_key_data,
1682 const uint32_t saved_key_size,
1683 const void* row_desc,
1684 const uint32_t row_desc_size,
1685 uint max_parts
1686 )
1687{
1688 int ret_val = 0;
1689
1690 uchar* row_desc_ptr = (uchar *)row_desc;
1691 uchar *new_key_ptr = (uchar *)new_key_data;
1692 uchar *saved_key_ptr = (uchar *)saved_key_data;
1693
1694 //
1695 // if the keys have an infinity byte, set it
1696 //
1697 if (row_desc_ptr[0]) {
1698 // new_key_inf_val = (int8_t)new_key_ptr[0];
1699 // saved_key_inf_val = (int8_t)saved_key_ptr[0];
1700 new_key_ptr++;
1701 saved_key_ptr++;
1702 }
1703 row_desc_ptr++;
1704
1705 for (uint i = 0; i < max_parts; i++) {
1706 if (!((uint32_t)(new_key_ptr - (uchar *)new_key_data) < new_key_size &&
1707 (uint32_t)(saved_key_ptr - (uchar *)saved_key_data) < saved_key_size &&
1708 (uint32_t)(row_desc_ptr - (uchar *)row_desc) < row_desc_size))
1709 break;
1710 uint32_t new_key_field_length;
1711 uint32_t saved_key_field_length;
1712 uint32_t row_desc_field_length;
1713 //
1714 // if there is a null byte at this point in the key
1715 //
1716 if (row_desc_ptr[0]) {
1717 //
1718 // compare null bytes. If different, return
1719 //
1720 if (new_key_ptr[0] != saved_key_ptr[0]) {
1721 ret_val = ((int) *new_key_ptr - (int) *saved_key_ptr);
1722 goto exit;
1723 }
1724 saved_key_ptr++;
1725 //
1726 // in case we just read the fact that new_key_ptr and saved_key_ptr
1727 // have NULL as their next field
1728 //
1729 if (!*new_key_ptr++) {
1730 //
1731 // skip row_desc_ptr[0] read in if clause
1732 //
1733 row_desc_ptr++;
1734 //
1735 // skip data that describes rest of field
1736 //
1737 row_desc_ptr += skip_field_in_descriptor(row_desc_ptr);
1738 continue;
1739 }
1740 }
1741 row_desc_ptr++;
1742 bool read_string = false;
1743 ret_val = compare_toku_field(
1744 new_key_ptr,
1745 saved_key_ptr,
1746 row_desc_ptr,
1747 &new_key_field_length,
1748 &saved_key_field_length,
1749 &row_desc_field_length,
1750 &read_string
1751 );
1752 new_key_ptr += new_key_field_length;
1753 saved_key_ptr += saved_key_field_length;
1754 row_desc_ptr += row_desc_field_length;
1755 if (ret_val) {
1756 goto exit;
1757 }
1758
1759 assert_always((uint32_t)(new_key_ptr - (uchar *)new_key_data) <= new_key_size);
1760 assert_always((uint32_t)(saved_key_ptr - (uchar *)saved_key_data) <= saved_key_size);
1761 assert_always((uint32_t)(row_desc_ptr - (uchar *)row_desc) <= row_desc_size);
1762 }
1763
1764 ret_val = 0;
1765exit:
1766 return ret_val;
1767}
1768
1769static int tokudb_cmp_dbt_key_parts(DB *file, const DBT *keya, const DBT *keyb, uint max_parts) {
1770 assert_always(file->cmp_descriptor->dbt.size);
1771 return tokudb_compare_two_key_parts(
1772 keya->data,
1773 keya->size,
1774 keyb->data,
1775 keyb->size,
1776 (uchar *)file->cmp_descriptor->dbt.data + 4,
1777 (*(uint32_t *)file->cmp_descriptor->dbt.data) - 4,
1778 max_parts);
1779}
1780
1781static uint32_t create_toku_main_key_pack_descriptor (
1782 uchar* buf
1783 )
1784{
1785 //
1786 // The first four bytes always contain the offset of where the first key
1787 // ends.
1788 //
1789 uchar* pos = buf + 4;
1790 uint32_t offset = 0;
1791 //
1792 // one byte states if this is the main dictionary
1793 //
1794 pos[0] = 1;
1795 pos++;
1796 goto exit;
1797
1798
1799exit:
1800 offset = pos - buf;
1801 buf[0] = (uchar)(offset & 255);
1802 buf[1] = (uchar)((offset >> 8) & 255);
1803 buf[2] = (uchar)((offset >> 16) & 255);
1804 buf[3] = (uchar)((offset >> 24) & 255);
1805
1806 return pos - buf;
1807}
1808
1809#define COL_HAS_NO_CHARSET 0x44
1810#define COL_HAS_CHARSET 0x55
1811
1812#define COL_FIX_PK_OFFSET 0x66
1813#define COL_VAR_PK_OFFSET 0x77
1814
1815#define CK_FIX_RANGE 0x88
1816#define CK_VAR_RANGE 0x99
1817
1818#define COPY_OFFSET_TO_BUF memcpy ( \
1819 pos, \
1820 &kc_info->cp_info[pk_index][field_index].col_pack_val, \
1821 sizeof(uint32_t) \
1822 ); \
1823 pos += sizeof(uint32_t);
1824
1825
1826static uint32_t pack_desc_pk_info(uchar* buf, KEY_AND_COL_INFO* kc_info, TABLE_SHARE* table_share, KEY_PART_INFO* key_part) {
1827 uchar* pos = buf;
1828 uint16 field_index = key_part->field->field_index;
1829 Field* field = table_share->field[field_index];
1830 TOKU_TYPE toku_type = mysql_to_toku_type(field);
1831 uint32_t key_part_length = key_part->length;
1832 uint32_t field_length;
1833 uchar len_bytes = 0;
1834
1835 switch(toku_type) {
1836 case (toku_type_int):
1837 case (toku_type_double):
1838 case (toku_type_float):
1839 pos[0] = COL_FIX_FIELD;
1840 pos++;
1841 assert_always(kc_info->field_lengths[field_index] < 256);
1842 pos[0] = kc_info->field_lengths[field_index];
1843 pos++;
1844 break;
1845 case (toku_type_fixbinary):
1846 pos[0] = COL_FIX_FIELD;
1847 pos++;
1848 field_length = field->pack_length();
1849 set_if_smaller(key_part_length, field_length);
1850 assert_always(key_part_length < 256);
1851 pos[0] = (uchar)key_part_length;
1852 pos++;
1853 break;
1854 case (toku_type_fixstring):
1855 case (toku_type_varbinary):
1856 case (toku_type_varstring):
1857 case (toku_type_blob):
1858 pos[0] = COL_VAR_FIELD;
1859 pos++;
1860 len_bytes = (key_part_length > 255) ? 2 : 1;
1861 pos[0] = len_bytes;
1862 pos++;
1863 break;
1864 default:
1865 assert_unreachable();
1866 }
1867
1868 return pos - buf;
1869}
1870
1871static uint32_t pack_desc_pk_offset_info(
1872 uchar* buf,
1873 KEY_AND_COL_INFO* kc_info,
1874 TABLE_SHARE* table_share,
1875 KEY_PART_INFO* key_part,
1876 KEY* prim_key,
1877 uchar* pk_info
1878 )
1879{
1880 uchar* pos = buf;
1881 uint16 field_index = key_part->field->field_index;
1882 bool found_col_in_pk = false;
1883 uint32_t index_in_pk;
1884
1885 bool is_constant_offset = true;
1886 uint32_t offset = 0;
1887 for (uint i = 0; i < prim_key->user_defined_key_parts; i++) {
1888 KEY_PART_INFO curr = prim_key->key_part[i];
1889 uint16 curr_field_index = curr.field->field_index;
1890
1891 if (pk_info[2*i] == COL_VAR_FIELD) {
1892 is_constant_offset = false;
1893 }
1894
1895 if (curr_field_index == field_index) {
1896 found_col_in_pk = true;
1897 index_in_pk = i;
1898 break;
1899 }
1900 offset += pk_info[2*i + 1];
1901 }
1902 assert_always(found_col_in_pk);
1903 if (is_constant_offset) {
1904 pos[0] = COL_FIX_PK_OFFSET;
1905 pos++;
1906
1907 memcpy (pos, &offset, sizeof(offset));
1908 pos += sizeof(offset);
1909 }
1910 else {
1911 pos[0] = COL_VAR_PK_OFFSET;
1912 pos++;
1913
1914 memcpy(pos, &index_in_pk, sizeof(index_in_pk));
1915 pos += sizeof(index_in_pk);
1916 }
1917 return pos - buf;
1918}
1919
1920static uint32_t pack_desc_offset_info(uchar* buf, KEY_AND_COL_INFO* kc_info, uint pk_index, TABLE_SHARE* table_share, KEY_PART_INFO* key_part) {
1921 uchar* pos = buf;
1922 uint16 field_index = key_part->field->field_index;
1923 Field* field = table_share->field[field_index];
1924 TOKU_TYPE toku_type = mysql_to_toku_type(field);
1925 bool found_index = false;
1926
1927 switch(toku_type) {
1928 case (toku_type_int):
1929 case (toku_type_double):
1930 case (toku_type_float):
1931 case (toku_type_fixbinary):
1932 case (toku_type_fixstring):
1933 pos[0] = COL_FIX_FIELD;
1934 pos++;
1935
1936 // copy the offset
1937 COPY_OFFSET_TO_BUF;
1938 break;
1939 case (toku_type_varbinary):
1940 case (toku_type_varstring):
1941 pos[0] = COL_VAR_FIELD;
1942 pos++;
1943
1944 // copy the offset
1945 COPY_OFFSET_TO_BUF;
1946 break;
1947 case (toku_type_blob):
1948 pos[0] = COL_BLOB_FIELD;
1949 pos++;
1950 for (uint32_t i = 0; i < kc_info->num_blobs; i++) {
1951 uint32_t blob_index = kc_info->blob_fields[i];
1952 if (blob_index == field_index) {
1953 uint32_t val = i;
1954 memcpy(pos, &val, sizeof(uint32_t));
1955 pos += sizeof(uint32_t);
1956 found_index = true;
1957 break;
1958 }
1959 }
1960 assert_always(found_index);
1961 break;
1962 default:
1963 assert_unreachable();
1964 }
1965
1966 return pos - buf;
1967}
1968
1969static uint32_t pack_desc_key_length_info(uchar* buf, KEY_AND_COL_INFO* kc_info, TABLE_SHARE* table_share, KEY_PART_INFO* key_part) {
1970 uchar* pos = buf;
1971 uint16 field_index = key_part->field->field_index;
1972 Field* field = table_share->field[field_index];
1973 TOKU_TYPE toku_type = mysql_to_toku_type(field);
1974 uint32_t key_part_length = key_part->length;
1975 uint32_t field_length;
1976
1977 switch(toku_type) {
1978 case (toku_type_int):
1979 case (toku_type_double):
1980 case (toku_type_float):
1981 // copy the key_part length
1982 field_length = kc_info->field_lengths[field_index];
1983 memcpy(pos, &field_length, sizeof(field_length));
1984 pos += sizeof(key_part_length);
1985 break;
1986 case (toku_type_fixbinary):
1987 case (toku_type_fixstring):
1988 field_length = field->pack_length();
1989 set_if_smaller(key_part_length, field_length);
1990 // fallthrough
1991 case (toku_type_varbinary):
1992 case (toku_type_varstring):
1993 case (toku_type_blob):
1994 // copy the key_part length
1995 memcpy(pos, &key_part_length, sizeof(key_part_length));
1996 pos += sizeof(key_part_length);
1997 break;
1998 default:
1999 assert_unreachable();
2000 }
2001
2002 return pos - buf;
2003}
2004
2005static uint32_t pack_desc_char_info(uchar* buf, KEY_AND_COL_INFO* kc_info, TABLE_SHARE* table_share, KEY_PART_INFO* key_part) {
2006 uchar* pos = buf;
2007 uint16 field_index = key_part->field->field_index;
2008 Field* field = table_share->field[field_index];
2009 TOKU_TYPE toku_type = mysql_to_toku_type(field);
2010 uint32_t charset_num = 0;
2011
2012 switch(toku_type) {
2013 case (toku_type_int):
2014 case (toku_type_double):
2015 case (toku_type_float):
2016 case (toku_type_fixbinary):
2017 case (toku_type_varbinary):
2018 pos[0] = COL_HAS_NO_CHARSET;
2019 pos++;
2020 break;
2021 case (toku_type_fixstring):
2022 case (toku_type_varstring):
2023 case (toku_type_blob):
2024 pos[0] = COL_HAS_CHARSET;
2025 pos++;
2026
2027 // copy the charset
2028 charset_num = field->charset()->number;
2029 pos[0] = (uchar)(charset_num & 255);
2030 pos[1] = (uchar)((charset_num >> 8) & 255);
2031 pos[2] = (uchar)((charset_num >> 16) & 255);
2032 pos[3] = (uchar)((charset_num >> 24) & 255);
2033 pos += 4;
2034 break;
2035 default:
2036 assert_unreachable();
2037 }
2038
2039 return pos - buf;
2040}
2041
2042static uint32_t pack_some_row_info (
2043 uchar* buf,
2044 uint pk_index,
2045 TABLE_SHARE* table_share,
2046 KEY_AND_COL_INFO* kc_info
2047 )
2048{
2049 uchar* pos = buf;
2050 uint32_t num_null_bytes = 0;
2051 //
2052 // four bytes stating number of null bytes
2053 //
2054 num_null_bytes = table_share->null_bytes;
2055 memcpy(pos, &num_null_bytes, sizeof(num_null_bytes));
2056 pos += sizeof(num_null_bytes);
2057 //
2058 // eight bytes stating mcp_info
2059 //
2060 memcpy(pos, &kc_info->mcp_info[pk_index], sizeof(MULTI_COL_PACK_INFO));
2061 pos += sizeof(MULTI_COL_PACK_INFO);
2062 //
2063 // one byte for the number of offset bytes
2064 //
2065 pos[0] = (uchar)kc_info->num_offset_bytes;
2066 pos++;
2067
2068 return pos - buf;
2069}
2070
2071static uint32_t get_max_clustering_val_pack_desc_size(
2072 TABLE_SHARE* table_share
2073 )
2074{
2075 uint32_t ret_val = 0;
2076 //
2077 // the fixed stuff:
2078 // first the things in pack_some_row_info
2079 // second another mcp_info
2080 // third a byte that states if blobs exist
2081 ret_val += sizeof(uint32_t) + sizeof(MULTI_COL_PACK_INFO) + 1;
2082 ret_val += sizeof(MULTI_COL_PACK_INFO);
2083 ret_val++;
2084 //
2085 // now the variable stuff
2086 // an upper bound is, for each field, byte stating if it is fixed or var, followed
2087 // by 8 bytes for endpoints
2088 //
2089 ret_val += (table_share->fields)*(1 + 2*sizeof(uint32_t));
2090 //
2091 // four bytes storing the length of this portion
2092 //
2093 ret_val += 4;
2094
2095 return ret_val;
2096}
2097
2098static uint32_t create_toku_clustering_val_pack_descriptor (
2099 uchar* buf,
2100 uint pk_index,
2101 TABLE_SHARE* table_share,
2102 KEY_AND_COL_INFO* kc_info,
2103 uint32_t keynr,
2104 bool is_clustering
2105 )
2106{
2107 uchar* pos = buf + 4;
2108 uint32_t offset = 0;
2109 bool start_range_set = false;
2110 uint32_t last_col = 0;
2111 //
2112 // do not need to write anything if the key is not clustering
2113 //
2114 if (!is_clustering) {
2115 goto exit;
2116 }
2117
2118 pos += pack_some_row_info(
2119 pos,
2120 pk_index,
2121 table_share,
2122 kc_info
2123 );
2124
2125 //
2126 // eight bytes stating mcp_info of clustering key
2127 //
2128 memcpy(pos, &kc_info->mcp_info[keynr], sizeof(MULTI_COL_PACK_INFO));
2129 pos += sizeof(MULTI_COL_PACK_INFO);
2130
2131 //
2132 // store bit that states if blobs exist
2133 //
2134 pos[0] = (kc_info->num_blobs) ? 1 : 0;
2135 pos++;
2136
2137 //
2138 // descriptor assumes that all fields filtered from pk are
2139 // also filtered from clustering key val. Doing check here to
2140 // make sure something unexpected does not happen
2141 //
2142 for (uint i = 0; i < table_share->fields; i++) {
2143 bool col_filtered = bitmap_is_set(&kc_info->key_filters[keynr],i);
2144 bool col_filtered_in_pk = bitmap_is_set(&kc_info->key_filters[pk_index],i);
2145 if (col_filtered_in_pk) {
2146 assert_always(col_filtered);
2147 }
2148 }
2149
2150 //
2151 // first handle the fixed fields
2152 //
2153 start_range_set = false;
2154 last_col = 0;
2155 for (uint i = 0; i < table_share->fields; i++) {
2156 bool col_filtered = bitmap_is_set(&kc_info->key_filters[keynr],i);
2157 if (!is_fixed_field(kc_info, i)) {
2158 //
2159 // not a fixed field, continue
2160 //
2161 continue;
2162 }
2163 if (col_filtered && start_range_set) {
2164 //
2165 // need to set the end range
2166 //
2167 start_range_set = false;
2168 uint32_t end_offset = kc_info->cp_info[pk_index][last_col].col_pack_val + kc_info->field_lengths[last_col];
2169 memcpy(pos, &end_offset, sizeof(end_offset));
2170 pos += sizeof(end_offset);
2171 }
2172 else if (!col_filtered) {
2173 if (!start_range_set) {
2174 pos[0] = CK_FIX_RANGE;
2175 pos++;
2176 start_range_set = true;
2177 uint32_t start_offset = kc_info->cp_info[pk_index][i].col_pack_val;
2178 memcpy(pos, &start_offset , sizeof(start_offset));
2179 pos += sizeof(start_offset);
2180 }
2181 last_col = i;
2182 }
2183 else {
2184 continue;
2185 }
2186 }
2187 if (start_range_set) {
2188 //
2189 // need to set the end range
2190 //
2191 start_range_set = false;
2192 uint32_t end_offset = kc_info->cp_info[pk_index][last_col].col_pack_val+ kc_info->field_lengths[last_col];
2193 memcpy(pos, &end_offset, sizeof(end_offset));
2194 pos += sizeof(end_offset);
2195 }
2196
2197 //
2198 // now handle the var fields
2199 //
2200 start_range_set = false;
2201 last_col = 0;
2202 for (uint i = 0; i < table_share->fields; i++) {
2203 bool col_filtered = bitmap_is_set(&kc_info->key_filters[keynr],i);
2204 if (!is_variable_field(kc_info, i)) {
2205 //
2206 // not a var field, continue
2207 //
2208 continue;
2209 }
2210 if (col_filtered && start_range_set) {
2211 //
2212 // need to set the end range
2213 //
2214 start_range_set = false;
2215 uint32_t end_offset = kc_info->cp_info[pk_index][last_col].col_pack_val;
2216 memcpy(pos, &end_offset, sizeof(end_offset));
2217 pos += sizeof(end_offset);
2218 }
2219 else if (!col_filtered) {
2220 if (!start_range_set) {
2221 pos[0] = CK_VAR_RANGE;
2222 pos++;
2223
2224 start_range_set = true;
2225 uint32_t start_offset = kc_info->cp_info[pk_index][i].col_pack_val;
2226 memcpy(pos, &start_offset , sizeof(start_offset));
2227 pos += sizeof(start_offset);
2228 }
2229 last_col = i;
2230 }
2231 else {
2232 continue;
2233 }
2234 }
2235 if (start_range_set) {
2236 start_range_set = false;
2237 uint32_t end_offset = kc_info->cp_info[pk_index][last_col].col_pack_val;
2238 memcpy(pos, &end_offset, sizeof(end_offset));
2239 pos += sizeof(end_offset);
2240 }
2241
2242exit:
2243 offset = pos - buf;
2244 buf[0] = (uchar)(offset & 255);
2245 buf[1] = (uchar)((offset >> 8) & 255);
2246 buf[2] = (uchar)((offset >> 16) & 255);
2247 buf[3] = (uchar)((offset >> 24) & 255);
2248
2249 return pos - buf;
2250}
2251
2252static uint32_t pack_clustering_val_from_desc(
2253 uchar* buf,
2254 void* row_desc,
2255 uint32_t row_desc_size,
2256 const DBT* pk_val
2257 )
2258{
2259 uchar* null_bytes_src_ptr = NULL;
2260 uchar* fixed_src_ptr = NULL;
2261 uchar* var_src_offset_ptr = NULL;
2262 uchar* var_src_data_ptr = NULL;
2263 uchar* fixed_dest_ptr = NULL;
2264 uchar* var_dest_offset_ptr = NULL;
2265 uchar* var_dest_data_ptr = NULL;
2266 uchar* orig_var_dest_data_ptr = NULL;
2267 uchar* desc_pos = (uchar *)row_desc;
2268 uint32_t num_null_bytes = 0;
2269 uint32_t num_offset_bytes;
2270 MULTI_COL_PACK_INFO src_mcp_info, dest_mcp_info;
2271 uchar has_blobs;
2272
2273 memcpy(&num_null_bytes, desc_pos, sizeof(num_null_bytes));
2274 desc_pos += sizeof(num_null_bytes);
2275
2276 memcpy(&src_mcp_info, desc_pos, sizeof(src_mcp_info));
2277 desc_pos += sizeof(src_mcp_info);
2278
2279 num_offset_bytes = desc_pos[0];
2280 desc_pos++;
2281
2282 memcpy(&dest_mcp_info, desc_pos, sizeof(dest_mcp_info));
2283 desc_pos += sizeof(dest_mcp_info);
2284
2285 has_blobs = desc_pos[0];
2286 desc_pos++;
2287
2288 //
2289 //set the variables
2290 //
2291 null_bytes_src_ptr = (uchar *)pk_val->data;
2292 fixed_src_ptr = null_bytes_src_ptr + num_null_bytes;
2293 var_src_offset_ptr = fixed_src_ptr + src_mcp_info.fixed_field_size;
2294 var_src_data_ptr = var_src_offset_ptr + src_mcp_info.len_of_offsets;
2295
2296 fixed_dest_ptr = buf + num_null_bytes;
2297 var_dest_offset_ptr = fixed_dest_ptr + dest_mcp_info.fixed_field_size;
2298 var_dest_data_ptr = var_dest_offset_ptr + dest_mcp_info.len_of_offsets;
2299 orig_var_dest_data_ptr = var_dest_data_ptr;
2300
2301 //
2302 // copy the null bytes
2303 //
2304 memcpy(buf, null_bytes_src_ptr, num_null_bytes);
2305 while ( (uint32_t)(desc_pos - (uchar *)row_desc) < row_desc_size) {
2306 uint32_t start, end, length;
2307 uchar curr = desc_pos[0];
2308 desc_pos++;
2309
2310 memcpy(&start, desc_pos, sizeof(start));
2311 desc_pos += sizeof(start);
2312
2313 memcpy(&end, desc_pos, sizeof(end));
2314 desc_pos += sizeof(end);
2315
2316 assert_always (start <= end);
2317
2318 if (curr == CK_FIX_RANGE) {
2319 length = end - start;
2320
2321 memcpy(fixed_dest_ptr, fixed_src_ptr + start, length);
2322 fixed_dest_ptr += length;
2323 }
2324 else if (curr == CK_VAR_RANGE) {
2325 uint32_t start_data_size;
2326 uint32_t start_data_offset;
2327 uint32_t end_data_size;
2328 uint32_t end_data_offset;
2329 uint32_t offset_diffs;
2330
2331 get_var_field_info(
2332 &start_data_size,
2333 &start_data_offset,
2334 start,
2335 var_src_offset_ptr,
2336 num_offset_bytes
2337 );
2338 get_var_field_info(
2339 &end_data_size,
2340 &end_data_offset,
2341 end,
2342 var_src_offset_ptr,
2343 num_offset_bytes
2344 );
2345 length = end_data_offset + end_data_size - start_data_offset;
2346 //
2347 // copy the data
2348 //
2349 memcpy(
2350 var_dest_data_ptr,
2351 var_src_data_ptr + start_data_offset,
2352 length
2353 );
2354 var_dest_data_ptr += length;
2355
2356 //
2357 // put in offset info
2358 //
2359 offset_diffs = (end_data_offset + end_data_size) - (uint32_t)(var_dest_data_ptr - orig_var_dest_data_ptr);
2360 for (uint32_t i = start; i <= end; i++) {
2361 if ( num_offset_bytes == 1 ) {
2362 assert_always(offset_diffs < 256);
2363 var_dest_offset_ptr[0] = var_src_offset_ptr[i] - (uchar)offset_diffs;
2364 var_dest_offset_ptr++;
2365 } else if ( num_offset_bytes == 2 ) {
2366 uint32_t tmp = uint2korr(var_src_offset_ptr + 2*i);
2367 uint32_t new_offset = tmp - offset_diffs;
2368 assert_always(new_offset < 1<<16);
2369 int2store(var_dest_offset_ptr,new_offset);
2370 var_dest_offset_ptr += 2;
2371 } else {
2372 assert_unreachable();
2373 }
2374 }
2375 } else {
2376 assert_unreachable();
2377 }
2378 }
2379 //
2380 // copy blobs
2381 // at this point, var_dest_data_ptr is pointing to the end, where blobs should be located
2382 // so, we put the blobs at var_dest_data_ptr
2383 //
2384 if (has_blobs) {
2385 uint32_t num_blob_bytes;
2386 uint32_t start_offset;
2387 uchar* src_blob_ptr = NULL;
2388 get_blob_field_info(
2389 &start_offset,
2390 src_mcp_info.len_of_offsets,
2391 var_src_data_ptr,
2392 num_offset_bytes
2393 );
2394 src_blob_ptr = var_src_data_ptr + start_offset;
2395 num_blob_bytes = pk_val->size - (start_offset + (var_src_data_ptr - null_bytes_src_ptr));
2396 memcpy(var_dest_data_ptr, src_blob_ptr, num_blob_bytes);
2397 var_dest_data_ptr += num_blob_bytes;
2398 }
2399 return var_dest_data_ptr - buf;
2400}
2401
2402
2403static uint32_t get_max_secondary_key_pack_desc_size(
2404 KEY_AND_COL_INFO* kc_info
2405 )
2406{
2407 uint32_t ret_val = 0;
2408 //
2409 // the fixed stuff:
2410 // byte that states if main dictionary
2411 // byte that states if hpk
2412 // the things in pack_some_row_info
2413 ret_val++;
2414 ret_val++;
2415 ret_val += sizeof(uint32_t) + sizeof(MULTI_COL_PACK_INFO) + 1;
2416 //
2417 // now variable sized stuff
2418 //
2419
2420 // first the blobs
2421 ret_val += sizeof(kc_info->num_blobs);
2422 ret_val+= kc_info->num_blobs;
2423
2424 // then the pk
2425 // one byte for num key parts
2426 // two bytes for each key part
2427 ret_val++;
2428 ret_val += MAX_REF_PARTS*2;
2429
2430 // then the key
2431 // null bit, then null byte,
2432 // then 1 byte stating what it is, then 4 for offset, 4 for key length,
2433 // 1 for if charset exists, and 4 for charset
2434 ret_val += MAX_REF_PARTS*(1 + sizeof(uint32_t) + 1 + 3*sizeof(uint32_t) + 1);
2435 //
2436 // four bytes storing the length of this portion
2437 //
2438 ret_val += 4;
2439 return ret_val;
2440}
2441
2442static uint32_t create_toku_secondary_key_pack_descriptor (
2443 uchar* buf,
2444 bool has_hpk,
2445 uint pk_index,
2446 TABLE_SHARE* table_share,
2447 TABLE* table,
2448 KEY_AND_COL_INFO* kc_info,
2449 KEY* key_info,
2450 KEY* prim_key
2451 )
2452{
2453 //
2454 // The first four bytes always contain the offset of where the first key
2455 // ends.
2456 //
2457 uchar* pk_info = NULL;
2458 uchar* pos = buf + 4;
2459 uint32_t offset = 0;
2460
2461 //
2462 // first byte states that it is NOT main dictionary
2463 //
2464 pos[0] = 0;
2465 pos++;
2466
2467 //
2468 // one byte states if main dictionary has an hpk or not
2469 //
2470 if (has_hpk) {
2471 pos[0] = 1;
2472 }
2473 else {
2474 pos[0] = 0;
2475 }
2476 pos++;
2477
2478 pos += pack_some_row_info(
2479 pos,
2480 pk_index,
2481 table_share,
2482 kc_info
2483 );
2484
2485 //
2486 // store blob information
2487 //
2488 memcpy(pos, &kc_info->num_blobs, sizeof(kc_info->num_blobs));
2489 pos += sizeof(uint32_t);
2490 for (uint32_t i = 0; i < kc_info->num_blobs; i++) {
2491 //
2492 // store length bytes for each blob
2493 //
2494 Field* field = table_share->field[kc_info->blob_fields[i]];
2495 pos[0] = (uchar)field->row_pack_length();
2496 pos++;
2497 }
2498
2499 //
2500 // store the pk information
2501 //
2502 if (has_hpk) {
2503 pos[0] = 0;
2504 pos++;
2505 }
2506 else {
2507 //
2508 // store number of parts
2509 //
2510 assert_always(prim_key->user_defined_key_parts < 128);
2511 pos[0] = 2 * prim_key->user_defined_key_parts;
2512 pos++;
2513 //
2514 // for each part, store if it is a fixed field or var field
2515 // if fixed, store number of bytes, if var, store
2516 // number of length bytes
2517 // total should be two bytes per key part stored
2518 //
2519 pk_info = pos;
2520 uchar* tmp = pos;
2521 for (uint i = 0; i < prim_key->user_defined_key_parts; i++) {
2522 tmp += pack_desc_pk_info(
2523 tmp,
2524 kc_info,
2525 table_share,
2526 &prim_key->key_part[i]
2527 );
2528 }
2529 //
2530 // asserting that we moved forward as much as we think we have
2531 //
2532 assert_always(tmp - pos == (2 * prim_key->user_defined_key_parts));
2533 pos = tmp;
2534 }
2535
2536 for (uint i = 0; i < key_info->user_defined_key_parts; i++) {
2537 KEY_PART_INFO curr_kpi = key_info->key_part[i];
2538 uint16 field_index = curr_kpi.field->field_index;
2539 Field* field = table_share->field[field_index];
2540 bool is_col_in_pk = false;
2541
2542 if (bitmap_is_set(&kc_info->key_filters[pk_index],field_index)) {
2543 assert_always(!has_hpk && prim_key != NULL);
2544 is_col_in_pk = true;
2545 }
2546 else {
2547 is_col_in_pk = false;
2548 }
2549
2550 pos[0] = field->null_bit;
2551 pos++;
2552
2553 if (is_col_in_pk) {
2554 //
2555 // assert that columns in pk do not have a null bit
2556 // because in MySQL, pk columns cannot be null
2557 //
2558 assert_always(!field->null_bit);
2559 }
2560
2561 if (field->null_bit) {
2562 uint32_t null_offset = get_null_offset(table,table->field[field_index]);
2563 memcpy(pos, &null_offset, sizeof(uint32_t));
2564 pos += sizeof(uint32_t);
2565 }
2566 if (is_col_in_pk) {
2567 pos += pack_desc_pk_offset_info(
2568 pos,
2569 kc_info,
2570 table_share,
2571 &curr_kpi,
2572 prim_key,
2573 pk_info
2574 );
2575 }
2576 else {
2577 pos += pack_desc_offset_info(
2578 pos,
2579 kc_info,
2580 pk_index,
2581 table_share,
2582 &curr_kpi
2583 );
2584 }
2585 pos += pack_desc_key_length_info(
2586 pos,
2587 kc_info,
2588 table_share,
2589 &curr_kpi
2590 );
2591 pos += pack_desc_char_info(
2592 pos,
2593 kc_info,
2594 table_share,
2595 &curr_kpi
2596 );
2597 }
2598
2599 offset = pos - buf;
2600 buf[0] = (uchar)(offset & 255);
2601 buf[1] = (uchar)((offset >> 8) & 255);
2602 buf[2] = (uchar)((offset >> 16) & 255);
2603 buf[3] = (uchar)((offset >> 24) & 255);
2604
2605 return pos - buf;
2606}
2607
2608static uint32_t skip_key_in_desc(
2609 uchar* row_desc
2610 )
2611{
2612 uchar* pos = row_desc;
2613 uchar col_bin_or_char;
2614 //
2615 // skip the byte that states if it is a fix field or var field, we do not care
2616 //
2617 pos++;
2618
2619 //
2620 // skip the offset information
2621 //
2622 pos += sizeof(uint32_t);
2623
2624 //
2625 // skip the key_part_length info
2626 //
2627 pos += sizeof(uint32_t);
2628 col_bin_or_char = pos[0];
2629 pos++;
2630 if (col_bin_or_char == COL_HAS_NO_CHARSET) {
2631 goto exit;
2632 }
2633 //
2634 // skip the charset info
2635 //
2636 pos += 4;
2637
2638
2639exit:
2640 return (uint32_t)(pos-row_desc);
2641}
2642
2643
2644static uint32_t max_key_size_from_desc(
2645 void* row_desc,
2646 uint32_t row_desc_size
2647 )
2648{
2649 uchar* desc_pos = (uchar *)row_desc;
2650 uint32_t num_blobs;
2651 uint32_t num_pk_columns;
2652 //
2653 // start at 1 for the infinity byte
2654 //
2655 uint32_t max_size = 1;
2656
2657 // skip byte that states if main dictionary
2658 bool is_main_dictionary = desc_pos[0];
2659 desc_pos++;
2660 assert_always(!is_main_dictionary);
2661
2662 // skip hpk byte
2663 desc_pos++;
2664
2665 // skip num_null_bytes
2666 desc_pos += sizeof(uint32_t);
2667
2668 // skip mcp_info
2669 desc_pos += sizeof(MULTI_COL_PACK_INFO);
2670
2671 // skip offset_bytes
2672 desc_pos++;
2673
2674 // skip over blobs
2675 memcpy(&num_blobs, desc_pos, sizeof(num_blobs));
2676 desc_pos += sizeof(num_blobs);
2677 desc_pos += num_blobs;
2678
2679 // skip over pk info
2680 num_pk_columns = desc_pos[0]/2;
2681 desc_pos++;
2682 desc_pos += 2*num_pk_columns;
2683
2684 while ( (uint32_t)(desc_pos - (uchar *)row_desc) < row_desc_size) {
2685 uchar has_charset;
2686 uint32_t key_length = 0;
2687
2688 uchar null_bit = desc_pos[0];
2689 desc_pos++;
2690
2691 if (null_bit) {
2692 //
2693 // column is NULLable, skip null_offset, and add a null byte
2694 //
2695 max_size++;
2696 desc_pos += sizeof(uint32_t);
2697 }
2698 //
2699 // skip over byte that states if fix or var
2700 //
2701 desc_pos++;
2702
2703 // skip over offset
2704 desc_pos += sizeof(uint32_t);
2705
2706 //
2707 // get the key length and add it to return value
2708 //
2709 memcpy(&key_length, desc_pos, sizeof(key_length));
2710 desc_pos += sizeof(key_length);
2711 max_size += key_length;
2712 max_size += 2; // 2 bytes for a potential length bytes, we are upperbounding, does not need to be super tight
2713
2714 has_charset = desc_pos[0];
2715 desc_pos++;
2716
2717 uint32_t charset_num;
2718 if (has_charset == COL_HAS_CHARSET) {
2719 // skip over charsent num
2720 desc_pos += sizeof(charset_num);
2721 }
2722 else {
2723 assert_always(has_charset == COL_HAS_NO_CHARSET);
2724 }
2725 }
2726 return max_size;
2727}
2728
2729static uint32_t pack_key_from_desc(
2730 uchar* buf,
2731 void* row_desc,
2732 uint32_t row_desc_size,
2733 const DBT* pk_key,
2734 const DBT* pk_val) {
2735
2736 MULTI_COL_PACK_INFO mcp_info;
2737 uint32_t num_null_bytes;
2738 uint32_t num_blobs;
2739 uint32_t num_pk_columns;
2740 uchar* blob_lengths = NULL;
2741 uchar* pk_info = NULL;
2742 uchar* pk_data_ptr = NULL;
2743 uchar* null_bytes_ptr = NULL;
2744 uchar* fixed_field_ptr = NULL;
2745 uchar* var_field_offset_ptr = NULL;
2746 const uchar* var_field_data_ptr = NULL;
2747 uint32_t num_offset_bytes;
2748 uchar* packed_key_pos = buf;
2749 uchar* desc_pos = (uchar *)row_desc;
2750
2751 bool is_main_dictionary = desc_pos[0];
2752 desc_pos++;
2753 assert_always(!is_main_dictionary);
2754
2755 //
2756 // get the constant info out of descriptor
2757 //
2758 bool hpk = desc_pos[0];
2759 desc_pos++;
2760
2761 memcpy(&num_null_bytes, desc_pos, sizeof(num_null_bytes));
2762 desc_pos += sizeof(num_null_bytes);
2763
2764 memcpy(&mcp_info, desc_pos, sizeof(mcp_info));
2765 desc_pos += sizeof(mcp_info);
2766
2767 num_offset_bytes = desc_pos[0];
2768 desc_pos++;
2769
2770 memcpy(&num_blobs, desc_pos, sizeof(num_blobs));
2771 desc_pos += sizeof(num_blobs);
2772
2773 blob_lengths = desc_pos;
2774 desc_pos += num_blobs;
2775
2776 num_pk_columns = desc_pos[0]/2;
2777 desc_pos++;
2778 pk_info = desc_pos;
2779 desc_pos += 2*num_pk_columns;
2780
2781 //
2782 // now start packing the key
2783 //
2784
2785 //
2786 // pack the infinity byte
2787 //
2788 packed_key_pos[0] = COL_ZERO;
2789 packed_key_pos++;
2790 //
2791 // now start packing each column of the key, as described in descriptor
2792 //
2793 if (!hpk) {
2794 // +1 for the infinity byte
2795 pk_data_ptr = (uchar *)pk_key->data + 1;
2796 }
2797 null_bytes_ptr = (uchar *)pk_val->data;
2798 fixed_field_ptr = null_bytes_ptr + num_null_bytes;
2799 var_field_offset_ptr = fixed_field_ptr + mcp_info.fixed_field_size;
2800 var_field_data_ptr = var_field_offset_ptr + mcp_info.len_of_offsets;
2801 while ((uint32_t)(desc_pos - (uchar*)row_desc) < row_desc_size) {
2802 uchar col_fix_val;
2803 uchar has_charset;
2804 uint32_t col_pack_val = 0;
2805 uint32_t key_length = 0;
2806
2807 uchar null_bit = desc_pos[0];
2808 desc_pos++;
2809
2810 if (null_bit) {
2811 //
2812 // column is NULLable, need to check the null bytes to see if it is NULL
2813 //
2814 uint32_t null_offset = 0;
2815 bool is_field_null;
2816 memcpy(&null_offset, desc_pos, sizeof(null_offset));
2817 desc_pos += sizeof(null_offset);
2818
2819 is_field_null = (null_bytes_ptr[null_offset] & null_bit) ? true: false;
2820 if (is_field_null) {
2821 packed_key_pos[0] = NULL_COL_VAL;
2822 packed_key_pos++;
2823 desc_pos += skip_key_in_desc(desc_pos);
2824 continue;
2825 } else {
2826 packed_key_pos[0] = NONNULL_COL_VAL;
2827 packed_key_pos++;
2828 }
2829 }
2830 //
2831 // now pack the column (unless it was NULL, and we continued)
2832 //
2833 col_fix_val = desc_pos[0];
2834 desc_pos++;
2835
2836 memcpy(&col_pack_val, desc_pos, sizeof(col_pack_val));
2837 desc_pos += sizeof(col_pack_val);
2838
2839 memcpy(&key_length, desc_pos, sizeof(key_length));
2840 desc_pos += sizeof(key_length);
2841
2842 has_charset = desc_pos[0];
2843 desc_pos++;
2844
2845 uint32_t charset_num = 0;
2846 if (has_charset == COL_HAS_CHARSET) {
2847 memcpy(&charset_num, desc_pos, sizeof(charset_num));
2848 desc_pos += sizeof(charset_num);
2849 } else {
2850 assert_always(has_charset == COL_HAS_NO_CHARSET);
2851 }
2852 //
2853 // case where column is in pk val
2854 //
2855 if (col_fix_val == COL_FIX_FIELD ||
2856 col_fix_val == COL_VAR_FIELD ||
2857 col_fix_val == COL_BLOB_FIELD) {
2858 if (col_fix_val == COL_FIX_FIELD &&
2859 has_charset == COL_HAS_NO_CHARSET) {
2860 memcpy(
2861 packed_key_pos,
2862 &fixed_field_ptr[col_pack_val],
2863 key_length);
2864 packed_key_pos += key_length;
2865 } else if (col_fix_val == COL_VAR_FIELD &&
2866 has_charset == COL_HAS_NO_CHARSET) {
2867 uint32_t data_start_offset = 0;
2868
2869 uint32_t data_size = 0;
2870 get_var_field_info(
2871 &data_size,
2872 &data_start_offset,
2873 col_pack_val,
2874 var_field_offset_ptr,
2875 num_offset_bytes);
2876
2877 //
2878 // length of this field in this row is data_size
2879 // data is located beginning at var_field_data_ptr + data_start_offset
2880 //
2881 packed_key_pos = pack_toku_varbinary_from_desc(
2882 packed_key_pos,
2883 var_field_data_ptr + data_start_offset,
2884 //number of bytes to use to encode the length in to_tokudb
2885 key_length,
2886 //length of field
2887 data_size);
2888 } else {
2889 const uchar* data_start = NULL;
2890 uint32_t data_start_offset = 0;
2891 uint32_t data_size = 0;
2892
2893 if (col_fix_val == COL_FIX_FIELD) {
2894 data_start_offset = col_pack_val;
2895 data_size = key_length;
2896 data_start = fixed_field_ptr + data_start_offset;
2897 } else if (col_fix_val == COL_VAR_FIELD){
2898 get_var_field_info(
2899 &data_size,
2900 &data_start_offset,
2901 col_pack_val,
2902 var_field_offset_ptr,
2903 num_offset_bytes);
2904 data_start = var_field_data_ptr + data_start_offset;
2905 } else if (col_fix_val == COL_BLOB_FIELD) {
2906 uint32_t blob_index = col_pack_val;
2907 uint32_t blob_offset;
2908 const uchar* blob_ptr = NULL;
2909 uint32_t field_len;
2910 uint32_t field_len_bytes = blob_lengths[blob_index];
2911 get_blob_field_info(
2912 &blob_offset,
2913 mcp_info.len_of_offsets,
2914 var_field_data_ptr,
2915 num_offset_bytes);
2916 blob_ptr = var_field_data_ptr + blob_offset;
2917 assert_always(num_blobs > 0);
2918
2919 // skip over other blobs to get to the one we want to
2920 // make a key out of
2921 for (uint32_t i = 0; i < blob_index; i++) {
2922 blob_ptr = unpack_toku_field_blob(
2923 NULL,
2924 blob_ptr,
2925 blob_lengths[i],
2926 true);
2927 }
2928 // at this point, blob_ptr is pointing to the blob we
2929 // want to make a key from
2930 field_len = get_blob_field_len(blob_ptr, field_len_bytes);
2931 // now we set the variables to make the key
2932 data_start = blob_ptr + field_len_bytes;
2933 data_size = field_len;
2934 } else {
2935 assert_unreachable();
2936 }
2937
2938 packed_key_pos = pack_toku_varstring_from_desc(packed_key_pos,
2939 data_start,
2940 key_length,
2941 data_size,
2942 charset_num);
2943 }
2944 } else {
2945 // case where column is in pk key
2946 if (col_fix_val == COL_FIX_PK_OFFSET) {
2947 memcpy(packed_key_pos, &pk_data_ptr[col_pack_val], key_length);
2948 packed_key_pos += key_length;
2949 } else if (col_fix_val == COL_VAR_PK_OFFSET) {
2950 uchar* tmp_pk_data_ptr = pk_data_ptr;
2951 uint32_t index_in_pk = col_pack_val;
2952 //
2953 // skip along in pk to the right column
2954 //
2955 for (uint32_t i = 0; i < index_in_pk; i++) {
2956 if (pk_info[2*i] == COL_FIX_FIELD) {
2957 tmp_pk_data_ptr += pk_info[2*i + 1];
2958 } else if (pk_info[2*i] == COL_VAR_FIELD) {
2959 uint32_t len_bytes = pk_info[2*i + 1];
2960 uint32_t len;
2961 if (len_bytes == 1) {
2962 len = tmp_pk_data_ptr[0];
2963 tmp_pk_data_ptr++;
2964 } else if (len_bytes == 2) {
2965 len = uint2korr(tmp_pk_data_ptr);
2966 tmp_pk_data_ptr += 2;
2967 } else {
2968 assert_unreachable();
2969 }
2970 tmp_pk_data_ptr += len;
2971 } else {
2972 assert_unreachable();
2973 }
2974 }
2975 //
2976 // at this point, tmp_pk_data_ptr is pointing at the column
2977 //
2978 uint32_t is_fix_field = pk_info[2*index_in_pk];
2979 if (is_fix_field == COL_FIX_FIELD) {
2980 memcpy(packed_key_pos, tmp_pk_data_ptr, key_length);
2981 packed_key_pos += key_length;
2982 } else if (is_fix_field == COL_VAR_FIELD) {
2983 const uchar* data_start = NULL;
2984 uint32_t data_size = 0;
2985 uint32_t len_bytes = pk_info[2*index_in_pk + 1];
2986 if (len_bytes == 1) {
2987 data_size = tmp_pk_data_ptr[0];
2988 tmp_pk_data_ptr++;
2989 } else if (len_bytes == 2) {
2990 data_size = uint2korr(tmp_pk_data_ptr);
2991 tmp_pk_data_ptr += 2;
2992 } else {
2993 assert_unreachable();
2994 }
2995 data_start = tmp_pk_data_ptr;
2996
2997 if (has_charset == COL_HAS_CHARSET) {
2998 packed_key_pos = pack_toku_varstring_from_desc(
2999 packed_key_pos,
3000 data_start,
3001 key_length,
3002 data_size,
3003 charset_num);
3004 } else if (has_charset == COL_HAS_NO_CHARSET) {
3005 packed_key_pos = pack_toku_varbinary_from_desc(
3006 packed_key_pos,
3007 data_start,
3008 key_length,
3009 data_size);
3010 } else {
3011 assert_unreachable();
3012 }
3013 } else {
3014 assert_unreachable();
3015 }
3016 } else {
3017 assert_unreachable();
3018 }
3019 }
3020
3021 }
3022 assert_always( (uint32_t)(desc_pos - (uchar *)row_desc) == row_desc_size);
3023
3024 //
3025 // now append the primary key to the end of the key
3026 //
3027 if (hpk) {
3028 memcpy(packed_key_pos, pk_key->data, pk_key->size);
3029 packed_key_pos += pk_key->size;
3030 } else {
3031 memcpy(packed_key_pos, (uchar *)pk_key->data + 1, pk_key->size - 1);
3032 packed_key_pos += (pk_key->size - 1);
3033 }
3034
3035 return (uint32_t)(packed_key_pos - buf);
3036}
3037
3038static bool fields_have_same_name(Field* a, Field* b) {
3039 return strcmp(a->field_name.str, b->field_name.str) == 0;
3040}
3041
3042static bool fields_are_same_type(Field* a, Field* b) {
3043 bool retval = true;
3044 enum_field_types a_mysql_type = a->real_type();
3045 enum_field_types b_mysql_type = b->real_type();
3046 TOKU_TYPE a_toku_type = mysql_to_toku_type(a);
3047 TOKU_TYPE b_toku_type = mysql_to_toku_type(b);
3048 // make sure have same names
3049 // make sure have same types
3050 if (a_mysql_type != b_mysql_type) {
3051 retval = false;
3052 goto cleanup;
3053 }
3054 // Thanks to MariaDB 5.5, we can have two fields
3055 // be the same MySQL type but not the same toku type,
3056 // This is an issue introduced with MariaDB's fractional time
3057 // implementation
3058 if (a_toku_type != b_toku_type) {
3059 retval = false;
3060 goto cleanup;
3061 }
3062 // make sure that either both are nullable, or both not nullable
3063 if ((a->null_bit && !b->null_bit) || (!a->null_bit && b->null_bit)) {
3064 retval = false;
3065 goto cleanup;
3066 }
3067 switch (a_mysql_type) {
3068 case MYSQL_TYPE_TINY:
3069 case MYSQL_TYPE_SHORT:
3070 case MYSQL_TYPE_INT24:
3071 case MYSQL_TYPE_LONG:
3072 case MYSQL_TYPE_LONGLONG:
3073 // length, unsigned, auto increment
3074 if (a->pack_length() != b->pack_length() ||
3075 (a->flags & UNSIGNED_FLAG) != (b->flags & UNSIGNED_FLAG) ||
3076 (a->flags & AUTO_INCREMENT_FLAG) != (b->flags & AUTO_INCREMENT_FLAG)) {
3077 retval = false;
3078 goto cleanup;
3079 }
3080 break;
3081 case MYSQL_TYPE_DOUBLE:
3082 case MYSQL_TYPE_FLOAT:
3083 // length, unsigned, auto increment
3084 if (a->pack_length() != b->pack_length() ||
3085 (a->flags & UNSIGNED_FLAG) != (b->flags & UNSIGNED_FLAG) ||
3086 (a->flags & AUTO_INCREMENT_FLAG) != (b->flags & AUTO_INCREMENT_FLAG)) {
3087 retval = false;
3088 goto cleanup;
3089 }
3090 break;
3091 case MYSQL_TYPE_NEWDECIMAL:
3092 // length, unsigned
3093 if (a->pack_length() != b->pack_length() ||
3094 (a->flags & UNSIGNED_FLAG) != (b->flags & UNSIGNED_FLAG)) {
3095 retval = false;
3096 goto cleanup;
3097 }
3098 break;
3099 case MYSQL_TYPE_ENUM: {
3100 Field_enum *a_enum = static_cast<Field_enum *>(a);
3101 if (!a_enum->eq_def(b)) {
3102 retval = false;
3103 goto cleanup;
3104 }
3105 break;
3106 }
3107 case MYSQL_TYPE_SET: {
3108 Field_set *a_set = static_cast<Field_set *>(a);
3109 if (!a_set->eq_def(b)) {
3110 retval = false;
3111 goto cleanup;
3112 }
3113 break;
3114 }
3115 case MYSQL_TYPE_BIT:
3116 // length
3117 if (a->pack_length() != b->pack_length()) {
3118 retval = false;
3119 goto cleanup;
3120 }
3121 break;
3122 case MYSQL_TYPE_DATE:
3123 case MYSQL_TYPE_DATETIME:
3124 case MYSQL_TYPE_YEAR:
3125 case MYSQL_TYPE_NEWDATE:
3126 case MYSQL_TYPE_TIME:
3127 case MYSQL_TYPE_TIMESTAMP:
3128#if (50600 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50699) || \
3129 (50700 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50799) || \
3130 (100000 <= MYSQL_VERSION_ID)
3131 case MYSQL_TYPE_DATETIME2:
3132 case MYSQL_TYPE_TIMESTAMP2:
3133 case MYSQL_TYPE_TIME2:
3134#endif
3135 // length
3136 if (a->pack_length() != b->pack_length()) {
3137 retval = false;
3138 goto cleanup;
3139 }
3140 break;
3141 case MYSQL_TYPE_TINY_BLOB:
3142 case MYSQL_TYPE_MEDIUM_BLOB:
3143 case MYSQL_TYPE_BLOB:
3144 case MYSQL_TYPE_LONG_BLOB:
3145 // test the charset
3146 if (a->charset()->number != b->charset()->number) {
3147 retval = false;
3148 goto cleanup;
3149 }
3150 if (a->row_pack_length() != b->row_pack_length()) {
3151 retval = false;
3152 goto cleanup;
3153 }
3154 break;
3155 case MYSQL_TYPE_STRING:
3156 if (a->pack_length() != b->pack_length()) {
3157 retval = false;
3158 goto cleanup;
3159 }
3160 // if both are binary, we know have same pack lengths,
3161 // so we can goto end
3162 if (a->binary() && b->binary()) {
3163 // nothing to do, we are good
3164 }
3165 else if (!a->binary() && !b->binary()) {
3166 // test the charset
3167 if (a->charset()->number != b->charset()->number) {
3168 retval = false;
3169 goto cleanup;
3170 }
3171 }
3172 else {
3173 // one is binary and the other is not, so not the same
3174 retval = false;
3175 goto cleanup;
3176 }
3177 break;
3178 case MYSQL_TYPE_VARCHAR:
3179 if (a->field_length != b->field_length) {
3180 retval = false;
3181 goto cleanup;
3182 }
3183 // if both are binary, we know have same pack lengths,
3184 // so we can goto end
3185 if (a->binary() && b->binary()) {
3186 // nothing to do, we are good
3187 }
3188 else if (!a->binary() && !b->binary()) {
3189 // test the charset
3190 if (a->charset()->number != b->charset()->number) {
3191 retval = false;
3192 goto cleanup;
3193 }
3194 }
3195 else {
3196 // one is binary and the other is not, so not the same
3197 retval = false;
3198 goto cleanup;
3199 }
3200 break;
3201 //
3202 // I believe these are old types that are no longer
3203 // in any 5.1 tables, so tokudb does not need
3204 // to worry about them
3205 // Putting in this assert in case I am wrong.
3206 // Do not support geometry yet.
3207 //
3208 case MYSQL_TYPE_GEOMETRY:
3209 case MYSQL_TYPE_DECIMAL:
3210 case MYSQL_TYPE_VAR_STRING:
3211 case MYSQL_TYPE_NULL:
3212 case MYSQL_TYPE_VARCHAR_COMPRESSED:
3213 case MYSQL_TYPE_BLOB_COMPRESSED:
3214 assert_unreachable();
3215 }
3216
3217cleanup:
3218 return retval;
3219}
3220
3221static bool are_two_fields_same(Field* a, Field* b) {
3222 return fields_have_same_name(a, b) && fields_are_same_type(a, b);
3223}
3224
3225
3226