1/* Copyright (c) 2000, 2013, Oracle and/or its affiliates.
2 Copyright (c) 2016, MariaDB
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; version 2 of the License.
7
8 This program is distributed in the hope that it will be useful,
9 but WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 GNU General Public License for more details.
12
13 You should have received a copy of the GNU General Public License
14 along with this program; if not, write to the Free Software
15 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
16
17/* This file is originally from the mysql distribution. Coded by monty */
18
19#ifdef USE_PRAGMA_IMPLEMENTATION
20#pragma implementation // gcc: Class implementation
21#endif
22
23#include "mariadb.h"
24#include <m_string.h>
25#include <m_ctype.h>
26#include <mysql_com.h>
27
28#include "sql_string.h"
29
30/*****************************************************************************
31** String functions
32*****************************************************************************/
33
34bool String::real_alloc(size_t length)
35{
36 size_t arg_length= ALIGN_SIZE(length + 1);
37 DBUG_ASSERT(arg_length > length);
38 if (arg_length <= length)
39 return TRUE; /* Overflow */
40 str_length=0;
41 if (Alloced_length < arg_length)
42 {
43 free();
44 if (!(Ptr=(char*) my_malloc(arg_length,MYF(MY_WME |
45 (thread_specific ?
46 MY_THREAD_SPECIFIC : 0)))))
47 return TRUE;
48 DBUG_ASSERT(length < UINT_MAX32);
49 Alloced_length=(uint32) arg_length;
50 alloced=1;
51 }
52 Ptr[0]=0;
53 return FALSE;
54}
55
56
57/**
58 Allocates a new buffer on the heap for this String.
59
60 - If the String's internal buffer is privately owned and heap allocated,
61 one of the following is performed.
62
63 - If the requested length is greater than what fits in the buffer, a new
64 buffer is allocated, data moved and the old buffer freed.
65
66 - If the requested length is less or equal to what fits in the buffer, a
67 null character is inserted at the appropriate position.
68
69 - If the String does not keep a private buffer on the heap, such a buffer
70 will be allocated and the string copied accoring to its length, as found
71 in String::length().
72
73 For C compatibility, the new string buffer is null terminated.
74
75 @param alloc_length The requested string size in characters, excluding any
76 null terminator.
77
78 @retval false Either the copy operation is complete or, if the size of the
79 new buffer is smaller than the currently allocated buffer (if one exists),
80 no allocation occurred.
81
82 @retval true An error occurred when attempting to allocate memory.
83*/
84bool String::realloc_raw(size_t alloc_length)
85{
86 if (Alloced_length <= alloc_length)
87 {
88 char *new_ptr;
89 uint32 len= ALIGN_SIZE(alloc_length+1);
90 DBUG_ASSERT(len > alloc_length);
91 if (len <= alloc_length)
92 return TRUE; /* Overflow */
93 if (alloced)
94 {
95 if (!(new_ptr= (char*) my_realloc(Ptr,len,
96 MYF(MY_WME |
97 (thread_specific ?
98 MY_THREAD_SPECIFIC : 0)))))
99 return TRUE; // Signal error
100 }
101 else if ((new_ptr= (char*) my_malloc(len,
102 MYF(MY_WME |
103 (thread_specific ?
104 MY_THREAD_SPECIFIC : 0)))))
105 {
106 if (str_length > len - 1)
107 str_length= 0;
108 if (str_length) // Avoid bugs in memcpy on AIX
109 memcpy(new_ptr,Ptr,str_length);
110 new_ptr[str_length]=0;
111 alloced=1;
112 }
113 else
114 return TRUE; // Signal error
115 Ptr= new_ptr;
116 DBUG_ASSERT(len < UINT_MAX32);
117 Alloced_length= (uint32)len;
118 }
119 return FALSE;
120}
121
122bool String::set_int(longlong num, bool unsigned_flag, CHARSET_INFO *cs)
123{
124 uint l=20*cs->mbmaxlen+1;
125 int base= unsigned_flag ? 10 : -10;
126
127 if (alloc(l))
128 return TRUE;
129 str_length=(uint32) (cs->cset->longlong10_to_str)(cs,Ptr,l,base,num);
130 str_charset=cs;
131 return FALSE;
132}
133
134
135// Convert a number into its HEX representation
136bool String::set_hex(ulonglong num)
137{
138 char *n_end;
139 if (alloc(65) || !(n_end= longlong2str(num, Ptr, 16)))
140 return true;
141 length((uint32) (n_end - Ptr));
142 set_charset(&my_charset_latin1);
143 return false;
144}
145
146
147/**
148 Append a hex representation of the byte "value" into "to".
149 Note:
150 "to" is incremented for the caller by two bytes. It's passed by reference!
151 So it resembles a macros, hence capital letters in the name.
152*/
153static inline void APPEND_HEX(char *&to, uchar value)
154{
155 *to++= _dig_vec_upper[((uchar) value) >> 4];
156 *to++= _dig_vec_upper[((uchar) value) & 0x0F];
157}
158
159
160void String::qs_append_hex(const char *str, uint32 len)
161{
162 const char *str_end= str + len;
163 for (char *to= Ptr + str_length ; str < str_end; str++)
164 APPEND_HEX(to, (uchar) *str);
165 str_length+= len * 2;
166}
167
168
169// Convert a string to its HEX representation
170bool String::set_hex(const char *str, uint32 len)
171{
172 /*
173 Safety: cut the source string if "len" is too large.
174 Note, alloc() can allocate some more space than requested, due to:
175 - ALIGN_SIZE
176 - one extra byte for a null terminator
177 So cut the source string to 0x7FFFFFF0 rather than 0x7FFFFFFE.
178 */
179 set_if_smaller(len, 0x7FFFFFF0);
180 if (alloc(len * 2))
181 return true;
182 length(0);
183 qs_append_hex(str, len);
184 set_charset(&my_charset_latin1);
185 return false;
186}
187
188
189bool String::set_real(double num,uint decimals, CHARSET_INFO *cs)
190{
191 char buff[FLOATING_POINT_BUFFER];
192 uint dummy_errors;
193 size_t len;
194
195 str_charset=cs;
196 if (decimals >= FLOATING_POINT_DECIMALS)
197 {
198 len= my_gcvt(num, MY_GCVT_ARG_DOUBLE, sizeof(buff) - 1, buff, NULL);
199 return copy(buff, (uint)len, &my_charset_latin1, cs, &dummy_errors);
200 }
201 len= my_fcvt(num, decimals, buff, NULL);
202 return copy(buff, (uint32) len, &my_charset_latin1, cs,
203 &dummy_errors);
204}
205
206
207bool String::copy()
208{
209 if (!alloced)
210 {
211 Alloced_length=0; // Force realloc
212 return realloc(str_length);
213 }
214 return FALSE;
215}
216
217/**
218 Copies the internal buffer from str. If this String has a private heap
219 allocated buffer where new data does not fit, a new buffer is allocated
220 before copying and the old buffer freed. Character set information is also
221 copied.
222
223 @param str The string whose internal buffer is to be copied.
224
225 @retval false Success.
226 @retval true Memory allocation failed.
227*/
228bool String::copy(const String &str)
229{
230 if (alloc(str.str_length))
231 return TRUE;
232 str_length=str.str_length;
233 bmove(Ptr,str.Ptr,str_length); // May be overlapping
234 Ptr[str_length]=0;
235 str_charset=str.str_charset;
236 return FALSE;
237}
238
239bool String::copy(const char *str,size_t arg_length, CHARSET_INFO *cs)
240{
241 if (alloc(arg_length))
242 return TRUE;
243 DBUG_ASSERT(arg_length <= UINT_MAX32);
244 if ((str_length=(uint32)arg_length))
245 memcpy(Ptr,str,arg_length);
246 Ptr[arg_length]=0;
247 str_charset=cs;
248 return FALSE;
249}
250
251
252/*
253 Checks that the source string can be just copied to the destination string
254 without conversion.
255
256 SYNPOSIS
257
258 needs_conversion()
259 arg_length Length of string to copy.
260 from_cs Character set to copy from
261 to_cs Character set to copy to
262 uint32 *offset Returns number of unaligned characters.
263
264 RETURN
265 0 No conversion needed
266 1 Either character set conversion or adding leading zeros
267 (e.g. for UCS-2) must be done
268
269 NOTE
270 to_cs may be NULL for "no conversion" if the system variable
271 character_set_results is NULL.
272*/
273
274bool String::needs_conversion(size_t arg_length,
275 CHARSET_INFO *from_cs,
276 CHARSET_INFO *to_cs,
277 uint32 *offset)
278{
279 *offset= 0;
280 if (!to_cs ||
281 (to_cs == &my_charset_bin) ||
282 (to_cs == from_cs) ||
283 my_charset_same(from_cs, to_cs) ||
284 ((from_cs == &my_charset_bin) &&
285 (!(*offset=(uint32)(arg_length % to_cs->mbminlen)))))
286 return FALSE;
287 return TRUE;
288}
289
290
291/*
292 Checks that the source string can just be copied to the destination string
293 without conversion.
294 Unlike needs_conversion it will require conversion on incoming binary data
295 to ensure the data are verified for vailidity first.
296
297 @param arg_length Length of string to copy.
298 @param from_cs Character set to copy from
299 @param to_cs Character set to copy to
300
301 @return conversion needed
302*/
303bool String::needs_conversion_on_storage(size_t arg_length,
304 CHARSET_INFO *cs_from,
305 CHARSET_INFO *cs_to)
306{
307 uint32 offset;
308 return (needs_conversion(arg_length, cs_from, cs_to, &offset) ||
309 /* force conversion when storing a binary string */
310 (cs_from == &my_charset_bin &&
311 /* into a non-binary destination */
312 cs_to != &my_charset_bin &&
313 /* and any of the following is true :*/
314 (
315 /* it's a variable length encoding */
316 cs_to->mbminlen != cs_to->mbmaxlen ||
317 /* longer than 2 bytes : neither 1 byte nor ucs2 */
318 cs_to->mbminlen > 2 ||
319 /* and is not a multiple of the char byte size */
320 0 != (arg_length % cs_to->mbmaxlen)
321 )
322 )
323 );
324}
325
326
327/*
328 Copy a multi-byte character sets with adding leading zeros.
329
330 SYNOPSIS
331
332 copy_aligned()
333 str String to copy
334 arg_length Length of string. This should NOT be dividable with
335 cs->mbminlen.
336 offset arg_length % cs->mb_minlength
337 cs Character set for 'str'
338
339 NOTES
340 For real multi-byte, ascii incompatible charactser sets,
341 like UCS-2, add leading zeros if we have an incomplete character.
342 Thus,
343 SELECT _ucs2 0xAA
344 will automatically be converted into
345 SELECT _ucs2 0x00AA
346
347 RETURN
348 0 ok
349 1 error
350*/
351
352bool String::copy_aligned(const char *str, size_t arg_length, size_t offset,
353 CHARSET_INFO *cs)
354{
355 /* How many bytes are in incomplete character */
356 offset= cs->mbminlen - offset; /* How many zeros we should prepend */
357 DBUG_ASSERT(offset && offset != cs->mbminlen);
358
359 size_t aligned_length= arg_length + offset;
360 if (alloc(aligned_length))
361 return TRUE;
362
363 /*
364 Note, this is only safe for big-endian UCS-2.
365 If we add little-endian UCS-2 sometimes, this code
366 will be more complicated. But it's OK for now.
367 */
368 bzero((char*) Ptr, offset);
369 memcpy(Ptr + offset, str, arg_length);
370 Ptr[aligned_length]=0;
371 /* str_length is always >= 0 as arg_length is != 0 */
372 str_length= (uint32)aligned_length;
373 str_charset= cs;
374 return FALSE;
375}
376
377
378bool String::set_or_copy_aligned(const char *str, size_t arg_length,
379 CHARSET_INFO *cs)
380{
381 /* How many bytes are in incomplete character */
382 size_t offset= (arg_length % cs->mbminlen);
383
384 if (!offset) /* All characters are complete, just copy */
385 {
386 set(str, arg_length, cs);
387 return FALSE;
388 }
389 return copy_aligned(str, arg_length, offset, cs);
390}
391
392
393/**
394 Copies the character data into this String, with optional character set
395 conversion.
396
397 @return
398 FALSE ok
399 TRUE Could not allocate result buffer
400
401*/
402
403bool String::copy(const char *str, size_t arg_length,
404 CHARSET_INFO *from_cs, CHARSET_INFO *to_cs, uint *errors)
405{
406 uint32 offset;
407
408 DBUG_ASSERT(!str || str != Ptr);
409
410 if (!needs_conversion(arg_length, from_cs, to_cs, &offset))
411 {
412 *errors= 0;
413 return copy(str, arg_length, to_cs);
414 }
415 if ((from_cs == &my_charset_bin) && offset)
416 {
417 *errors= 0;
418 return copy_aligned(str, arg_length, offset, to_cs);
419 }
420 size_t new_length= to_cs->mbmaxlen*arg_length;
421 if (alloc(new_length))
422 return TRUE;
423 str_length=copy_and_convert((char*) Ptr, new_length, to_cs,
424 str, arg_length, from_cs, errors);
425 str_charset=to_cs;
426 return FALSE;
427}
428
429
430/*
431 Set a string to the value of a latin1-string, keeping the original charset
432
433 SYNOPSIS
434 copy_or_set()
435 str String of a simple charset (latin1)
436 arg_length Length of string
437
438 IMPLEMENTATION
439 If string object is of a simple character set, set it to point to the
440 given string.
441 If not, make a copy and convert it to the new character set.
442
443 RETURN
444 0 ok
445 1 Could not allocate result buffer
446
447*/
448
449bool String::set_ascii(const char *str, size_t arg_length)
450{
451 if (str_charset->mbminlen == 1)
452 {
453 set(str, arg_length, str_charset);
454 return 0;
455 }
456 uint dummy_errors;
457 return copy(str, (uint32)arg_length, &my_charset_latin1, str_charset, &dummy_errors);
458}
459
460
461/* This is used by mysql.cc */
462
463bool String::fill(uint32 max_length,char fill_char)
464{
465 if (str_length > max_length)
466 Ptr[str_length=max_length]=0;
467 else
468 {
469 if (realloc(max_length))
470 return TRUE;
471 bfill(Ptr+str_length,max_length-str_length,fill_char);
472 str_length=max_length;
473 }
474 return FALSE;
475}
476
477void String::strip_sp()
478{
479 while (str_length && my_isspace(str_charset,Ptr[str_length-1]))
480 str_length--;
481}
482
483bool String::append(const String &s)
484{
485 if (s.length())
486 {
487 if (realloc_with_extra_if_needed(str_length+s.length()))
488 return TRUE;
489 memcpy(Ptr+str_length,s.ptr(),s.length());
490 str_length+=s.length();
491 }
492 return FALSE;
493}
494
495
496/*
497 Append an ASCII string to the a string of the current character set
498*/
499
500bool String::append(const char *s,size_t size)
501{
502 DBUG_ASSERT(size <= UINT_MAX32);
503 uint32 arg_length= (uint32) size;
504 if (!arg_length)
505 return FALSE;
506
507 /*
508 For an ASCII incompatible string, e.g. UCS-2, we need to convert
509 */
510 if (str_charset->mbminlen > 1)
511 {
512 uint32 add_length=arg_length * str_charset->mbmaxlen;
513 uint dummy_errors;
514 if (realloc_with_extra_if_needed(str_length+ add_length))
515 return TRUE;
516 str_length+= copy_and_convert(Ptr+str_length, add_length, str_charset,
517 s, arg_length, &my_charset_latin1,
518 &dummy_errors);
519 return FALSE;
520 }
521
522 /*
523 For an ASCII compatinble string we can just append.
524 */
525 if (realloc_with_extra_if_needed(str_length+arg_length))
526 return TRUE;
527 memcpy(Ptr+str_length,s,arg_length);
528 str_length+=arg_length;
529 return FALSE;
530}
531
532
533/*
534 Append a 0-terminated ASCII string
535*/
536
537bool String::append(const char *s)
538{
539 return append(s, (uint) strlen(s));
540}
541
542bool String::append_longlong(longlong val)
543{
544 if (realloc(str_length+MAX_BIGINT_WIDTH+2))
545 return TRUE;
546 char *end= (char*) longlong10_to_str(val, (char*) Ptr + str_length, -10);
547 str_length= (uint32)(end - Ptr);
548 return FALSE;
549}
550
551
552bool String::append_ulonglong(ulonglong val)
553{
554 if (realloc(str_length+MAX_BIGINT_WIDTH+2))
555 return TRUE;
556 char *end= (char*) longlong10_to_str(val, (char*) Ptr + str_length, 10);
557 str_length= (uint32) (end - Ptr);
558 return FALSE;
559}
560
561/*
562 Append a string in the given charset to the string
563 with character set recoding
564*/
565
566bool String::append(const char *s, size_t arg_length, CHARSET_INFO *cs)
567{
568 uint32 offset;
569
570 if (needs_conversion((uint32)arg_length, cs, str_charset, &offset))
571 {
572 size_t add_length;
573 if ((cs == &my_charset_bin) && offset)
574 {
575 DBUG_ASSERT(str_charset->mbminlen > offset);
576 offset= str_charset->mbminlen - offset; // How many characters to pad
577 add_length= arg_length + offset;
578 if (realloc(str_length + add_length))
579 return TRUE;
580 bzero((char*) Ptr + str_length, offset);
581 memcpy(Ptr + str_length + offset, s, arg_length);
582 str_length+= (uint32)add_length;
583 return FALSE;
584 }
585
586 add_length= arg_length / cs->mbminlen * str_charset->mbmaxlen;
587 uint dummy_errors;
588 if (realloc_with_extra_if_needed(str_length + add_length))
589 return TRUE;
590 str_length+= copy_and_convert(Ptr+str_length, (uint32)add_length, str_charset,
591 s, (uint32)arg_length, cs, &dummy_errors);
592 }
593 else
594 {
595 if (realloc_with_extra_if_needed(str_length + arg_length))
596 return TRUE;
597 memcpy(Ptr + str_length, s, arg_length);
598 str_length+= (uint32)arg_length;
599 }
600 return FALSE;
601}
602
603bool String::append(IO_CACHE* file, uint32 arg_length)
604{
605 if (realloc_with_extra_if_needed(str_length+arg_length))
606 return TRUE;
607 if (my_b_read(file, (uchar*) Ptr + str_length, arg_length))
608 {
609 shrink(str_length ? str_length : 1);
610 return TRUE;
611 }
612 str_length+=arg_length;
613 return FALSE;
614}
615
616
617/**
618 Append a parenthesized number to String.
619 Used in various pieces of SHOW related code.
620
621 @param nr Number
622 @param radix Radix, optional parameter, 10 by default.
623*/
624bool String::append_parenthesized(long nr, int radix)
625{
626 char buff[64], *end;
627 buff[0]= '(';
628 end= int10_to_str(nr, buff + 1, radix);
629 *end++ = ')';
630 return append(buff, (uint) (end - buff));
631}
632
633
634bool String::append_with_prefill(const char *s,uint32 arg_length,
635 uint32 full_length, char fill_char)
636{
637 int t_length= arg_length > full_length ? arg_length : full_length;
638
639 if (realloc_with_extra_if_needed(str_length + t_length))
640 return TRUE;
641 t_length= full_length - arg_length;
642 if (t_length > 0)
643 {
644 bfill(Ptr+str_length, t_length, fill_char);
645 str_length=str_length + t_length;
646 }
647 append(s, arg_length);
648 return FALSE;
649}
650
651uint32 String::numchars() const
652{
653 return (uint32) str_charset->cset->numchars(str_charset, Ptr, Ptr+str_length);
654}
655
656int String::charpos(longlong i,uint32 offset)
657{
658 if (i <= 0)
659 return (int)i;
660 return (int)str_charset->cset->charpos(str_charset,Ptr+offset,Ptr+str_length,(size_t)i);
661}
662
663int String::strstr(const String &s,uint32 offset)
664{
665 if (s.length()+offset <= str_length)
666 {
667 if (!s.length())
668 return ((int) offset); // Empty string is always found
669
670 const char *str = Ptr+offset;
671 const char *search=s.ptr();
672 const char *end=Ptr+str_length-s.length()+1;
673 const char *search_end=s.ptr()+s.length();
674skip:
675 while (str != end)
676 {
677 if (*str++ == *search)
678 {
679 char *i,*j;
680 i=(char*) str; j=(char*) search+1;
681 while (j != search_end)
682 if (*i++ != *j++) goto skip;
683 return (int) (str-Ptr) -1;
684 }
685 }
686 }
687 return -1;
688}
689
690/*
691** Search string from end. Offset is offset to the end of string
692*/
693
694int String::strrstr(const String &s,uint32 offset)
695{
696 if (s.length() <= offset && offset <= str_length)
697 {
698 if (!s.length())
699 return offset; // Empty string is always found
700 const char *str = Ptr+offset-1;
701 const char *search=s.ptr()+s.length()-1;
702
703 const char *end=Ptr+s.length()-2;
704 const char *search_end=s.ptr()-1;
705skip:
706 while (str != end)
707 {
708 if (*str-- == *search)
709 {
710 char *i,*j;
711 i=(char*) str; j=(char*) search-1;
712 while (j != search_end)
713 if (*i-- != *j--) goto skip;
714 return (int) (i-Ptr) +1;
715 }
716 }
717 }
718 return -1;
719}
720
721/*
722 Replace substring with string
723 If wrong parameter or not enough memory, do nothing
724*/
725
726bool String::replace(uint32 offset,uint32 arg_length,const String &to)
727{
728 return replace(offset,arg_length,to.ptr(),to.length());
729}
730
731bool String::replace(uint32 offset,uint32 arg_length,
732 const char *to, uint32 to_length)
733{
734 long diff = (long) to_length-(long) arg_length;
735 if (offset+arg_length <= str_length)
736 {
737 if (diff < 0)
738 {
739 if (to_length)
740 memcpy(Ptr+offset,to,to_length);
741 bmove(Ptr+offset+to_length,Ptr+offset+arg_length,
742 str_length-offset-arg_length);
743 }
744 else
745 {
746 if (diff)
747 {
748 if (realloc_with_extra_if_needed(str_length+(uint32) diff))
749 return TRUE;
750 bmove_upp((uchar*) Ptr+str_length+diff, (uchar*) Ptr+str_length,
751 str_length-offset-arg_length);
752 }
753 if (to_length)
754 memcpy(Ptr+offset,to,to_length);
755 }
756 str_length+=(uint32) diff;
757 }
758 return FALSE;
759}
760
761
762// added by Holyfoot for "geometry" needs
763int String::reserve(size_t space_needed, size_t grow_by)
764{
765 if (Alloced_length < str_length + space_needed)
766 {
767 if (realloc(Alloced_length + MY_MAX(space_needed, grow_by) - 1))
768 return TRUE;
769 }
770 return FALSE;
771}
772
773void String::qs_append(const char *str, size_t len)
774{
775 memcpy(Ptr + str_length, str, len + 1);
776 str_length += (uint32)len;
777}
778
779void String::qs_append(double d)
780{
781 char *buff = Ptr + str_length;
782 str_length+= (uint32) my_gcvt(d, MY_GCVT_ARG_DOUBLE, FLOATING_POINT_BUFFER - 1, buff,
783 NULL);
784}
785
786void String::qs_append(double *d)
787{
788 double ld;
789 float8get(ld, (char*) d);
790 qs_append(ld);
791}
792
793void String::qs_append(int i)
794{
795 char *buff= Ptr + str_length;
796 char *end= int10_to_str(i, buff, -10);
797 str_length+= (int) (end-buff);
798}
799
800void String::qs_append(ulonglong i)
801{
802 char *buff= Ptr + str_length;
803 char *end= longlong10_to_str(i, buff, 10);
804 str_length+= (int) (end-buff);
805}
806
807/*
808 Compare strings according to collation, without end space.
809
810 SYNOPSIS
811 sortcmp()
812 s First string
813 t Second string
814 cs Collation
815
816 NOTE:
817 Normally this is case sensitive comparison
818
819 RETURN
820 < 0 s < t
821 0 s == t
822 > 0 s > t
823*/
824
825
826int sortcmp(const String *s,const String *t, CHARSET_INFO *cs)
827{
828 return cs->coll->strnncollsp(cs,
829 (uchar *) s->ptr(),s->length(),
830 (uchar *) t->ptr(),t->length());
831}
832
833
834/*
835 Compare strings byte by byte. End spaces are also compared.
836
837 SYNOPSIS
838 stringcmp()
839 s First string
840 t Second string
841
842 NOTE:
843 Strings are compared as a stream of uchars
844
845 RETURN
846 < 0 s < t
847 0 s == t
848 > 0 s > t
849*/
850
851
852int stringcmp(const String *s,const String *t)
853{
854 uint32 s_len=s->length(),t_len=t->length(),len=MY_MIN(s_len,t_len);
855 int cmp= memcmp(s->ptr(), t->ptr(), len);
856 return (cmp) ? cmp : (int) (s_len - t_len);
857}
858
859
860/**
861 Return a string which has the same value with "from" and
862 which is safe to modify, trying to avoid unnecessary allocation
863 and copying when possible.
864
865 @param to Buffer. Must not be a constant string.
866 @param from Some existing value. We'll try to reuse it.
867 Can be a constant or a variable string.
868 @param from_length The total size that will be possibly needed.
869 Note, can be 0.
870
871 Note, in some cases "from" and "to" can point to the same object.
872
873 If "from" is a variable string and its allocated memory is enough
874 to store "from_length" bytes, then "from" is returned as is.
875
876 If "from" is a variable string and its allocated memory is not enough
877 to store "from_length" bytes, then "from" is reallocated and returned.
878
879 Otherwise (if "from" is a constant string, or looks like a constant string),
880 then "to" is reallocated to fit "from_length" bytes, the value is copied
881 from "from" to "to", then "to" is returned.
882*/
883String *copy_if_not_alloced(String *to,String *from,uint32 from_length)
884{
885 DBUG_ASSERT(to);
886 /*
887 If "from" is a constant string, e.g.:
888 SELECT INSERT('', <pos>, <length>, <replacement>);
889 we should not return it. See MDEV-9332.
890
891 The code below detects different string types:
892
893 a. All constant strings have Alloced_length==0 and alloced==false.
894 They point to a static memory array, or a mem_root memory,
895 and should stay untouched until the end of their life cycle.
896 Not safe to reuse.
897
898 b. Some variable string have Alloced_length==0 and alloced==false initially,
899 they are not bound to any char array and allocate space on the first use
900 (and become #d). A typical example of such String is Item::str_value.
901 This type of string could be reused, but there is no a way to distinguish
902 them from the true constant strings (#a).
903 Not safe to reuse.
904
905 c. Some variable strings have Alloced_length>0 and alloced==false.
906 They point to a fixed size writtable char array (typically on stack)
907 initially but can later allocate more space on the heap when the
908 fixed size array is too small (these strings become #d after allocation).
909 Safe to reuse.
910
911 d. Some variable strings have Alloced_length>0 and alloced==true.
912 They already store data on the heap.
913 Safe to reuse.
914
915 e. Some strings can have Alloced_length==0 and alloced==true.
916 This type of strings allocate space on the heap, but then are marked
917 as constant strings using String::mark_as_const().
918 A typical example - the result of a character set conversion
919 of a constant string.
920 Not safe to reuse.
921 */
922 if (from->Alloced_length > 0) // "from" is #c or #d (not a constant)
923 {
924 if (from->Alloced_length >= from_length)
925 return from; // #c or #d (large enough to store from_length bytes)
926
927 if (from->alloced)
928 {
929 (void) from->realloc(from_length);
930 return from; // #d (reallocated to fit from_length bytes)
931 }
932 /*
933 "from" is of type #c. It currently points to a writtable char array
934 (typically on stack), but is too small for "from_length" bytes.
935 We need to reallocate either "from" or "to".
936
937 "from" typically points to a temporary buffer inside Item_xxx::val_str(),
938 or to Item::str_value, and thus is "less permanent" than "to".
939
940 Reallocating "to" may give more benifits:
941 - "to" can point to a "more permanent" storage and can be reused
942 for multiple rows, e.g. str_buffer in Protocol::send_result_set_row(),
943 which is passed to val_str() for all string type rows.
944 - "from" can stay pointing to its original fixed size stack char array,
945 and thus reduce the total amount of my_alloc/my_free.
946 */
947 }
948
949 if (from == to)
950 {
951 /*
952 Possible string types:
953 #a not possible (constants should not be passed as "to")
954 #b possible (a fresh variable with no associated char buffer)
955 #c possible (a variable with a char buffer,
956 in case it's smaller than fixed_length)
957 #d not possible (handled earlier)
958 #e not possible (constants should not be passed as "to")
959
960 If a string of types #a or #e appears here, that means the caller made
961 something wrong. Otherwise, it's safe to reallocate and return "to".
962
963 Note, as we can't distinguish between #a and #b for sure,
964 so we can't assert "not #a", but we can at least assert "not #e".
965 */
966 DBUG_ASSERT(!from->alloced || from->Alloced_length > 0); // Not #e
967
968 (void) from->realloc(from_length);
969 return from;
970 }
971 if (to->realloc(from_length))
972 return from; // Actually an error
973 if ((to->str_length=MY_MIN(from->str_length,from_length)))
974 memcpy(to->Ptr,from->Ptr,to->str_length);
975 to->str_charset=from->str_charset;
976 return to; // "from" was of types #a, #b, #e, or small #c.
977}
978
979
980/****************************************************************************
981 Help functions
982****************************************************************************/
983
984/**
985 Copy string with HEX-encoding of "bad" characters.
986
987 @details This functions copies the string pointed by "src"
988 to the string pointed by "dst". Not more than "srclen" bytes
989 are read from "src". Any sequences of bytes representing
990 a not-well-formed substring (according to cs) are hex-encoded,
991 and all well-formed substrings (according to cs) are copied as is.
992 Not more than "dstlen" bytes are written to "dst". The number
993 of bytes written to "dst" is returned.
994
995 @param cs character set pointer of the destination string
996 @param[out] dst destination string
997 @param dstlen size of dst
998 @param src source string
999 @param srclen length of src
1000
1001 @retval result length
1002*/
1003
1004size_t
1005my_copy_with_hex_escaping(CHARSET_INFO *cs,
1006 char *dst, size_t dstlen,
1007 const char *src, size_t srclen)
1008{
1009 const char *srcend= src + srclen;
1010 char *dst0= dst;
1011
1012 for ( ; src < srcend ; )
1013 {
1014 size_t chlen;
1015 if ((chlen= my_ismbchar(cs, src, srcend)))
1016 {
1017 if (dstlen < chlen)
1018 break; /* purecov: inspected */
1019 memcpy(dst, src, chlen);
1020 src+= chlen;
1021 dst+= chlen;
1022 dstlen-= chlen;
1023 }
1024 else if (*src & 0x80)
1025 {
1026 if (dstlen < 4)
1027 break; /* purecov: inspected */
1028 *dst++= '\\';
1029 *dst++= 'x';
1030 APPEND_HEX(dst, (uchar) *src);
1031 src++;
1032 dstlen-= 4;
1033 }
1034 else
1035 {
1036 if (dstlen < 1)
1037 break; /* purecov: inspected */
1038 *dst++= *src++;
1039 dstlen--;
1040 }
1041 }
1042 return dst - dst0;
1043}
1044
1045
1046/*
1047 Copy a string,
1048 with optional character set conversion,
1049 with optional left padding (for binary -> UCS2 conversion)
1050
1051 Bad input bytes are replaced to '?'.
1052
1053 The string that is written to "to" is always well-formed.
1054
1055 @param to The destination string
1056 @param to_length Space available in "to"
1057 @param to_cs Character set of the "to" string
1058 @param from The source string
1059 @param from_length Length of the "from" string
1060 @param from_cs Character set of the "from" string
1061 @param nchars Copy not more than "nchars" characters
1062
1063 The members as set as follows:
1064 m_well_formed_error_pos To the position when "from" is not well formed
1065 or NULL otherwise.
1066 m_cannot_convert_error_pos To the position where a not convertable
1067 character met, or NULL otherwise.
1068 m_source_end_pos To the position where scanning of the "from"
1069 string stopped.
1070
1071 @returns number of bytes that were written to 'to'
1072*/
1073uint
1074String_copier::well_formed_copy(CHARSET_INFO *to_cs,
1075 char *to, size_t to_length,
1076 CHARSET_INFO *from_cs,
1077 const char *from, size_t from_length, size_t nchars)
1078{
1079 if ((to_cs == &my_charset_bin) ||
1080 (from_cs == &my_charset_bin) ||
1081 (to_cs == from_cs) ||
1082 my_charset_same(from_cs, to_cs))
1083 {
1084 m_cannot_convert_error_pos= NULL;
1085 return (uint) to_cs->cset->copy_fix(to_cs, to, to_length, from, from_length,
1086 nchars, this);
1087 }
1088 return (uint) my_convert_fix(to_cs, to, to_length, from_cs, from, from_length,
1089 nchars, this, this);
1090}
1091
1092
1093
1094/*
1095 Append characters to a single-quoted string '...', escaping special
1096 characters with backslashes as necessary.
1097 Does not add the enclosing quotes, this is left up to caller.
1098*/
1099#define APPEND(X) if (append(X)) return 1; else break
1100bool String::append_for_single_quote(const char *st, size_t len)
1101{
1102 const char *end= st+len;
1103 for (; st < end; st++)
1104 {
1105 uchar c= *st;
1106 switch (c)
1107 {
1108 case '\\': APPEND(STRING_WITH_LEN("\\\\"));
1109 case '\0': APPEND(STRING_WITH_LEN("\\0"));
1110 case '\'': APPEND(STRING_WITH_LEN("\\'"));
1111 case '\n': APPEND(STRING_WITH_LEN("\\n"));
1112 case '\r': APPEND(STRING_WITH_LEN("\\r"));
1113 case '\032': APPEND(STRING_WITH_LEN("\\Z"));
1114 default: APPEND(c);
1115 }
1116 }
1117 return 0;
1118}
1119
1120void String::print(String *str) const
1121{
1122 str->append_for_single_quote(Ptr, str_length);
1123}
1124
1125
1126void String::print_with_conversion(String *print, CHARSET_INFO *cs) const
1127{
1128 StringBuffer<256> tmp(cs);
1129 uint errors= 0;
1130 tmp.copy(this, cs, &errors);
1131 tmp.print(print);
1132}
1133
1134
1135/*
1136 Exchange state of this object and argument.
1137
1138 SYNOPSIS
1139 String::swap()
1140
1141 RETURN
1142 Target string will contain state of this object and vice versa.
1143*/
1144
1145void String::swap(String &s)
1146{
1147 swap_variables(char *, Ptr, s.Ptr);
1148 swap_variables(uint32, str_length, s.str_length);
1149 swap_variables(uint32, Alloced_length, s.Alloced_length);
1150 swap_variables(bool, alloced, s.alloced);
1151 swap_variables(CHARSET_INFO*, str_charset, s.str_charset);
1152}
1153
1154
1155/**
1156 Convert string to printable ASCII string
1157
1158 @details This function converts input string "from" replacing non-ASCII bytes
1159 with hexadecimal sequences ("\xXX") optionally appending "..." to the end of
1160 the resulting string.
1161 This function used in the ER_TRUNCATED_WRONG_VALUE_FOR_FIELD error messages,
1162 e.g. when a string cannot be converted to a result charset.
1163
1164
1165 @param to output buffer
1166 @param to_len size of the output buffer (8 bytes or greater)
1167 @param from input string
1168 @param from_len size of the input string
1169 @param from_cs input charset
1170 @param nbytes maximal number of bytes to convert (from_len if 0)
1171
1172 @return number of bytes in the output string
1173*/
1174
1175uint convert_to_printable(char *to, size_t to_len,
1176 const char *from, size_t from_len,
1177 CHARSET_INFO *from_cs, size_t nbytes /*= 0*/)
1178{
1179 /* needs at least 8 bytes for '\xXX...' and zero byte */
1180 DBUG_ASSERT(to_len >= 8);
1181
1182 char *t= to;
1183 char *t_end= to + to_len - 1; // '- 1' is for the '\0' at the end
1184 const char *f= from;
1185 const char *f_end= from + (nbytes ? MY_MIN(from_len, nbytes) : from_len);
1186 char *dots= to; // last safe place to append '...'
1187
1188 if (!f || t == t_end)
1189 return 0;
1190
1191 for (; t < t_end && f < f_end; f++)
1192 {
1193 /*
1194 If the source string is ASCII compatible (mbminlen==1)
1195 and the source character is in ASCII printable range (0x20..0x7F),
1196 then display the character as is.
1197
1198 Otherwise, if the source string is not ASCII compatible (e.g. UCS2),
1199 or the source character is not in the printable range,
1200 then print the character using HEX notation.
1201 */
1202 if (((unsigned char) *f) >= 0x20 &&
1203 ((unsigned char) *f) <= 0x7F &&
1204 from_cs->mbminlen == 1)
1205 {
1206 *t++= *f;
1207 }
1208 else
1209 {
1210 if (t_end - t < 4) // \xXX
1211 break;
1212 *t++= '\\';
1213 *t++= 'x';
1214 APPEND_HEX(t, *f);
1215 }
1216 if (t_end - t >= 3) // '...'
1217 dots= t;
1218 }
1219 if (f < from + from_len)
1220 memcpy(dots, STRING_WITH_LEN("...\0"));
1221 else
1222 *t= '\0';
1223 return (uint) (t - to);
1224}
1225