1 | /* Copyright (c) 2000, 2013, Oracle and/or its affiliates. |
2 | Copyright (c) 2016, MariaDB |
3 | |
4 | This program is free software; you can redistribute it and/or modify |
5 | it under the terms of the GNU General Public License as published by |
6 | the Free Software Foundation; version 2 of the License. |
7 | |
8 | This program is distributed in the hope that it will be useful, |
9 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
11 | GNU General Public License for more details. |
12 | |
13 | You should have received a copy of the GNU General Public License |
14 | along with this program; if not, write to the Free Software |
15 | Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ |
16 | |
17 | /* This file is originally from the mysql distribution. Coded by monty */ |
18 | |
19 | #ifdef USE_PRAGMA_IMPLEMENTATION |
20 | #pragma implementation // gcc: Class implementation |
21 | #endif |
22 | |
23 | #include "mariadb.h" |
24 | #include <m_string.h> |
25 | #include <m_ctype.h> |
26 | #include <mysql_com.h> |
27 | |
28 | #include "sql_string.h" |
29 | |
30 | /***************************************************************************** |
31 | ** String functions |
32 | *****************************************************************************/ |
33 | |
34 | bool String::real_alloc(size_t length) |
35 | { |
36 | size_t arg_length= ALIGN_SIZE(length + 1); |
37 | DBUG_ASSERT(arg_length > length); |
38 | if (arg_length <= length) |
39 | return TRUE; /* Overflow */ |
40 | str_length=0; |
41 | if (Alloced_length < arg_length) |
42 | { |
43 | free(); |
44 | if (!(Ptr=(char*) my_malloc(arg_length,MYF(MY_WME | |
45 | (thread_specific ? |
46 | MY_THREAD_SPECIFIC : 0))))) |
47 | return TRUE; |
48 | DBUG_ASSERT(length < UINT_MAX32); |
49 | Alloced_length=(uint32) arg_length; |
50 | alloced=1; |
51 | } |
52 | Ptr[0]=0; |
53 | return FALSE; |
54 | } |
55 | |
56 | |
57 | /** |
58 | Allocates a new buffer on the heap for this String. |
59 | |
60 | - If the String's internal buffer is privately owned and heap allocated, |
61 | one of the following is performed. |
62 | |
63 | - If the requested length is greater than what fits in the buffer, a new |
64 | buffer is allocated, data moved and the old buffer freed. |
65 | |
66 | - If the requested length is less or equal to what fits in the buffer, a |
67 | null character is inserted at the appropriate position. |
68 | |
69 | - If the String does not keep a private buffer on the heap, such a buffer |
70 | will be allocated and the string copied accoring to its length, as found |
71 | in String::length(). |
72 | |
73 | For C compatibility, the new string buffer is null terminated. |
74 | |
75 | @param alloc_length The requested string size in characters, excluding any |
76 | null terminator. |
77 | |
78 | @retval false Either the copy operation is complete or, if the size of the |
79 | new buffer is smaller than the currently allocated buffer (if one exists), |
80 | no allocation occurred. |
81 | |
82 | @retval true An error occurred when attempting to allocate memory. |
83 | */ |
84 | bool String::realloc_raw(size_t alloc_length) |
85 | { |
86 | if (Alloced_length <= alloc_length) |
87 | { |
88 | char *new_ptr; |
89 | uint32 len= ALIGN_SIZE(alloc_length+1); |
90 | DBUG_ASSERT(len > alloc_length); |
91 | if (len <= alloc_length) |
92 | return TRUE; /* Overflow */ |
93 | if (alloced) |
94 | { |
95 | if (!(new_ptr= (char*) my_realloc(Ptr,len, |
96 | MYF(MY_WME | |
97 | (thread_specific ? |
98 | MY_THREAD_SPECIFIC : 0))))) |
99 | return TRUE; // Signal error |
100 | } |
101 | else if ((new_ptr= (char*) my_malloc(len, |
102 | MYF(MY_WME | |
103 | (thread_specific ? |
104 | MY_THREAD_SPECIFIC : 0))))) |
105 | { |
106 | if (str_length > len - 1) |
107 | str_length= 0; |
108 | if (str_length) // Avoid bugs in memcpy on AIX |
109 | memcpy(new_ptr,Ptr,str_length); |
110 | new_ptr[str_length]=0; |
111 | alloced=1; |
112 | } |
113 | else |
114 | return TRUE; // Signal error |
115 | Ptr= new_ptr; |
116 | DBUG_ASSERT(len < UINT_MAX32); |
117 | Alloced_length= (uint32)len; |
118 | } |
119 | return FALSE; |
120 | } |
121 | |
122 | bool String::set_int(longlong num, bool unsigned_flag, CHARSET_INFO *cs) |
123 | { |
124 | uint l=20*cs->mbmaxlen+1; |
125 | int base= unsigned_flag ? 10 : -10; |
126 | |
127 | if (alloc(l)) |
128 | return TRUE; |
129 | str_length=(uint32) (cs->cset->longlong10_to_str)(cs,Ptr,l,base,num); |
130 | str_charset=cs; |
131 | return FALSE; |
132 | } |
133 | |
134 | |
135 | // Convert a number into its HEX representation |
136 | bool String::set_hex(ulonglong num) |
137 | { |
138 | char *n_end; |
139 | if (alloc(65) || !(n_end= longlong2str(num, Ptr, 16))) |
140 | return true; |
141 | length((uint32) (n_end - Ptr)); |
142 | set_charset(&my_charset_latin1); |
143 | return false; |
144 | } |
145 | |
146 | |
147 | /** |
148 | Append a hex representation of the byte "value" into "to". |
149 | Note: |
150 | "to" is incremented for the caller by two bytes. It's passed by reference! |
151 | So it resembles a macros, hence capital letters in the name. |
152 | */ |
153 | static inline void APPEND_HEX(char *&to, uchar value) |
154 | { |
155 | *to++= _dig_vec_upper[((uchar) value) >> 4]; |
156 | *to++= _dig_vec_upper[((uchar) value) & 0x0F]; |
157 | } |
158 | |
159 | |
160 | void String::qs_append_hex(const char *str, uint32 len) |
161 | { |
162 | const char *str_end= str + len; |
163 | for (char *to= Ptr + str_length ; str < str_end; str++) |
164 | APPEND_HEX(to, (uchar) *str); |
165 | str_length+= len * 2; |
166 | } |
167 | |
168 | |
169 | // Convert a string to its HEX representation |
170 | bool String::set_hex(const char *str, uint32 len) |
171 | { |
172 | /* |
173 | Safety: cut the source string if "len" is too large. |
174 | Note, alloc() can allocate some more space than requested, due to: |
175 | - ALIGN_SIZE |
176 | - one extra byte for a null terminator |
177 | So cut the source string to 0x7FFFFFF0 rather than 0x7FFFFFFE. |
178 | */ |
179 | set_if_smaller(len, 0x7FFFFFF0); |
180 | if (alloc(len * 2)) |
181 | return true; |
182 | length(0); |
183 | qs_append_hex(str, len); |
184 | set_charset(&my_charset_latin1); |
185 | return false; |
186 | } |
187 | |
188 | |
189 | bool String::set_real(double num,uint decimals, CHARSET_INFO *cs) |
190 | { |
191 | char buff[FLOATING_POINT_BUFFER]; |
192 | uint dummy_errors; |
193 | size_t len; |
194 | |
195 | str_charset=cs; |
196 | if (decimals >= FLOATING_POINT_DECIMALS) |
197 | { |
198 | len= my_gcvt(num, MY_GCVT_ARG_DOUBLE, sizeof(buff) - 1, buff, NULL); |
199 | return copy(buff, (uint)len, &my_charset_latin1, cs, &dummy_errors); |
200 | } |
201 | len= my_fcvt(num, decimals, buff, NULL); |
202 | return copy(buff, (uint32) len, &my_charset_latin1, cs, |
203 | &dummy_errors); |
204 | } |
205 | |
206 | |
207 | bool String::copy() |
208 | { |
209 | if (!alloced) |
210 | { |
211 | Alloced_length=0; // Force realloc |
212 | return realloc(str_length); |
213 | } |
214 | return FALSE; |
215 | } |
216 | |
217 | /** |
218 | Copies the internal buffer from str. If this String has a private heap |
219 | allocated buffer where new data does not fit, a new buffer is allocated |
220 | before copying and the old buffer freed. Character set information is also |
221 | copied. |
222 | |
223 | @param str The string whose internal buffer is to be copied. |
224 | |
225 | @retval false Success. |
226 | @retval true Memory allocation failed. |
227 | */ |
228 | bool String::copy(const String &str) |
229 | { |
230 | if (alloc(str.str_length)) |
231 | return TRUE; |
232 | str_length=str.str_length; |
233 | bmove(Ptr,str.Ptr,str_length); // May be overlapping |
234 | Ptr[str_length]=0; |
235 | str_charset=str.str_charset; |
236 | return FALSE; |
237 | } |
238 | |
239 | bool String::copy(const char *str,size_t arg_length, CHARSET_INFO *cs) |
240 | { |
241 | if (alloc(arg_length)) |
242 | return TRUE; |
243 | DBUG_ASSERT(arg_length <= UINT_MAX32); |
244 | if ((str_length=(uint32)arg_length)) |
245 | memcpy(Ptr,str,arg_length); |
246 | Ptr[arg_length]=0; |
247 | str_charset=cs; |
248 | return FALSE; |
249 | } |
250 | |
251 | |
252 | /* |
253 | Checks that the source string can be just copied to the destination string |
254 | without conversion. |
255 | |
256 | SYNPOSIS |
257 | |
258 | needs_conversion() |
259 | arg_length Length of string to copy. |
260 | from_cs Character set to copy from |
261 | to_cs Character set to copy to |
262 | uint32 *offset Returns number of unaligned characters. |
263 | |
264 | RETURN |
265 | 0 No conversion needed |
266 | 1 Either character set conversion or adding leading zeros |
267 | (e.g. for UCS-2) must be done |
268 | |
269 | NOTE |
270 | to_cs may be NULL for "no conversion" if the system variable |
271 | character_set_results is NULL. |
272 | */ |
273 | |
274 | bool String::needs_conversion(size_t arg_length, |
275 | CHARSET_INFO *from_cs, |
276 | CHARSET_INFO *to_cs, |
277 | uint32 *offset) |
278 | { |
279 | *offset= 0; |
280 | if (!to_cs || |
281 | (to_cs == &my_charset_bin) || |
282 | (to_cs == from_cs) || |
283 | my_charset_same(from_cs, to_cs) || |
284 | ((from_cs == &my_charset_bin) && |
285 | (!(*offset=(uint32)(arg_length % to_cs->mbminlen))))) |
286 | return FALSE; |
287 | return TRUE; |
288 | } |
289 | |
290 | |
291 | /* |
292 | Checks that the source string can just be copied to the destination string |
293 | without conversion. |
294 | Unlike needs_conversion it will require conversion on incoming binary data |
295 | to ensure the data are verified for vailidity first. |
296 | |
297 | @param arg_length Length of string to copy. |
298 | @param from_cs Character set to copy from |
299 | @param to_cs Character set to copy to |
300 | |
301 | @return conversion needed |
302 | */ |
303 | bool String::needs_conversion_on_storage(size_t arg_length, |
304 | CHARSET_INFO *cs_from, |
305 | CHARSET_INFO *cs_to) |
306 | { |
307 | uint32 offset; |
308 | return (needs_conversion(arg_length, cs_from, cs_to, &offset) || |
309 | /* force conversion when storing a binary string */ |
310 | (cs_from == &my_charset_bin && |
311 | /* into a non-binary destination */ |
312 | cs_to != &my_charset_bin && |
313 | /* and any of the following is true :*/ |
314 | ( |
315 | /* it's a variable length encoding */ |
316 | cs_to->mbminlen != cs_to->mbmaxlen || |
317 | /* longer than 2 bytes : neither 1 byte nor ucs2 */ |
318 | cs_to->mbminlen > 2 || |
319 | /* and is not a multiple of the char byte size */ |
320 | 0 != (arg_length % cs_to->mbmaxlen) |
321 | ) |
322 | ) |
323 | ); |
324 | } |
325 | |
326 | |
327 | /* |
328 | Copy a multi-byte character sets with adding leading zeros. |
329 | |
330 | SYNOPSIS |
331 | |
332 | copy_aligned() |
333 | str String to copy |
334 | arg_length Length of string. This should NOT be dividable with |
335 | cs->mbminlen. |
336 | offset arg_length % cs->mb_minlength |
337 | cs Character set for 'str' |
338 | |
339 | NOTES |
340 | For real multi-byte, ascii incompatible charactser sets, |
341 | like UCS-2, add leading zeros if we have an incomplete character. |
342 | Thus, |
343 | SELECT _ucs2 0xAA |
344 | will automatically be converted into |
345 | SELECT _ucs2 0x00AA |
346 | |
347 | RETURN |
348 | 0 ok |
349 | 1 error |
350 | */ |
351 | |
352 | bool String::copy_aligned(const char *str, size_t arg_length, size_t offset, |
353 | CHARSET_INFO *cs) |
354 | { |
355 | /* How many bytes are in incomplete character */ |
356 | offset= cs->mbminlen - offset; /* How many zeros we should prepend */ |
357 | DBUG_ASSERT(offset && offset != cs->mbminlen); |
358 | |
359 | size_t aligned_length= arg_length + offset; |
360 | if (alloc(aligned_length)) |
361 | return TRUE; |
362 | |
363 | /* |
364 | Note, this is only safe for big-endian UCS-2. |
365 | If we add little-endian UCS-2 sometimes, this code |
366 | will be more complicated. But it's OK for now. |
367 | */ |
368 | bzero((char*) Ptr, offset); |
369 | memcpy(Ptr + offset, str, arg_length); |
370 | Ptr[aligned_length]=0; |
371 | /* str_length is always >= 0 as arg_length is != 0 */ |
372 | str_length= (uint32)aligned_length; |
373 | str_charset= cs; |
374 | return FALSE; |
375 | } |
376 | |
377 | |
378 | bool String::set_or_copy_aligned(const char *str, size_t arg_length, |
379 | CHARSET_INFO *cs) |
380 | { |
381 | /* How many bytes are in incomplete character */ |
382 | size_t offset= (arg_length % cs->mbminlen); |
383 | |
384 | if (!offset) /* All characters are complete, just copy */ |
385 | { |
386 | set(str, arg_length, cs); |
387 | return FALSE; |
388 | } |
389 | return copy_aligned(str, arg_length, offset, cs); |
390 | } |
391 | |
392 | |
393 | /** |
394 | Copies the character data into this String, with optional character set |
395 | conversion. |
396 | |
397 | @return |
398 | FALSE ok |
399 | TRUE Could not allocate result buffer |
400 | |
401 | */ |
402 | |
403 | bool String::copy(const char *str, size_t arg_length, |
404 | CHARSET_INFO *from_cs, CHARSET_INFO *to_cs, uint *errors) |
405 | { |
406 | uint32 offset; |
407 | |
408 | DBUG_ASSERT(!str || str != Ptr); |
409 | |
410 | if (!needs_conversion(arg_length, from_cs, to_cs, &offset)) |
411 | { |
412 | *errors= 0; |
413 | return copy(str, arg_length, to_cs); |
414 | } |
415 | if ((from_cs == &my_charset_bin) && offset) |
416 | { |
417 | *errors= 0; |
418 | return copy_aligned(str, arg_length, offset, to_cs); |
419 | } |
420 | size_t new_length= to_cs->mbmaxlen*arg_length; |
421 | if (alloc(new_length)) |
422 | return TRUE; |
423 | str_length=copy_and_convert((char*) Ptr, new_length, to_cs, |
424 | str, arg_length, from_cs, errors); |
425 | str_charset=to_cs; |
426 | return FALSE; |
427 | } |
428 | |
429 | |
430 | /* |
431 | Set a string to the value of a latin1-string, keeping the original charset |
432 | |
433 | SYNOPSIS |
434 | copy_or_set() |
435 | str String of a simple charset (latin1) |
436 | arg_length Length of string |
437 | |
438 | IMPLEMENTATION |
439 | If string object is of a simple character set, set it to point to the |
440 | given string. |
441 | If not, make a copy and convert it to the new character set. |
442 | |
443 | RETURN |
444 | 0 ok |
445 | 1 Could not allocate result buffer |
446 | |
447 | */ |
448 | |
449 | bool String::set_ascii(const char *str, size_t arg_length) |
450 | { |
451 | if (str_charset->mbminlen == 1) |
452 | { |
453 | set(str, arg_length, str_charset); |
454 | return 0; |
455 | } |
456 | uint dummy_errors; |
457 | return copy(str, (uint32)arg_length, &my_charset_latin1, str_charset, &dummy_errors); |
458 | } |
459 | |
460 | |
461 | /* This is used by mysql.cc */ |
462 | |
463 | bool String::fill(uint32 max_length,char fill_char) |
464 | { |
465 | if (str_length > max_length) |
466 | Ptr[str_length=max_length]=0; |
467 | else |
468 | { |
469 | if (realloc(max_length)) |
470 | return TRUE; |
471 | bfill(Ptr+str_length,max_length-str_length,fill_char); |
472 | str_length=max_length; |
473 | } |
474 | return FALSE; |
475 | } |
476 | |
477 | void String::strip_sp() |
478 | { |
479 | while (str_length && my_isspace(str_charset,Ptr[str_length-1])) |
480 | str_length--; |
481 | } |
482 | |
483 | bool String::append(const String &s) |
484 | { |
485 | if (s.length()) |
486 | { |
487 | if (realloc_with_extra_if_needed(str_length+s.length())) |
488 | return TRUE; |
489 | memcpy(Ptr+str_length,s.ptr(),s.length()); |
490 | str_length+=s.length(); |
491 | } |
492 | return FALSE; |
493 | } |
494 | |
495 | |
496 | /* |
497 | Append an ASCII string to the a string of the current character set |
498 | */ |
499 | |
500 | bool String::append(const char *s,size_t size) |
501 | { |
502 | DBUG_ASSERT(size <= UINT_MAX32); |
503 | uint32 arg_length= (uint32) size; |
504 | if (!arg_length) |
505 | return FALSE; |
506 | |
507 | /* |
508 | For an ASCII incompatible string, e.g. UCS-2, we need to convert |
509 | */ |
510 | if (str_charset->mbminlen > 1) |
511 | { |
512 | uint32 add_length=arg_length * str_charset->mbmaxlen; |
513 | uint dummy_errors; |
514 | if (realloc_with_extra_if_needed(str_length+ add_length)) |
515 | return TRUE; |
516 | str_length+= copy_and_convert(Ptr+str_length, add_length, str_charset, |
517 | s, arg_length, &my_charset_latin1, |
518 | &dummy_errors); |
519 | return FALSE; |
520 | } |
521 | |
522 | /* |
523 | For an ASCII compatinble string we can just append. |
524 | */ |
525 | if (realloc_with_extra_if_needed(str_length+arg_length)) |
526 | return TRUE; |
527 | memcpy(Ptr+str_length,s,arg_length); |
528 | str_length+=arg_length; |
529 | return FALSE; |
530 | } |
531 | |
532 | |
533 | /* |
534 | Append a 0-terminated ASCII string |
535 | */ |
536 | |
537 | bool String::append(const char *s) |
538 | { |
539 | return append(s, (uint) strlen(s)); |
540 | } |
541 | |
542 | bool String::append_longlong(longlong val) |
543 | { |
544 | if (realloc(str_length+MAX_BIGINT_WIDTH+2)) |
545 | return TRUE; |
546 | char *end= (char*) longlong10_to_str(val, (char*) Ptr + str_length, -10); |
547 | str_length= (uint32)(end - Ptr); |
548 | return FALSE; |
549 | } |
550 | |
551 | |
552 | bool String::append_ulonglong(ulonglong val) |
553 | { |
554 | if (realloc(str_length+MAX_BIGINT_WIDTH+2)) |
555 | return TRUE; |
556 | char *end= (char*) longlong10_to_str(val, (char*) Ptr + str_length, 10); |
557 | str_length= (uint32) (end - Ptr); |
558 | return FALSE; |
559 | } |
560 | |
561 | /* |
562 | Append a string in the given charset to the string |
563 | with character set recoding |
564 | */ |
565 | |
566 | bool String::append(const char *s, size_t arg_length, CHARSET_INFO *cs) |
567 | { |
568 | uint32 offset; |
569 | |
570 | if (needs_conversion((uint32)arg_length, cs, str_charset, &offset)) |
571 | { |
572 | size_t add_length; |
573 | if ((cs == &my_charset_bin) && offset) |
574 | { |
575 | DBUG_ASSERT(str_charset->mbminlen > offset); |
576 | offset= str_charset->mbminlen - offset; // How many characters to pad |
577 | add_length= arg_length + offset; |
578 | if (realloc(str_length + add_length)) |
579 | return TRUE; |
580 | bzero((char*) Ptr + str_length, offset); |
581 | memcpy(Ptr + str_length + offset, s, arg_length); |
582 | str_length+= (uint32)add_length; |
583 | return FALSE; |
584 | } |
585 | |
586 | add_length= arg_length / cs->mbminlen * str_charset->mbmaxlen; |
587 | uint dummy_errors; |
588 | if (realloc_with_extra_if_needed(str_length + add_length)) |
589 | return TRUE; |
590 | str_length+= copy_and_convert(Ptr+str_length, (uint32)add_length, str_charset, |
591 | s, (uint32)arg_length, cs, &dummy_errors); |
592 | } |
593 | else |
594 | { |
595 | if (realloc_with_extra_if_needed(str_length + arg_length)) |
596 | return TRUE; |
597 | memcpy(Ptr + str_length, s, arg_length); |
598 | str_length+= (uint32)arg_length; |
599 | } |
600 | return FALSE; |
601 | } |
602 | |
603 | bool String::append(IO_CACHE* file, uint32 arg_length) |
604 | { |
605 | if (realloc_with_extra_if_needed(str_length+arg_length)) |
606 | return TRUE; |
607 | if (my_b_read(file, (uchar*) Ptr + str_length, arg_length)) |
608 | { |
609 | shrink(str_length ? str_length : 1); |
610 | return TRUE; |
611 | } |
612 | str_length+=arg_length; |
613 | return FALSE; |
614 | } |
615 | |
616 | |
617 | /** |
618 | Append a parenthesized number to String. |
619 | Used in various pieces of SHOW related code. |
620 | |
621 | @param nr Number |
622 | @param radix Radix, optional parameter, 10 by default. |
623 | */ |
624 | bool String::append_parenthesized(long nr, int radix) |
625 | { |
626 | char buff[64], *end; |
627 | buff[0]= '('; |
628 | end= int10_to_str(nr, buff + 1, radix); |
629 | *end++ = ')'; |
630 | return append(buff, (uint) (end - buff)); |
631 | } |
632 | |
633 | |
634 | bool String::append_with_prefill(const char *s,uint32 arg_length, |
635 | uint32 full_length, char fill_char) |
636 | { |
637 | int t_length= arg_length > full_length ? arg_length : full_length; |
638 | |
639 | if (realloc_with_extra_if_needed(str_length + t_length)) |
640 | return TRUE; |
641 | t_length= full_length - arg_length; |
642 | if (t_length > 0) |
643 | { |
644 | bfill(Ptr+str_length, t_length, fill_char); |
645 | str_length=str_length + t_length; |
646 | } |
647 | append(s, arg_length); |
648 | return FALSE; |
649 | } |
650 | |
651 | uint32 String::numchars() const |
652 | { |
653 | return (uint32) str_charset->cset->numchars(str_charset, Ptr, Ptr+str_length); |
654 | } |
655 | |
656 | int String::charpos(longlong i,uint32 offset) |
657 | { |
658 | if (i <= 0) |
659 | return (int)i; |
660 | return (int)str_charset->cset->charpos(str_charset,Ptr+offset,Ptr+str_length,(size_t)i); |
661 | } |
662 | |
663 | int String::strstr(const String &s,uint32 offset) |
664 | { |
665 | if (s.length()+offset <= str_length) |
666 | { |
667 | if (!s.length()) |
668 | return ((int) offset); // Empty string is always found |
669 | |
670 | const char *str = Ptr+offset; |
671 | const char *search=s.ptr(); |
672 | const char *end=Ptr+str_length-s.length()+1; |
673 | const char *search_end=s.ptr()+s.length(); |
674 | skip: |
675 | while (str != end) |
676 | { |
677 | if (*str++ == *search) |
678 | { |
679 | char *i,*j; |
680 | i=(char*) str; j=(char*) search+1; |
681 | while (j != search_end) |
682 | if (*i++ != *j++) goto skip; |
683 | return (int) (str-Ptr) -1; |
684 | } |
685 | } |
686 | } |
687 | return -1; |
688 | } |
689 | |
690 | /* |
691 | ** Search string from end. Offset is offset to the end of string |
692 | */ |
693 | |
694 | int String::strrstr(const String &s,uint32 offset) |
695 | { |
696 | if (s.length() <= offset && offset <= str_length) |
697 | { |
698 | if (!s.length()) |
699 | return offset; // Empty string is always found |
700 | const char *str = Ptr+offset-1; |
701 | const char *search=s.ptr()+s.length()-1; |
702 | |
703 | const char *end=Ptr+s.length()-2; |
704 | const char *search_end=s.ptr()-1; |
705 | skip: |
706 | while (str != end) |
707 | { |
708 | if (*str-- == *search) |
709 | { |
710 | char *i,*j; |
711 | i=(char*) str; j=(char*) search-1; |
712 | while (j != search_end) |
713 | if (*i-- != *j--) goto skip; |
714 | return (int) (i-Ptr) +1; |
715 | } |
716 | } |
717 | } |
718 | return -1; |
719 | } |
720 | |
721 | /* |
722 | Replace substring with string |
723 | If wrong parameter or not enough memory, do nothing |
724 | */ |
725 | |
726 | bool String::replace(uint32 offset,uint32 arg_length,const String &to) |
727 | { |
728 | return replace(offset,arg_length,to.ptr(),to.length()); |
729 | } |
730 | |
731 | bool String::replace(uint32 offset,uint32 arg_length, |
732 | const char *to, uint32 to_length) |
733 | { |
734 | long diff = (long) to_length-(long) arg_length; |
735 | if (offset+arg_length <= str_length) |
736 | { |
737 | if (diff < 0) |
738 | { |
739 | if (to_length) |
740 | memcpy(Ptr+offset,to,to_length); |
741 | bmove(Ptr+offset+to_length,Ptr+offset+arg_length, |
742 | str_length-offset-arg_length); |
743 | } |
744 | else |
745 | { |
746 | if (diff) |
747 | { |
748 | if (realloc_with_extra_if_needed(str_length+(uint32) diff)) |
749 | return TRUE; |
750 | bmove_upp((uchar*) Ptr+str_length+diff, (uchar*) Ptr+str_length, |
751 | str_length-offset-arg_length); |
752 | } |
753 | if (to_length) |
754 | memcpy(Ptr+offset,to,to_length); |
755 | } |
756 | str_length+=(uint32) diff; |
757 | } |
758 | return FALSE; |
759 | } |
760 | |
761 | |
762 | // added by Holyfoot for "geometry" needs |
763 | int String::reserve(size_t space_needed, size_t grow_by) |
764 | { |
765 | if (Alloced_length < str_length + space_needed) |
766 | { |
767 | if (realloc(Alloced_length + MY_MAX(space_needed, grow_by) - 1)) |
768 | return TRUE; |
769 | } |
770 | return FALSE; |
771 | } |
772 | |
773 | void String::qs_append(const char *str, size_t len) |
774 | { |
775 | memcpy(Ptr + str_length, str, len + 1); |
776 | str_length += (uint32)len; |
777 | } |
778 | |
779 | void String::qs_append(double d) |
780 | { |
781 | char *buff = Ptr + str_length; |
782 | str_length+= (uint32) my_gcvt(d, MY_GCVT_ARG_DOUBLE, FLOATING_POINT_BUFFER - 1, buff, |
783 | NULL); |
784 | } |
785 | |
786 | void String::qs_append(double *d) |
787 | { |
788 | double ld; |
789 | float8get(ld, (char*) d); |
790 | qs_append(ld); |
791 | } |
792 | |
793 | void String::qs_append(int i) |
794 | { |
795 | char *buff= Ptr + str_length; |
796 | char *end= int10_to_str(i, buff, -10); |
797 | str_length+= (int) (end-buff); |
798 | } |
799 | |
800 | void String::qs_append(ulonglong i) |
801 | { |
802 | char *buff= Ptr + str_length; |
803 | char *end= longlong10_to_str(i, buff, 10); |
804 | str_length+= (int) (end-buff); |
805 | } |
806 | |
807 | /* |
808 | Compare strings according to collation, without end space. |
809 | |
810 | SYNOPSIS |
811 | sortcmp() |
812 | s First string |
813 | t Second string |
814 | cs Collation |
815 | |
816 | NOTE: |
817 | Normally this is case sensitive comparison |
818 | |
819 | RETURN |
820 | < 0 s < t |
821 | 0 s == t |
822 | > 0 s > t |
823 | */ |
824 | |
825 | |
826 | int sortcmp(const String *s,const String *t, CHARSET_INFO *cs) |
827 | { |
828 | return cs->coll->strnncollsp(cs, |
829 | (uchar *) s->ptr(),s->length(), |
830 | (uchar *) t->ptr(),t->length()); |
831 | } |
832 | |
833 | |
834 | /* |
835 | Compare strings byte by byte. End spaces are also compared. |
836 | |
837 | SYNOPSIS |
838 | stringcmp() |
839 | s First string |
840 | t Second string |
841 | |
842 | NOTE: |
843 | Strings are compared as a stream of uchars |
844 | |
845 | RETURN |
846 | < 0 s < t |
847 | 0 s == t |
848 | > 0 s > t |
849 | */ |
850 | |
851 | |
852 | int stringcmp(const String *s,const String *t) |
853 | { |
854 | uint32 s_len=s->length(),t_len=t->length(),len=MY_MIN(s_len,t_len); |
855 | int cmp= memcmp(s->ptr(), t->ptr(), len); |
856 | return (cmp) ? cmp : (int) (s_len - t_len); |
857 | } |
858 | |
859 | |
860 | /** |
861 | Return a string which has the same value with "from" and |
862 | which is safe to modify, trying to avoid unnecessary allocation |
863 | and copying when possible. |
864 | |
865 | @param to Buffer. Must not be a constant string. |
866 | @param from Some existing value. We'll try to reuse it. |
867 | Can be a constant or a variable string. |
868 | @param from_length The total size that will be possibly needed. |
869 | Note, can be 0. |
870 | |
871 | Note, in some cases "from" and "to" can point to the same object. |
872 | |
873 | If "from" is a variable string and its allocated memory is enough |
874 | to store "from_length" bytes, then "from" is returned as is. |
875 | |
876 | If "from" is a variable string and its allocated memory is not enough |
877 | to store "from_length" bytes, then "from" is reallocated and returned. |
878 | |
879 | Otherwise (if "from" is a constant string, or looks like a constant string), |
880 | then "to" is reallocated to fit "from_length" bytes, the value is copied |
881 | from "from" to "to", then "to" is returned. |
882 | */ |
883 | String *copy_if_not_alloced(String *to,String *from,uint32 from_length) |
884 | { |
885 | DBUG_ASSERT(to); |
886 | /* |
887 | If "from" is a constant string, e.g.: |
888 | SELECT INSERT('', <pos>, <length>, <replacement>); |
889 | we should not return it. See MDEV-9332. |
890 | |
891 | The code below detects different string types: |
892 | |
893 | a. All constant strings have Alloced_length==0 and alloced==false. |
894 | They point to a static memory array, or a mem_root memory, |
895 | and should stay untouched until the end of their life cycle. |
896 | Not safe to reuse. |
897 | |
898 | b. Some variable string have Alloced_length==0 and alloced==false initially, |
899 | they are not bound to any char array and allocate space on the first use |
900 | (and become #d). A typical example of such String is Item::str_value. |
901 | This type of string could be reused, but there is no a way to distinguish |
902 | them from the true constant strings (#a). |
903 | Not safe to reuse. |
904 | |
905 | c. Some variable strings have Alloced_length>0 and alloced==false. |
906 | They point to a fixed size writtable char array (typically on stack) |
907 | initially but can later allocate more space on the heap when the |
908 | fixed size array is too small (these strings become #d after allocation). |
909 | Safe to reuse. |
910 | |
911 | d. Some variable strings have Alloced_length>0 and alloced==true. |
912 | They already store data on the heap. |
913 | Safe to reuse. |
914 | |
915 | e. Some strings can have Alloced_length==0 and alloced==true. |
916 | This type of strings allocate space on the heap, but then are marked |
917 | as constant strings using String::mark_as_const(). |
918 | A typical example - the result of a character set conversion |
919 | of a constant string. |
920 | Not safe to reuse. |
921 | */ |
922 | if (from->Alloced_length > 0) // "from" is #c or #d (not a constant) |
923 | { |
924 | if (from->Alloced_length >= from_length) |
925 | return from; // #c or #d (large enough to store from_length bytes) |
926 | |
927 | if (from->alloced) |
928 | { |
929 | (void) from->realloc(from_length); |
930 | return from; // #d (reallocated to fit from_length bytes) |
931 | } |
932 | /* |
933 | "from" is of type #c. It currently points to a writtable char array |
934 | (typically on stack), but is too small for "from_length" bytes. |
935 | We need to reallocate either "from" or "to". |
936 | |
937 | "from" typically points to a temporary buffer inside Item_xxx::val_str(), |
938 | or to Item::str_value, and thus is "less permanent" than "to". |
939 | |
940 | Reallocating "to" may give more benifits: |
941 | - "to" can point to a "more permanent" storage and can be reused |
942 | for multiple rows, e.g. str_buffer in Protocol::send_result_set_row(), |
943 | which is passed to val_str() for all string type rows. |
944 | - "from" can stay pointing to its original fixed size stack char array, |
945 | and thus reduce the total amount of my_alloc/my_free. |
946 | */ |
947 | } |
948 | |
949 | if (from == to) |
950 | { |
951 | /* |
952 | Possible string types: |
953 | #a not possible (constants should not be passed as "to") |
954 | #b possible (a fresh variable with no associated char buffer) |
955 | #c possible (a variable with a char buffer, |
956 | in case it's smaller than fixed_length) |
957 | #d not possible (handled earlier) |
958 | #e not possible (constants should not be passed as "to") |
959 | |
960 | If a string of types #a or #e appears here, that means the caller made |
961 | something wrong. Otherwise, it's safe to reallocate and return "to". |
962 | |
963 | Note, as we can't distinguish between #a and #b for sure, |
964 | so we can't assert "not #a", but we can at least assert "not #e". |
965 | */ |
966 | DBUG_ASSERT(!from->alloced || from->Alloced_length > 0); // Not #e |
967 | |
968 | (void) from->realloc(from_length); |
969 | return from; |
970 | } |
971 | if (to->realloc(from_length)) |
972 | return from; // Actually an error |
973 | if ((to->str_length=MY_MIN(from->str_length,from_length))) |
974 | memcpy(to->Ptr,from->Ptr,to->str_length); |
975 | to->str_charset=from->str_charset; |
976 | return to; // "from" was of types #a, #b, #e, or small #c. |
977 | } |
978 | |
979 | |
980 | /**************************************************************************** |
981 | Help functions |
982 | ****************************************************************************/ |
983 | |
984 | /** |
985 | Copy string with HEX-encoding of "bad" characters. |
986 | |
987 | @details This functions copies the string pointed by "src" |
988 | to the string pointed by "dst". Not more than "srclen" bytes |
989 | are read from "src". Any sequences of bytes representing |
990 | a not-well-formed substring (according to cs) are hex-encoded, |
991 | and all well-formed substrings (according to cs) are copied as is. |
992 | Not more than "dstlen" bytes are written to "dst". The number |
993 | of bytes written to "dst" is returned. |
994 | |
995 | @param cs character set pointer of the destination string |
996 | @param[out] dst destination string |
997 | @param dstlen size of dst |
998 | @param src source string |
999 | @param srclen length of src |
1000 | |
1001 | @retval result length |
1002 | */ |
1003 | |
1004 | size_t |
1005 | my_copy_with_hex_escaping(CHARSET_INFO *cs, |
1006 | char *dst, size_t dstlen, |
1007 | const char *src, size_t srclen) |
1008 | { |
1009 | const char *srcend= src + srclen; |
1010 | char *dst0= dst; |
1011 | |
1012 | for ( ; src < srcend ; ) |
1013 | { |
1014 | size_t chlen; |
1015 | if ((chlen= my_ismbchar(cs, src, srcend))) |
1016 | { |
1017 | if (dstlen < chlen) |
1018 | break; /* purecov: inspected */ |
1019 | memcpy(dst, src, chlen); |
1020 | src+= chlen; |
1021 | dst+= chlen; |
1022 | dstlen-= chlen; |
1023 | } |
1024 | else if (*src & 0x80) |
1025 | { |
1026 | if (dstlen < 4) |
1027 | break; /* purecov: inspected */ |
1028 | *dst++= '\\'; |
1029 | *dst++= 'x'; |
1030 | APPEND_HEX(dst, (uchar) *src); |
1031 | src++; |
1032 | dstlen-= 4; |
1033 | } |
1034 | else |
1035 | { |
1036 | if (dstlen < 1) |
1037 | break; /* purecov: inspected */ |
1038 | *dst++= *src++; |
1039 | dstlen--; |
1040 | } |
1041 | } |
1042 | return dst - dst0; |
1043 | } |
1044 | |
1045 | |
1046 | /* |
1047 | Copy a string, |
1048 | with optional character set conversion, |
1049 | with optional left padding (for binary -> UCS2 conversion) |
1050 | |
1051 | Bad input bytes are replaced to '?'. |
1052 | |
1053 | The string that is written to "to" is always well-formed. |
1054 | |
1055 | @param to The destination string |
1056 | @param to_length Space available in "to" |
1057 | @param to_cs Character set of the "to" string |
1058 | @param from The source string |
1059 | @param from_length Length of the "from" string |
1060 | @param from_cs Character set of the "from" string |
1061 | @param nchars Copy not more than "nchars" characters |
1062 | |
1063 | The members as set as follows: |
1064 | m_well_formed_error_pos To the position when "from" is not well formed |
1065 | or NULL otherwise. |
1066 | m_cannot_convert_error_pos To the position where a not convertable |
1067 | character met, or NULL otherwise. |
1068 | m_source_end_pos To the position where scanning of the "from" |
1069 | string stopped. |
1070 | |
1071 | @returns number of bytes that were written to 'to' |
1072 | */ |
1073 | uint |
1074 | String_copier::well_formed_copy(CHARSET_INFO *to_cs, |
1075 | char *to, size_t to_length, |
1076 | CHARSET_INFO *from_cs, |
1077 | const char *from, size_t from_length, size_t nchars) |
1078 | { |
1079 | if ((to_cs == &my_charset_bin) || |
1080 | (from_cs == &my_charset_bin) || |
1081 | (to_cs == from_cs) || |
1082 | my_charset_same(from_cs, to_cs)) |
1083 | { |
1084 | m_cannot_convert_error_pos= NULL; |
1085 | return (uint) to_cs->cset->copy_fix(to_cs, to, to_length, from, from_length, |
1086 | nchars, this); |
1087 | } |
1088 | return (uint) my_convert_fix(to_cs, to, to_length, from_cs, from, from_length, |
1089 | nchars, this, this); |
1090 | } |
1091 | |
1092 | |
1093 | |
1094 | /* |
1095 | Append characters to a single-quoted string '...', escaping special |
1096 | characters with backslashes as necessary. |
1097 | Does not add the enclosing quotes, this is left up to caller. |
1098 | */ |
1099 | #define APPEND(X) if (append(X)) return 1; else break |
1100 | bool String::append_for_single_quote(const char *st, size_t len) |
1101 | { |
1102 | const char *end= st+len; |
1103 | for (; st < end; st++) |
1104 | { |
1105 | uchar c= *st; |
1106 | switch (c) |
1107 | { |
1108 | case '\\': APPEND(STRING_WITH_LEN("\\\\" )); |
1109 | case '\0': APPEND(STRING_WITH_LEN("\\0" )); |
1110 | case '\'': APPEND(STRING_WITH_LEN("\\'" )); |
1111 | case '\n': APPEND(STRING_WITH_LEN("\\n" )); |
1112 | case '\r': APPEND(STRING_WITH_LEN("\\r" )); |
1113 | case '\032': APPEND(STRING_WITH_LEN("\\Z" )); |
1114 | default: APPEND(c); |
1115 | } |
1116 | } |
1117 | return 0; |
1118 | } |
1119 | |
1120 | void String::print(String *str) const |
1121 | { |
1122 | str->append_for_single_quote(Ptr, str_length); |
1123 | } |
1124 | |
1125 | |
1126 | void String::print_with_conversion(String *print, CHARSET_INFO *cs) const |
1127 | { |
1128 | StringBuffer<256> tmp(cs); |
1129 | uint errors= 0; |
1130 | tmp.copy(this, cs, &errors); |
1131 | tmp.print(print); |
1132 | } |
1133 | |
1134 | |
1135 | /* |
1136 | Exchange state of this object and argument. |
1137 | |
1138 | SYNOPSIS |
1139 | String::swap() |
1140 | |
1141 | RETURN |
1142 | Target string will contain state of this object and vice versa. |
1143 | */ |
1144 | |
1145 | void String::swap(String &s) |
1146 | { |
1147 | swap_variables(char *, Ptr, s.Ptr); |
1148 | swap_variables(uint32, str_length, s.str_length); |
1149 | swap_variables(uint32, Alloced_length, s.Alloced_length); |
1150 | swap_variables(bool, alloced, s.alloced); |
1151 | swap_variables(CHARSET_INFO*, str_charset, s.str_charset); |
1152 | } |
1153 | |
1154 | |
1155 | /** |
1156 | Convert string to printable ASCII string |
1157 | |
1158 | @details This function converts input string "from" replacing non-ASCII bytes |
1159 | with hexadecimal sequences ("\xXX") optionally appending "..." to the end of |
1160 | the resulting string. |
1161 | This function used in the ER_TRUNCATED_WRONG_VALUE_FOR_FIELD error messages, |
1162 | e.g. when a string cannot be converted to a result charset. |
1163 | |
1164 | |
1165 | @param to output buffer |
1166 | @param to_len size of the output buffer (8 bytes or greater) |
1167 | @param from input string |
1168 | @param from_len size of the input string |
1169 | @param from_cs input charset |
1170 | @param nbytes maximal number of bytes to convert (from_len if 0) |
1171 | |
1172 | @return number of bytes in the output string |
1173 | */ |
1174 | |
1175 | uint convert_to_printable(char *to, size_t to_len, |
1176 | const char *from, size_t from_len, |
1177 | CHARSET_INFO *from_cs, size_t nbytes /*= 0*/) |
1178 | { |
1179 | /* needs at least 8 bytes for '\xXX...' and zero byte */ |
1180 | DBUG_ASSERT(to_len >= 8); |
1181 | |
1182 | char *t= to; |
1183 | char *t_end= to + to_len - 1; // '- 1' is for the '\0' at the end |
1184 | const char *f= from; |
1185 | const char *f_end= from + (nbytes ? MY_MIN(from_len, nbytes) : from_len); |
1186 | char *dots= to; // last safe place to append '...' |
1187 | |
1188 | if (!f || t == t_end) |
1189 | return 0; |
1190 | |
1191 | for (; t < t_end && f < f_end; f++) |
1192 | { |
1193 | /* |
1194 | If the source string is ASCII compatible (mbminlen==1) |
1195 | and the source character is in ASCII printable range (0x20..0x7F), |
1196 | then display the character as is. |
1197 | |
1198 | Otherwise, if the source string is not ASCII compatible (e.g. UCS2), |
1199 | or the source character is not in the printable range, |
1200 | then print the character using HEX notation. |
1201 | */ |
1202 | if (((unsigned char) *f) >= 0x20 && |
1203 | ((unsigned char) *f) <= 0x7F && |
1204 | from_cs->mbminlen == 1) |
1205 | { |
1206 | *t++= *f; |
1207 | } |
1208 | else |
1209 | { |
1210 | if (t_end - t < 4) // \xXX |
1211 | break; |
1212 | *t++= '\\'; |
1213 | *t++= 'x'; |
1214 | APPEND_HEX(t, *f); |
1215 | } |
1216 | if (t_end - t >= 3) // '...' |
1217 | dots= t; |
1218 | } |
1219 | if (f < from + from_len) |
1220 | memcpy(dots, STRING_WITH_LEN("...\0" )); |
1221 | else |
1222 | *t= '\0'; |
1223 | return (uint) (t - to); |
1224 | } |
1225 | |