1/* Copyright (c) 2002, 2013, Oracle and/or its affiliates.
2 Copyright (c) 2009, 2014, SkySQL Ab.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; version 2 of the License.
7
8 This program is distributed in the hope that it will be useful,
9 but WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 GNU General Public License for more details.
12
13 You should have received a copy of the GNU General Public License
14 along with this program; if not, write to the Free Software
15 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
16
17#include "strings_def.h"
18#include <m_ctype.h>
19#include "my_sys.h" /* Needed for MY_ERRNO_ERANGE */
20#include <errno.h>
21
22#include "stdarg.h"
23
24/*
25 Returns the number of bytes required for strnxfrm().
26*/
27
28size_t my_strnxfrmlen_simple(CHARSET_INFO *cs, size_t len)
29{
30 return len * (cs->strxfrm_multiply ? cs->strxfrm_multiply : 1);
31}
32
33
34/*
35 Converts a string into its sort key.
36
37 SYNOPSIS
38 my_strnxfrm_xxx()
39
40 IMPLEMENTATION
41
42 The my_strxfrm_xxx() function transforms a string pointed to by
43 'src' with length 'srclen' according to the charset+collation
44 pair 'cs' and copies the result key into 'dest'.
45
46 Comparing two strings using memcmp() after my_strnxfrm_xxx()
47 is equal to comparing two original strings with my_strnncollsp_xxx().
48
49 Not more than 'dstlen' bytes are written into 'dst'.
50 To guarantee that the whole string is transformed, 'dstlen' must be
51 at least srclen*cs->strnxfrm_multiply bytes long. Otherwise,
52 consequent memcmp() may return a non-accurate result.
53
54 If the source string is too short to fill whole 'dstlen' bytes,
55 then the 'dest' string is padded up to 'dstlen', ensuring that:
56
57 "a" == "a "
58 "a\0" < "a"
59 "a\0" < "a "
60
61 my_strnxfrm_simple() is implemented for 8bit charsets and
62 simple collations with one-to-one string->key transformation.
63
64 See also implementations for various charsets/collations in
65 other ctype-xxx.c files.
66
67 RETURN
68
69 Target len 'dstlen'.
70
71*/
72
73
74size_t my_strnxfrm_simple_internal(CHARSET_INFO * cs,
75 uchar *dst, size_t dstlen, uint *nweights,
76 const uchar *src, size_t srclen)
77{
78 const uchar *map= cs->sort_order;
79 uchar *d0= dst;
80 uint frmlen;
81 if ((frmlen= (uint)MY_MIN(dstlen, *nweights)) > srclen)
82 frmlen= (uint)srclen;
83 if (dst != src)
84 {
85 const uchar *end;
86 for (end= src + frmlen; src < end;)
87 *dst++= map[*src++];
88 }
89 else
90 {
91 const uchar *end;
92 for (end= dst + frmlen; dst < end; dst++)
93 *dst= map[(uchar) *dst];
94 }
95 *nweights-= frmlen;
96 return dst - d0;
97}
98
99
100size_t my_strnxfrm_simple(CHARSET_INFO * cs,
101 uchar *dst, size_t dstlen, uint nweights,
102 const uchar *src, size_t srclen, uint flags)
103{
104 uchar *d0= dst;
105 dst= d0 + my_strnxfrm_simple_internal(cs, dst, dstlen, &nweights,
106 src, srclen);
107 return my_strxfrm_pad_desc_and_reverse(cs, d0, dst, d0 + dstlen,
108 nweights, flags, 0);
109}
110
111
112size_t my_strnxfrm_simple_nopad(CHARSET_INFO * cs,
113 uchar *dst, size_t dstlen, uint nweights,
114 const uchar *src, size_t srclen, uint flags)
115{
116 uchar *d0= dst;
117 dst= d0 + my_strnxfrm_simple_internal(cs, dst, dstlen, &nweights,
118 src, srclen);
119 return my_strxfrm_pad_desc_and_reverse_nopad(cs, d0, dst, d0 + dstlen,
120 nweights, flags, 0);
121}
122
123
124int my_strnncoll_simple(CHARSET_INFO * cs, const uchar *s, size_t slen,
125 const uchar *t, size_t tlen,
126 my_bool t_is_prefix)
127{
128 size_t len = ( slen > tlen ) ? tlen : slen;
129 const uchar *map= cs->sort_order;
130 if (t_is_prefix && slen > tlen)
131 slen=tlen;
132 while (len--)
133 {
134 if (map[*s++] != map[*t++])
135 return ((int) map[s[-1]] - (int) map[t[-1]]);
136 }
137 /*
138 We can't use (slen - tlen) here as the result may be outside of the
139 precision of a signed int
140 */
141 return slen > tlen ? 1 : slen < tlen ? -1 : 0 ;
142}
143
144
145/*
146 Compare strings, discarding end space
147
148 SYNOPSIS
149 my_strnncollsp_simple()
150 cs character set handler
151 a First string to compare
152 a_length Length of 'a'
153 b Second string to compare
154 b_length Length of 'b'
155
156 IMPLEMENTATION
157 If one string is shorter as the other, then we space extend the other
158 so that the strings have equal length.
159
160 This will ensure that the following things hold:
161
162 "a" == "a "
163 "a\0" < "a"
164 "a\0" < "a "
165
166 RETURN
167 < 0 a < b
168 = 0 a == b
169 > 0 a > b
170*/
171
172int my_strnncollsp_simple(CHARSET_INFO * cs, const uchar *a, size_t a_length,
173 const uchar *b, size_t b_length)
174{
175 const uchar *map= cs->sort_order, *end;
176 size_t length;
177 int res;
178
179 end= a + (length= MY_MIN(a_length, b_length));
180 while (a < end)
181 {
182 if (map[*a++] != map[*b++])
183 return ((int) map[a[-1]] - (int) map[b[-1]]);
184 }
185 res= 0;
186 if (a_length != b_length)
187 {
188 int swap= 1;
189 /*
190 Check the next not space character of the longer key. If it's < ' ',
191 then it's smaller than the other key.
192 */
193 if (a_length < b_length)
194 {
195 /* put shorter key in s */
196 a_length= b_length;
197 a= b;
198 swap= -1; /* swap sign of result */
199 res= -res;
200 }
201 for (end= a + a_length-length; a < end ; a++)
202 {
203 if (map[*a] != map[' '])
204 return (map[*a] < map[' ']) ? -swap : swap;
205 }
206 }
207 return res;
208}
209
210
211int my_strnncollsp_simple_nopad(CHARSET_INFO * cs,
212 const uchar *a, size_t a_length,
213 const uchar *b, size_t b_length)
214{
215 return my_strnncoll_simple(cs, a, a_length, b, b_length, FALSE);
216}
217
218
219size_t my_caseup_str_8bit(CHARSET_INFO * cs,char *str)
220{
221 register const uchar *map= cs->to_upper;
222 char *str_orig= str;
223 while ((*str= (char) map[(uchar) *str]) != 0)
224 str++;
225 return (size_t) (str - str_orig);
226}
227
228
229size_t my_casedn_str_8bit(CHARSET_INFO * cs,char *str)
230{
231 register const uchar *map= cs->to_lower;
232 char *str_orig= str;
233 while ((*str= (char) map[(uchar) *str]) != 0)
234 str++;
235 return (size_t) (str - str_orig);
236}
237
238
239size_t my_caseup_8bit(CHARSET_INFO * cs, char *src, size_t srclen,
240 char *dst __attribute__((unused)),
241 size_t dstlen __attribute__((unused)))
242{
243 char *end= src + srclen;
244 register const uchar *map= cs->to_upper;
245 DBUG_ASSERT(src == dst && srclen == dstlen);
246 for ( ; src != end ; src++)
247 *src= (char) map[(uchar) *src];
248 return srclen;
249}
250
251
252size_t my_casedn_8bit(CHARSET_INFO * cs, char *src, size_t srclen,
253 char *dst __attribute__((unused)),
254 size_t dstlen __attribute__((unused)))
255{
256 char *end= src + srclen;
257 register const uchar *map=cs->to_lower;
258 DBUG_ASSERT(src == dst && srclen == dstlen);
259 for ( ; src != end ; src++)
260 *src= (char) map[(uchar) *src];
261 return srclen;
262}
263
264int my_strcasecmp_8bit(CHARSET_INFO * cs,const char *s, const char *t)
265{
266 register const uchar *map=cs->to_upper;
267 while (map[(uchar) *s] == map[(uchar) *t++])
268 if (!*s++) return 0;
269 return ((int) map[(uchar) s[0]] - (int) map[(uchar) t[-1]]);
270}
271
272
273int my_charlen_8bit(CHARSET_INFO *cs __attribute__((unused)),
274 const uchar *str, const uchar *end)
275{
276 return str >= end ? MY_CS_TOOSMALL : 1;
277}
278
279
280int my_mb_wc_8bit(CHARSET_INFO *cs,my_wc_t *wc,
281 const uchar *str,
282 const uchar *end __attribute__((unused)))
283{
284 if (str >= end)
285 return MY_CS_TOOSMALL;
286
287 *wc=cs->tab_to_uni[*str];
288 return (!wc[0] && str[0]) ? -1 : 1;
289}
290
291int my_wc_mb_8bit(CHARSET_INFO *cs,my_wc_t wc,
292 uchar *str,
293 uchar *end)
294{
295 MY_UNI_IDX *idx;
296
297 if (str >= end)
298 return MY_CS_TOOSMALL;
299
300 for (idx=cs->tab_from_uni; idx->tab ; idx++)
301 {
302 if (idx->from <= wc && idx->to >= wc)
303 {
304 str[0]= idx->tab[wc - idx->from];
305 return (!str[0] && wc) ? MY_CS_ILUNI : 1;
306 }
307 }
308 return MY_CS_ILUNI;
309}
310
311
312/*
313 We can't use vsprintf here as it's not guaranteed to return
314 the length on all operating systems.
315 This function is also not called in a safe environment, so the
316 end buffer must be checked.
317*/
318
319size_t my_snprintf_8bit(CHARSET_INFO *cs __attribute__((unused)),
320 char* to, size_t n __attribute__((unused)),
321 const char* fmt, ...)
322{
323 va_list args;
324 size_t result;
325 va_start(args,fmt);
326 result= my_vsnprintf(to, n, fmt, args);
327 va_end(args);
328 return result;
329}
330
331
332void my_hash_sort_simple_nopad(CHARSET_INFO *cs,
333 const uchar *key, size_t len,
334 ulong *nr1, ulong *nr2)
335{
336 register const uchar *sort_order=cs->sort_order;
337 const uchar *end= key + len;
338 register ulong m1= *nr1, m2= *nr2;
339 for (; key < (uchar*) end ; key++)
340 {
341 MY_HASH_ADD(m1, m2, (uint) sort_order[(uint) *key]);
342 }
343 *nr1= m1;
344 *nr2= m2;
345}
346
347
348void my_hash_sort_simple(CHARSET_INFO *cs,
349 const uchar *key, size_t len,
350 ulong *nr1, ulong *nr2)
351{
352 register const uchar *sort_order=cs->sort_order;
353 const uchar *end;
354 uint16 space_weight= sort_order[' '];
355
356 /*
357 Remove all trailing characters that are equal to space.
358 We have to do this to be able to compare 'A ' and 'A' as identical.
359
360 If the key is long enough, cut the trailing spaces (0x20) using an
361 optimized function implemented in skip_trailing_spaces().
362
363 "len > 16" is just some heuristic here.
364 Calling skip_triling_space() for short values is not desirable,
365 because its initialization block may be more expensive than the
366 performance gained.
367 */
368
369 end= len > 16 ? skip_trailing_space(key, len) : key + len;
370
371 /*
372 We removed all trailing characters that are binary equal to space 0x20.
373 Now remove all trailing characters that have weights equal to space.
374 Some 8bit simple collations may have such characters:
375 - cp1250_general_ci 0xA0 NO-BREAK SPACE == 0x20 SPACE
376 - cp1251_ukrainian_ci 0x60 GRAVE ACCENT == 0x20 SPACE
377 - koi8u_general_ci 0x60 GRAVE ACCENT == 0x20 SPACE
378 */
379
380 for ( ; key < end ; )
381 {
382 if (sort_order[*--end] != space_weight)
383 {
384 end++;
385 break;
386 }
387 }
388 my_hash_sort_simple_nopad(cs, key, end - key, nr1, nr2);
389}
390
391
392long my_strntol_8bit(CHARSET_INFO *cs,
393 const char *nptr, size_t l, int base,
394 char **endptr, int *err)
395{
396 int negative;
397 register uint32 cutoff;
398 register uint cutlim;
399 register uint32 i;
400 register const char *s;
401 register uchar c;
402 const char *save, *e;
403 int overflow;
404
405 *err= 0; /* Initialize error indicator */
406
407 s = nptr;
408 e = nptr+l;
409
410 for ( ; s<e && my_isspace(cs, *s) ; s++);
411
412 if (s == e)
413 {
414 goto noconv;
415 }
416
417 /* Check for a sign. */
418 if (*s == '-')
419 {
420 negative = 1;
421 ++s;
422 }
423 else if (*s == '+')
424 {
425 negative = 0;
426 ++s;
427 }
428 else
429 negative = 0;
430
431 save = s;
432 cutoff = ((uint32)~0L) / (uint32) base;
433 cutlim = (uint) (((uint32)~0L) % (uint32) base);
434
435 overflow = 0;
436 i = 0;
437 for (c = *s; s != e; c = *++s)
438 {
439 if (c>='0' && c<='9')
440 c -= '0';
441 else if (c>='A' && c<='Z')
442 c = c - 'A' + 10;
443 else if (c>='a' && c<='z')
444 c = c - 'a' + 10;
445 else
446 break;
447 if (c >= base)
448 break;
449 if (i > cutoff || (i == cutoff && c > cutlim))
450 overflow = 1;
451 else
452 {
453 i *= (uint32) base;
454 i += c;
455 }
456 }
457
458 if (s == save)
459 goto noconv;
460
461 if (endptr != NULL)
462 *endptr = (char *) s;
463
464 if (negative)
465 {
466 if (i > (uint32) INT_MIN32)
467 overflow = 1;
468 }
469 else if (i > INT_MAX32)
470 overflow = 1;
471
472 if (overflow)
473 {
474 err[0]= ERANGE;
475 return negative ? INT_MIN32 : INT_MAX32;
476 }
477
478 return (negative ? -((long) i) : (long) i);
479
480noconv:
481 err[0]= EDOM;
482 if (endptr != NULL)
483 *endptr = (char *) nptr;
484 return 0L;
485}
486
487
488ulong my_strntoul_8bit(CHARSET_INFO *cs,
489 const char *nptr, size_t l, int base,
490 char **endptr, int *err)
491{
492 int negative;
493 register uint32 cutoff;
494 register uint cutlim;
495 register uint32 i;
496 register const char *s;
497 register uchar c;
498 const char *save, *e;
499 int overflow;
500
501 *err= 0; /* Initialize error indicator */
502
503 s = nptr;
504 e = nptr+l;
505
506 for( ; s<e && my_isspace(cs, *s); s++);
507
508 if (s==e)
509 {
510 goto noconv;
511 }
512
513 if (*s == '-')
514 {
515 negative = 1;
516 ++s;
517 }
518 else if (*s == '+')
519 {
520 negative = 0;
521 ++s;
522 }
523 else
524 negative = 0;
525
526 save = s;
527 cutoff = ((uint32)~0L) / (uint32) base;
528 cutlim = (uint) (((uint32)~0L) % (uint32) base);
529 overflow = 0;
530 i = 0;
531
532 for (c = *s; s != e; c = *++s)
533 {
534 if (c>='0' && c<='9')
535 c -= '0';
536 else if (c>='A' && c<='Z')
537 c = c - 'A' + 10;
538 else if (c>='a' && c<='z')
539 c = c - 'a' + 10;
540 else
541 break;
542 if (c >= base)
543 break;
544 if (i > cutoff || (i == cutoff && c > cutlim))
545 overflow = 1;
546 else
547 {
548 i *= (uint32) base;
549 i += c;
550 }
551 }
552
553 if (s == save)
554 goto noconv;
555
556 if (endptr != NULL)
557 *endptr = (char *) s;
558
559 if (overflow)
560 {
561 err[0]= ERANGE;
562 return (~(uint32) 0);
563 }
564
565 return (negative ? -((long) i) : (long) i);
566
567noconv:
568 err[0]= EDOM;
569 if (endptr != NULL)
570 *endptr = (char *) nptr;
571 return 0L;
572}
573
574
575longlong my_strntoll_8bit(CHARSET_INFO *cs __attribute__((unused)),
576 const char *nptr, size_t l, int base,
577 char **endptr,int *err)
578{
579 int negative;
580 register ulonglong cutoff;
581 register uint cutlim;
582 register ulonglong i;
583 register const char *s, *e;
584 const char *save;
585 int overflow;
586
587 *err= 0; /* Initialize error indicator */
588
589 s = nptr;
590 e = nptr+l;
591
592 for(; s<e && my_isspace(cs,*s); s++);
593
594 if (s == e)
595 {
596 goto noconv;
597 }
598
599 if (*s == '-')
600 {
601 negative = 1;
602 ++s;
603 }
604 else if (*s == '+')
605 {
606 negative = 0;
607 ++s;
608 }
609 else
610 negative = 0;
611
612 save = s;
613
614 cutoff = (~(ulonglong) 0) / (unsigned long int) base;
615 cutlim = (uint) ((~(ulonglong) 0) % (unsigned long int) base);
616
617 overflow = 0;
618 i = 0;
619 for ( ; s != e; s++)
620 {
621 register uchar c= *s;
622 if (c>='0' && c<='9')
623 c -= '0';
624 else if (c>='A' && c<='Z')
625 c = c - 'A' + 10;
626 else if (c>='a' && c<='z')
627 c = c - 'a' + 10;
628 else
629 break;
630 if (c >= base)
631 break;
632 if (i > cutoff || (i == cutoff && c > cutlim))
633 overflow = 1;
634 else
635 {
636 i *= (ulonglong) base;
637 i += c;
638 }
639 }
640
641 if (s == save)
642 goto noconv;
643
644 if (endptr != NULL)
645 *endptr = (char *) s;
646
647 if (negative)
648 {
649 if (i > (ulonglong) LONGLONG_MIN)
650 overflow = 1;
651 }
652 else if (i > (ulonglong) LONGLONG_MAX)
653 overflow = 1;
654
655 if (overflow)
656 {
657 err[0]= ERANGE;
658 return negative ? LONGLONG_MIN : LONGLONG_MAX;
659 }
660
661 return (negative ? -((longlong) i) : (longlong) i);
662
663noconv:
664 err[0]= EDOM;
665 if (endptr != NULL)
666 *endptr = (char *) nptr;
667 return 0L;
668}
669
670
671ulonglong my_strntoull_8bit(CHARSET_INFO *cs,
672 const char *nptr, size_t l, int base,
673 char **endptr, int *err)
674{
675 int negative;
676 register ulonglong cutoff;
677 register uint cutlim;
678 register ulonglong i;
679 register const char *s, *e;
680 const char *save;
681 int overflow;
682
683 *err= 0; /* Initialize error indicator */
684
685 s = nptr;
686 e = nptr+l;
687
688 for(; s<e && my_isspace(cs,*s); s++);
689
690 if (s == e)
691 {
692 goto noconv;
693 }
694
695 if (*s == '-')
696 {
697 negative = 1;
698 ++s;
699 }
700 else if (*s == '+')
701 {
702 negative = 0;
703 ++s;
704 }
705 else
706 negative = 0;
707
708 save = s;
709
710 cutoff = (~(ulonglong) 0) / (unsigned long int) base;
711 cutlim = (uint) ((~(ulonglong) 0) % (unsigned long int) base);
712
713 overflow = 0;
714 i = 0;
715 for ( ; s != e; s++)
716 {
717 register uchar c= *s;
718
719 if (c>='0' && c<='9')
720 c -= '0';
721 else if (c>='A' && c<='Z')
722 c = c - 'A' + 10;
723 else if (c>='a' && c<='z')
724 c = c - 'a' + 10;
725 else
726 break;
727 if (c >= base)
728 break;
729 if (i > cutoff || (i == cutoff && c > cutlim))
730 overflow = 1;
731 else
732 {
733 i *= (ulonglong) base;
734 i += c;
735 }
736 }
737
738 if (s == save)
739 goto noconv;
740
741 if (endptr != NULL)
742 *endptr = (char *) s;
743
744 if (overflow)
745 {
746 err[0]= ERANGE;
747 return (~(ulonglong) 0);
748 }
749
750 return (negative ? -((longlong) i) : (longlong) i);
751
752noconv:
753 err[0]= EDOM;
754 if (endptr != NULL)
755 *endptr = (char *) nptr;
756 return 0L;
757}
758
759
760/*
761 Read double from string
762
763 SYNOPSIS:
764 my_strntod_8bit()
765 cs Character set information
766 str String to convert to double
767 length Optional length for string.
768 end result pointer to end of converted string
769 err Error number if failed conversion
770
771 NOTES:
772 If length is not INT_MAX32 or str[length] != 0 then the given str must
773 be writeable
774 If length == INT_MAX32 the str must be \0 terminated.
775
776 It's implemented this way to save a buffer allocation and a memory copy.
777
778 RETURN
779 Value of number in string
780*/
781
782
783double my_strntod_8bit(CHARSET_INFO *cs __attribute__((unused)),
784 char *str, size_t length,
785 char **end, int *err)
786{
787 if (length == INT_MAX32)
788 length= 65535; /* Should be big enough */
789 *end= str + length;
790 return my_strtod(str, end, err);
791}
792
793
794/*
795 This is a fast version optimized for the case of radix 10 / -10
796
797 Assume len >= 1
798*/
799
800size_t my_long10_to_str_8bit(CHARSET_INFO *cs __attribute__((unused)),
801 char *dst, size_t len, int radix, long int val)
802{
803 char buffer[66];
804 register char *p, *e;
805 long int new_val;
806 uint sign=0;
807 unsigned long int uval = (unsigned long int) val;
808
809 e = p = &buffer[sizeof(buffer)-1];
810 *p= 0;
811
812 if (radix < 0)
813 {
814 if (val < 0)
815 {
816 /* Avoid integer overflow in (-val) for LONGLONG_MIN (BUG#31799). */
817 uval= (unsigned long int)0 - uval;
818 *dst++= '-';
819 len--;
820 sign= 1;
821 }
822 }
823
824 new_val = (long) (uval / 10);
825 *--p = '0'+ (char) (uval - (unsigned long) new_val * 10);
826 val = new_val;
827
828 while (val != 0)
829 {
830 new_val=val/10;
831 *--p = '0' + (char) (val-new_val*10);
832 val= new_val;
833 }
834
835 len= MY_MIN(len, (size_t) (e-p));
836 memcpy(dst, p, len);
837 return len+sign;
838}
839
840
841size_t my_longlong10_to_str_8bit(CHARSET_INFO *cs __attribute__((unused)),
842 char *dst, size_t len, int radix,
843 longlong val)
844{
845 char buffer[65];
846 register char *p, *e;
847 long long_val;
848 uint sign= 0;
849 ulonglong uval = (ulonglong)val;
850
851 if (radix < 0)
852 {
853 if (val < 0)
854 {
855 /* Avoid integer overflow in (-val) for LONGLONG_MIN (BUG#31799). */
856 uval = (ulonglong)0 - uval;
857 *dst++= '-';
858 len--;
859 sign= 1;
860 }
861 }
862
863 e = p = &buffer[sizeof(buffer)-1];
864 *p= 0;
865
866 if (uval == 0)
867 {
868 *--p= '0';
869 len= 1;
870 goto cnv;
871 }
872
873 while (uval > (ulonglong) LONG_MAX)
874 {
875 ulonglong quo= uval/(uint) 10;
876 uint rem= (uint) (uval- quo* (uint) 10);
877 *--p = '0' + rem;
878 uval= quo;
879 }
880
881 long_val= (long) uval;
882 while (long_val != 0)
883 {
884 long quo= long_val/10;
885 *--p = (char) ('0' + (long_val - quo*10));
886 long_val= quo;
887 }
888
889 len= MY_MIN(len, (size_t) (e-p));
890cnv:
891 memcpy(dst, p, len);
892 return len+sign;
893}
894
895
896/*
897** Compare string against string with wildcard
898** 0 if matched
899** -1 if not matched with wildcard
900** 1 if matched with wildcard
901*/
902
903#ifdef LIKE_CMP_TOUPPER
904#define likeconv(s,A) (uchar) my_toupper(s,A)
905#else
906#define likeconv(s,A) (uchar) (s)->sort_order[(uchar) (A)]
907#endif
908
909#define INC_PTR(cs,A,B) (A)++
910
911
912static
913int my_wildcmp_8bit_impl(CHARSET_INFO *cs,
914 const char *str,const char *str_end,
915 const char *wildstr,const char *wildend,
916 int escape, int w_one, int w_many, int recurse_level)
917{
918 int result= -1; /* Not found, using wildcards */
919
920 if (my_string_stack_guard && my_string_stack_guard(recurse_level))
921 return 1;
922 while (wildstr != wildend)
923 {
924 while (*wildstr != w_many && *wildstr != w_one)
925 {
926 if (*wildstr == escape && wildstr+1 != wildend)
927 wildstr++;
928
929 if (str == str_end || likeconv(cs,*wildstr++) != likeconv(cs,*str++))
930 return(1); /* No match */
931 if (wildstr == wildend)
932 return(str != str_end); /* Match if both are at end */
933 result=1; /* Found an anchor char */
934 }
935 if (*wildstr == w_one)
936 {
937 do
938 {
939 if (str == str_end) /* Skip one char if possible */
940 return(result);
941 INC_PTR(cs,str,str_end);
942 } while (++wildstr < wildend && *wildstr == w_one);
943 if (wildstr == wildend)
944 break;
945 }
946 if (*wildstr == w_many)
947 { /* Found w_many */
948 uchar cmp;
949
950 wildstr++;
951 /* Remove any '%' and '_' from the wild search string */
952 for (; wildstr != wildend ; wildstr++)
953 {
954 if (*wildstr == w_many)
955 continue;
956 if (*wildstr == w_one)
957 {
958 if (str == str_end)
959 return(-1);
960 INC_PTR(cs,str,str_end);
961 continue;
962 }
963 break; /* Not a wild character */
964 }
965 if (wildstr == wildend)
966 return(0); /* Ok if w_many is last */
967 if (str == str_end)
968 return(-1);
969
970 if ((cmp= *wildstr) == escape && wildstr+1 != wildend)
971 cmp= *++wildstr;
972
973 INC_PTR(cs,wildstr,wildend); /* This is compared trough cmp */
974 cmp=likeconv(cs,cmp);
975 do
976 {
977 /*
978 Find the next character in the subject string equal to 'cmp', then
979 check recursively my_wildcmp_8bit_impl() for the pattern remainder.
980 */
981 while (str != str_end && (uchar) likeconv(cs,*str) != cmp)
982 str++;
983 if (str++ == str_end)
984 return(-1); /* 'cmp' was not found in the subject string */
985 {
986 int tmp=my_wildcmp_8bit_impl(cs,str,str_end,
987 wildstr,wildend,escape,w_one,
988 w_many, recurse_level+1);
989 if (tmp <= 0)
990 return(tmp);
991 }
992 /*
993 The recursion call did not match. But it returned 1, which means
994 the pattern remainder has some non-special characters.
995 Continue, there is a chance that we'll find another 'cmp'
996 at a different position in the subject string.
997 */
998 } while (str != str_end);
999 return(-1);
1000 }
1001 }
1002 return(str != str_end ? 1 : 0);
1003}
1004
1005int my_wildcmp_8bit(CHARSET_INFO *cs,
1006 const char *str,const char *str_end,
1007 const char *wildstr,const char *wildend,
1008 int escape, int w_one, int w_many)
1009{
1010 return my_wildcmp_8bit_impl(cs, str, str_end,
1011 wildstr, wildend,
1012 escape, w_one, w_many, 1);
1013}
1014
1015
1016/*
1017** Calculate min_str and max_str that ranges a LIKE string.
1018** Arguments:
1019** ptr Pointer to LIKE string.
1020** ptr_length Length of LIKE string.
1021** escape Escape character in LIKE. (Normally '\').
1022** All escape characters should be removed from min_str and max_str
1023** res_length Length of min_str and max_str.
1024** min_str Smallest case sensitive string that ranges LIKE.
1025** Should be space padded to res_length.
1026** max_str Largest case sensitive string that ranges LIKE.
1027** Normally padded with the biggest character sort value.
1028**
1029** The function should return 0 if ok and 1 if the LIKE string can't be
1030** optimized !
1031*/
1032
1033my_bool my_like_range_simple(CHARSET_INFO *cs,
1034 const char *ptr, size_t ptr_length,
1035 pbool escape, pbool w_one, pbool w_many,
1036 size_t res_length,
1037 char *min_str,char *max_str,
1038 size_t *min_length, size_t *max_length)
1039{
1040 const char *end= ptr + ptr_length;
1041 char *min_org=min_str;
1042 char *min_end=min_str+res_length;
1043 size_t charlen= res_length / cs->mbmaxlen;
1044
1045 for (; ptr != end && min_str != min_end && charlen > 0 ; ptr++, charlen--)
1046 {
1047 if (*ptr == escape && ptr+1 != end)
1048 {
1049 ptr++; /* Skip escape */
1050 *min_str++= *max_str++ = *ptr;
1051 continue;
1052 }
1053 if (*ptr == w_one) /* '_' in SQL */
1054 {
1055 *min_str++='\0'; /* This should be min char */
1056 *max_str++= (char) cs->max_sort_char;
1057 continue;
1058 }
1059 if (*ptr == w_many) /* '%' in SQL */
1060 {
1061 /* Calculate length of keys */
1062 *min_length= (cs->state & (MY_CS_BINSORT | MY_CS_NOPAD)) ?
1063 (size_t) (min_str - min_org) :
1064 res_length;
1065 *max_length= res_length;
1066 do
1067 {
1068 *min_str++= 0;
1069 *max_str++= (char) cs->max_sort_char;
1070 } while (min_str != min_end);
1071 return 0;
1072 }
1073 *min_str++= *max_str++ = *ptr;
1074 }
1075
1076 *min_length= *max_length = (size_t) (min_str - min_org);
1077 while (min_str != min_end)
1078 *min_str++= *max_str++ = ' '; /* Because if key compression */
1079 return 0;
1080}
1081
1082
1083size_t my_scan_8bit(CHARSET_INFO *cs, const char *str, const char *end, int sq)
1084{
1085 const char *str0= str;
1086 switch (sq)
1087 {
1088 case MY_SEQ_INTTAIL:
1089 if (*str == '.')
1090 {
1091 for(str++ ; str != end && *str == '0' ; str++);
1092 return (size_t) (str - str0);
1093 }
1094 return 0;
1095
1096 case MY_SEQ_SPACES:
1097 for ( ; str < end ; str++)
1098 {
1099 if (!my_isspace(cs,*str))
1100 break;
1101 }
1102 return (size_t) (str - str0);
1103 case MY_SEQ_NONSPACES:
1104 for ( ; str < end ; str++)
1105 {
1106 if (my_isspace(cs, *str))
1107 break;
1108 }
1109 return (size_t) (str - str0);
1110 default:
1111 return 0;
1112 }
1113}
1114
1115
1116void my_fill_8bit(CHARSET_INFO *cs __attribute__((unused)),
1117 char *s, size_t l, int fill)
1118{
1119 bfill((uchar*) s,l,fill);
1120}
1121
1122
1123size_t my_numchars_8bit(CHARSET_INFO *cs __attribute__((unused)),
1124 const char *b, const char *e)
1125{
1126 return (size_t) (e - b);
1127}
1128
1129
1130size_t my_numcells_8bit(CHARSET_INFO *cs __attribute__((unused)),
1131 const char *b, const char *e)
1132{
1133 return (size_t) (e - b);
1134}
1135
1136
1137size_t my_charpos_8bit(CHARSET_INFO *cs __attribute__((unused)),
1138 const char *b __attribute__((unused)),
1139 const char *e __attribute__((unused)),
1140 size_t pos)
1141{
1142 return pos;
1143}
1144
1145
1146size_t
1147my_well_formed_char_length_8bit(CHARSET_INFO *cs __attribute__((unused)),
1148 const char *start, const char *end,
1149 size_t nchars, MY_STRCOPY_STATUS *status)
1150{
1151 size_t nbytes= (size_t) (end - start);
1152 size_t res= MY_MIN(nbytes, nchars);
1153 status->m_well_formed_error_pos= NULL;
1154 status->m_source_end_pos= start + res;
1155 return res;
1156}
1157
1158
1159/*
1160 Copy a 8-bit string. Not more than "nchars" character are copied.
1161*/
1162size_t
1163my_copy_8bit(CHARSET_INFO *cs __attribute__((unused)),
1164 char *dst, size_t dst_length,
1165 const char *src, size_t src_length,
1166 size_t nchars, MY_STRCOPY_STATUS *status)
1167{
1168 set_if_smaller(src_length, dst_length);
1169 set_if_smaller(src_length, nchars);
1170 if (src_length)
1171 memmove(dst, src, src_length);
1172 status->m_source_end_pos= src + src_length;
1173 status->m_well_formed_error_pos= NULL;
1174 return src_length;
1175}
1176
1177
1178size_t my_lengthsp_8bit(CHARSET_INFO *cs __attribute__((unused)),
1179 const char *ptr, size_t length)
1180{
1181 const char *end;
1182 end= (const char *) skip_trailing_space((const uchar *)ptr, length);
1183 return (size_t) (end-ptr);
1184}
1185
1186
1187uint my_instr_simple(CHARSET_INFO *cs,
1188 const char *b, size_t b_length,
1189 const char *s, size_t s_length,
1190 my_match_t *match, uint nmatch)
1191{
1192 register const uchar *str, *search, *end, *search_end;
1193
1194 if (s_length <= b_length)
1195 {
1196 if (!s_length)
1197 {
1198 if (nmatch)
1199 {
1200 match->beg= 0;
1201 match->end= 0;
1202 match->mb_len= 0;
1203 }
1204 return 1; /* Empty string is always found */
1205 }
1206
1207 str= (const uchar*) b;
1208 search= (const uchar*) s;
1209 end= (const uchar*) b+b_length-s_length+1;
1210 search_end= (const uchar*) s + s_length;
1211
1212skip:
1213 while (str != end)
1214 {
1215 if (cs->sort_order[*str++] == cs->sort_order[*search])
1216 {
1217 register const uchar *i,*j;
1218
1219 i= str;
1220 j= search+1;
1221
1222 while (j != search_end)
1223 if (cs->sort_order[*i++] != cs->sort_order[*j++])
1224 goto skip;
1225
1226 if (nmatch > 0)
1227 {
1228 match[0].beg= 0;
1229 match[0].end= (uint) (str- (const uchar*)b-1);
1230 match[0].mb_len= match[0].end;
1231
1232 if (nmatch > 1)
1233 {
1234 match[1].beg= match[0].end;
1235 match[1].end= (uint)(match[0].end+s_length);
1236 match[1].mb_len= match[1].end-match[1].beg;
1237 }
1238 }
1239 return 2;
1240 }
1241 }
1242 }
1243 return 0;
1244}
1245
1246
1247typedef struct
1248{
1249 int nchars;
1250 struct my_uni_idx_st uidx;
1251} uni_idx;
1252
1253#define PLANE_SIZE 0x100
1254#define PLANE_NUM 0x100
1255#define PLANE_NUMBER(x) (((x)>>8) % PLANE_NUM)
1256
1257static int pcmp(const void * f, const void * s)
1258{
1259 const uni_idx *F= (const uni_idx*) f;
1260 const uni_idx *S= (const uni_idx*) s;
1261 int res;
1262
1263 if (!(res=((S->nchars)-(F->nchars))))
1264 res=((F->uidx.from)-(S->uidx.to));
1265 return res;
1266}
1267
1268static my_bool
1269create_fromuni(struct charset_info_st *cs,
1270 MY_CHARSET_LOADER *loader)
1271{
1272 uni_idx idx[PLANE_NUM];
1273 int i,n;
1274
1275 /*
1276 Check that Unicode map is loaded.
1277 It can be not loaded when the collation is
1278 listed in Index.xml but not specified
1279 in the character set specific XML file.
1280 */
1281 if (!cs->tab_to_uni)
1282 return TRUE;
1283
1284 /* Clear plane statistics */
1285 bzero(idx,sizeof(idx));
1286
1287 /* Count number of characters in each plane */
1288 for (i=0; i< 0x100; i++)
1289 {
1290 uint16 wc=cs->tab_to_uni[i];
1291 int pl= PLANE_NUMBER(wc);
1292
1293 if (wc || !i)
1294 {
1295 if (!idx[pl].nchars)
1296 {
1297 idx[pl].uidx.from=wc;
1298 idx[pl].uidx.to=wc;
1299 }else
1300 {
1301 idx[pl].uidx.from=wc<idx[pl].uidx.from?wc:idx[pl].uidx.from;
1302 idx[pl].uidx.to=wc>idx[pl].uidx.to?wc:idx[pl].uidx.to;
1303 }
1304 idx[pl].nchars++;
1305 }
1306 }
1307
1308 /* Sort planes in descending order */
1309 qsort(&idx,PLANE_NUM,sizeof(uni_idx),&pcmp);
1310
1311 for (i=0; i < PLANE_NUM; i++)
1312 {
1313 int ch,numchars;
1314 uchar *tab;
1315
1316 /* Skip empty plane */
1317 if (!idx[i].nchars)
1318 break;
1319
1320 numchars=idx[i].uidx.to-idx[i].uidx.from+1;
1321 if (!(idx[i].uidx.tab= tab= (uchar*)
1322 (loader->once_alloc) (numchars *
1323 sizeof(*idx[i].uidx.tab))))
1324 return TRUE;
1325
1326 bzero(tab,numchars*sizeof(*tab));
1327
1328 for (ch=1; ch < PLANE_SIZE; ch++)
1329 {
1330 uint16 wc=cs->tab_to_uni[ch];
1331 if (wc >= idx[i].uidx.from && wc <= idx[i].uidx.to && wc)
1332 {
1333 int ofs= wc - idx[i].uidx.from;
1334 if (!tab[ofs] || tab[ofs] > 0x7F) /* Prefer ASCII*/
1335 {
1336 /*
1337 Some character sets can have double encoding. For example,
1338 in ARMSCII8, the following characters are encoded twice:
1339
1340 Encoding#1 Encoding#2 Unicode Character Name
1341 ---------- ---------- ------- --------------
1342 0x27 0xFF U+0027 APOSTROPHE
1343 0x28 0xA5 U+0028 LEFT PARENTHESIS
1344 0x29 0xA4 U+0029 RIGHT PARENTHESIS
1345 0x2C 0xAB U+002C COMMA
1346 0x2D 0xAC U+002D HYPHEN-MINUS
1347 0x2E 0xA9 U+002E FULL STOP
1348
1349 That is, both 0x27 and 0xFF convert to Unicode U+0027.
1350 When converting back from Unicode to ARMSCII,
1351 we prefer the ASCII range, that is we want U+0027
1352 to convert to 0x27 rather than to 0xFF.
1353 */
1354 tab[ofs]= ch;
1355 }
1356 }
1357 }
1358 }
1359
1360 /* Allocate and fill reverse table for each plane */
1361 n=i;
1362 if (!(cs->tab_from_uni= (MY_UNI_IDX *)
1363 (loader->once_alloc)(sizeof(MY_UNI_IDX) * (n + 1))))
1364 return TRUE;
1365
1366 for (i=0; i< n; i++)
1367 ((struct my_uni_idx_st*)cs->tab_from_uni)[i]= idx[i].uidx;
1368
1369 /* Set end-of-list marker */
1370 bzero((char*) &cs->tab_from_uni[i],sizeof(MY_UNI_IDX));
1371 return FALSE;
1372}
1373
1374
1375/*
1376 Detect if a character set is 8bit,
1377 and it is pure ascii, i.e. doesn't have
1378 characters outside U+0000..U+007F
1379 This functions is shared between "conf_to_src"
1380 and dynamic charsets loader in "mysqld".
1381*/
1382static my_bool
1383my_charset_is_8bit_pure_ascii(CHARSET_INFO *cs)
1384{
1385 size_t code;
1386 if (!cs->tab_to_uni)
1387 return 0;
1388 for (code= 0; code < 256; code++)
1389 {
1390 if (cs->tab_to_uni[code] > 0x7F)
1391 return 0;
1392 }
1393 return 1;
1394}
1395
1396
1397/*
1398 Shared function between conf_to_src and mysys.
1399 Check if a 8bit character set is compatible with
1400 ascii on the range 0x00..0x7F.
1401*/
1402static my_bool
1403my_charset_is_ascii_compatible(CHARSET_INFO *cs)
1404{
1405 uint i;
1406 if (!cs->tab_to_uni)
1407 return 1;
1408 for (i= 0; i < 128; i++)
1409 {
1410 if (cs->tab_to_uni[i] != i)
1411 return 0;
1412 }
1413 return 1;
1414}
1415
1416
1417uint my_8bit_charset_flags_from_data(CHARSET_INFO *cs)
1418{
1419 uint flags= 0;
1420 if (my_charset_is_8bit_pure_ascii(cs))
1421 flags|= MY_CS_PUREASCII;
1422 if (!my_charset_is_ascii_compatible(cs))
1423 flags|= MY_CS_NONASCII;
1424 return flags;
1425}
1426
1427
1428/*
1429 Check if case sensitive sort order: A < a < B.
1430 We need MY_CS_FLAG for regex library, and for
1431 case sensitivity flag for 5.0 client protocol,
1432 to support isCaseSensitive() method in JDBC driver
1433*/
1434uint my_8bit_collation_flags_from_data(CHARSET_INFO *cs)
1435{
1436 uint flags= 0;
1437 if (cs->sort_order && cs->sort_order['A'] < cs->sort_order['a'] &&
1438 cs->sort_order['a'] < cs->sort_order['B'])
1439 flags|= MY_CS_CSSORT;
1440 return flags;
1441}
1442
1443
1444static my_bool
1445my_cset_init_8bit(struct charset_info_st *cs, MY_CHARSET_LOADER *loader)
1446{
1447 cs->state|= my_8bit_charset_flags_from_data(cs);
1448 cs->caseup_multiply= 1;
1449 cs->casedn_multiply= 1;
1450 cs->pad_char= ' ';
1451 if (!cs->to_lower || !cs->to_upper || !cs->ctype || !cs->tab_to_uni)
1452 return TRUE;
1453 return create_fromuni(cs, loader);
1454}
1455
1456static void set_max_sort_char(struct charset_info_st *cs)
1457{
1458 uchar max_char;
1459 uint i;
1460
1461 if (!cs->sort_order)
1462 return;
1463
1464 max_char=cs->sort_order[(uchar) cs->max_sort_char];
1465 for (i= 0; i < 256; i++)
1466 {
1467 if ((uchar) cs->sort_order[i] > max_char)
1468 {
1469 max_char=(uchar) cs->sort_order[i];
1470 cs->max_sort_char= i;
1471 }
1472 }
1473}
1474
1475static my_bool my_coll_init_simple(struct charset_info_st *cs,
1476 MY_CHARSET_LOADER *loader __attribute__((unused)))
1477{
1478 if (!cs->sort_order)
1479 return TRUE;
1480 cs->state|= my_8bit_collation_flags_from_data(cs);
1481 set_max_sort_char(cs);
1482 return FALSE;
1483}
1484
1485
1486longlong my_strtoll10_8bit(CHARSET_INFO *cs __attribute__((unused)),
1487 const char *nptr, char **endptr, int *error)
1488{
1489 return my_strtoll10(nptr, endptr, error);
1490}
1491
1492
1493int my_mb_ctype_8bit(CHARSET_INFO *cs, int *ctype,
1494 const uchar *s, const uchar *e)
1495{
1496 if (s >= e)
1497 {
1498 *ctype= 0;
1499 return MY_CS_TOOSMALL;
1500 }
1501 *ctype= cs->ctype[*s + 1];
1502 return 1;
1503}
1504
1505
1506#define CUTOFF (ULONGLONG_MAX / 10)
1507#define CUTLIM (ULONGLONG_MAX % 10)
1508#define DIGITS_IN_ULONGLONG 20
1509
1510static ulonglong d10[DIGITS_IN_ULONGLONG]=
1511{
1512 1,
1513 10,
1514 100,
1515 1000,
1516 10000,
1517 100000,
1518 1000000,
1519 10000000,
1520 100000000,
1521 1000000000,
1522 10000000000ULL,
1523 100000000000ULL,
1524 1000000000000ULL,
1525 10000000000000ULL,
1526 100000000000000ULL,
1527 1000000000000000ULL,
1528 10000000000000000ULL,
1529 100000000000000000ULL,
1530 1000000000000000000ULL,
1531 10000000000000000000ULL
1532};
1533
1534
1535/*
1536
1537 Convert a string to unsigned long long integer value
1538 with rounding.
1539
1540 SYNOPSIS
1541 my_strntoull10_8bit()
1542 cs in pointer to character set
1543 str in pointer to the string to be converted
1544 length in string length
1545 unsigned_flag in whether the number is unsigned
1546 endptr out pointer to the stop character
1547 error out returned error code
1548
1549 DESCRIPTION
1550 This function takes the decimal representation of integer number
1551 from string str and converts it to an signed or unsigned
1552 long long integer value.
1553 Space characters and tab are ignored.
1554 A sign character might precede the digit characters.
1555 The number may have any number of pre-zero digits.
1556 The number may have decimal point and exponent.
1557 Rounding is always done in "away from zero" style:
1558 0.5 -> 1
1559 -0.5 -> -1
1560
1561 The function stops reading the string str after "length" bytes
1562 or at the first character that is not a part of correct number syntax:
1563
1564 <signed numeric literal> ::=
1565 [ <sign> ] <exact numeric literal> [ E [ <sign> ] <unsigned integer> ]
1566
1567 <exact numeric literal> ::=
1568 <unsigned integer> [ <period> [ <unsigned integer> ] ]
1569 | <period> <unsigned integer>
1570 <unsigned integer> ::= <digit>...
1571
1572 RETURN VALUES
1573 Value of string as a signed/unsigned longlong integer
1574
1575 endptr cannot be NULL. The function will store the end pointer
1576 to the stop character here.
1577
1578 The error parameter contains information how things went:
1579 0 ok
1580 ERANGE If the the value of the converted number is out of range
1581 In this case the return value is:
1582 - ULONGLONG_MAX if unsigned_flag and the number was too big
1583 - 0 if unsigned_flag and the number was negative
1584 - LONGLONG_MAX if no unsigned_flag and the number is too big
1585 - LONGLONG_MIN if no unsigned_flag and the number it too big negative
1586
1587 EDOM If the string didn't contain any digits.
1588 In this case the return value is 0.
1589*/
1590
1591ulonglong
1592my_strntoull10rnd_8bit(CHARSET_INFO *cs __attribute__((unused)),
1593 const char *str, size_t length, int unsigned_flag,
1594 char **endptr, int *error)
1595{
1596 const char *dot, *end9, *beg, *end= str + length;
1597 ulonglong ull;
1598 ulong ul;
1599 uchar ch;
1600 int shift= 0, digits= 0, negative, addon;
1601
1602 /* Skip leading spaces and tabs */
1603 for ( ; str < end && (*str == ' ' || *str == '\t') ; str++);
1604
1605 if (str >= end)
1606 goto ret_edom;
1607
1608 if ((negative= (*str == '-')) || *str=='+') /* optional sign */
1609 {
1610 if (++str == end)
1611 goto ret_edom;
1612 }
1613
1614 beg= str;
1615 end9= (str + 9) > end ? end : (str + 9);
1616 /* Accumulate small number into ulong, for performance purposes */
1617 for (ul= 0 ; str < end9 && (ch= (uchar) (*str - '0')) < 10; str++)
1618 {
1619 ul= ul * 10 + ch;
1620 }
1621
1622 if (str >= end) /* Small number without dots and expanents */
1623 {
1624 *endptr= (char*) str;
1625 if (negative)
1626 {
1627 if (unsigned_flag)
1628 {
1629 *error= ul ? MY_ERRNO_ERANGE : 0;
1630 return 0;
1631 }
1632 else
1633 {
1634 *error= 0;
1635 return (ulonglong) (longlong) -(long) ul;
1636 }
1637 }
1638 else
1639 {
1640 *error=0;
1641 return (ulonglong) ul;
1642 }
1643 }
1644
1645 digits= (int) (str - beg);
1646
1647 /* Continue to accumulate into ulonglong */
1648 for (dot= NULL, ull= ul; str < end; str++)
1649 {
1650 if ((ch= (uchar) (*str - '0')) < 10)
1651 {
1652 if (ull < CUTOFF || (ull == CUTOFF && ch <= CUTLIM))
1653 {
1654 ull= ull * 10 + ch;
1655 digits++;
1656 continue;
1657 }
1658 /*
1659 Adding the next digit would overflow.
1660 Remember the next digit in "addon", for rounding.
1661 Scan all digits with an optional single dot.
1662 */
1663 if (ull == CUTOFF)
1664 {
1665 ull= ULONGLONG_MAX;
1666 addon= 1;
1667 str++;
1668 }
1669 else
1670 addon= (*str >= '5');
1671 if (!dot)
1672 {
1673 for ( ; str < end && (ch= (uchar) (*str - '0')) < 10; shift++, str++);
1674 if (str < end && *str == '.')
1675 {
1676 str++;
1677 for ( ; str < end && (ch= (uchar) (*str - '0')) < 10; str++);
1678 }
1679 }
1680 else
1681 {
1682 shift= (int) (dot - str);
1683 for ( ; str < end && (ch= (uchar) (*str - '0')) < 10; str++);
1684 }
1685 goto exp;
1686 }
1687
1688 if (*str == '.')
1689 {
1690 if (dot)
1691 {
1692 /* The second dot character */
1693 addon= 0;
1694 goto exp;
1695 }
1696 else
1697 {
1698 dot= str + 1;
1699 }
1700 continue;
1701 }
1702
1703 /* Unknown character, exit the loop */
1704 break;
1705 }
1706 shift= dot ? (int)(dot - str) : 0; /* Right shift */
1707 addon= 0;
1708
1709exp: /* [ E [ <sign> ] <unsigned integer> ] */
1710
1711 if (!digits)
1712 {
1713 str= beg;
1714 goto ret_edom;
1715 }
1716
1717 if (str < end && (*str == 'e' || *str == 'E'))
1718 {
1719 str++;
1720 if (str < end)
1721 {
1722 int negative_exp, exponent;
1723 if ((negative_exp= (*str == '-')) || *str=='+')
1724 {
1725 if (++str == end)
1726 {
1727 str-= 2; /* 'e-' or 'e+' not followed by digits */
1728 goto ret_sign;
1729 }
1730 }
1731 for (exponent= 0 ;
1732 str < end && (ch= (uchar) (*str - '0')) < 10;
1733 str++)
1734 {
1735 exponent= exponent * 10 + ch;
1736 }
1737 shift+= negative_exp ? -exponent : exponent;
1738 }
1739 else
1740 str--; /* 'e' not followed by digits */
1741 }
1742
1743 if (shift == 0) /* No shift, check addon digit */
1744 {
1745 if (addon)
1746 {
1747 if (ull == ULONGLONG_MAX)
1748 goto ret_too_big;
1749 ull++;
1750 }
1751 goto ret_sign;
1752 }
1753
1754 if (shift < 0) /* Right shift */
1755 {
1756 ulonglong d, r;
1757
1758 if (-shift >= DIGITS_IN_ULONGLONG)
1759 goto ret_zero; /* Exponent is a big negative number, return 0 */
1760
1761 d= d10[-shift];
1762 r= (ull % d) * 2;
1763 ull /= d;
1764 if (r >= d)
1765 ull++;
1766 goto ret_sign;
1767 }
1768
1769 if (shift > DIGITS_IN_ULONGLONG) /* Huge left shift */
1770 {
1771 if (!ull)
1772 goto ret_sign;
1773 goto ret_too_big;
1774 }
1775
1776 for ( ; shift > 0; shift--, ull*= 10) /* Left shift */
1777 {
1778 if (ull > CUTOFF)
1779 goto ret_too_big; /* Overflow, number too big */
1780 }
1781
1782ret_sign:
1783 *endptr= (char*) str;
1784
1785 if (!unsigned_flag)
1786 {
1787 if (negative)
1788 {
1789 if (ull > (ulonglong) LONGLONG_MIN)
1790 {
1791 *error= MY_ERRNO_ERANGE;
1792 return (ulonglong) LONGLONG_MIN;
1793 }
1794 *error= 0;
1795 return (ulonglong) -(longlong) ull;
1796 }
1797 else
1798 {
1799 if (ull > (ulonglong) LONGLONG_MAX)
1800 {
1801 *error= MY_ERRNO_ERANGE;
1802 return (ulonglong) LONGLONG_MAX;
1803 }
1804 *error= 0;
1805 return ull;
1806 }
1807 }
1808
1809 /* Unsigned number */
1810 if (negative && ull)
1811 {
1812 *error= MY_ERRNO_ERANGE;
1813 return 0;
1814 }
1815 *error= 0;
1816 return ull;
1817
1818ret_zero:
1819 *endptr= (char*) str;
1820 *error= 0;
1821 return 0;
1822
1823ret_edom:
1824 *endptr= (char*) str;
1825 *error= MY_ERRNO_EDOM;
1826 return 0;
1827
1828ret_too_big:
1829 *endptr= (char*) str;
1830 *error= MY_ERRNO_ERANGE;
1831 return unsigned_flag ?
1832 ULONGLONG_MAX :
1833 negative ? (ulonglong) LONGLONG_MIN : (ulonglong) LONGLONG_MAX;
1834}
1835
1836
1837/*
1838 Check if a constant can be propagated
1839
1840 SYNOPSIS:
1841 my_propagate_simple()
1842 cs Character set information
1843 str String to convert to double
1844 length Optional length for string.
1845
1846 NOTES:
1847 Takes the string in the given charset and check
1848 if it can be safely propagated in the optimizer.
1849
1850 create table t1 (
1851 s char(5) character set latin1 collate latin1_german2_ci);
1852 insert into t1 values (0xf6); -- o-umlaut
1853 select * from t1 where length(s)=1 and s='oe';
1854
1855 The above query should return one row.
1856 We cannot convert this query into:
1857 select * from t1 where length('oe')=1 and s='oe';
1858
1859 Currently we don't check the constant itself,
1860 and decide not to propagate a constant
1861 just if the collation itself allows tricky things
1862 like expansions and contractions. In the future
1863 we can write a more sophisticated functions to
1864 check the constants. For example, 'oa' can always
1865 be safety propagated in German2 because unlike
1866 'oe' it does not have any special meaning.
1867
1868 RETURN
1869 1 if constant can be safely propagated
1870 0 if it is not safe to propagate the constant
1871*/
1872
1873
1874
1875my_bool my_propagate_simple(CHARSET_INFO *cs __attribute__((unused)),
1876 const uchar *str __attribute__((unused)),
1877 size_t length __attribute__((unused)))
1878{
1879 return 1;
1880}
1881
1882
1883my_bool my_propagate_complex(CHARSET_INFO *cs __attribute__((unused)),
1884 const uchar *str __attribute__((unused)),
1885 size_t length __attribute__((unused)))
1886{
1887 return 0;
1888}
1889
1890
1891/*
1892 Normalize strxfrm flags
1893
1894 SYNOPSIS:
1895 my_strxfrm_flag_normalize()
1896 flags - non-normalized flags
1897 nlevels - number of levels
1898
1899 NOTES:
1900 If levels are omitted, then 1-maximum is assumed.
1901 If any level number is greater than the maximum,
1902 it is treated as the maximum.
1903
1904 RETURN
1905 normalized flags
1906*/
1907
1908uint my_strxfrm_flag_normalize(uint flags, uint maximum)
1909{
1910 DBUG_ASSERT(maximum >= 1 && maximum <= MY_STRXFRM_NLEVELS);
1911
1912 /* If levels are omitted, then 1-maximum is assumed*/
1913 if (!(flags & MY_STRXFRM_LEVEL_ALL))
1914 {
1915 static uint def_level_flags[]= {0, 0x01, 0x03, 0x07, 0x0F, 0x1F, 0x3F };
1916 uint flag_pad= flags &
1917 (MY_STRXFRM_PAD_WITH_SPACE | MY_STRXFRM_PAD_TO_MAXLEN);
1918 flags= def_level_flags[maximum] | flag_pad;
1919 }
1920 else
1921 {
1922 uint i;
1923 uint flag_lev= flags & MY_STRXFRM_LEVEL_ALL;
1924 uint flag_dsc= (flags >> MY_STRXFRM_DESC_SHIFT) & MY_STRXFRM_LEVEL_ALL;
1925 uint flag_rev= (flags >> MY_STRXFRM_REVERSE_SHIFT) & MY_STRXFRM_LEVEL_ALL;
1926 uint flag_pad= flags &
1927 (MY_STRXFRM_PAD_WITH_SPACE | MY_STRXFRM_PAD_TO_MAXLEN);
1928
1929 /*
1930 If any level number is greater than the maximum,
1931 it is treated as the maximum.
1932 */
1933 for (maximum--, flags= 0, i= 0; i < MY_STRXFRM_NLEVELS; i++)
1934 {
1935 uint src_bit= 1 << i;
1936 if (flag_lev & src_bit)
1937 {
1938 uint dst_bit= 1 << MY_MIN(i, maximum);
1939 flags|= dst_bit;
1940 flags|= (flag_dsc & dst_bit) << MY_STRXFRM_DESC_SHIFT;
1941 flags|= (flag_rev & dst_bit) << MY_STRXFRM_REVERSE_SHIFT;
1942 }
1943 }
1944 flags|= flag_pad;
1945 }
1946
1947 return flags;
1948}
1949
1950
1951/*
1952 Apply DESC and REVERSE collation rules.
1953
1954 SYNOPSIS:
1955 my_strxfrm_desc_and_reverse()
1956 str - pointer to string
1957 strend - end of string
1958 flags - flags
1959 level - which level, starting from 0.
1960
1961 NOTES:
1962 Apply DESC or REVERSE or both flags.
1963
1964 If DESC flag is given, then the weights
1965 come out NOTed or negated for that level.
1966
1967 If REVERSE flags is given, then the weights come out in
1968 reverse order for that level, that is, starting with
1969 the last character and ending with the first character.
1970
1971 If nether DESC nor REVERSE flags are give,
1972 the string is not changed.
1973
1974*/
1975void
1976my_strxfrm_desc_and_reverse(uchar *str, uchar *strend,
1977 uint flags, uint level)
1978{
1979 if (flags & (MY_STRXFRM_DESC_LEVEL1 << level))
1980 {
1981 if (flags & (MY_STRXFRM_REVERSE_LEVEL1 << level))
1982 {
1983 for (strend--; str <= strend;)
1984 {
1985 uchar tmp= *str;
1986 *str++= ~*strend;
1987 *strend--= ~tmp;
1988 }
1989 }
1990 else
1991 {
1992 for (; str < strend; str++)
1993 *str= ~*str;
1994 }
1995 }
1996 else if (flags & (MY_STRXFRM_REVERSE_LEVEL1 << level))
1997 {
1998 for (strend--; str < strend;)
1999 {
2000 uchar tmp= *str;
2001 *str++= *strend;
2002 *strend--= tmp;
2003 }
2004 }
2005}
2006
2007
2008size_t
2009my_strxfrm_pad_desc_and_reverse(CHARSET_INFO *cs,
2010 uchar *str, uchar *frmend, uchar *strend,
2011 uint nweights, uint flags, uint level)
2012{
2013 if (nweights && frmend < strend && (flags & MY_STRXFRM_PAD_WITH_SPACE))
2014 {
2015 uint fill_length= MY_MIN((uint) (strend - frmend), nweights * cs->mbminlen);
2016 cs->cset->fill(cs, (char*) frmend, fill_length, cs->pad_char);
2017 frmend+= fill_length;
2018 }
2019 my_strxfrm_desc_and_reverse(str, frmend, flags, level);
2020 if ((flags & MY_STRXFRM_PAD_TO_MAXLEN) && frmend < strend)
2021 {
2022 size_t fill_length= strend - frmend;
2023 cs->cset->fill(cs, (char*) frmend, fill_length, cs->pad_char);
2024 frmend= strend;
2025 }
2026 return frmend - str;
2027}
2028
2029
2030size_t
2031my_strxfrm_pad_desc_and_reverse_nopad(CHARSET_INFO *cs,
2032 uchar *str, uchar *frmend, uchar *strend,
2033 uint nweights, uint flags, uint level)
2034{
2035 if (nweights && frmend < strend && (flags & MY_STRXFRM_PAD_WITH_SPACE))
2036 {
2037 uint fill_length= MY_MIN((uint) (strend - frmend), nweights * cs->mbminlen);
2038 memset(frmend, 0x00, fill_length);
2039 frmend+= fill_length;
2040 }
2041 my_strxfrm_desc_and_reverse(str, frmend, flags, level);
2042 if ((flags & MY_STRXFRM_PAD_TO_MAXLEN) && frmend < strend)
2043 {
2044 size_t fill_length= strend - frmend;
2045 memset(frmend, 0x00, fill_length);
2046 frmend= strend;
2047 }
2048 return frmend - str;
2049}
2050
2051
2052MY_CHARSET_HANDLER my_charset_8bit_handler=
2053{
2054 my_cset_init_8bit,
2055 my_numchars_8bit,
2056 my_charpos_8bit,
2057 my_lengthsp_8bit,
2058 my_numcells_8bit,
2059 my_mb_wc_8bit,
2060 my_wc_mb_8bit,
2061 my_mb_ctype_8bit,
2062 my_caseup_str_8bit,
2063 my_casedn_str_8bit,
2064 my_caseup_8bit,
2065 my_casedn_8bit,
2066 my_snprintf_8bit,
2067 my_long10_to_str_8bit,
2068 my_longlong10_to_str_8bit,
2069 my_fill_8bit,
2070 my_strntol_8bit,
2071 my_strntoul_8bit,
2072 my_strntoll_8bit,
2073 my_strntoull_8bit,
2074 my_strntod_8bit,
2075 my_strtoll10_8bit,
2076 my_strntoull10rnd_8bit,
2077 my_scan_8bit,
2078 my_charlen_8bit,
2079 my_well_formed_char_length_8bit,
2080 my_copy_8bit,
2081 my_wc_mb_bin, /* native_to_mb */
2082};
2083
2084MY_COLLATION_HANDLER my_collation_8bit_simple_ci_handler =
2085{
2086 my_coll_init_simple, /* init */
2087 my_strnncoll_simple,
2088 my_strnncollsp_simple,
2089 my_strnxfrm_simple,
2090 my_strnxfrmlen_simple,
2091 my_like_range_simple,
2092 my_wildcmp_8bit,
2093 my_strcasecmp_8bit,
2094 my_instr_simple,
2095 my_hash_sort_simple,
2096 my_propagate_simple
2097};
2098
2099
2100MY_COLLATION_HANDLER my_collation_8bit_simple_nopad_ci_handler =
2101{
2102 my_coll_init_simple, /* init */
2103 my_strnncoll_simple,
2104 my_strnncollsp_simple_nopad,
2105 my_strnxfrm_simple_nopad,
2106 my_strnxfrmlen_simple,
2107 my_like_range_simple,
2108 my_wildcmp_8bit,
2109 my_strcasecmp_8bit,
2110 my_instr_simple,
2111 my_hash_sort_simple_nopad,
2112 my_propagate_simple
2113};
2114