1/* Copyright (c) 2003, 2013, Oracle and/or its affiliates
2 Copyright (c) 2009, 2016, MariaDB
3
4 This library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Library General Public
6 License as published by the Free Software Foundation; version 2
7 of the License.
8
9 This library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Library General Public License for more details.
13
14 You should have received a copy of the GNU Library General Public
15 License along with this library; if not, write to the Free
16 Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
17 MA 02110-1301, USA */
18
19/* UCS2 support. Written by Alexander Barkov <bar@mysql.com> */
20
21#include "strings_def.h"
22#include <m_ctype.h>
23#include <my_sys.h>
24#include <stdarg.h>
25
26
27#if defined(HAVE_CHARSET_utf16) || defined(HAVE_CHARSET_ucs2)
28#define HAVE_CHARSET_mb2
29#endif
30
31
32#if defined(HAVE_CHARSET_mb2) || defined(HAVE_CHARSET_utf32)
33#define HAVE_CHARSET_mb2_or_mb4
34#endif
35
36
37#ifndef EILSEQ
38#define EILSEQ ENOENT
39#endif
40
41#undef ULONGLONG_MAX
42#define ULONGLONG_MAX (~(ulonglong) 0)
43#define MAX_NEGATIVE_NUMBER ((ulonglong) 0x8000000000000000LL)
44#define INIT_CNT 9
45#define LFACTOR 1000000000ULL
46#define LFACTOR1 10000000000ULL
47#define LFACTOR2 100000000000ULL
48
49#if defined(HAVE_CHARSET_utf32) || defined(HAVE_CHARSET_mb2)
50static unsigned long lfactor[9]=
51{ 1L, 10L, 100L, 1000L, 10000L, 100000L, 1000000L, 10000000L, 100000000L };
52#endif
53
54
55#ifdef HAVE_CHARSET_mb2_or_mb4
56static size_t
57my_caseup_str_mb2_or_mb4(CHARSET_INFO * cs __attribute__((unused)),
58 char * s __attribute__((unused)))
59{
60 DBUG_ASSERT(0);
61 return 0;
62}
63
64
65static size_t
66my_casedn_str_mb2_or_mb4(CHARSET_INFO *cs __attribute__((unused)),
67 char * s __attribute__((unused)))
68{
69 DBUG_ASSERT(0);
70 return 0;
71}
72
73
74static int
75my_strcasecmp_mb2_or_mb4(CHARSET_INFO *cs __attribute__((unused)),
76 const char *s __attribute__((unused)),
77 const char *t __attribute__((unused)))
78{
79 DBUG_ASSERT(0);
80 return 0;
81}
82
83
84typedef enum
85{
86 MY_CHAR_COPY_OK= 0, /* The character was Okey */
87 MY_CHAR_COPY_ERROR= 1, /* The character was not Ok, and could not fix */
88 MY_CHAR_COPY_FIXED= 2 /* The character was not Ok, was fixed to '?' */
89} my_char_copy_status_t;
90
91
92/*
93 Copies an incomplete character, lef-padding it with 0x00 bytes.
94
95 @param cs Character set
96 @param dst The destination string
97 @param dst_length Space available in dst
98 @param src The source string
99 @param src_length Length of src
100 @param nchars Copy not more than nchars characters.
101 The "nchars" parameter of the caller.
102 Only 0 and non-0 are important here.
103 @param fix What to do if after zero-padding didn't get a valid
104 character:
105 - FALSE - exit with error.
106 - TRUE - try to put '?' instead.
107
108 @return MY_CHAR_COPY_OK if after zero-padding got a valid character.
109 cs->mbmaxlen bytes were written to "dst".
110 @return MY_CHAR_COPY_FIXED if after zero-padding did not get a valid
111 character, but wrote '?' to the destination
112 string instead.
113 cs->mbminlen bytes were written to "dst".
114 @return MY_CHAR_COPY_ERROR If failed and nothing was written to "dst".
115 Possible reasons:
116 - dst_length was too short
117 - nchars was 0
118 - the character after padding appeared not
119 to be valid, and could not fix it to '?'.
120*/
121static my_char_copy_status_t
122my_copy_incomplete_char(CHARSET_INFO *cs,
123 char *dst, size_t dst_length,
124 const char *src, size_t src_length,
125 size_t nchars, my_bool fix)
126{
127 size_t pad_length;
128 size_t src_offset= src_length % cs->mbminlen;
129 if (dst_length < cs->mbminlen || !nchars)
130 return MY_CHAR_COPY_ERROR;
131
132 pad_length= cs->mbminlen - src_offset;
133 bzero(dst, pad_length);
134 memmove(dst + pad_length, src, src_offset);
135 /*
136 In some cases left zero-padding can create an incorrect character.
137 For example:
138 INSERT INTO t1 (utf32_column) VALUES (0x110000);
139 We'll pad the value to 0x00110000, which is a wrong UTF32 sequence!
140 The valid characters range is limited to 0x00000000..0x0010FFFF.
141
142 Make sure we didn't pad to an incorrect character.
143 */
144 if (cs->cset->charlen(cs, (uchar *) dst, (uchar *) dst + cs->mbminlen) ==
145 (int) cs->mbminlen)
146 return MY_CHAR_COPY_OK;
147
148 if (fix &&
149 cs->cset->wc_mb(cs, '?', (uchar *) dst, (uchar *) dst + cs->mbminlen) ==
150 (int) cs->mbminlen)
151 return MY_CHAR_COPY_FIXED;
152
153 return MY_CHAR_COPY_ERROR;
154}
155
156
157/*
158 Copy an UCS2/UTF16/UTF32 string, fix bad characters.
159*/
160static size_t
161my_copy_fix_mb2_or_mb4(CHARSET_INFO *cs,
162 char *dst, size_t dst_length,
163 const char *src, size_t src_length,
164 size_t nchars, MY_STRCOPY_STATUS *status)
165{
166 size_t length2, src_offset= src_length % cs->mbminlen;
167 my_char_copy_status_t padstatus;
168
169 if (!src_offset)
170 return my_copy_fix_mb(cs, dst, dst_length,
171 src, src_length, nchars, status);
172 if ((padstatus= my_copy_incomplete_char(cs, dst, dst_length,
173 src, src_length, nchars, TRUE)) ==
174 MY_CHAR_COPY_ERROR)
175 {
176 status->m_source_end_pos= status->m_well_formed_error_pos= src;
177 return 0;
178 }
179 length2= my_copy_fix_mb(cs, dst + cs->mbminlen, dst_length - cs->mbminlen,
180 src + src_offset, src_length - src_offset,
181 nchars - 1, status);
182 if (padstatus == MY_CHAR_COPY_FIXED)
183 status->m_well_formed_error_pos= src;
184 return cs->mbminlen /* The left-padded character */ + length2;
185}
186
187
188static long
189my_strntol_mb2_or_mb4(CHARSET_INFO *cs,
190 const char *nptr, size_t l, int base,
191 char **endptr, int *err)
192{
193 int negative= 0;
194 int overflow;
195 int cnv;
196 my_wc_t wc;
197 my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
198 register unsigned int cutlim;
199 register uint32 cutoff;
200 register uint32 res;
201 register const uchar *s= (const uchar*) nptr;
202 register const uchar *e= (const uchar*) nptr+l;
203 const uchar *save;
204
205 *err= 0;
206 do
207 {
208 if ((cnv= mb_wc(cs, &wc, s, e)) > 0)
209 {
210 switch (wc)
211 {
212 case ' ' : break;
213 case '\t': break;
214 case '-' : negative= !negative; break;
215 case '+' : break;
216 default : goto bs;
217 }
218 }
219 else /* No more characters or bad multibyte sequence */
220 {
221 if (endptr != NULL )
222 *endptr= (char*) s;
223 err[0]= (cnv==MY_CS_ILSEQ) ? EILSEQ : EDOM;
224 return 0;
225 }
226 s+= cnv;
227 } while (1);
228
229bs:
230
231 overflow= 0;
232 res= 0;
233 save= s;
234 cutoff= ((uint32)~0L) / (uint32) base;
235 cutlim= (uint) (((uint32)~0L) % (uint32) base);
236
237 do {
238 if ((cnv= mb_wc(cs, &wc, s, e)) > 0)
239 {
240 s+= cnv;
241 if (wc >= '0' && wc <= '9')
242 wc-= '0';
243 else if (wc >= 'A' && wc <= 'Z')
244 wc= wc - 'A' + 10;
245 else if (wc >= 'a' && wc <= 'z')
246 wc= wc - 'a' + 10;
247 else
248 break;
249 if ((int)wc >= base)
250 break;
251 if (res > cutoff || (res == cutoff && wc > cutlim))
252 overflow= 1;
253 else
254 {
255 res*= (uint32) base;
256 res+= wc;
257 }
258 }
259 else if (cnv == MY_CS_ILSEQ)
260 {
261 if (endptr !=NULL )
262 *endptr = (char*) s;
263 err[0]= EILSEQ;
264 return 0;
265 }
266 else
267 {
268 /* No more characters */
269 break;
270 }
271 } while(1);
272
273 if (endptr != NULL)
274 *endptr = (char *) s;
275
276 if (s == save)
277 {
278 err[0]= EDOM;
279 return 0L;
280 }
281
282 if (negative)
283 {
284 if (res > (uint32) INT_MIN32)
285 overflow= 1;
286 }
287 else if (res > INT_MAX32)
288 overflow= 1;
289
290 if (overflow)
291 {
292 err[0]= ERANGE;
293 return negative ? INT_MIN32 : INT_MAX32;
294 }
295
296 return (negative ? -((long) res) : (long) res);
297}
298
299
300static ulong
301my_strntoul_mb2_or_mb4(CHARSET_INFO *cs,
302 const char *nptr, size_t l, int base,
303 char **endptr, int *err)
304{
305 int negative= 0;
306 int overflow;
307 int cnv;
308 my_wc_t wc;
309 my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
310 register unsigned int cutlim;
311 register uint32 cutoff;
312 register uint32 res;
313 register const uchar *s= (const uchar*) nptr;
314 register const uchar *e= (const uchar*) nptr + l;
315 const uchar *save;
316
317 *err= 0;
318 do
319 {
320 if ((cnv= mb_wc(cs, &wc, s, e)) > 0)
321 {
322 switch (wc)
323 {
324 case ' ' : break;
325 case '\t': break;
326 case '-' : negative= !negative; break;
327 case '+' : break;
328 default : goto bs;
329 }
330 }
331 else /* No more characters or bad multibyte sequence */
332 {
333 if (endptr !=NULL )
334 *endptr= (char*)s;
335 err[0]= (cnv == MY_CS_ILSEQ) ? EILSEQ : EDOM;
336 return 0;
337 }
338 s+= cnv;
339 } while (1);
340
341bs:
342
343 overflow= 0;
344 res= 0;
345 save= s;
346 cutoff= ((uint32)~0L) / (uint32) base;
347 cutlim= (uint) (((uint32)~0L) % (uint32) base);
348
349 do
350 {
351 if ((cnv= mb_wc(cs, &wc, s, e)) > 0)
352 {
353 s+= cnv;
354 if (wc >= '0' && wc <= '9')
355 wc-= '0';
356 else if (wc >= 'A' && wc <= 'Z')
357 wc= wc - 'A' + 10;
358 else if (wc >= 'a' && wc <= 'z')
359 wc= wc - 'a' + 10;
360 else
361 break;
362 if ((int) wc >= base)
363 break;
364 if (res > cutoff || (res == cutoff && wc > cutlim))
365 overflow = 1;
366 else
367 {
368 res*= (uint32) base;
369 res+= wc;
370 }
371 }
372 else if (cnv == MY_CS_ILSEQ)
373 {
374 if (endptr != NULL )
375 *endptr= (char*)s;
376 err[0]= EILSEQ;
377 return 0;
378 }
379 else
380 {
381 /* No more characters */
382 break;
383 }
384 } while(1);
385
386 if (endptr != NULL)
387 *endptr= (char *) s;
388
389 if (s == save)
390 {
391 err[0]= EDOM;
392 return 0L;
393 }
394
395 if (overflow)
396 {
397 err[0]= (ERANGE);
398 return (~(uint32) 0);
399 }
400
401 return (negative ? -((long) res) : (long) res);
402}
403
404
405static longlong
406my_strntoll_mb2_or_mb4(CHARSET_INFO *cs,
407 const char *nptr, size_t l, int base,
408 char **endptr, int *err)
409{
410 int negative=0;
411 int overflow;
412 int cnv;
413 my_wc_t wc;
414 my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
415 register ulonglong cutoff;
416 register unsigned int cutlim;
417 register ulonglong res;
418 register const uchar *s= (const uchar*) nptr;
419 register const uchar *e= (const uchar*) nptr+l;
420 const uchar *save;
421
422 *err= 0;
423 do
424 {
425 if ((cnv= mb_wc(cs, &wc, s, e)) > 0)
426 {
427 switch (wc)
428 {
429 case ' ' : break;
430 case '\t': break;
431 case '-' : negative= !negative; break;
432 case '+' : break;
433 default : goto bs;
434 }
435 }
436 else /* No more characters or bad multibyte sequence */
437 {
438 if (endptr !=NULL )
439 *endptr = (char*)s;
440 err[0] = (cnv==MY_CS_ILSEQ) ? EILSEQ : EDOM;
441 return 0;
442 }
443 s+=cnv;
444 } while (1);
445
446bs:
447
448 overflow = 0;
449 res = 0;
450 save = s;
451 cutoff = (~(ulonglong) 0) / (unsigned long int) base;
452 cutlim = (uint) ((~(ulonglong) 0) % (unsigned long int) base);
453
454 do {
455 if ((cnv= mb_wc(cs, &wc, s, e)) > 0)
456 {
457 s+=cnv;
458 if ( wc>='0' && wc<='9')
459 wc -= '0';
460 else if ( wc>='A' && wc<='Z')
461 wc = wc - 'A' + 10;
462 else if ( wc>='a' && wc<='z')
463 wc = wc - 'a' + 10;
464 else
465 break;
466 if ((int)wc >= base)
467 break;
468 if (res > cutoff || (res == cutoff && wc > cutlim))
469 overflow = 1;
470 else
471 {
472 res *= (ulonglong) base;
473 res += wc;
474 }
475 }
476 else if (cnv==MY_CS_ILSEQ)
477 {
478 if (endptr !=NULL )
479 *endptr = (char*)s;
480 err[0]=EILSEQ;
481 return 0;
482 }
483 else
484 {
485 /* No more characters */
486 break;
487 }
488 } while(1);
489
490 if (endptr != NULL)
491 *endptr = (char *) s;
492
493 if (s == save)
494 {
495 err[0]=EDOM;
496 return 0L;
497 }
498
499 if (negative)
500 {
501 if (res > (ulonglong) LONGLONG_MIN)
502 overflow = 1;
503 }
504 else if (res > (ulonglong) LONGLONG_MAX)
505 overflow = 1;
506
507 if (overflow)
508 {
509 err[0]=ERANGE;
510 return negative ? LONGLONG_MIN : LONGLONG_MAX;
511 }
512
513 return (negative ? -((longlong)res) : (longlong)res);
514}
515
516
517static ulonglong
518my_strntoull_mb2_or_mb4(CHARSET_INFO *cs,
519 const char *nptr, size_t l, int base,
520 char **endptr, int *err)
521{
522 int negative= 0;
523 int overflow;
524 int cnv;
525 my_wc_t wc;
526 my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
527 register ulonglong cutoff;
528 register unsigned int cutlim;
529 register ulonglong res;
530 register const uchar *s= (const uchar*) nptr;
531 register const uchar *e= (const uchar*) nptr + l;
532 const uchar *save;
533
534 *err= 0;
535 do
536 {
537 if ((cnv= mb_wc(cs, &wc, s, e)) > 0)
538 {
539 switch (wc)
540 {
541 case ' ' : break;
542 case '\t': break;
543 case '-' : negative= !negative; break;
544 case '+' : break;
545 default : goto bs;
546 }
547 }
548 else /* No more characters or bad multibyte sequence */
549 {
550 if (endptr !=NULL )
551 *endptr = (char*)s;
552 err[0]= (cnv==MY_CS_ILSEQ) ? EILSEQ : EDOM;
553 return 0;
554 }
555 s+=cnv;
556 } while (1);
557
558bs:
559
560 overflow = 0;
561 res = 0;
562 save = s;
563 cutoff = (~(ulonglong) 0) / (unsigned long int) base;
564 cutlim = (uint) ((~(ulonglong) 0) % (unsigned long int) base);
565
566 do
567 {
568 if ((cnv= mb_wc(cs, &wc, s, e)) > 0)
569 {
570 s+=cnv;
571 if ( wc>='0' && wc<='9')
572 wc -= '0';
573 else if ( wc>='A' && wc<='Z')
574 wc = wc - 'A' + 10;
575 else if ( wc>='a' && wc<='z')
576 wc = wc - 'a' + 10;
577 else
578 break;
579 if ((int)wc >= base)
580 break;
581 if (res > cutoff || (res == cutoff && wc > cutlim))
582 overflow = 1;
583 else
584 {
585 res *= (ulonglong) base;
586 res += wc;
587 }
588 }
589 else if (cnv==MY_CS_ILSEQ)
590 {
591 if (endptr !=NULL )
592 *endptr = (char*)s;
593 err[0]= EILSEQ;
594 return 0;
595 }
596 else
597 {
598 /* No more characters */
599 break;
600 }
601 } while(1);
602
603 if (endptr != NULL)
604 *endptr = (char *) s;
605
606 if (s == save)
607 {
608 err[0]= EDOM;
609 return 0L;
610 }
611
612 if (overflow)
613 {
614 err[0]= ERANGE;
615 return (~(ulonglong) 0);
616 }
617
618 return (negative ? -((longlong) res) : (longlong) res);
619}
620
621
622static double
623my_strntod_mb2_or_mb4(CHARSET_INFO *cs,
624 char *nptr, size_t length,
625 char **endptr, int *err)
626{
627 char buf[256];
628 double res;
629 register char *b= buf;
630 register const uchar *s= (const uchar*) nptr;
631 const uchar *end;
632 my_wc_t wc;
633 my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
634 int cnv;
635
636 *err= 0;
637 /* Cut too long strings */
638 if (length >= sizeof(buf))
639 length= sizeof(buf) - 1;
640 end= s + length;
641
642 while ((cnv= mb_wc(cs, &wc, s, end)) > 0)
643 {
644 s+= cnv;
645 if (wc > (int) (uchar) 'e' || !wc)
646 break; /* Can't be part of double */
647 *b++= (char) wc;
648 }
649
650 *endptr= b;
651 res= my_strtod(buf, endptr, err);
652 *endptr= nptr + cs->mbminlen * (size_t) (*endptr - buf);
653 return res;
654}
655
656
657static ulonglong
658my_strntoull10rnd_mb2_or_mb4(CHARSET_INFO *cs,
659 const char *nptr, size_t length,
660 int unsign_fl,
661 char **endptr, int *err)
662{
663 char buf[256], *b= buf;
664 ulonglong res;
665 const uchar *end, *s= (const uchar*) nptr;
666 my_wc_t wc;
667 my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
668 int cnv;
669
670 /* Cut too long strings */
671 if (length >= sizeof(buf))
672 length= sizeof(buf)-1;
673 end= s + length;
674
675 while ((cnv= mb_wc(cs, &wc, s, end)) > 0)
676 {
677 s+= cnv;
678 if (wc > (int) (uchar) 'e' || !wc)
679 break; /* Can't be a number part */
680 *b++= (char) wc;
681 }
682
683 res= my_strntoull10rnd_8bit(cs, buf, b - buf, unsign_fl, endptr, err);
684 *endptr= (char*) nptr + cs->mbminlen * (size_t) (*endptr - buf);
685 return res;
686}
687
688
689/*
690 This is a fast version optimized for the case of radix 10 / -10
691*/
692
693static size_t
694my_l10tostr_mb2_or_mb4(CHARSET_INFO *cs,
695 char *dst, size_t len, int radix, long int val)
696{
697 char buffer[66];
698 register char *p, *db, *de;
699 long int new_val;
700 int sl= 0;
701 unsigned long int uval = (unsigned long int) val;
702
703 p= &buffer[sizeof(buffer) - 1];
704 *p= '\0';
705
706 if (radix < 0)
707 {
708 if (val < 0)
709 {
710 sl= 1;
711 /* Avoid integer overflow in (-val) for LONGLONG_MIN (BUG#31799). */
712 uval = (unsigned long int)0 - uval;
713 }
714 }
715
716 new_val = (long) (uval / 10);
717 *--p = '0'+ (char) (uval - (unsigned long) new_val * 10);
718 val= new_val;
719
720 while (val != 0)
721 {
722 new_val= val / 10;
723 *--p= '0' + (char) (val - new_val * 10);
724 val= new_val;
725 }
726
727 if (sl)
728 {
729 *--p= '-';
730 }
731
732 for ( db= dst, de= dst + len ; (dst < de) && *p ; p++)
733 {
734 int cnvres= cs->cset->wc_mb(cs,(my_wc_t)p[0],(uchar*) dst, (uchar*) de);
735 if (cnvres > 0)
736 dst+= cnvres;
737 else
738 break;
739 }
740 return (int) (dst - db);
741}
742
743
744static size_t
745my_ll10tostr_mb2_or_mb4(CHARSET_INFO *cs,
746 char *dst, size_t len, int radix, longlong val)
747{
748 char buffer[65];
749 register char *p, *db, *de;
750 long long_val;
751 int sl= 0;
752 ulonglong uval= (ulonglong) val;
753
754 if (radix < 0)
755 {
756 if (val < 0)
757 {
758 sl= 1;
759 /* Avoid integer overflow in (-val) for LONGLONG_MIN (BUG#31799). */
760 uval = (ulonglong)0 - uval;
761 }
762 }
763
764 p= &buffer[sizeof(buffer)-1];
765 *p='\0';
766
767 if (uval == 0)
768 {
769 *--p= '0';
770 goto cnv;
771 }
772
773 while (uval > (ulonglong) LONG_MAX)
774 {
775 ulonglong quo= uval/(uint) 10;
776 uint rem= (uint) (uval- quo* (uint) 10);
777 *--p= '0' + rem;
778 uval= quo;
779 }
780
781 long_val= (long) uval;
782 while (long_val != 0)
783 {
784 long quo= long_val/10;
785 *--p= (char) ('0' + (long_val - quo*10));
786 long_val= quo;
787 }
788
789cnv:
790 if (sl)
791 {
792 *--p= '-';
793 }
794
795 for ( db= dst, de= dst + len ; (dst < de) && *p ; p++)
796 {
797 int cnvres= cs->cset->wc_mb(cs, (my_wc_t) p[0], (uchar*) dst, (uchar*) de);
798 if (cnvres > 0)
799 dst+= cnvres;
800 else
801 break;
802 }
803 return (int) (dst -db);
804}
805
806#endif /* HAVE_CHARSET_mb2_or_mb4 */
807
808
809#ifdef HAVE_CHARSET_mb2
810/**
811 Convert a Unicode code point to a digit.
812 @param wc - the input Unicode code point
813 @param[OUT] c - the output character representing the digit value 0..9
814
815 @return 0 - if wc is a good digit
816 @return 1 - if wc is not a digit
817*/
818static inline my_bool
819wc2digit_uchar(uchar *c, my_wc_t wc)
820{
821 return wc > '9' || (c[0]= (uchar) (wc - '0')) > 9;
822}
823
824
825static longlong
826my_strtoll10_mb2(CHARSET_INFO *cs __attribute__((unused)),
827 const char *nptr, char **endptr, int *error)
828{
829 const uchar *s, *end, *start, *n_end, *true_end;
830 uchar c;
831 unsigned long i, j, k;
832 ulonglong li;
833 int negative;
834 ulong cutoff, cutoff2, cutoff3;
835 my_wc_t wc;
836 int res;
837 my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
838
839 s= (const uchar *) nptr;
840 /* If fixed length string */
841 if (endptr)
842 {
843 /*
844 Make sure string length is even.
845 Odd length indicates a bug in the caller.
846 Assert in debug, round in production.
847 */
848 DBUG_ASSERT((*endptr - (const char *) s) % 2 == 0);
849 end= s + ((*endptr - (const char*) s) / 2) * 2;
850
851 for ( ; ; ) /* Skip leading spaces and tabs */
852 {
853 if ((res= mb_wc(cs, &wc, s, end)) <= 0)
854 goto no_conv;
855 s+= res;
856 if (wc != ' ' && wc != '\t')
857 break;
858 }
859 }
860 else
861 {
862 /* We don't support null terminated strings in UCS2 */
863 goto no_conv;
864 }
865
866 /* Check for a sign. */
867 negative= 0;
868 if (wc == '-')
869 {
870 *error= -1; /* Mark as negative number */
871 negative= 1;
872 if ((res= mb_wc(cs, &wc, s, end)) <= 0)
873 goto no_conv;
874 s+= res; /* wc is now expected to hold the first digit. */
875 cutoff= MAX_NEGATIVE_NUMBER / LFACTOR2;
876 cutoff2= (MAX_NEGATIVE_NUMBER % LFACTOR2) / 100;
877 cutoff3= MAX_NEGATIVE_NUMBER % 100;
878 }
879 else
880 {
881 *error= 0;
882 if (wc == '+')
883 {
884 if ((res= mb_wc(cs, &wc, s, end)) <= 0)
885 goto no_conv;
886 s+= res; /* wc is now expected to hold the first digit. */
887 }
888 cutoff= ULONGLONG_MAX / LFACTOR2;
889 cutoff2= ULONGLONG_MAX % LFACTOR2 / 100;
890 cutoff3= ULONGLONG_MAX % 100;
891 }
892
893 /*
894 The code below assumes that 'wc' holds the first digit
895 and 's' points to the next character after it.
896
897 Scan pre-zeros if any.
898 */
899 if (wc == '0')
900 {
901 i= 0;
902 for ( ; ; s+= res)
903 {
904 if (s == end)
905 goto end_i; /* Return 0 */
906 if ((res= mb_wc(cs, &wc, s, end)) <= 0)
907 goto no_conv;
908 if (wc != '0')
909 break;
910 }
911 n_end= s + 2 * INIT_CNT;
912 }
913 else
914 {
915 /* Read first digit to check that it's a valid number */
916 if ((i= (wc - '0')) > 9)
917 goto no_conv;
918 n_end= s + 2 * (INIT_CNT-1);
919 }
920
921 /* Handle first 9 digits and store them in i */
922 if (n_end > end)
923 n_end= end;
924 for ( ; ; s+= res)
925 {
926 if ((res= mb_wc(cs, &wc, s, n_end)) <= 0)
927 break;
928 if (wc2digit_uchar(&c, wc))
929 goto end_i;
930 i= i*10+c;
931 }
932 if (s == end)
933 goto end_i;
934
935 /* Handle next 9 digits and store them in j */
936 j= 0;
937 start= s; /* Used to know how much to shift i */
938 n_end= true_end= s + 2 * INIT_CNT;
939 if (n_end > end)
940 n_end= end;
941 do
942 {
943 if ((res= mb_wc(cs, &wc, s, end)) <= 0)
944 goto no_conv;
945 if (wc2digit_uchar(&c, wc))
946 goto end_i_and_j;
947 s+= res;
948 j= j * 10 + c;
949 } while (s != n_end);
950 if (s == end)
951 {
952 if (s != true_end)
953 goto end_i_and_j;
954 goto end3;
955 }
956
957 /* Handle the next 1 or 2 digits and store them in k */
958 if ((res= mb_wc(cs, &wc, s, end)) <= 0)
959 goto no_conv;
960 if ((k= (wc - '0')) > 9)
961 goto end3;
962 s+= res;
963
964 if (s == end)
965 goto end4;
966 if ((res= mb_wc(cs, &wc, s, end)) <= 0)
967 goto no_conv;
968 if (wc2digit_uchar(&c, wc))
969 goto end4;
970 s+= res;
971 k= k*10+c;
972 *endptr= (char*) s;
973
974 /* number string should have ended here */
975 if (s != end && mb_wc(cs, &wc, s, end) > 0 && ((uchar) (wc - '0')) <= 9)
976 goto overflow;
977
978 /* Check that we didn't get an overflow with the last digit */
979 if (i > cutoff || (i == cutoff && ((j > cutoff2 || j == cutoff2) &&
980 k > cutoff3)))
981 goto overflow;
982 li=i*LFACTOR2+ (ulonglong) j*100 + k;
983 return (longlong) li;
984
985overflow: /* *endptr is set here */
986 *error= MY_ERRNO_ERANGE;
987 return negative ? LONGLONG_MIN : (longlong) ULONGLONG_MAX;
988
989end_i:
990 *endptr= (char*) s;
991 return (negative ? ((longlong) -(long) i) : (longlong) i);
992
993end_i_and_j:
994 li= (ulonglong) i * lfactor[(size_t) (s-start) / 2] + j;
995 *endptr= (char*) s;
996 return (negative ? -((longlong) li) : (longlong) li);
997
998end3:
999 li=(ulonglong) i*LFACTOR+ (ulonglong) j;
1000 *endptr= (char*) s;
1001 return (negative ? -((longlong) li) : (longlong) li);
1002
1003end4:
1004 li=(ulonglong) i*LFACTOR1+ (ulonglong) j * 10 + k;
1005 *endptr= (char*) s;
1006 if (negative)
1007 {
1008 if (li > MAX_NEGATIVE_NUMBER)
1009 goto overflow;
1010 return -((longlong) li);
1011 }
1012 return (longlong) li;
1013
1014no_conv:
1015 /* There was no number to convert. */
1016 *error= MY_ERRNO_EDOM;
1017 *endptr= (char *) nptr;
1018 return 0;
1019}
1020
1021
1022static size_t
1023my_scan_mb2(CHARSET_INFO *cs __attribute__((unused)),
1024 const char *str, const char *end, int sequence_type)
1025{
1026 const char *str0= str;
1027 my_wc_t wc;
1028 my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
1029 int res;
1030
1031 switch (sequence_type)
1032 {
1033 case MY_SEQ_SPACES:
1034 for (res= mb_wc(cs, &wc, (const uchar *) str, (const uchar *) end);
1035 res > 0 && wc == ' ';
1036 str+= res,
1037 res= mb_wc(cs, &wc, (const uchar *) str, (const uchar *) end))
1038 {
1039 }
1040 return (size_t) (str - str0);
1041 case MY_SEQ_NONSPACES:
1042 DBUG_ASSERT(0); /* Not implemented */
1043 /* pass through */
1044 default:
1045 return 0;
1046 }
1047}
1048
1049
1050static void
1051my_fill_mb2(CHARSET_INFO *cs, char *s, size_t slen, int fill)
1052{
1053 char buf[10], *last;
1054 size_t buflen, remainder;
1055
1056 DBUG_ASSERT((slen % 2) == 0);
1057
1058 buflen= cs->cset->wc_mb(cs, (my_wc_t) fill, (uchar*) buf,
1059 (uchar*) buf + sizeof(buf));
1060
1061 DBUG_ASSERT(buflen > 0);
1062
1063 /*
1064 "last" in the last position where a sequence of "buflen" bytes can start.
1065 */
1066 for (last= s + slen - buflen; s <= last; s+= buflen)
1067 {
1068 /* Enough space for the character */
1069 memcpy(s, buf, buflen);
1070 }
1071
1072 /*
1073 If there are some more space which is not enough
1074 for the whole multibyte character, then add trailing zeros.
1075 */
1076 if ((remainder= last + buflen - s) > 0)
1077 bzero(s, (size_t) remainder);
1078}
1079
1080
1081static size_t
1082my_vsnprintf_mb2(char *dst, size_t n, const char* fmt, va_list ap)
1083{
1084 char *start=dst, *end= dst + n - 1;
1085 for (; *fmt ; fmt++)
1086 {
1087 if (fmt[0] != '%')
1088 {
1089 if (dst == end) /* End of buffer */
1090 break;
1091
1092 *dst++='\0';
1093 *dst++= *fmt; /* Copy ordinary char */
1094 continue;
1095 }
1096
1097 fmt++;
1098
1099 /* Skip if max size is used (to be compatible with printf) */
1100 while ( (*fmt >= '0' && *fmt <= '9') || *fmt == '.' || *fmt == '-')
1101 fmt++;
1102
1103 if (*fmt == 'l')
1104 fmt++;
1105
1106 if (*fmt == 's') /* String parameter */
1107 {
1108 char *par= va_arg(ap, char *);
1109 size_t plen;
1110 size_t left_len= (size_t)(end-dst);
1111 if (!par)
1112 par= (char*) "(null)";
1113 plen= strlen(par);
1114 if (left_len <= plen * 2)
1115 plen = left_len / 2 - 1;
1116
1117 for ( ; plen ; plen--, dst+=2, par++)
1118 {
1119 dst[0]= '\0';
1120 dst[1]= par[0];
1121 }
1122 continue;
1123 }
1124 else if (*fmt == 'd' || *fmt == 'u') /* Integer parameter */
1125 {
1126 int iarg;
1127 char nbuf[16];
1128 char *pbuf= nbuf;
1129
1130 if ((size_t) (end - dst) < 32)
1131 break;
1132 iarg= va_arg(ap, int);
1133 if (*fmt == 'd')
1134 int10_to_str((long) iarg, nbuf, -10);
1135 else
1136 int10_to_str((long) (uint) iarg, nbuf,10);
1137
1138 for (; pbuf[0]; pbuf++)
1139 {
1140 *dst++= '\0';
1141 *dst++= *pbuf;
1142 }
1143 continue;
1144 }
1145
1146 /* We come here on '%%', unknown code or too long parameter */
1147 if (dst == end)
1148 break;
1149 *dst++= '\0';
1150 *dst++= '%'; /* % used as % or unknown code */
1151 }
1152
1153 DBUG_ASSERT(dst <= end);
1154 *dst='\0'; /* End of errmessage */
1155 return (size_t) (dst - start);
1156}
1157
1158
1159static size_t
1160my_snprintf_mb2(CHARSET_INFO *cs __attribute__((unused)),
1161 char* to, size_t n, const char* fmt, ...)
1162{
1163 va_list args;
1164 va_start(args,fmt);
1165 return my_vsnprintf_mb2(to, n, fmt, args);
1166}
1167
1168
1169static size_t
1170my_lengthsp_mb2(CHARSET_INFO *cs __attribute__((unused)),
1171 const char *ptr, size_t length)
1172{
1173 const char *end= ptr + length;
1174 while (end > ptr + 1 && end[-1] == ' ' && end[-2] == '\0')
1175 end-= 2;
1176 return (size_t) (end - ptr);
1177}
1178
1179#endif /* HAVE_CHARSET_mb2*/
1180
1181
1182/*
1183 Next part is actually HAVE_CHARSET_utf16-specific,
1184 but the JSON functions needed my_utf16_uni()
1185 so the #ifdef was moved lower.
1186*/
1187
1188
1189/*
1190 D800..DB7F - Non-provate surrogate high (896 pages)
1191 DB80..DBFF - Private surrogate high (128 pages)
1192 DC00..DFFF - Surrogate low (1024 codes in a page)
1193*/
1194#define MY_UTF16_SURROGATE_HIGH_FIRST 0xD800
1195#define MY_UTF16_SURROGATE_HIGH_LAST 0xDBFF
1196#define MY_UTF16_SURROGATE_LOW_FIRST 0xDC00
1197#define MY_UTF16_SURROGATE_LOW_LAST 0xDFFF
1198
1199#define MY_UTF16_HIGH_HEAD(x) ((((uchar) (x)) & 0xFC) == 0xD8)
1200#define MY_UTF16_LOW_HEAD(x) ((((uchar) (x)) & 0xFC) == 0xDC)
1201/* Test if a byte is a leading byte of a high or low surrogate head: */
1202#define MY_UTF16_SURROGATE_HEAD(x) ((((uchar) (x)) & 0xF8) == 0xD8)
1203/* Test if a Unicode code point is a high or low surrogate head */
1204#define MY_UTF16_SURROGATE(x) (((x) & 0xF800) == 0xD800)
1205
1206#define MY_UTF16_WC2(a, b) ((a << 8) + b)
1207
1208/*
1209 a= 110110?? (<< 18)
1210 b= ???????? (<< 10)
1211 c= 110111?? (<< 8)
1212 d= ???????? (<< 0)
1213*/
1214#define MY_UTF16_WC4(a, b, c, d) (((a & 3) << 18) + (b << 10) + \
1215 ((c & 3) << 8) + d + 0x10000)
1216
1217#define IS_MB2_CHAR(b0,b1) (!MY_UTF16_SURROGATE_HEAD(b0))
1218#define IS_MB4_CHAR(b0,b1,b2,b3) (MY_UTF16_HIGH_HEAD(b0) && MY_UTF16_LOW_HEAD(b2))
1219
1220static inline int my_weight_mb2_utf16mb2_general_ci(uchar b0, uchar b1)
1221{
1222 my_wc_t wc= MY_UTF16_WC2(b0, b1);
1223 MY_UNICASE_CHARACTER *page= my_unicase_default.page[wc >> 8];
1224 return (int) (page ? page[wc & 0xFF].sort : wc);
1225}
1226#define MY_FUNCTION_NAME(x) my_ ## x ## _utf16_general_ci
1227#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
1228#define WEIGHT_MB2(b0,b1) my_weight_mb2_utf16mb2_general_ci(b0,b1)
1229#define WEIGHT_MB4(b0,b1,b2,b3) MY_CS_REPLACEMENT_CHARACTER
1230#include "strcoll.ic"
1231
1232#define MY_FUNCTION_NAME(x) my_ ## x ## _utf16_bin
1233#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
1234#define WEIGHT_MB2(b0,b1) ((int) MY_UTF16_WC2(b0, b1))
1235#define WEIGHT_MB4(b0,b1,b2,b3) ((int) MY_UTF16_WC4(b0, b1, b2, b3))
1236#include "strcoll.ic"
1237
1238#define DEFINE_STRNNCOLLSP_NOPAD
1239#define MY_FUNCTION_NAME(x) my_ ## x ## _utf16_general_nopad_ci
1240#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
1241#define WEIGHT_MB2(b0,b1) my_weight_mb2_utf16mb2_general_ci(b0,b1)
1242#define WEIGHT_MB4(b0,b1,b2,b3) MY_CS_REPLACEMENT_CHARACTER
1243#include "strcoll.ic"
1244
1245#define DEFINE_STRNNCOLLSP_NOPAD
1246#define MY_FUNCTION_NAME(x) my_ ## x ## _utf16_nopad_bin
1247#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
1248#define WEIGHT_MB2(b0,b1) ((int) MY_UTF16_WC2(b0, b1))
1249#define WEIGHT_MB4(b0,b1,b2,b3) ((int) MY_UTF16_WC4(b0, b1, b2, b3))
1250#include "strcoll.ic"
1251
1252#undef IS_MB2_CHAR
1253#undef IS_MB4_CHAR
1254
1255/*
1256 These two functions are used in JSON library, so made exportable
1257 and unconditionally compiled into the library.
1258*/
1259
1260/*static*/ int
1261my_utf16_uni(CHARSET_INFO *cs __attribute__((unused)),
1262 my_wc_t *pwc, const uchar *s, const uchar *e)
1263{
1264 if (s + 2 > e)
1265 return MY_CS_TOOSMALL2;
1266
1267 /*
1268 High bytes: 0xD[89AB] = B'110110??'
1269 Low bytes: 0xD[CDEF] = B'110111??'
1270 Surrogate mask: 0xFC = B'11111100'
1271 */
1272
1273 if (MY_UTF16_HIGH_HEAD(*s)) /* Surrogate head */
1274 {
1275 if (s + 4 > e)
1276 return MY_CS_TOOSMALL4;
1277
1278 if (!MY_UTF16_LOW_HEAD(s[2])) /* Broken surrigate pair */
1279 return MY_CS_ILSEQ;
1280
1281 *pwc= MY_UTF16_WC4(s[0], s[1], s[2], s[3]);
1282 return 4;
1283 }
1284
1285 if (MY_UTF16_LOW_HEAD(*s)) /* Low surrogate part without high part */
1286 return MY_CS_ILSEQ;
1287
1288 *pwc= MY_UTF16_WC2(s[0], s[1]);
1289 return 2;
1290}
1291
1292
1293/*static*/ int
1294my_uni_utf16(CHARSET_INFO *cs __attribute__((unused)),
1295 my_wc_t wc, uchar *s, uchar *e)
1296{
1297 if (wc <= 0xFFFF)
1298 {
1299 if (s + 2 > e)
1300 return MY_CS_TOOSMALL2;
1301 if (MY_UTF16_SURROGATE(wc))
1302 return MY_CS_ILUNI;
1303 *s++= (uchar) (wc >> 8);
1304 *s= (uchar) (wc & 0xFF);
1305 return 2;
1306 }
1307
1308 if (wc <= 0x10FFFF)
1309 {
1310 if (s + 4 > e)
1311 return MY_CS_TOOSMALL4;
1312 *s++= (uchar) ((wc-= 0x10000) >> 18) | 0xD8;
1313 *s++= (uchar) (wc >> 10) & 0xFF;
1314 *s++= (uchar) ((wc >> 8) & 3) | 0xDC;
1315 *s= (uchar) wc & 0xFF;
1316 return 4;
1317 }
1318
1319 return MY_CS_ILUNI;
1320}
1321
1322
1323#ifdef HAVE_CHARSET_utf16
1324
1325
1326static inline void
1327my_tolower_utf16(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
1328{
1329 MY_UNICASE_CHARACTER *page;
1330 if ((*wc <= uni_plane->maxchar) && (page= uni_plane->page[*wc >> 8]))
1331 *wc= page[*wc & 0xFF].tolower;
1332}
1333
1334
1335static inline void
1336my_toupper_utf16(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
1337{
1338 MY_UNICASE_CHARACTER *page;
1339 if ((*wc <= uni_plane->maxchar) && (page= uni_plane->page[*wc >> 8]))
1340 *wc= page[*wc & 0xFF].toupper;
1341}
1342
1343
1344static inline void
1345my_tosort_utf16(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
1346{
1347 if (*wc <= uni_plane->maxchar)
1348 {
1349 MY_UNICASE_CHARACTER *page;
1350 if ((page= uni_plane->page[*wc >> 8]))
1351 *wc= page[*wc & 0xFF].sort;
1352 }
1353 else
1354 {
1355 *wc= MY_CS_REPLACEMENT_CHARACTER;
1356 }
1357}
1358
1359
1360
1361static size_t
1362my_caseup_utf16(CHARSET_INFO *cs, char *src, size_t srclen,
1363 char *dst __attribute__((unused)),
1364 size_t dstlen __attribute__((unused)))
1365{
1366 my_wc_t wc;
1367 my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
1368 my_charset_conv_wc_mb wc_mb= cs->cset->wc_mb;
1369 int res;
1370 char *srcend= src + srclen;
1371 MY_UNICASE_INFO *uni_plane= cs->caseinfo;
1372 DBUG_ASSERT(src == dst && srclen == dstlen);
1373
1374 while ((src < srcend) &&
1375 (res= mb_wc(cs, &wc, (uchar *) src, (uchar *) srcend)) > 0)
1376 {
1377 my_toupper_utf16(uni_plane, &wc);
1378 if (res != wc_mb(cs, wc, (uchar *) src, (uchar *) srcend))
1379 break;
1380 src+= res;
1381 }
1382 return srclen;
1383}
1384
1385
1386static void
1387my_hash_sort_utf16_nopad(CHARSET_INFO *cs,
1388 const uchar *s, size_t slen,
1389 ulong *nr1, ulong *nr2)
1390{
1391 my_wc_t wc;
1392 my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
1393 int res;
1394 const uchar *e= s + slen;
1395 MY_UNICASE_INFO *uni_plane= cs->caseinfo;
1396 register ulong m1= *nr1, m2= *nr2;
1397
1398 while ((s < e) && (res= mb_wc(cs, &wc, (uchar *) s, (uchar *) e)) > 0)
1399 {
1400 my_tosort_utf16(uni_plane, &wc);
1401 MY_HASH_ADD_16(m1, m2, wc);
1402 s+= res;
1403 }
1404 *nr1= m1;
1405 *nr2= m2;
1406}
1407
1408
1409static void
1410my_hash_sort_utf16(CHARSET_INFO *cs, const uchar *s, size_t slen,
1411 ulong *nr1, ulong *nr2)
1412{
1413 size_t lengthsp= cs->cset->lengthsp(cs, (const char *) s, slen);
1414 my_hash_sort_utf16_nopad(cs, s, lengthsp, nr1, nr2);
1415}
1416
1417
1418static size_t
1419my_casedn_utf16(CHARSET_INFO *cs, char *src, size_t srclen,
1420 char *dst __attribute__((unused)),
1421 size_t dstlen __attribute__((unused)))
1422{
1423 my_wc_t wc;
1424 my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
1425 my_charset_conv_wc_mb wc_mb= cs->cset->wc_mb;
1426 int res;
1427 char *srcend= src + srclen;
1428 MY_UNICASE_INFO *uni_plane= cs->caseinfo;
1429 DBUG_ASSERT(src == dst && srclen == dstlen);
1430
1431 while ((src < srcend) &&
1432 (res= mb_wc(cs, &wc, (uchar *) src, (uchar *) srcend)) > 0)
1433 {
1434 my_tolower_utf16(uni_plane, &wc);
1435 if (res != wc_mb(cs, wc, (uchar *) src, (uchar *) srcend))
1436 break;
1437 src+= res;
1438 }
1439 return srclen;
1440}
1441
1442
1443static int
1444my_charlen_utf16(CHARSET_INFO *cs, const uchar *str, const uchar *end)
1445{
1446 my_wc_t wc;
1447 return cs->cset->mb_wc(cs, &wc, str, end);
1448}
1449
1450
1451#define MY_FUNCTION_NAME(x) my_ ## x ## _utf16
1452#define CHARLEN(cs,str,end) my_charlen_utf16(cs,str,end)
1453#define DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN
1454#include "ctype-mb.ic"
1455#undef MY_FUNCTION_NAME
1456#undef CHARLEN
1457#undef DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN
1458/* Defines my_well_formed_char_length_utf16 */
1459
1460
1461static size_t
1462my_numchars_utf16(CHARSET_INFO *cs,
1463 const char *b, const char *e)
1464{
1465 size_t nchars= 0;
1466 for ( ; ; nchars++)
1467 {
1468 size_t charlen= my_ismbchar(cs, b, e);
1469 if (!charlen)
1470 break;
1471 b+= charlen;
1472 }
1473 return nchars;
1474}
1475
1476
1477static size_t
1478my_charpos_utf16(CHARSET_INFO *cs,
1479 const char *b, const char *e, size_t pos)
1480{
1481 const char *b0= b;
1482 uint charlen;
1483
1484 for ( ; pos; b+= charlen, pos--)
1485 {
1486 if (!(charlen= my_ismbchar(cs, b, e)))
1487 return (e + 2 - b0); /* Error, return pos outside the string */
1488 }
1489 return (size_t) (pos ? (e + 2 - b0) : (b - b0));
1490}
1491
1492
1493static int
1494my_wildcmp_utf16_ci(CHARSET_INFO *cs,
1495 const char *str,const char *str_end,
1496 const char *wildstr,const char *wildend,
1497 int escape, int w_one, int w_many)
1498{
1499 MY_UNICASE_INFO *uni_plane= cs->caseinfo;
1500 return my_wildcmp_unicode(cs, str, str_end, wildstr, wildend,
1501 escape, w_one, w_many, uni_plane);
1502}
1503
1504
1505static int
1506my_wildcmp_utf16_bin(CHARSET_INFO *cs,
1507 const char *str,const char *str_end,
1508 const char *wildstr,const char *wildend,
1509 int escape, int w_one, int w_many)
1510{
1511 return my_wildcmp_unicode(cs, str, str_end, wildstr, wildend,
1512 escape, w_one, w_many, NULL);
1513}
1514
1515
1516static void
1517my_hash_sort_utf16_nopad_bin(CHARSET_INFO *cs __attribute__((unused)),
1518 const uchar *pos, size_t len,
1519 ulong *nr1, ulong *nr2)
1520{
1521 const uchar *end= pos + len;
1522 register ulong m1= *nr1, m2= *nr2;
1523
1524 for ( ; pos < end ; pos++)
1525 {
1526 MY_HASH_ADD(m1, m2, (uint)*pos);
1527 }
1528 *nr1= m1;
1529 *nr2= m2;
1530}
1531
1532
1533static void
1534my_hash_sort_utf16_bin(CHARSET_INFO *cs,
1535 const uchar *pos, size_t len, ulong *nr1, ulong *nr2)
1536{
1537 size_t lengthsp= cs->cset->lengthsp(cs, (const char *) pos, len);
1538 my_hash_sort_utf16_nopad_bin(cs, pos, lengthsp, nr1, nr2);
1539}
1540
1541
1542static MY_COLLATION_HANDLER my_collation_utf16_general_ci_handler =
1543{
1544 NULL, /* init */
1545 my_strnncoll_utf16_general_ci,
1546 my_strnncollsp_utf16_general_ci,
1547 my_strnxfrm_unicode,
1548 my_strnxfrmlen_unicode,
1549 my_like_range_generic,
1550 my_wildcmp_utf16_ci,
1551 my_strcasecmp_mb2_or_mb4,
1552 my_instr_mb,
1553 my_hash_sort_utf16,
1554 my_propagate_simple
1555};
1556
1557
1558static MY_COLLATION_HANDLER my_collation_utf16_bin_handler =
1559{
1560 NULL, /* init */
1561 my_strnncoll_utf16_bin,
1562 my_strnncollsp_utf16_bin,
1563 my_strnxfrm_unicode_full_bin,
1564 my_strnxfrmlen_unicode_full_bin,
1565 my_like_range_generic,
1566 my_wildcmp_utf16_bin,
1567 my_strcasecmp_mb2_or_mb4,
1568 my_instr_mb,
1569 my_hash_sort_utf16_bin,
1570 my_propagate_simple
1571};
1572
1573
1574static MY_COLLATION_HANDLER my_collation_utf16_general_nopad_ci_handler =
1575{
1576 NULL, /* init */
1577 my_strnncoll_utf16_general_ci,
1578 my_strnncollsp_utf16_general_nopad_ci,
1579 my_strnxfrm_unicode_nopad,
1580 my_strnxfrmlen_unicode,
1581 my_like_range_generic,
1582 my_wildcmp_utf16_ci,
1583 my_strcasecmp_mb2_or_mb4,
1584 my_instr_mb,
1585 my_hash_sort_utf16_nopad,
1586 my_propagate_simple
1587};
1588
1589
1590static MY_COLLATION_HANDLER my_collation_utf16_nopad_bin_handler =
1591{
1592 NULL, /* init */
1593 my_strnncoll_utf16_bin,
1594 my_strnncollsp_utf16_nopad_bin,
1595 my_strnxfrm_unicode_full_nopad_bin,
1596 my_strnxfrmlen_unicode_full_bin,
1597 my_like_range_generic,
1598 my_wildcmp_utf16_bin,
1599 my_strcasecmp_mb2_or_mb4,
1600 my_instr_mb,
1601 my_hash_sort_utf16_nopad_bin,
1602 my_propagate_simple
1603};
1604
1605
1606MY_CHARSET_HANDLER my_charset_utf16_handler=
1607{
1608 NULL, /* init */
1609 my_numchars_utf16,
1610 my_charpos_utf16,
1611 my_lengthsp_mb2,
1612 my_numcells_mb,
1613 my_utf16_uni, /* mb_wc */
1614 my_uni_utf16, /* wc_mb */
1615 my_mb_ctype_mb,
1616 my_caseup_str_mb2_or_mb4,
1617 my_casedn_str_mb2_or_mb4,
1618 my_caseup_utf16,
1619 my_casedn_utf16,
1620 my_snprintf_mb2,
1621 my_l10tostr_mb2_or_mb4,
1622 my_ll10tostr_mb2_or_mb4,
1623 my_fill_mb2,
1624 my_strntol_mb2_or_mb4,
1625 my_strntoul_mb2_or_mb4,
1626 my_strntoll_mb2_or_mb4,
1627 my_strntoull_mb2_or_mb4,
1628 my_strntod_mb2_or_mb4,
1629 my_strtoll10_mb2,
1630 my_strntoull10rnd_mb2_or_mb4,
1631 my_scan_mb2,
1632 my_charlen_utf16,
1633 my_well_formed_char_length_utf16,
1634 my_copy_fix_mb2_or_mb4,
1635 my_uni_utf16,
1636};
1637
1638
1639struct charset_info_st my_charset_utf16_general_ci=
1640{
1641 54,0,0, /* number */
1642 MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
1643 "utf16", /* cs name */
1644 "utf16_general_ci", /* name */
1645 "UTF-16 Unicode", /* comment */
1646 NULL, /* tailoring */
1647 NULL, /* ctype */
1648 NULL, /* to_lower */
1649 NULL, /* to_upper */
1650 NULL, /* sort_order */
1651 NULL, /* uca */
1652 NULL, /* tab_to_uni */
1653 NULL, /* tab_from_uni */
1654 &my_unicase_default, /* caseinfo */
1655 NULL, /* state_map */
1656 NULL, /* ident_map */
1657 1, /* strxfrm_multiply */
1658 1, /* caseup_multiply */
1659 1, /* casedn_multiply */
1660 2, /* mbminlen */
1661 4, /* mbmaxlen */
1662 0, /* min_sort_char */
1663 0xFFFF, /* max_sort_char */
1664 ' ', /* pad char */
1665 0, /* escape_with_backslash_is_dangerous */
1666 1, /* levels_for_order */
1667 &my_charset_utf16_handler,
1668 &my_collation_utf16_general_ci_handler
1669};
1670
1671
1672struct charset_info_st my_charset_utf16_bin=
1673{
1674 55,0,0, /* number */
1675 MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
1676 "utf16", /* cs name */
1677 "utf16_bin", /* name */
1678 "UTF-16 Unicode", /* comment */
1679 NULL, /* tailoring */
1680 NULL, /* ctype */
1681 NULL, /* to_lower */
1682 NULL, /* to_upper */
1683 NULL, /* sort_order */
1684 NULL, /* uca */
1685 NULL, /* tab_to_uni */
1686 NULL, /* tab_from_uni */
1687 &my_unicase_default, /* caseinfo */
1688 NULL, /* state_map */
1689 NULL, /* ident_map */
1690 1, /* strxfrm_multiply */
1691 1, /* caseup_multiply */
1692 1, /* casedn_multiply */
1693 2, /* mbminlen */
1694 4, /* mbmaxlen */
1695 0, /* min_sort_char */
1696 0xFFFF, /* max_sort_char */
1697 ' ', /* pad char */
1698 0, /* escape_with_backslash_is_dangerous */
1699 1, /* levels_for_order */
1700 &my_charset_utf16_handler,
1701 &my_collation_utf16_bin_handler
1702};
1703
1704
1705struct charset_info_st my_charset_utf16_general_nopad_ci=
1706{
1707 MY_NOPAD_ID(54),0,0, /* number */
1708 MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII|MY_CS_NOPAD,
1709 "utf16", /* cs name */
1710 "utf16_general_nopad_ci", /* name */
1711 "UTF-16 Unicode", /* comment */
1712 NULL, /* tailoring */
1713 NULL, /* ctype */
1714 NULL, /* to_lower */
1715 NULL, /* to_upper */
1716 NULL, /* sort_order */
1717 NULL, /* uca */
1718 NULL, /* tab_to_uni */
1719 NULL, /* tab_from_uni */
1720 &my_unicase_default, /* caseinfo */
1721 NULL, /* state_map */
1722 NULL, /* ident_map */
1723 1, /* strxfrm_multiply */
1724 1, /* caseup_multiply */
1725 1, /* casedn_multiply */
1726 2, /* mbminlen */
1727 4, /* mbmaxlen */
1728 0, /* min_sort_char */
1729 0xFFFF, /* max_sort_char */
1730 ' ', /* pad char */
1731 0, /* escape_with_backslash_is_dangerous */
1732 1, /* levels_for_order */
1733 &my_charset_utf16_handler,
1734 &my_collation_utf16_general_nopad_ci_handler
1735};
1736
1737
1738struct charset_info_st my_charset_utf16_nopad_bin=
1739{
1740 MY_NOPAD_ID(55),0,0, /* number */
1741 MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII|
1742 MY_CS_NOPAD,
1743 "utf16", /* cs name */
1744 "utf16_nopad_bin", /* name */
1745 "UTF-16 Unicode", /* comment */
1746 NULL, /* tailoring */
1747 NULL, /* ctype */
1748 NULL, /* to_lower */
1749 NULL, /* to_upper */
1750 NULL, /* sort_order */
1751 NULL, /* uca */
1752 NULL, /* tab_to_uni */
1753 NULL, /* tab_from_uni */
1754 &my_unicase_default, /* caseinfo */
1755 NULL, /* state_map */
1756 NULL, /* ident_map */
1757 1, /* strxfrm_multiply */
1758 1, /* caseup_multiply */
1759 1, /* casedn_multiply */
1760 2, /* mbminlen */
1761 4, /* mbmaxlen */
1762 0, /* min_sort_char */
1763 0xFFFF, /* max_sort_char */
1764 ' ', /* pad char */
1765 0, /* escape_with_backslash_is_dangerous */
1766 1, /* levels_for_order */
1767 &my_charset_utf16_handler,
1768 &my_collation_utf16_nopad_bin_handler
1769};
1770
1771
1772#define IS_MB2_CHAR(b0,b1) (!MY_UTF16_SURROGATE_HEAD(b1))
1773#define IS_MB4_CHAR(b0,b1,b2,b3) (MY_UTF16_HIGH_HEAD(b1) && MY_UTF16_LOW_HEAD(b3))
1774
1775#define MY_FUNCTION_NAME(x) my_ ## x ## _utf16le_general_ci
1776#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
1777#define WEIGHT_MB2(b0,b1) my_weight_mb2_utf16mb2_general_ci(b1,b0)
1778#define WEIGHT_MB4(b0,b1,b2,b3) MY_CS_REPLACEMENT_CHARACTER
1779#include "strcoll.ic"
1780
1781#define MY_FUNCTION_NAME(x) my_ ## x ## _utf16le_bin
1782#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
1783#define WEIGHT_MB2(b0,b1) ((int) MY_UTF16_WC2(b1, b0))
1784#define WEIGHT_MB4(b0,b1,b2,b3) ((int) MY_UTF16_WC4(b1, b0, b3, b2))
1785#include "strcoll.ic"
1786
1787#define DEFINE_STRNNCOLLSP_NOPAD
1788#define MY_FUNCTION_NAME(x) my_ ## x ## _utf16le_general_nopad_ci
1789#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
1790#define WEIGHT_MB2(b0,b1) my_weight_mb2_utf16mb2_general_ci(b1,b0)
1791#define WEIGHT_MB4(b0,b1,b2,b3) MY_CS_REPLACEMENT_CHARACTER
1792#include "strcoll.ic"
1793
1794#define DEFINE_STRNNCOLLSP_NOPAD
1795#define MY_FUNCTION_NAME(x) my_ ## x ## _utf16le_nopad_bin
1796#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
1797#define WEIGHT_MB2(b0,b1) ((int) MY_UTF16_WC2(b1, b0))
1798#define WEIGHT_MB4(b0,b1,b2,b3) ((int) MY_UTF16_WC4(b1, b0, b3, b2))
1799#include "strcoll.ic"
1800
1801#undef IS_MB2_CHAR
1802#undef IS_MB4_CHAR
1803
1804static int
1805my_utf16le_uni(CHARSET_INFO *cs __attribute__((unused)),
1806 my_wc_t *pwc, const uchar *s, const uchar *e)
1807{
1808 my_wc_t lo;
1809
1810 if (s + 2 > e)
1811 return MY_CS_TOOSMALL2;
1812
1813 if ((*pwc= uint2korr(s)) < MY_UTF16_SURROGATE_HIGH_FIRST ||
1814 (*pwc > MY_UTF16_SURROGATE_LOW_LAST))
1815 return 2; /* [0000-D7FF,E000-FFFF] */
1816
1817 if (*pwc >= MY_UTF16_SURROGATE_LOW_FIRST)
1818 return MY_CS_ILSEQ; /* [DC00-DFFF] Low surrogate part without high part */
1819
1820 if (s + 4 > e)
1821 return MY_CS_TOOSMALL4;
1822
1823 s+= 2;
1824
1825 if ((lo= uint2korr(s)) < MY_UTF16_SURROGATE_LOW_FIRST ||
1826 lo > MY_UTF16_SURROGATE_LOW_LAST)
1827 return MY_CS_ILSEQ; /* Expected low surrogate part, got something else */
1828
1829 *pwc= 0x10000 + (((*pwc & 0x3FF) << 10) | (lo & 0x3FF));
1830 return 4;
1831}
1832
1833
1834static int
1835my_uni_utf16le(CHARSET_INFO *cs __attribute__((unused)),
1836 my_wc_t wc, uchar *s, uchar *e)
1837{
1838 uint32 first, second, total;
1839 if (wc < MY_UTF16_SURROGATE_HIGH_FIRST ||
1840 (wc > MY_UTF16_SURROGATE_LOW_LAST &&
1841 wc <= 0xFFFF))
1842 {
1843 if (s + 2 > e)
1844 return MY_CS_TOOSMALL2;
1845 int2store(s, wc);
1846 return 2; /* [0000-D7FF,E000-FFFF] */
1847 }
1848
1849 if (wc < 0xFFFF || wc > 0x10FFFF)
1850 return MY_CS_ILUNI; /* [D800-DFFF,10FFFF+] */
1851
1852 if (s + 4 > e)
1853 return MY_CS_TOOSMALL4;
1854
1855 wc-= 0x10000;
1856 first= (0xD800 | ((wc >> 10) & 0x3FF));
1857 second= (0xDC00 | (wc & 0x3FF));
1858 total= first | (second << 16);
1859 int4store(s, total);
1860 return 4; /* [010000-10FFFF] */
1861}
1862
1863
1864static size_t
1865my_lengthsp_utf16le(CHARSET_INFO *cs __attribute__((unused)),
1866 const char *ptr, size_t length)
1867{
1868 const char *end= ptr + length;
1869 while (end > ptr + 1 && uint2korr(end - 2) == ' ')
1870 end-= 2;
1871 return (size_t) (end - ptr);
1872}
1873
1874
1875static MY_COLLATION_HANDLER my_collation_utf16le_general_ci_handler =
1876{
1877 NULL, /* init */
1878 my_strnncoll_utf16le_general_ci,
1879 my_strnncollsp_utf16le_general_ci,
1880 my_strnxfrm_unicode,
1881 my_strnxfrmlen_unicode,
1882 my_like_range_generic,
1883 my_wildcmp_utf16_ci,
1884 my_strcasecmp_mb2_or_mb4,
1885 my_instr_mb,
1886 my_hash_sort_utf16,
1887 my_propagate_simple
1888};
1889
1890
1891static MY_COLLATION_HANDLER my_collation_utf16le_bin_handler =
1892{
1893 NULL, /* init */
1894 my_strnncoll_utf16le_bin,
1895 my_strnncollsp_utf16le_bin,
1896 my_strnxfrm_unicode_full_bin,
1897 my_strnxfrmlen_unicode_full_bin,
1898 my_like_range_generic,
1899 my_wildcmp_utf16_bin,
1900 my_strcasecmp_mb2_or_mb4,
1901 my_instr_mb,
1902 my_hash_sort_utf16_bin,
1903 my_propagate_simple
1904};
1905
1906
1907static MY_COLLATION_HANDLER my_collation_utf16le_general_nopad_ci_handler =
1908{
1909 NULL, /* init */
1910 my_strnncoll_utf16le_general_ci,
1911 my_strnncollsp_utf16le_general_nopad_ci,
1912 my_strnxfrm_unicode_nopad,
1913 my_strnxfrmlen_unicode,
1914 my_like_range_generic,
1915 my_wildcmp_utf16_ci,
1916 my_strcasecmp_mb2_or_mb4,
1917 my_instr_mb,
1918 my_hash_sort_utf16_nopad,
1919 my_propagate_simple
1920};
1921
1922
1923static MY_COLLATION_HANDLER my_collation_utf16le_nopad_bin_handler =
1924{
1925 NULL, /* init */
1926 my_strnncoll_utf16le_bin,
1927 my_strnncollsp_utf16le_nopad_bin,
1928 my_strnxfrm_unicode_full_nopad_bin,
1929 my_strnxfrmlen_unicode_full_bin,
1930 my_like_range_generic,
1931 my_wildcmp_utf16_bin,
1932 my_strcasecmp_mb2_or_mb4,
1933 my_instr_mb,
1934 my_hash_sort_utf16_nopad_bin,
1935 my_propagate_simple
1936};
1937
1938
1939static MY_CHARSET_HANDLER my_charset_utf16le_handler=
1940{
1941 NULL, /* init */
1942 my_numchars_utf16,
1943 my_charpos_utf16,
1944 my_lengthsp_utf16le,
1945 my_numcells_mb,
1946 my_utf16le_uni, /* mb_wc */
1947 my_uni_utf16le, /* wc_mb */
1948 my_mb_ctype_mb,
1949 my_caseup_str_mb2_or_mb4,
1950 my_casedn_str_mb2_or_mb4,
1951 my_caseup_utf16,
1952 my_casedn_utf16,
1953 my_snprintf_mb2,
1954 my_l10tostr_mb2_or_mb4,
1955 my_ll10tostr_mb2_or_mb4,
1956 my_fill_mb2,
1957 my_strntol_mb2_or_mb4,
1958 my_strntoul_mb2_or_mb4,
1959 my_strntoll_mb2_or_mb4,
1960 my_strntoull_mb2_or_mb4,
1961 my_strntod_mb2_or_mb4,
1962 my_strtoll10_mb2,
1963 my_strntoull10rnd_mb2_or_mb4,
1964 my_scan_mb2,
1965 my_charlen_utf16,
1966 my_well_formed_char_length_utf16,
1967 my_copy_fix_mb2_or_mb4,
1968 my_uni_utf16le,
1969};
1970
1971
1972struct charset_info_st my_charset_utf16le_general_ci=
1973{
1974 56,0,0, /* number */
1975 MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
1976 "utf16le", /* cs name */
1977 "utf16le_general_ci",/* name */
1978 "UTF-16LE Unicode", /* comment */
1979 NULL, /* tailoring */
1980 NULL, /* ctype */
1981 NULL, /* to_lower */
1982 NULL, /* to_upper */
1983 NULL, /* sort_order */
1984 NULL, /* uca */
1985 NULL, /* tab_to_uni */
1986 NULL, /* tab_from_uni */
1987 &my_unicase_default, /* caseinfo */
1988 NULL, /* state_map */
1989 NULL, /* ident_map */
1990 1, /* strxfrm_multiply */
1991 1, /* caseup_multiply */
1992 1, /* casedn_multiply */
1993 2, /* mbminlen */
1994 4, /* mbmaxlen */
1995 0, /* min_sort_char */
1996 0xFFFF, /* max_sort_char */
1997 ' ', /* pad char */
1998 0, /* escape_with_backslash_is_dangerous */
1999 1, /* levels_for_order */
2000 &my_charset_utf16le_handler,
2001 &my_collation_utf16le_general_ci_handler
2002};
2003
2004
2005struct charset_info_st my_charset_utf16le_bin=
2006{
2007 62,0,0, /* number */
2008 MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
2009 "utf16le", /* cs name */
2010 "utf16le_bin", /* name */
2011 "UTF-16LE Unicode", /* comment */
2012 NULL, /* tailoring */
2013 NULL, /* ctype */
2014 NULL, /* to_lower */
2015 NULL, /* to_upper */
2016 NULL, /* sort_order */
2017 NULL, /* uca */
2018 NULL, /* tab_to_uni */
2019 NULL, /* tab_from_uni */
2020 &my_unicase_default, /* caseinfo */
2021 NULL, /* state_map */
2022 NULL, /* ident_map */
2023 1, /* strxfrm_multiply */
2024 1, /* caseup_multiply */
2025 1, /* casedn_multiply */
2026 2, /* mbminlen */
2027 4, /* mbmaxlen */
2028 0, /* min_sort_char */
2029 0xFFFF, /* max_sort_char */
2030 ' ', /* pad char */
2031 0, /* escape_with_backslash_is_dangerous */
2032 1, /* levels_for_order */
2033 &my_charset_utf16le_handler,
2034 &my_collation_utf16le_bin_handler
2035};
2036
2037
2038struct charset_info_st my_charset_utf16le_general_nopad_ci=
2039{
2040 MY_NOPAD_ID(56),0,0, /* number */
2041 MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII|MY_CS_NOPAD,
2042 "utf16le", /* cs name */
2043 "utf16le_general_nopad_ci",/* name */
2044 "UTF-16LE Unicode", /* comment */
2045 NULL, /* tailoring */
2046 NULL, /* ctype */
2047 NULL, /* to_lower */
2048 NULL, /* to_upper */
2049 NULL, /* sort_order */
2050 NULL, /* uca */
2051 NULL, /* tab_to_uni */
2052 NULL, /* tab_from_uni */
2053 &my_unicase_default, /* caseinfo */
2054 NULL, /* state_map */
2055 NULL, /* ident_map */
2056 1, /* strxfrm_multiply */
2057 1, /* caseup_multiply */
2058 1, /* casedn_multiply */
2059 2, /* mbminlen */
2060 4, /* mbmaxlen */
2061 0, /* min_sort_char */
2062 0xFFFF, /* max_sort_char */
2063 ' ', /* pad char */
2064 0, /* escape_with_backslash_is_dangerous */
2065 1, /* levels_for_order */
2066 &my_charset_utf16le_handler,
2067 &my_collation_utf16le_general_nopad_ci_handler
2068};
2069
2070
2071struct charset_info_st my_charset_utf16le_nopad_bin=
2072{
2073 MY_NOPAD_ID(62),0,0, /* number */
2074 MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII|
2075 MY_CS_NOPAD,
2076 "utf16le", /* cs name */
2077 "utf16le_nopad_bin", /* name */
2078 "UTF-16LE Unicode", /* comment */
2079 NULL, /* tailoring */
2080 NULL, /* ctype */
2081 NULL, /* to_lower */
2082 NULL, /* to_upper */
2083 NULL, /* sort_order */
2084 NULL, /* uca */
2085 NULL, /* tab_to_uni */
2086 NULL, /* tab_from_uni */
2087 &my_unicase_default, /* caseinfo */
2088 NULL, /* state_map */
2089 NULL, /* ident_map */
2090 1, /* strxfrm_multiply */
2091 1, /* caseup_multiply */
2092 1, /* casedn_multiply */
2093 2, /* mbminlen */
2094 4, /* mbmaxlen */
2095 0, /* min_sort_char */
2096 0xFFFF, /* max_sort_char */
2097 ' ', /* pad char */
2098 0, /* escape_with_backslash_is_dangerous */
2099 1, /* levels_for_order */
2100 &my_charset_utf16le_handler,
2101 &my_collation_utf16le_nopad_bin_handler
2102};
2103
2104
2105#endif /* HAVE_CHARSET_utf16 */
2106
2107
2108#ifdef HAVE_CHARSET_utf32
2109
2110/*
2111 Check is b0 and b1 start a valid UTF32 four-byte sequence.
2112 Don't accept characters greater than U+10FFFF.
2113*/
2114#define IS_UTF32_MBHEAD4(b0,b1) (!(b0) && ((uchar) (b1) <= 0x10))
2115
2116#define IS_MB4_CHAR(b0,b1,b2,b3) (IS_UTF32_MBHEAD4(b0,b1))
2117
2118#define MY_UTF32_WC4(b0,b1,b2,b3) ((((my_wc_t)b0) << 24) + (b1 << 16) + \
2119 (b2 << 8) + (b3))
2120
2121static inline int my_weight_utf32_general_ci(uchar b0, uchar b1,
2122 uchar b2, uchar b3)
2123{
2124 my_wc_t wc= MY_UTF32_WC4(b0, b1, b2, b3);
2125 if (wc <= 0xFFFF)
2126 {
2127 MY_UNICASE_CHARACTER *page= my_unicase_default.page[wc >> 8];
2128 return (int) (page ? page[wc & 0xFF].sort : wc);
2129 }
2130 return MY_CS_REPLACEMENT_CHARACTER;
2131}
2132#define MY_FUNCTION_NAME(x) my_ ## x ## _utf32_general_ci
2133#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
2134#define WEIGHT_MB4(b0,b1,b2,b3) my_weight_utf32_general_ci(b0, b1, b2, b3)
2135#include "strcoll.ic"
2136
2137#define MY_FUNCTION_NAME(x) my_ ## x ## _utf32_bin
2138#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
2139#define WEIGHT_MB4(b0,b1,b2,b3) ((int) MY_UTF32_WC4(b0, b1, b2, b3))
2140#include "strcoll.ic"
2141
2142#define DEFINE_STRNNCOLLSP_NOPAD
2143#define MY_FUNCTION_NAME(x) my_ ## x ## _utf32_general_nopad_ci
2144#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
2145#define WEIGHT_MB4(b0,b1,b2,b3) my_weight_utf32_general_ci(b0, b1, b2, b3)
2146#include "strcoll.ic"
2147
2148#define DEFINE_STRNNCOLLSP_NOPAD
2149#define MY_FUNCTION_NAME(x) my_ ## x ## _utf32_nopad_bin
2150#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
2151#define WEIGHT_MB4(b0,b1,b2,b3) ((int) MY_UTF32_WC4(b0, b1, b2, b3))
2152#include "strcoll.ic"
2153
2154#undef IS_MB2_CHAR
2155#undef IS_MB4_CHAR
2156
2157
2158static int
2159my_utf32_uni(CHARSET_INFO *cs __attribute__((unused)),
2160 my_wc_t *pwc, const uchar *s, const uchar *e)
2161{
2162 if (s + 4 > e)
2163 return MY_CS_TOOSMALL4;
2164 *pwc= MY_UTF32_WC4(s[0], s[1], s[2], s[3]);
2165 return *pwc > 0x10FFFF ? MY_CS_ILSEQ : 4;
2166}
2167
2168
2169static int
2170my_uni_utf32(CHARSET_INFO *cs __attribute__((unused)),
2171 my_wc_t wc, uchar *s, uchar *e)
2172{
2173 if (s + 4 > e)
2174 return MY_CS_TOOSMALL4;
2175
2176 if (wc > 0x10FFFF)
2177 return MY_CS_ILUNI;
2178
2179 s[0]= (uchar) (wc >> 24);
2180 s[1]= (uchar) (wc >> 16) & 0xFF;
2181 s[2]= (uchar) (wc >> 8) & 0xFF;
2182 s[3]= (uchar) wc & 0xFF;
2183 return 4;
2184}
2185
2186
2187static inline void
2188my_tolower_utf32(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
2189{
2190 MY_UNICASE_CHARACTER *page;
2191 if ((*wc <= uni_plane->maxchar) && (page= uni_plane->page[*wc >> 8]))
2192 *wc= page[*wc & 0xFF].tolower;
2193}
2194
2195
2196static inline void
2197my_toupper_utf32(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
2198{
2199 MY_UNICASE_CHARACTER *page;
2200 if ((*wc <= uni_plane->maxchar) && (page= uni_plane->page[*wc >> 8]))
2201 *wc= page[*wc & 0xFF].toupper;
2202}
2203
2204
2205static inline void
2206my_tosort_utf32(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
2207{
2208 if (*wc <= uni_plane->maxchar)
2209 {
2210 MY_UNICASE_CHARACTER *page;
2211 if ((page= uni_plane->page[*wc >> 8]))
2212 *wc= page[*wc & 0xFF].sort;
2213 }
2214 else
2215 {
2216 *wc= MY_CS_REPLACEMENT_CHARACTER;
2217 }
2218}
2219
2220
2221static size_t
2222my_lengthsp_utf32(CHARSET_INFO *cs __attribute__((unused)),
2223 const char *ptr, size_t length)
2224{
2225 const char *end= ptr + length;
2226 DBUG_ASSERT((length % 4) == 0);
2227 while (end > ptr + 3 && end[-1] == ' ' && !end[-2] && !end[-3] && !end[-4])
2228 end-= 4;
2229 return (size_t) (end - ptr);
2230}
2231
2232
2233static size_t
2234my_caseup_utf32(CHARSET_INFO *cs, char *src, size_t srclen,
2235 char *dst __attribute__((unused)),
2236 size_t dstlen __attribute__((unused)))
2237{
2238 my_wc_t wc;
2239 int res;
2240 char *srcend= src + srclen;
2241 MY_UNICASE_INFO *uni_plane= cs->caseinfo;
2242 DBUG_ASSERT(src == dst && srclen == dstlen);
2243
2244 while ((src < srcend) &&
2245 (res= my_utf32_uni(cs, &wc, (uchar *)src, (uchar*) srcend)) > 0)
2246 {
2247 my_toupper_utf32(uni_plane, &wc);
2248 if (res != my_uni_utf32(cs, wc, (uchar*) src, (uchar*) srcend))
2249 break;
2250 src+= res;
2251 }
2252 return srclen;
2253}
2254
2255
2256static void
2257my_hash_sort_utf32_nopad(CHARSET_INFO *cs, const uchar *s, size_t slen,
2258 ulong *nr1, ulong *nr2)
2259{
2260 my_wc_t wc;
2261 int res;
2262 const uchar *e= s + slen;
2263 MY_UNICASE_INFO *uni_plane= cs->caseinfo;
2264 register ulong m1= *nr1, m2= *nr2;
2265
2266 while ((res= my_utf32_uni(cs, &wc, (uchar*) s, (uchar*) e)) > 0)
2267 {
2268 my_tosort_utf32(uni_plane, &wc);
2269 MY_HASH_ADD(m1, m2, (uint) (wc >> 24));
2270 MY_HASH_ADD(m1, m2, (uint) (wc >> 16) & 0xFF);
2271 MY_HASH_ADD(m1, m2, (uint) (wc >> 8) & 0xFF);
2272 MY_HASH_ADD(m1, m2, (uint) (wc & 0xFF));
2273 s+= res;
2274 }
2275 *nr1= m1;
2276 *nr2= m2;
2277}
2278
2279
2280static void
2281my_hash_sort_utf32(CHARSET_INFO *cs, const uchar *s, size_t slen,
2282 ulong *nr1, ulong *nr2)
2283{
2284 size_t lengthsp= my_lengthsp_utf32(cs, (const char *) s, slen);
2285 my_hash_sort_utf32_nopad(cs, s, lengthsp, nr1, nr2);
2286}
2287
2288
2289static size_t
2290my_casedn_utf32(CHARSET_INFO *cs, char *src, size_t srclen,
2291 char *dst __attribute__((unused)),
2292 size_t dstlen __attribute__((unused)))
2293{
2294 my_wc_t wc;
2295 int res;
2296 char *srcend= src + srclen;
2297 MY_UNICASE_INFO *uni_plane= cs->caseinfo;
2298 DBUG_ASSERT(src == dst && srclen == dstlen);
2299
2300 while ((res= my_utf32_uni(cs, &wc, (uchar*) src, (uchar*) srcend)) > 0)
2301 {
2302 my_tolower_utf32(uni_plane,&wc);
2303 if (res != my_uni_utf32(cs, wc, (uchar*) src, (uchar*) srcend))
2304 break;
2305 src+= res;
2306 }
2307 return srclen;
2308}
2309
2310
2311static int
2312my_charlen_utf32(CHARSET_INFO *cs __attribute__((unused)),
2313 const uchar *b, const uchar *e)
2314{
2315 return b + 4 > e ? MY_CS_TOOSMALL4 :
2316 IS_UTF32_MBHEAD4(b[0], b[1]) ? 4 : MY_CS_ILSEQ;
2317}
2318
2319
2320#define MY_FUNCTION_NAME(x) my_ ## x ## _utf32
2321#define CHARLEN(cs,str,end) my_charlen_utf32(cs,str,end)
2322#define DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN
2323#include "ctype-mb.ic"
2324#undef MY_FUNCTION_NAME
2325#undef CHARLEN
2326#undef DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN
2327/* Defines my_well_formed_char_length_utf32 */
2328
2329
2330static size_t
2331my_vsnprintf_utf32(char *dst, size_t n, const char* fmt, va_list ap)
2332{
2333 char *start= dst, *end= dst + n;
2334 DBUG_ASSERT((n % 4) == 0);
2335 for (; *fmt ; fmt++)
2336 {
2337 if (fmt[0] != '%')
2338 {
2339 if (dst >= end) /* End of buffer */
2340 break;
2341
2342 *dst++= '\0';
2343 *dst++= '\0';
2344 *dst++= '\0';
2345 *dst++= *fmt; /* Copy ordinary char */
2346 continue;
2347 }
2348
2349 fmt++;
2350
2351 /* Skip if max size is used (to be compatible with printf) */
2352 while ( (*fmt>='0' && *fmt<='9') || *fmt == '.' || *fmt == '-')
2353 fmt++;
2354
2355 if (*fmt == 'l')
2356 fmt++;
2357
2358 if (*fmt == 's') /* String parameter */
2359 {
2360 reg2 char *par= va_arg(ap, char *);
2361 size_t plen;
2362 size_t left_len= (size_t)(end - dst);
2363 if (!par) par= (char*)"(null)";
2364 plen= strlen(par);
2365 if (left_len <= plen*4)
2366 plen= left_len / 4 - 1;
2367
2368 for ( ; plen ; plen--, dst+= 4, par++)
2369 {
2370 dst[0]= '\0';
2371 dst[1]= '\0';
2372 dst[2]= '\0';
2373 dst[3]= par[0];
2374 }
2375 continue;
2376 }
2377 else if (*fmt == 'd' || *fmt == 'u') /* Integer parameter */
2378 {
2379 register int iarg;
2380 char nbuf[16];
2381 char *pbuf= nbuf;
2382
2383 if ((size_t) (end - dst) < 64)
2384 break;
2385 iarg= va_arg(ap, int);
2386 if (*fmt == 'd')
2387 int10_to_str((long) iarg, nbuf, -10);
2388 else
2389 int10_to_str((long) (uint) iarg,nbuf,10);
2390
2391 for (; pbuf[0]; pbuf++)
2392 {
2393 *dst++= '\0';
2394 *dst++= '\0';
2395 *dst++= '\0';
2396 *dst++= *pbuf;
2397 }
2398 continue;
2399 }
2400
2401 /* We come here on '%%', unknown code or too long parameter */
2402 if (dst == end)
2403 break;
2404 *dst++= '\0';
2405 *dst++= '\0';
2406 *dst++= '\0';
2407 *dst++= '%'; /* % used as % or unknown code */
2408 }
2409
2410 DBUG_ASSERT(dst < end);
2411 *dst++= '\0';
2412 *dst++= '\0';
2413 *dst++= '\0';
2414 *dst++= '\0'; /* End of errmessage */
2415 return (size_t) (dst - start - 4);
2416}
2417
2418
2419static size_t
2420my_snprintf_utf32(CHARSET_INFO *cs __attribute__((unused)),
2421 char* to, size_t n, const char* fmt, ...)
2422{
2423 va_list args;
2424 va_start(args,fmt);
2425 return my_vsnprintf_utf32(to, n, fmt, args);
2426}
2427
2428
2429static longlong
2430my_strtoll10_utf32(CHARSET_INFO *cs __attribute__((unused)),
2431 const char *nptr, char **endptr, int *error)
2432{
2433 const char *s, *end, *start, *n_end, *true_end;
2434 uchar c;
2435 unsigned long i, j, k;
2436 ulonglong li;
2437 int negative;
2438 ulong cutoff, cutoff2, cutoff3;
2439
2440 s= nptr;
2441 /* If fixed length string */
2442 if (endptr)
2443 {
2444 /* Make sure string length is even */
2445 end= s + ((*endptr - s) / 4) * 4;
2446 while (s < end && !s[0] && !s[1] && !s[2] &&
2447 (s[3] == ' ' || s[3] == '\t'))
2448 s+= 4;
2449 if (s == end)
2450 goto no_conv;
2451 }
2452 else
2453 {
2454 /* We don't support null terminated strings in UCS2 */
2455 goto no_conv;
2456 }
2457
2458 /* Check for a sign. */
2459 negative= 0;
2460 if (!s[0] && !s[1] && !s[2] && s[3] == '-')
2461 {
2462 *error= -1; /* Mark as negative number */
2463 negative= 1;
2464 s+= 4;
2465 if (s == end)
2466 goto no_conv;
2467 cutoff= MAX_NEGATIVE_NUMBER / LFACTOR2;
2468 cutoff2= (MAX_NEGATIVE_NUMBER % LFACTOR2) / 100;
2469 cutoff3= MAX_NEGATIVE_NUMBER % 100;
2470 }
2471 else
2472 {
2473 *error= 0;
2474 if (!s[0] && !s[1] && !s[2] && s[3] == '+')
2475 {
2476 s+= 4;
2477 if (s == end)
2478 goto no_conv;
2479 }
2480 cutoff= ULONGLONG_MAX / LFACTOR2;
2481 cutoff2= ULONGLONG_MAX % LFACTOR2 / 100;
2482 cutoff3= ULONGLONG_MAX % 100;
2483 }
2484
2485 /* Handle case where we have a lot of pre-zero */
2486 if (!s[0] && !s[1] && !s[2] && s[3] == '0')
2487 {
2488 i= 0;
2489 do
2490 {
2491 s+= 4;
2492 if (s == end)
2493 goto end_i; /* Return 0 */
2494 }
2495 while (!s[0] && !s[1] && !s[2] && s[3] == '0');
2496 n_end= s + 4 * INIT_CNT;
2497 }
2498 else
2499 {
2500 /* Read first digit to check that it's a valid number */
2501 if (s[0] || s[1] || s[2] || (c= (s[3]-'0')) > 9)
2502 goto no_conv;
2503 i= c;
2504 s+= 4;
2505 n_end= s + 4 * (INIT_CNT-1);
2506 }
2507
2508 /* Handle first 9 digits and store them in i */
2509 if (n_end > end)
2510 n_end= end;
2511 for (; s != n_end ; s+= 4)
2512 {
2513 if (s[0] || s[1] || s[2] || (c= (s[3] - '0')) > 9)
2514 goto end_i;
2515 i= i * 10 + c;
2516 }
2517 if (s == end)
2518 goto end_i;
2519
2520 /* Handle next 9 digits and store them in j */
2521 j= 0;
2522 start= s; /* Used to know how much to shift i */
2523 n_end= true_end= s + 4 * INIT_CNT;
2524 if (n_end > end)
2525 n_end= end;
2526 do
2527 {
2528 if (s[0] || s[1] || s[2] || (c= (s[3] - '0')) > 9)
2529 goto end_i_and_j;
2530 j= j * 10 + c;
2531 s+= 4;
2532 } while (s != n_end);
2533 if (s == end)
2534 {
2535 if (s != true_end)
2536 goto end_i_and_j;
2537 goto end3;
2538 }
2539 if (s[0] || s[1] || s[2] || (c= (s[3] - '0')) > 9)
2540 goto end3;
2541
2542 /* Handle the next 1 or 2 digits and store them in k */
2543 k=c;
2544 s+= 4;
2545 if (s == end || s[0] || s[1] || s[2] || (c= (s[3]-'0')) > 9)
2546 goto end4;
2547 k= k * 10 + c;
2548 s+= 4;
2549 *endptr= (char*) s;
2550
2551 /* number string should have ended here */
2552 if (s != end && !s[0] && !s[1] && !s[2] && (c= (s[3] - '0')) <= 9)
2553 goto overflow;
2554
2555 /* Check that we didn't get an overflow with the last digit */
2556 if (i > cutoff || (i == cutoff && ((j > cutoff2 || j == cutoff2) &&
2557 k > cutoff3)))
2558 goto overflow;
2559 li= i * LFACTOR2+ (ulonglong) j * 100 + k;
2560 return (longlong) li;
2561
2562overflow: /* *endptr is set here */
2563 *error= MY_ERRNO_ERANGE;
2564 return negative ? LONGLONG_MIN : (longlong) ULONGLONG_MAX;
2565
2566end_i:
2567 *endptr= (char*) s;
2568 return (negative ? ((longlong) -(long) i) : (longlong) i);
2569
2570end_i_and_j:
2571 li= (ulonglong) i * lfactor[(size_t) (s-start) / 4] + j;
2572 *endptr= (char*) s;
2573 return (negative ? -((longlong) li) : (longlong) li);
2574
2575end3:
2576 li= (ulonglong) i*LFACTOR+ (ulonglong) j;
2577 *endptr= (char*) s;
2578 return (negative ? -((longlong) li) : (longlong) li);
2579
2580end4:
2581 li= (ulonglong) i*LFACTOR1+ (ulonglong) j * 10 + k;
2582 *endptr= (char*) s;
2583 if (negative)
2584 {
2585 if (li > MAX_NEGATIVE_NUMBER)
2586 goto overflow;
2587 return -((longlong) li);
2588 }
2589 return (longlong) li;
2590
2591no_conv:
2592 /* There was no number to convert. */
2593 *error= MY_ERRNO_EDOM;
2594 *endptr= (char *) nptr;
2595 return 0;
2596}
2597
2598
2599static size_t
2600my_numchars_utf32(CHARSET_INFO *cs __attribute__((unused)),
2601 const char *b, const char *e)
2602{
2603 return (size_t) (e - b) / 4;
2604}
2605
2606
2607static size_t
2608my_charpos_utf32(CHARSET_INFO *cs __attribute__((unused)),
2609 const char *b, const char *e, size_t pos)
2610{
2611 size_t string_length= (size_t) (e - b);
2612 return pos * 4 > string_length ? string_length + 4 : pos * 4;
2613}
2614
2615
2616static
2617void my_fill_utf32(CHARSET_INFO *cs,
2618 char *s, size_t slen, int fill)
2619{
2620 char buf[10];
2621#ifdef DBUG_ASSERT_EXISTS
2622 uint buflen;
2623#endif
2624 char *e= s + slen;
2625
2626 DBUG_ASSERT((slen % 4) == 0);
2627
2628#ifdef DBUG_ASSERT_EXISTS
2629 buflen=
2630#endif
2631 cs->cset->wc_mb(cs, (my_wc_t) fill, (uchar*) buf,
2632 (uchar*) buf + sizeof(buf));
2633 DBUG_ASSERT(buflen == 4);
2634 while (s < e)
2635 {
2636 memcpy(s, buf, 4);
2637 s+= 4;
2638 }
2639}
2640
2641
2642static int
2643my_wildcmp_utf32_ci(CHARSET_INFO *cs,
2644 const char *str, const char *str_end,
2645 const char *wildstr, const char *wildend,
2646 int escape, int w_one, int w_many)
2647{
2648 MY_UNICASE_INFO *uni_plane= cs->caseinfo;
2649 return my_wildcmp_unicode(cs, str, str_end, wildstr, wildend,
2650 escape, w_one, w_many, uni_plane);
2651}
2652
2653
2654static int
2655my_wildcmp_utf32_bin(CHARSET_INFO *cs,
2656 const char *str,const char *str_end,
2657 const char *wildstr,const char *wildend,
2658 int escape, int w_one, int w_many)
2659{
2660 return my_wildcmp_unicode(cs, str, str_end, wildstr, wildend,
2661 escape, w_one, w_many, NULL);
2662}
2663
2664
2665static size_t
2666my_scan_utf32(CHARSET_INFO *cs,
2667 const char *str, const char *end, int sequence_type)
2668{
2669 const char *str0= str;
2670
2671 switch (sequence_type)
2672 {
2673 case MY_SEQ_SPACES:
2674 for ( ; str < end; )
2675 {
2676 my_wc_t wc;
2677 int res= my_utf32_uni(cs, &wc, (uchar*) str, (uchar*) end);
2678 if (res < 0 || wc != ' ')
2679 break;
2680 str+= res;
2681 }
2682 return (size_t) (str - str0);
2683 case MY_SEQ_NONSPACES:
2684 DBUG_ASSERT(0); /* Not implemented */
2685 /* pass through */
2686 default:
2687 return 0;
2688 }
2689}
2690
2691
2692static MY_COLLATION_HANDLER my_collation_utf32_general_ci_handler =
2693{
2694 NULL, /* init */
2695 my_strnncoll_utf32_general_ci,
2696 my_strnncollsp_utf32_general_ci,
2697 my_strnxfrm_unicode,
2698 my_strnxfrmlen_unicode,
2699 my_like_range_generic,
2700 my_wildcmp_utf32_ci,
2701 my_strcasecmp_mb2_or_mb4,
2702 my_instr_mb,
2703 my_hash_sort_utf32,
2704 my_propagate_simple
2705};
2706
2707
2708static MY_COLLATION_HANDLER my_collation_utf32_bin_handler =
2709{
2710 NULL, /* init */
2711 my_strnncoll_utf32_bin,
2712 my_strnncollsp_utf32_bin,
2713 my_strnxfrm_unicode_full_bin,
2714 my_strnxfrmlen_unicode_full_bin,
2715 my_like_range_generic,
2716 my_wildcmp_utf32_bin,
2717 my_strcasecmp_mb2_or_mb4,
2718 my_instr_mb,
2719 my_hash_sort_utf32,
2720 my_propagate_simple
2721};
2722
2723
2724static MY_COLLATION_HANDLER my_collation_utf32_general_nopad_ci_handler =
2725{
2726 NULL, /* init */
2727 my_strnncoll_utf32_general_ci,
2728 my_strnncollsp_utf32_general_nopad_ci,
2729 my_strnxfrm_unicode_nopad,
2730 my_strnxfrmlen_unicode,
2731 my_like_range_generic,
2732 my_wildcmp_utf32_ci,
2733 my_strcasecmp_mb2_or_mb4,
2734 my_instr_mb,
2735 my_hash_sort_utf32_nopad,
2736 my_propagate_simple
2737};
2738
2739
2740static MY_COLLATION_HANDLER my_collation_utf32_nopad_bin_handler =
2741{
2742 NULL, /* init */
2743 my_strnncoll_utf32_bin,
2744 my_strnncollsp_utf32_nopad_bin,
2745 my_strnxfrm_unicode_full_nopad_bin,
2746 my_strnxfrmlen_unicode_full_bin,
2747 my_like_range_generic,
2748 my_wildcmp_utf32_bin,
2749 my_strcasecmp_mb2_or_mb4,
2750 my_instr_mb,
2751 my_hash_sort_utf32_nopad,
2752 my_propagate_simple
2753};
2754
2755
2756MY_CHARSET_HANDLER my_charset_utf32_handler=
2757{
2758 NULL, /* init */
2759 my_numchars_utf32,
2760 my_charpos_utf32,
2761 my_lengthsp_utf32,
2762 my_numcells_mb,
2763 my_utf32_uni,
2764 my_uni_utf32,
2765 my_mb_ctype_mb,
2766 my_caseup_str_mb2_or_mb4,
2767 my_casedn_str_mb2_or_mb4,
2768 my_caseup_utf32,
2769 my_casedn_utf32,
2770 my_snprintf_utf32,
2771 my_l10tostr_mb2_or_mb4,
2772 my_ll10tostr_mb2_or_mb4,
2773 my_fill_utf32,
2774 my_strntol_mb2_or_mb4,
2775 my_strntoul_mb2_or_mb4,
2776 my_strntoll_mb2_or_mb4,
2777 my_strntoull_mb2_or_mb4,
2778 my_strntod_mb2_or_mb4,
2779 my_strtoll10_utf32,
2780 my_strntoull10rnd_mb2_or_mb4,
2781 my_scan_utf32,
2782 my_charlen_utf32,
2783 my_well_formed_char_length_utf32,
2784 my_copy_fix_mb2_or_mb4,
2785 my_uni_utf32,
2786};
2787
2788
2789struct charset_info_st my_charset_utf32_general_ci=
2790{
2791 60,0,0, /* number */
2792 MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
2793 "utf32", /* cs name */
2794 "utf32_general_ci", /* name */
2795 "UTF-32 Unicode", /* comment */
2796 NULL, /* tailoring */
2797 NULL, /* ctype */
2798 NULL, /* to_lower */
2799 NULL, /* to_upper */
2800 NULL, /* sort_order */
2801 NULL, /* uca */
2802 NULL, /* tab_to_uni */
2803 NULL, /* tab_from_uni */
2804 &my_unicase_default, /* caseinfo */
2805 NULL, /* state_map */
2806 NULL, /* ident_map */
2807 1, /* strxfrm_multiply */
2808 1, /* caseup_multiply */
2809 1, /* casedn_multiply */
2810 4, /* mbminlen */
2811 4, /* mbmaxlen */
2812 0, /* min_sort_char */
2813 0xFFFF, /* max_sort_char */
2814 ' ', /* pad char */
2815 0, /* escape_with_backslash_is_dangerous */
2816 1, /* levels_for_order */
2817 &my_charset_utf32_handler,
2818 &my_collation_utf32_general_ci_handler
2819};
2820
2821
2822struct charset_info_st my_charset_utf32_bin=
2823{
2824 61,0,0, /* number */
2825 MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
2826 "utf32", /* cs name */
2827 "utf32_bin", /* name */
2828 "UTF-32 Unicode", /* comment */
2829 NULL, /* tailoring */
2830 NULL, /* ctype */
2831 NULL, /* to_lower */
2832 NULL, /* to_upper */
2833 NULL, /* sort_order */
2834 NULL, /* uca */
2835 NULL, /* tab_to_uni */
2836 NULL, /* tab_from_uni */
2837 &my_unicase_default, /* caseinfo */
2838 NULL, /* state_map */
2839 NULL, /* ident_map */
2840 1, /* strxfrm_multiply */
2841 1, /* caseup_multiply */
2842 1, /* casedn_multiply */
2843 4, /* mbminlen */
2844 4, /* mbmaxlen */
2845 0, /* min_sort_char */
2846 0xFFFF, /* max_sort_char */
2847 ' ', /* pad char */
2848 0, /* escape_with_backslash_is_dangerous */
2849 1, /* levels_for_order */
2850 &my_charset_utf32_handler,
2851 &my_collation_utf32_bin_handler
2852};
2853
2854
2855struct charset_info_st my_charset_utf32_general_nopad_ci=
2856{
2857 MY_NOPAD_ID(60),0,0, /* number */
2858 MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII|MY_CS_NOPAD,
2859 "utf32", /* cs name */
2860 "utf32_general_nopad_ci", /* name */
2861 "UTF-32 Unicode", /* comment */
2862 NULL, /* tailoring */
2863 NULL, /* ctype */
2864 NULL, /* to_lower */
2865 NULL, /* to_upper */
2866 NULL, /* sort_order */
2867 NULL, /* uca */
2868 NULL, /* tab_to_uni */
2869 NULL, /* tab_from_uni */
2870 &my_unicase_default, /* caseinfo */
2871 NULL, /* state_map */
2872 NULL, /* ident_map */
2873 1, /* strxfrm_multiply */
2874 1, /* caseup_multiply */
2875 1, /* casedn_multiply */
2876 4, /* mbminlen */
2877 4, /* mbmaxlen */
2878 0, /* min_sort_char */
2879 0xFFFF, /* max_sort_char */
2880 ' ', /* pad char */
2881 0, /* escape_with_backslash_is_dangerous */
2882 1, /* levels_for_order */
2883 &my_charset_utf32_handler,
2884 &my_collation_utf32_general_nopad_ci_handler
2885};
2886
2887
2888struct charset_info_st my_charset_utf32_nopad_bin=
2889{
2890 MY_NOPAD_ID(61),0,0, /* number */
2891 MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII|
2892 MY_CS_NOPAD,
2893 "utf32", /* cs name */
2894 "utf32_nopad_bin", /* name */
2895 "UTF-32 Unicode", /* comment */
2896 NULL, /* tailoring */
2897 NULL, /* ctype */
2898 NULL, /* to_lower */
2899 NULL, /* to_upper */
2900 NULL, /* sort_order */
2901 NULL, /* uca */
2902 NULL, /* tab_to_uni */
2903 NULL, /* tab_from_uni */
2904 &my_unicase_default, /* caseinfo */
2905 NULL, /* state_map */
2906 NULL, /* ident_map */
2907 1, /* strxfrm_multiply */
2908 1, /* caseup_multiply */
2909 1, /* casedn_multiply */
2910 4, /* mbminlen */
2911 4, /* mbmaxlen */
2912 0, /* min_sort_char */
2913 0xFFFF, /* max_sort_char */
2914 ' ', /* pad char */
2915 0, /* escape_with_backslash_is_dangerous */
2916 1, /* levels_for_order */
2917 &my_charset_utf32_handler,
2918 &my_collation_utf32_nopad_bin_handler
2919};
2920
2921
2922#endif /* HAVE_CHARSET_utf32 */
2923
2924
2925#ifdef HAVE_CHARSET_ucs2
2926
2927static const uchar ctype_ucs2[] = {
2928 0,
2929 32, 32, 32, 32, 32, 32, 32, 32, 32, 40, 40, 40, 40, 40, 32, 32,
2930 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
2931 72, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
2932 132,132,132,132,132,132,132,132,132,132, 16, 16, 16, 16, 16, 16,
2933 16,129,129,129,129,129,129, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2934 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 16, 16, 16, 16, 16,
2935 16,130,130,130,130,130,130, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2936 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 16, 16, 16, 16, 32,
2937 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2938 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2939 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2940 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2941 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2942 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2943 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2944 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
2945};
2946
2947static const uchar to_lower_ucs2[] = {
2948 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
2949 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
2950 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
2951 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
2952 64, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111,
2953 112,113,114,115,116,117,118,119,120,121,122, 91, 92, 93, 94, 95,
2954 96, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111,
2955 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
2956 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
2957 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
2958 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
2959 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
2960 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
2961 208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
2962 224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
2963 240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255
2964};
2965
2966static const uchar to_upper_ucs2[] = {
2967 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
2968 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
2969 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
2970 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
2971 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
2972 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
2973 96, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
2974 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,123,124,125,126,127,
2975 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
2976 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
2977 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
2978 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
2979 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
2980 208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
2981 224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
2982 240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255
2983};
2984
2985
2986/* Definitions for strcoll.ic */
2987#define IS_MB2_CHAR(x,y) (1)
2988#define UCS2_CODE(b0,b1) (((uchar) b0) << 8 | ((uchar) b1))
2989
2990
2991static inline int my_weight_mb2_ucs2_general_ci(uchar b0, uchar b1)
2992{
2993 my_wc_t wc= UCS2_CODE(b0, b1);
2994 MY_UNICASE_CHARACTER *page= my_unicase_default.page[wc >> 8];
2995 return (int) (page ? page[wc & 0xFF].sort : wc);
2996}
2997
2998
2999#define MY_FUNCTION_NAME(x) my_ ## x ## _ucs2_general_ci
3000#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
3001#define WEIGHT_MB2(b0,b1) my_weight_mb2_ucs2_general_ci(b0,b1)
3002#include "strcoll.ic"
3003
3004
3005#define MY_FUNCTION_NAME(x) my_ ## x ## _ucs2_bin
3006#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
3007#define WEIGHT_MB2(b0,b1) UCS2_CODE(b0,b1)
3008#include "strcoll.ic"
3009
3010
3011#define DEFINE_STRNNCOLLSP_NOPAD
3012#define MY_FUNCTION_NAME(x) my_ ## x ## _ucs2_general_nopad_ci
3013#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
3014#define WEIGHT_MB2(b0,b1) my_weight_mb2_ucs2_general_ci(b0,b1)
3015#include "strcoll.ic"
3016
3017
3018#define DEFINE_STRNNCOLLSP_NOPAD
3019#define MY_FUNCTION_NAME(x) my_ ## x ## _ucs2_nopad_bin
3020#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
3021#define WEIGHT_MB2(b0,b1) UCS2_CODE(b0,b1)
3022#include "strcoll.ic"
3023
3024
3025static int
3026my_charlen_ucs2(CHARSET_INFO *cs __attribute__((unused)),
3027 const uchar *s, const uchar *e)
3028{
3029 return s + 2 > e ? MY_CS_TOOSMALLN(2) : 2;
3030}
3031
3032
3033static int my_ucs2_uni(CHARSET_INFO *cs __attribute__((unused)),
3034 my_wc_t * pwc, const uchar *s, const uchar *e)
3035{
3036 if (s+2 > e) /* Need 2 characters */
3037 return MY_CS_TOOSMALL2;
3038
3039 *pwc= ((uchar)s[0]) * 256 + ((uchar)s[1]);
3040 return 2;
3041}
3042
3043static int my_uni_ucs2(CHARSET_INFO *cs __attribute__((unused)) ,
3044 my_wc_t wc, uchar *r, uchar *e)
3045{
3046 if ( r+2 > e )
3047 return MY_CS_TOOSMALL2;
3048
3049 if (wc > 0xFFFF) /* UCS2 does not support characters outside BMP */
3050 return MY_CS_ILUNI;
3051
3052 r[0]= (uchar) (wc >> 8);
3053 r[1]= (uchar) (wc & 0xFF);
3054 return 2;
3055}
3056
3057
3058static inline void
3059my_tolower_ucs2(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
3060{
3061 MY_UNICASE_CHARACTER *page;
3062 if ((page= uni_plane->page[(*wc >> 8) & 0xFF]))
3063 *wc= page[*wc & 0xFF].tolower;
3064}
3065
3066
3067static inline void
3068my_toupper_ucs2(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
3069{
3070 MY_UNICASE_CHARACTER *page;
3071 if ((page= uni_plane->page[(*wc >> 8) & 0xFF]))
3072 *wc= page[*wc & 0xFF].toupper;
3073}
3074
3075
3076static inline void
3077my_tosort_ucs2(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
3078{
3079 MY_UNICASE_CHARACTER *page;
3080 if ((page= uni_plane->page[(*wc >> 8) & 0xFF]))
3081 *wc= page[*wc & 0xFF].sort;
3082}
3083
3084static size_t my_caseup_ucs2(CHARSET_INFO *cs, char *src, size_t srclen,
3085 char *dst __attribute__((unused)),
3086 size_t dstlen __attribute__((unused)))
3087{
3088 my_wc_t wc;
3089 int res;
3090 char *srcend= src + srclen;
3091 MY_UNICASE_INFO *uni_plane= cs->caseinfo;
3092 DBUG_ASSERT(src == dst && srclen == dstlen);
3093
3094 while ((src < srcend) &&
3095 (res= my_ucs2_uni(cs, &wc, (uchar *)src, (uchar*) srcend)) > 0)
3096 {
3097 my_toupper_ucs2(uni_plane, &wc);
3098 if (res != my_uni_ucs2(cs, wc, (uchar*) src, (uchar*) srcend))
3099 break;
3100 src+= res;
3101 }
3102 return srclen;
3103}
3104
3105
3106static void
3107my_hash_sort_ucs2_nopad(CHARSET_INFO *cs, const uchar *s, size_t slen,
3108 ulong *nr1, ulong *nr2)
3109{
3110 my_wc_t wc;
3111 int res;
3112 const uchar *e=s+slen;
3113 MY_UNICASE_INFO *uni_plane= cs->caseinfo;
3114 register ulong m1= *nr1, m2= *nr2;
3115
3116 while ((s < e) && (res=my_ucs2_uni(cs,&wc, (uchar *)s, (uchar*)e)) >0)
3117 {
3118 my_tosort_ucs2(uni_plane, &wc);
3119 MY_HASH_ADD_16(m1, m2, wc);
3120 s+=res;
3121 }
3122 *nr1= m1;
3123 *nr2= m2;
3124}
3125
3126
3127static void my_hash_sort_ucs2(CHARSET_INFO *cs, const uchar *s, size_t slen,
3128 ulong *nr1, ulong *nr2)
3129{
3130 size_t lengthsp= my_lengthsp_mb2(cs, (const char *) s, slen);
3131 my_hash_sort_ucs2_nopad(cs, s, lengthsp, nr1, nr2);
3132}
3133
3134static size_t my_casedn_ucs2(CHARSET_INFO *cs, char *src, size_t srclen,
3135 char *dst __attribute__((unused)),
3136 size_t dstlen __attribute__((unused)))
3137{
3138 my_wc_t wc;
3139 int res;
3140 char *srcend= src + srclen;
3141 MY_UNICASE_INFO *uni_plane= cs->caseinfo;
3142 DBUG_ASSERT(src == dst && srclen == dstlen);
3143
3144 while ((src < srcend) &&
3145 (res= my_ucs2_uni(cs, &wc, (uchar*) src, (uchar*) srcend)) > 0)
3146 {
3147 my_tolower_ucs2(uni_plane, &wc);
3148 if (res != my_uni_ucs2(cs, wc, (uchar*) src, (uchar*) srcend))
3149 break;
3150 src+= res;
3151 }
3152 return srclen;
3153}
3154
3155
3156static void
3157my_fill_ucs2(CHARSET_INFO *cs __attribute__((unused)),
3158 char *s, size_t l, int fill)
3159{
3160 DBUG_ASSERT(fill <= 0xFFFF);
3161#ifdef WAITING_FOR_GCC_VECTORIZATION_BUG_TO_BE_FIXED
3162 /*
3163 This code with int2store() is known to be faster on some processors,
3164 but crashes on other processors due to a possible bug in GCC's
3165 -ftree-vectorization (which is enabled in -O3) in case of
3166 a non-aligned memory. See here for details:
3167 http://gcc.gnu.org/bugzilla/show_bug.cgi?id=58039
3168 */
3169 char *last= s + l - 2;
3170 uint16 tmp= (fill >> 8) + ((fill & 0xFF) << 8); /* swap bytes */
3171 DBUG_ASSERT(fill <= 0xFFFF);
3172 for ( ; s <= last; s+= 2)
3173 int2store(s, tmp); /* store little-endian */
3174#else
3175 for ( ; l >= 2; s[0]= (fill >> 8), s[1]= (fill & 0xFF), s+= 2, l-= 2);
3176#endif
3177}
3178
3179
3180static
3181size_t my_numchars_ucs2(CHARSET_INFO *cs __attribute__((unused)),
3182 const char *b, const char *e)
3183{
3184 return (size_t) (e-b)/2;
3185}
3186
3187
3188static
3189size_t my_charpos_ucs2(CHARSET_INFO *cs __attribute__((unused)),
3190 const char *b __attribute__((unused)),
3191 const char *e __attribute__((unused)),
3192 size_t pos)
3193{
3194 size_t string_length= (size_t) (e - b);
3195 return pos > string_length ? string_length + 2 : pos * 2;
3196}
3197
3198
3199static size_t
3200my_well_formed_char_length_ucs2(CHARSET_INFO *cs __attribute__((unused)),
3201 const char *b, const char *e,
3202 size_t nchars, MY_STRCOPY_STATUS *status)
3203{
3204 size_t length= e - b;
3205 if (nchars * 2 <= length)
3206 {
3207 status->m_well_formed_error_pos= NULL;
3208 status->m_source_end_pos= b + (nchars * 2);
3209 return nchars;
3210 }
3211 if (length % 2)
3212 {
3213 status->m_well_formed_error_pos= status->m_source_end_pos= e - 1;
3214 }
3215 else
3216 {
3217 status->m_well_formed_error_pos= NULL;
3218 status->m_source_end_pos= e;
3219 }
3220 return length / 2;
3221}
3222
3223
3224static
3225int my_wildcmp_ucs2_ci(CHARSET_INFO *cs,
3226 const char *str,const char *str_end,
3227 const char *wildstr,const char *wildend,
3228 int escape, int w_one, int w_many)
3229{
3230 MY_UNICASE_INFO *uni_plane= cs->caseinfo;
3231 return my_wildcmp_unicode(cs,str,str_end,wildstr,wildend,
3232 escape,w_one,w_many,uni_plane);
3233}
3234
3235
3236static
3237int my_wildcmp_ucs2_bin(CHARSET_INFO *cs,
3238 const char *str,const char *str_end,
3239 const char *wildstr,const char *wildend,
3240 int escape, int w_one, int w_many)
3241{
3242 return my_wildcmp_unicode(cs,str,str_end,wildstr,wildend,
3243 escape,w_one,w_many,NULL);
3244}
3245
3246
3247static void
3248my_hash_sort_ucs2_nopad_bin(CHARSET_INFO *cs __attribute__((unused)),
3249 const uchar *key, size_t len,
3250 ulong *nr1, ulong *nr2)
3251{
3252 const uchar *end= key + len;
3253 register ulong m1= *nr1, m2= *nr2;
3254 for ( ; key < end ; key++)
3255 {
3256 MY_HASH_ADD(m1, m2, (uint)*key);
3257 }
3258 *nr1= m1;
3259 *nr2= m2;
3260}
3261
3262
3263static void
3264my_hash_sort_ucs2_bin(CHARSET_INFO *cs,
3265 const uchar *key, size_t len, ulong *nr1, ulong *nr2)
3266{
3267 size_t lengthsp= my_lengthsp_mb2(cs, (const char *) key, len);
3268 my_hash_sort_ucs2_nopad_bin(cs, key, lengthsp, nr1, nr2);
3269}
3270
3271
3272static MY_COLLATION_HANDLER my_collation_ucs2_general_ci_handler =
3273{
3274 NULL, /* init */
3275 my_strnncoll_ucs2_general_ci,
3276 my_strnncollsp_ucs2_general_ci,
3277 my_strnxfrm_unicode,
3278 my_strnxfrmlen_unicode,
3279 my_like_range_generic,
3280 my_wildcmp_ucs2_ci,
3281 my_strcasecmp_mb2_or_mb4,
3282 my_instr_mb,
3283 my_hash_sort_ucs2,
3284 my_propagate_simple
3285};
3286
3287
3288static MY_COLLATION_HANDLER my_collation_ucs2_bin_handler =
3289{
3290 NULL, /* init */
3291 my_strnncoll_ucs2_bin,
3292 my_strnncollsp_ucs2_bin,
3293 my_strnxfrm_unicode,
3294 my_strnxfrmlen_unicode,
3295 my_like_range_generic,
3296 my_wildcmp_ucs2_bin,
3297 my_strcasecmp_mb2_or_mb4,
3298 my_instr_mb,
3299 my_hash_sort_ucs2_bin,
3300 my_propagate_simple
3301};
3302
3303
3304static MY_COLLATION_HANDLER my_collation_ucs2_general_nopad_ci_handler =
3305{
3306 NULL, /* init */
3307 my_strnncoll_ucs2_general_ci,
3308 my_strnncollsp_ucs2_general_nopad_ci,
3309 my_strnxfrm_unicode_nopad,
3310 my_strnxfrmlen_unicode,
3311 my_like_range_generic,
3312 my_wildcmp_ucs2_ci,
3313 my_strcasecmp_mb2_or_mb4,
3314 my_instr_mb,
3315 my_hash_sort_ucs2_nopad,
3316 my_propagate_simple
3317};
3318
3319
3320static MY_COLLATION_HANDLER my_collation_ucs2_nopad_bin_handler =
3321{
3322 NULL, /* init */
3323 my_strnncoll_ucs2_bin,
3324 my_strnncollsp_ucs2_nopad_bin,
3325 my_strnxfrm_unicode_nopad,
3326 my_strnxfrmlen_unicode,
3327 my_like_range_generic,
3328 my_wildcmp_ucs2_bin,
3329 my_strcasecmp_mb2_or_mb4,
3330 my_instr_mb,
3331 my_hash_sort_ucs2_nopad_bin,
3332 my_propagate_simple
3333};
3334
3335
3336MY_CHARSET_HANDLER my_charset_ucs2_handler=
3337{
3338 NULL, /* init */
3339 my_numchars_ucs2,
3340 my_charpos_ucs2,
3341 my_lengthsp_mb2,
3342 my_numcells_mb,
3343 my_ucs2_uni, /* mb_wc */
3344 my_uni_ucs2, /* wc_mb */
3345 my_mb_ctype_mb,
3346 my_caseup_str_mb2_or_mb4,
3347 my_casedn_str_mb2_or_mb4,
3348 my_caseup_ucs2,
3349 my_casedn_ucs2,
3350 my_snprintf_mb2,
3351 my_l10tostr_mb2_or_mb4,
3352 my_ll10tostr_mb2_or_mb4,
3353 my_fill_ucs2,
3354 my_strntol_mb2_or_mb4,
3355 my_strntoul_mb2_or_mb4,
3356 my_strntoll_mb2_or_mb4,
3357 my_strntoull_mb2_or_mb4,
3358 my_strntod_mb2_or_mb4,
3359 my_strtoll10_mb2,
3360 my_strntoull10rnd_mb2_or_mb4,
3361 my_scan_mb2,
3362 my_charlen_ucs2,
3363 my_well_formed_char_length_ucs2,
3364 my_copy_fix_mb2_or_mb4,
3365 my_uni_ucs2,
3366};
3367
3368
3369struct charset_info_st my_charset_ucs2_general_ci=
3370{
3371 35,0,0, /* number */
3372 MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
3373 "ucs2", /* cs name */
3374 "ucs2_general_ci", /* name */
3375 "", /* comment */
3376 NULL, /* tailoring */
3377 ctype_ucs2, /* ctype */
3378 to_lower_ucs2, /* to_lower */
3379 to_upper_ucs2, /* to_upper */
3380 to_upper_ucs2, /* sort_order */
3381 NULL, /* uca */
3382 NULL, /* tab_to_uni */
3383 NULL, /* tab_from_uni */
3384 &my_unicase_default,/* caseinfo */
3385 NULL, /* state_map */
3386 NULL, /* ident_map */
3387 1, /* strxfrm_multiply */
3388 1, /* caseup_multiply */
3389 1, /* casedn_multiply */
3390 2, /* mbminlen */
3391 2, /* mbmaxlen */
3392 0, /* min_sort_char */
3393 0xFFFF, /* max_sort_char */
3394 ' ', /* pad char */
3395 0, /* escape_with_backslash_is_dangerous */
3396 1, /* levels_for_order */
3397 &my_charset_ucs2_handler,
3398 &my_collation_ucs2_general_ci_handler
3399};
3400
3401
3402struct charset_info_st my_charset_ucs2_general_mysql500_ci=
3403{
3404 159, 0, 0, /* number */
3405 MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, /* state */
3406 "ucs2", /* cs name */
3407 "ucs2_general_mysql500_ci", /* name */
3408 "", /* comment */
3409 NULL, /* tailoring */
3410 ctype_ucs2, /* ctype */
3411 to_lower_ucs2, /* to_lower */
3412 to_upper_ucs2, /* to_upper */
3413 to_upper_ucs2, /* sort_order */
3414 NULL, /* uca */
3415 NULL, /* tab_to_uni */
3416 NULL, /* tab_from_uni */
3417 &my_unicase_mysql500, /* caseinfo */
3418 NULL, /* state_map */
3419 NULL, /* ident_map */
3420 1, /* strxfrm_multiply */
3421 1, /* caseup_multiply */
3422 1, /* casedn_multiply */
3423 2, /* mbminlen */
3424 2, /* mbmaxlen */
3425 0, /* min_sort_char */
3426 0xFFFF, /* max_sort_char */
3427 ' ', /* pad char */
3428 0, /* escape_with_backslash_is_dangerous */
3429 1, /* levels_for_order */
3430 &my_charset_ucs2_handler,
3431 &my_collation_ucs2_general_ci_handler
3432};
3433
3434
3435struct charset_info_st my_charset_ucs2_bin=
3436{
3437 90,0,0, /* number */
3438 MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_UNICODE|MY_CS_NONASCII,
3439 "ucs2", /* cs name */
3440 "ucs2_bin", /* name */
3441 "", /* comment */
3442 NULL, /* tailoring */
3443 ctype_ucs2, /* ctype */
3444 to_lower_ucs2, /* to_lower */
3445 to_upper_ucs2, /* to_upper */
3446 NULL, /* sort_order */
3447 NULL, /* uca */
3448 NULL, /* tab_to_uni */
3449 NULL, /* tab_from_uni */
3450 &my_unicase_default,/* caseinfo */
3451 NULL, /* state_map */
3452 NULL, /* ident_map */
3453 1, /* strxfrm_multiply */
3454 1, /* caseup_multiply */
3455 1, /* casedn_multiply */
3456 2, /* mbminlen */
3457 2, /* mbmaxlen */
3458 0, /* min_sort_char */
3459 0xFFFF, /* max_sort_char */
3460 ' ', /* pad char */
3461 0, /* escape_with_backslash_is_dangerous */
3462 1, /* levels_for_order */
3463 &my_charset_ucs2_handler,
3464 &my_collation_ucs2_bin_handler
3465};
3466
3467
3468struct charset_info_st my_charset_ucs2_general_nopad_ci=
3469{
3470 MY_NOPAD_ID(35),0,0, /* number */
3471 MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII|MY_CS_NOPAD,
3472 "ucs2", /* cs name */
3473 "ucs2_general_nopad_ci", /* name */
3474 "", /* comment */
3475 NULL, /* tailoring */
3476 ctype_ucs2, /* ctype */
3477 to_lower_ucs2, /* to_lower */
3478 to_upper_ucs2, /* to_upper */
3479 to_upper_ucs2, /* sort_order */
3480 NULL, /* uca */
3481 NULL, /* tab_to_uni */
3482 NULL, /* tab_from_uni */
3483 &my_unicase_default, /* caseinfo */
3484 NULL, /* state_map */
3485 NULL, /* ident_map */
3486 1, /* strxfrm_multiply */
3487 1, /* caseup_multiply */
3488 1, /* casedn_multiply */
3489 2, /* mbminlen */
3490 2, /* mbmaxlen */
3491 0, /* min_sort_char */
3492 0xFFFF, /* max_sort_char */
3493 ' ', /* pad char */
3494 0, /* escape_with_backslash_is_dangerous */
3495 1, /* levels_for_order */
3496 &my_charset_ucs2_handler,
3497 &my_collation_ucs2_general_nopad_ci_handler
3498};
3499
3500
3501struct charset_info_st my_charset_ucs2_nopad_bin=
3502{
3503 MY_NOPAD_ID(90),0,0, /* number */
3504 MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_UNICODE|MY_CS_NONASCII|MY_CS_NOPAD,
3505 "ucs2", /* cs name */
3506 "ucs2_nopad_bin", /* name */
3507 "", /* comment */
3508 NULL, /* tailoring */
3509 ctype_ucs2, /* ctype */
3510 to_lower_ucs2, /* to_lower */
3511 to_upper_ucs2, /* to_upper */
3512 NULL, /* sort_order */
3513 NULL, /* uca */
3514 NULL, /* tab_to_uni */
3515 NULL, /* tab_from_uni */
3516 &my_unicase_default, /* caseinfo */
3517 NULL, /* state_map */
3518 NULL, /* ident_map */
3519 1, /* strxfrm_multiply */
3520 1, /* caseup_multiply */
3521 1, /* casedn_multiply */
3522 2, /* mbminlen */
3523 2, /* mbmaxlen */
3524 0, /* min_sort_char */
3525 0xFFFF, /* max_sort_char */
3526 ' ', /* pad char */
3527 0, /* escape_with_backslash_is_dangerous */
3528 1, /* levels_for_order */
3529 &my_charset_ucs2_handler,
3530 &my_collation_ucs2_nopad_bin_handler
3531};
3532
3533#endif /* HAVE_CHARSET_ucs2 */
3534