1/* Copyright (c) 2000, 2014, Oracle and/or its affiliates.
2 Copyright (c) 2009, 2014, SkySQL Ab.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; version 2 of the License.
7
8 This program is distributed in the hope that it will be useful,
9 but WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 GNU General Public License for more details.
12
13 You should have received a copy of the GNU General Public License
14 along with this program; if not, write to the Free Software
15 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
16
17#include "strings_def.h"
18#include <m_ctype.h>
19
20#ifdef USE_MB
21
22
23size_t my_caseup_str_mb(CHARSET_INFO * cs, char *str)
24{
25 register uint32 l;
26 register const uchar *map= cs->to_upper;
27 char *str_orig= str;
28
29 while (*str)
30 {
31 /* Pointing after the '\0' is safe here. */
32 if ((l= my_ismbchar(cs, str, str + cs->mbmaxlen)))
33 str+= l;
34 else
35 {
36 *str= (char) map[(uchar)*str];
37 str++;
38 }
39 }
40 return (size_t) (str - str_orig);
41}
42
43
44size_t my_casedn_str_mb(CHARSET_INFO * cs, char *str)
45{
46 register uint32 l;
47 register const uchar *map= cs->to_lower;
48 char *str_orig= str;
49
50 while (*str)
51 {
52 /* Pointing after the '\0' is safe here. */
53 if ((l= my_ismbchar(cs, str, str + cs->mbmaxlen)))
54 str+= l;
55 else
56 {
57 *str= (char) map[(uchar)*str];
58 str++;
59 }
60 }
61 return (size_t) (str - str_orig);
62}
63
64
65static inline MY_UNICASE_CHARACTER*
66get_case_info_for_ch(CHARSET_INFO *cs, uint page, uint offs)
67{
68 MY_UNICASE_CHARACTER *p;
69 return cs->caseinfo && (p= cs->caseinfo->page[page]) ? &p[offs] : NULL;
70}
71
72
73/*
74 For character sets which don't change octet length in case conversion.
75*/
76size_t my_caseup_mb(CHARSET_INFO * cs, char *src, size_t srclen,
77 char *dst __attribute__((unused)),
78 size_t dstlen __attribute__((unused)))
79{
80 register uint32 l;
81 register char *srcend= src + srclen;
82 register const uchar *map= cs->to_upper;
83
84 DBUG_ASSERT(cs->caseup_multiply == 1);
85 DBUG_ASSERT(src == dst && srclen == dstlen);
86 DBUG_ASSERT(cs->mbmaxlen == 2);
87
88 while (src < srcend)
89 {
90 if ((l=my_ismbchar(cs, src, srcend)))
91 {
92 MY_UNICASE_CHARACTER *ch;
93 if ((ch= get_case_info_for_ch(cs, (uchar) src[0], (uchar) src[1])))
94 {
95 *src++= ch->toupper >> 8;
96 *src++= ch->toupper & 0xFF;
97 }
98 else
99 src+= l;
100 }
101 else
102 {
103 *src=(char) map[(uchar) *src];
104 src++;
105 }
106 }
107 return srclen;
108}
109
110
111size_t my_casedn_mb(CHARSET_INFO * cs, char *src, size_t srclen,
112 char *dst __attribute__((unused)),
113 size_t dstlen __attribute__((unused)))
114{
115 register uint32 l;
116 register char *srcend= src + srclen;
117 register const uchar *map=cs->to_lower;
118
119 DBUG_ASSERT(cs->casedn_multiply == 1);
120 DBUG_ASSERT(src == dst && srclen == dstlen);
121 DBUG_ASSERT(cs->mbmaxlen == 2);
122
123 while (src < srcend)
124 {
125 if ((l= my_ismbchar(cs, src, srcend)))
126 {
127 MY_UNICASE_CHARACTER *ch;
128 if ((ch= get_case_info_for_ch(cs, (uchar) src[0], (uchar) src[1])))
129 {
130 *src++= ch->tolower >> 8;
131 *src++= ch->tolower & 0xFF;
132 }
133 else
134 src+= l;
135 }
136 else
137 {
138 *src= (char) map[(uchar)*src];
139 src++;
140 }
141 }
142 return srclen;
143}
144
145
146/*
147 Case folding functions for character set
148 where case conversion can change string octet length.
149 For example, in EUCKR,
150 _euckr 0xA9A5 == "LATIN LETTER DOTLESS I" (Turkish letter)
151 is upper-cased to to
152 _euckr 0x49 "LATIN CAPITAL LETTER I" ('usual' letter I)
153 Length is reduced in this example from two bytes to one byte.
154*/
155static size_t
156my_casefold_mb_varlen(CHARSET_INFO *cs,
157 char *src, size_t srclen,
158 char *dst, size_t dstlen __attribute__((unused)),
159 const uchar *map,
160 size_t is_upper)
161{
162 char *srcend= src + srclen, *dst0= dst;
163
164 DBUG_ASSERT(cs->mbmaxlen == 2);
165
166 while (src < srcend)
167 {
168 size_t mblen= my_ismbchar(cs, src, srcend);
169 if (mblen)
170 {
171 MY_UNICASE_CHARACTER *ch;
172 if ((ch= get_case_info_for_ch(cs, (uchar) src[0], (uchar) src[1])))
173 {
174 int code= is_upper ? ch->toupper : ch->tolower;
175 src+= 2;
176 if (code > 0xFF)
177 *dst++= code >> 8;
178 *dst++= code & 0xFF;
179 }
180 else
181 {
182 *dst++= *src++;
183 *dst++= *src++;
184 }
185 }
186 else
187 {
188 *dst++= (char) map[(uchar) *src++];
189 }
190 }
191 return (size_t) (dst - dst0);
192}
193
194
195size_t
196my_casedn_mb_varlen(CHARSET_INFO * cs, char *src, size_t srclen,
197 char *dst, size_t dstlen)
198{
199 DBUG_ASSERT(dstlen >= srclen * cs->casedn_multiply);
200 DBUG_ASSERT(src != dst || cs->casedn_multiply == 1);
201 return my_casefold_mb_varlen(cs, src, srclen, dst, dstlen, cs->to_lower, 0);
202}
203
204
205size_t
206my_caseup_mb_varlen(CHARSET_INFO * cs, char *src, size_t srclen,
207 char *dst, size_t dstlen)
208{
209 DBUG_ASSERT(dstlen >= srclen * cs->caseup_multiply);
210 DBUG_ASSERT(src != dst || cs->caseup_multiply == 1);
211 return my_casefold_mb_varlen(cs, src, srclen, dst, dstlen, cs->to_upper, 1);
212}
213
214
215/*
216 my_strcasecmp_mb() returns 0 if strings are equal, non-zero otherwise.
217 */
218
219int my_strcasecmp_mb(CHARSET_INFO * cs,const char *s, const char *t)
220{
221 register uint32 l;
222 register const uchar *map=cs->to_upper;
223
224 while (*s && *t)
225 {
226 /* Pointing after the '\0' is safe here. */
227 if ((l=my_ismbchar(cs, s, s + cs->mbmaxlen)))
228 {
229 while (l--)
230 if (*s++ != *t++)
231 return 1;
232 }
233 else if (my_charlen(cs, t, t + cs->mbmaxlen) > 1)
234 return 1;
235 else if (map[(uchar) *s++] != map[(uchar) *t++])
236 return 1;
237 }
238 /* At least one of '*s' and '*t' is zero here. */
239 return (*t != *s);
240}
241
242
243/*
244** Compare string against string with wildcard
245** 0 if matched
246** -1 if not matched with wildcard
247** 1 if matched with wildcard
248*/
249
250#define INC_PTR(cs,A,B) A+=(my_ismbchar(cs,A,B) ? my_ismbchar(cs,A,B) : 1)
251
252#define likeconv(s,A) (uchar) (s)->sort_order[(uchar) (A)]
253
254static
255int my_wildcmp_mb_impl(CHARSET_INFO *cs,
256 const char *str,const char *str_end,
257 const char *wildstr,const char *wildend,
258 int escape, int w_one, int w_many, int recurse_level)
259{
260 int result= -1; /* Not found, using wildcards */
261
262 if (my_string_stack_guard && my_string_stack_guard(recurse_level))
263 return 1;
264 while (wildstr != wildend)
265 {
266 while (*wildstr != w_many && *wildstr != w_one)
267 {
268 int l;
269 if (*wildstr == escape && wildstr+1 != wildend)
270 wildstr++;
271 if ((l = my_ismbchar(cs, wildstr, wildend)))
272 {
273 if (str+l > str_end || memcmp(str, wildstr, l) != 0)
274 return 1;
275 str += l;
276 wildstr += l;
277 }
278 else
279 if (str == str_end || likeconv(cs,*wildstr++) != likeconv(cs,*str++))
280 return(1); /* No match */
281 if (wildstr == wildend)
282 return (str != str_end); /* Match if both are at end */
283 result=1; /* Found an anchor char */
284 }
285 if (*wildstr == w_one)
286 {
287 do
288 {
289 if (str == str_end) /* Skip one char if possible */
290 return (result);
291 INC_PTR(cs,str,str_end);
292 } while (++wildstr < wildend && *wildstr == w_one);
293 if (wildstr == wildend)
294 break;
295 }
296 if (*wildstr == w_many)
297 { /* Found w_many */
298 uchar cmp;
299 const char* mb = wildstr;
300 int mb_len=0;
301
302 wildstr++;
303 /* Remove any '%' and '_' from the wild search string */
304 for (; wildstr != wildend ; wildstr++)
305 {
306 if (*wildstr == w_many)
307 continue;
308 if (*wildstr == w_one)
309 {
310 if (str == str_end)
311 return (-1);
312 INC_PTR(cs,str,str_end);
313 continue;
314 }
315 break; /* Not a wild character */
316 }
317 if (wildstr == wildend)
318 return(0); /* Ok if w_many is last */
319 if (str == str_end)
320 return -1;
321
322 if ((cmp= *wildstr) == escape && wildstr+1 != wildend)
323 cmp= *++wildstr;
324
325 mb=wildstr;
326 mb_len= my_ismbchar(cs, wildstr, wildend);
327 INC_PTR(cs,wildstr,wildend); /* This is compared trough cmp */
328 cmp=likeconv(cs,cmp);
329 do
330 {
331 for (;;)
332 {
333 if (str >= str_end)
334 return -1;
335 if (mb_len)
336 {
337 if (str+mb_len <= str_end && memcmp(str, mb, mb_len) == 0)
338 {
339 str += mb_len;
340 break;
341 }
342 }
343 else if (!my_ismbchar(cs, str, str_end) &&
344 likeconv(cs,*str) == cmp)
345 {
346 str++;
347 break;
348 }
349 INC_PTR(cs,str, str_end);
350 }
351 {
352 int tmp=my_wildcmp_mb_impl(cs,str,str_end,wildstr,wildend,escape,w_one,
353 w_many, recurse_level + 1);
354 if (tmp <= 0)
355 return (tmp);
356 }
357 } while (str != str_end);
358 return(-1);
359 }
360 }
361 return (str != str_end ? 1 : 0);
362}
363
364int my_wildcmp_mb(CHARSET_INFO *cs,
365 const char *str,const char *str_end,
366 const char *wildstr,const char *wildend,
367 int escape, int w_one, int w_many)
368{
369 return my_wildcmp_mb_impl(cs, str, str_end,
370 wildstr, wildend,
371 escape, w_one, w_many, 1);
372}
373
374
375size_t my_numchars_mb(CHARSET_INFO *cs __attribute__((unused)),
376 const char *pos, const char *end)
377{
378 register size_t count= 0;
379 while (pos < end)
380 {
381 uint mb_len;
382 pos+= (mb_len= my_ismbchar(cs,pos,end)) ? mb_len : 1;
383 count++;
384 }
385 return count;
386}
387
388
389size_t my_charpos_mb(CHARSET_INFO *cs __attribute__((unused)),
390 const char *pos, const char *end, size_t length)
391{
392 const char *start= pos;
393
394 while (length && pos < end)
395 {
396 uint mb_len;
397 pos+= (mb_len= my_ismbchar(cs, pos, end)) ? mb_len : 1;
398 length--;
399 }
400 return (size_t) (length ? end+2-start : pos-start);
401}
402
403
404/*
405 Append a badly formed piece of string.
406 Bad bytes are fixed to '?'.
407
408 @param to The destination string
409 @param to_end The end of the destination string
410 @param from The source string
411 @param from_end The end of the source string
412 @param nchars Write not more than "nchars" characters.
413 @param status Copying status, must be previously initialized,
414 e.g. using well_formed_char_length() on the original
415 full source string.
416*/
417static size_t
418my_append_fix_badly_formed_tail(CHARSET_INFO *cs,
419 char *to, char *to_end,
420 const char *from, const char *from_end,
421 size_t nchars,
422 MY_STRCOPY_STATUS *status)
423{
424 char *to0= to;
425
426 for ( ; nchars; nchars--)
427 {
428 int chlen;
429 if ((chlen= cs->cset->charlen(cs, (const uchar*) from,
430 (const uchar *) from_end)) > 0)
431 {
432 /* Found a valid character */ /* chlen == 1..MBMAXLEN */
433 DBUG_ASSERT(chlen <= (int) cs->mbmaxlen);
434 if (to + chlen > to_end)
435 goto end; /* Does not fit to "to" */
436 memcpy(to, from, (size_t) chlen);
437 from+= chlen;
438 to+= chlen;
439 continue;
440 }
441 if (chlen == MY_CS_ILSEQ) /* chlen == 0 */
442 {
443 DBUG_ASSERT(from < from_end); /* Shouldn't get MY_CS_ILSEQ if empty */
444 goto bad;
445 }
446 /* Got an incomplete character */ /* chlen == MY_CS_TOOSMALLXXX */
447 DBUG_ASSERT(chlen >= MY_CS_TOOSMALL6);
448 DBUG_ASSERT(chlen <= MY_CS_TOOSMALL);
449 if (from >= from_end)
450 break; /* End of the source string */
451bad:
452 /* Bad byte sequence, or incomplete character found */
453 if (!status->m_well_formed_error_pos)
454 status->m_well_formed_error_pos= from;
455
456 if ((chlen= cs->cset->wc_mb(cs, '?', (uchar*) to, (uchar *) to_end)) <= 0)
457 break; /* Question mark does not fit into the destination */
458 to+= chlen;
459 from++;
460 }
461end:
462 status->m_source_end_pos= from;
463 return to - to0;
464}
465
466
467size_t
468my_copy_fix_mb(CHARSET_INFO *cs,
469 char *dst, size_t dst_length,
470 const char *src, size_t src_length,
471 size_t nchars, MY_STRCOPY_STATUS *status)
472{
473 size_t well_formed_nchars;
474 size_t well_formed_length;
475 size_t fixed_length;
476
477 set_if_smaller(src_length, dst_length);
478 well_formed_nchars= cs->cset->well_formed_char_length(cs,
479 src, src + src_length,
480 nchars, status);
481 DBUG_ASSERT(well_formed_nchars <= nchars);
482 memmove(dst, src, (well_formed_length= status->m_source_end_pos - src));
483 if (!status->m_well_formed_error_pos)
484 return well_formed_length;
485
486 fixed_length= my_append_fix_badly_formed_tail(cs,
487 dst + well_formed_length,
488 dst + dst_length,
489 src + well_formed_length,
490 src + src_length,
491 nchars - well_formed_nchars,
492 status);
493 return well_formed_length + fixed_length;
494}
495
496
497uint my_instr_mb(CHARSET_INFO *cs,
498 const char *b, size_t b_length,
499 const char *s, size_t s_length,
500 my_match_t *match, uint nmatch)
501{
502 register const char *end, *b0;
503 int res= 0;
504
505 if (s_length <= b_length)
506 {
507 if (!s_length)
508 {
509 if (nmatch)
510 {
511 match->beg= 0;
512 match->end= 0;
513 match->mb_len= 0;
514 }
515 return 1; /* Empty string is always found */
516 }
517
518 b0= b;
519 end= b+b_length-s_length+1;
520
521 while (b < end)
522 {
523 int mb_len;
524
525 if (!cs->coll->strnncoll(cs, (uchar*) b, s_length,
526 (uchar*) s, s_length, 0))
527 {
528 if (nmatch)
529 {
530 match[0].beg= 0;
531 match[0].end= (uint) (b-b0);
532 match[0].mb_len= res;
533 if (nmatch > 1)
534 {
535 match[1].beg= match[0].end;
536 match[1].end= (uint)(match[0].end+s_length);
537 match[1].mb_len= 0; /* Not computed */
538 }
539 }
540 return 2;
541 }
542 mb_len= (mb_len= my_ismbchar(cs, b, end)) ? mb_len : 1;
543 b+= mb_len;
544 b_length-= mb_len;
545 res++;
546 }
547 }
548 return 0;
549}
550
551
552/*
553 Copy one non-ascii character.
554 "dst" must have enough room for the character.
555 Note, we don't use sort_order[] in this macros.
556 This is correct even for case insensitive collations:
557 - basic Latin letters are processed outside this macros;
558 - for other characters sort_order[x] is equal to x.
559*/
560#define my_strnxfrm_mb_non_ascii_char(cs, dst, src, se) \
561{ \
562 switch (my_ismbchar(cs, (const char *) src, (const char *) se)) { \
563 case 4: \
564 *dst++= *src++; \
565 /* fall through */ \
566 case 3: \
567 *dst++= *src++; \
568 /* fall through */ \
569 case 2: \
570 *dst++= *src++; \
571 /* fall through */ \
572 case 0: \
573 *dst++= *src++; /* byte in range 0x80..0xFF which is not MB head */ \
574 } \
575}
576
577
578/*
579 For character sets with two or three byte multi-byte
580 characters having multibyte weights *equal* to their codes:
581 cp932, euckr, gb2312, sjis, eucjpms, ujis.
582*/
583size_t my_strnxfrm_mb_internal(CHARSET_INFO *cs, uchar *dst, uchar *de,
584 uint *nweights, const uchar *src, size_t srclen)
585{
586 uchar *d0= dst;
587 const uchar *se= src + srclen;
588 const uchar *sort_order= cs->sort_order;
589
590 DBUG_ASSERT(cs->mbmaxlen <= 4);
591
592 /*
593 If "srclen" is smaller than both "dstlen" and "nweights"
594 then we can run a simplified loop -
595 without checking "nweights" and "de".
596 */
597 if (de >= d0 + srclen && *nweights >= srclen)
598 {
599 if (sort_order)
600 {
601 /* Optimized version for a case insensitive collation */
602 for (; src < se; (*nweights)--)
603 {
604 if (*src < 128) /* quickly catch ASCII characters */
605 *dst++= sort_order[*src++];
606 else
607 my_strnxfrm_mb_non_ascii_char(cs, dst, src, se);
608 }
609 }
610 else
611 {
612 /* Optimized version for a case sensitive collation (no sort_order) */
613 for (; src < se; (*nweights)--)
614 {
615 if (*src < 128) /* quickly catch ASCII characters */
616 *dst++= *src++;
617 else
618 my_strnxfrm_mb_non_ascii_char(cs, dst, src, se);
619 }
620 }
621 goto end;
622 }
623
624 /*
625 A thourough loop, checking all possible limits:
626 "se", "nweights" and "de".
627 */
628 for (; src < se && *nweights && dst < de; (*nweights)--)
629 {
630 int chlen;
631 if (*src < 128 || !(chlen= my_ismbchar(cs, (const char *) src,
632 (const char *) se)))
633 {
634 /* Single byte character */
635 *dst++= sort_order ? sort_order[*src++] : *src++;
636 }
637 else
638 {
639 /* Multi-byte character */
640 size_t len= (dst + chlen <= de) ? chlen : de - dst;
641 memcpy(dst, src, len);
642 dst+= len;
643 src+= len;
644 }
645 }
646
647end:
648 return dst - d0;
649}
650
651
652size_t
653my_strnxfrm_mb(CHARSET_INFO *cs,
654 uchar *dst, size_t dstlen, uint nweights,
655 const uchar *src, size_t srclen, uint flags)
656{
657 uchar *de= dst + dstlen;
658 uchar *d0= dst;
659 dst= d0 + my_strnxfrm_mb_internal(cs, dst, de, &nweights, src, srclen);
660 return my_strxfrm_pad_desc_and_reverse(cs, d0, dst, de, nweights, flags, 0);
661}
662
663
664size_t
665my_strnxfrm_mb_nopad(CHARSET_INFO *cs,
666 uchar *dst, size_t dstlen, uint nweights,
667 const uchar *src, size_t srclen, uint flags)
668{
669 uchar *de= dst + dstlen;
670 uchar *d0= dst;
671 dst= d0 + my_strnxfrm_mb_internal(cs, dst, de, &nweights, src, srclen);
672 return my_strxfrm_pad_desc_and_reverse_nopad(cs, d0, dst, de, nweights,
673 flags, 0);
674}
675
676
677int
678my_strcasecmp_mb_bin(CHARSET_INFO * cs __attribute__((unused)),
679 const char *s, const char *t)
680{
681 return strcmp(s,t);
682}
683
684
685
686void
687my_hash_sort_mb_nopad_bin(CHARSET_INFO *cs __attribute__((unused)),
688 const uchar *key, size_t len,ulong *nr1, ulong *nr2)
689{
690 register ulong m1= *nr1, m2= *nr2;
691 const uchar *end= key + len;
692 for (; key < end ; key++)
693 {
694 MY_HASH_ADD(m1, m2, (uint)*key);
695 }
696 *nr1= m1;
697 *nr2= m2;
698}
699
700
701void
702my_hash_sort_mb_bin(CHARSET_INFO *cs __attribute__((unused)),
703 const uchar *key, size_t len,ulong *nr1, ulong *nr2)
704{
705 /*
706 Remove trailing spaces. We have to do this to be able to compare
707 'A ' and 'A' as identical
708 */
709 const uchar *end= skip_trailing_space(key, len);
710 my_hash_sort_mb_nopad_bin(cs, key, end - key, nr1, nr2);
711}
712
713
714/*
715 Fill the given buffer with 'maximum character' for given charset
716 SYNOPSIS
717 pad_max_char()
718 cs Character set
719 str Start of buffer to fill
720 end End of buffer to fill
721
722 DESCRIPTION
723 Write max key:
724 - for non-Unicode character sets:
725 just bfill using max_sort_char if max_sort_char is one byte.
726 In case when max_sort_char is two bytes, fill with double-byte pairs
727 and optionally pad with a single space character.
728 - for Unicode character set (utf-8):
729 create a buffer with multibyte representation of the max_sort_char
730 character, and copy it into max_str in a loop.
731*/
732static void pad_max_char(CHARSET_INFO *cs, char *str, char *end)
733{
734 char buf[10];
735 char buflen= cs->cset->native_to_mb(cs, cs->max_sort_char, (uchar*) buf,
736 (uchar*) buf + sizeof(buf));
737 DBUG_ASSERT(buflen > 0);
738 do
739 {
740 if ((str + buflen) <= end)
741 {
742 /* Enough space for the character */
743 memcpy(str, buf, buflen);
744 str+= buflen;
745 }
746 else
747 {
748 /*
749 There is no space for whole multibyte
750 character, then add trailing spaces.
751 */
752 *str++= ' ';
753 }
754 } while (str < end);
755}
756
757/*
758** Calculate min_str and max_str that ranges a LIKE string.
759** Arguments:
760** ptr Pointer to LIKE string.
761** ptr_length Length of LIKE string.
762** escape Escape character in LIKE. (Normally '\').
763** All escape characters should be removed from min_str and max_str
764** res_length Length of min_str and max_str.
765** min_str Smallest case sensitive string that ranges LIKE.
766** Should be space padded to res_length.
767** max_str Largest case sensitive string that ranges LIKE.
768** Normally padded with the biggest character sort value.
769**
770** The function should return 0 if ok and 1 if the LIKE string can't be
771** optimized !
772*/
773
774my_bool my_like_range_mb(CHARSET_INFO *cs,
775 const char *ptr,size_t ptr_length,
776 pbool escape, pbool w_one, pbool w_many,
777 size_t res_length,
778 char *min_str,char *max_str,
779 size_t *min_length,size_t *max_length)
780{
781 uint mb_len;
782 const char *end= ptr + ptr_length;
783 char *min_org= min_str;
784 char *min_end= min_str + res_length;
785 char *max_end= max_str + res_length;
786 size_t maxcharlen= res_length / cs->mbmaxlen;
787 const MY_CONTRACTIONS *contractions= my_charset_get_contractions(cs, 0);
788
789 for (; ptr != end && min_str != min_end && maxcharlen ; maxcharlen--)
790 {
791 /* We assume here that escape, w_any, w_namy are one-byte characters */
792 if (*ptr == escape && ptr+1 != end)
793 ptr++; /* Skip escape */
794 else if (*ptr == w_one || *ptr == w_many) /* '_' and '%' in SQL */
795 {
796fill_max_and_min:
797 /*
798 Calculate length of keys:
799 'a\0\0... is the smallest possible string when we have space expand
800 a\ff\ff... is the biggest possible string
801 */
802 *min_length= (cs->state & (MY_CS_BINSORT | MY_CS_NOPAD)) ?
803 (size_t) (min_str - min_org) :
804 res_length;
805 /* Create min key */
806 do
807 {
808 *min_str++= (char) cs->min_sort_char;
809 } while (min_str != min_end);
810
811 /*
812 Write max key: create a buffer with multibyte
813 representation of the max_sort_char character,
814 and copy it into max_str in a loop.
815 */
816 *max_length= res_length;
817 pad_max_char(cs, max_str, max_end);
818 return 0;
819 }
820 if ((mb_len= my_ismbchar(cs, ptr, end)) > 1)
821 {
822 if (ptr+mb_len > end || min_str+mb_len > min_end)
823 break;
824 while (mb_len--)
825 *min_str++= *max_str++= *ptr++;
826 }
827 else
828 {
829 /*
830 Special case for collations with contractions.
831 For example, in Chezh, 'ch' is a separate letter
832 which is sorted between 'h' and 'i'.
833 If the pattern 'abc%', 'c' at the end can mean:
834 - letter 'c' itself,
835 - beginning of the contraction 'ch'.
836
837 If we simply return this LIKE range:
838
839 'abc\min\min\min' and 'abc\max\max\max'
840
841 then this query: SELECT * FROM t1 WHERE a LIKE 'abc%'
842 will only find values starting from 'abc[^h]',
843 but won't find values starting from 'abch'.
844
845 We must ignore contraction heads followed by w_one or w_many.
846 ('Contraction head' means any letter which can be the first
847 letter in a contraction)
848
849 For example, for Czech 'abc%', we will return LIKE range,
850 which is equal to LIKE range for 'ab%':
851
852 'ab\min\min\min\min' and 'ab\max\max\max\max'.
853
854 */
855 if (contractions && ptr + 1 < end &&
856 my_uca_can_be_contraction_head(contractions, (uchar) *ptr))
857 {
858 /* Ptr[0] is a contraction head. */
859
860 if (ptr[1] == w_one || ptr[1] == w_many)
861 {
862 /* Contraction head followed by a wildcard, quit. */
863 goto fill_max_and_min;
864 }
865
866 /*
867 Some letters can be both contraction heads and contraction tails.
868 For example, in Danish 'aa' is a separate single letter which
869 is sorted after 'z'. So 'a' can be both head and tail.
870
871 If ptr[0]+ptr[1] is a contraction,
872 then put both letters together.
873
874 If ptr[1] can be a contraction part, but ptr[0]+ptr[1]
875 is not a contraction, then we put only ptr[0],
876 and continue with ptr[1] on the next loop.
877 */
878 if (my_uca_can_be_contraction_tail(contractions, (uchar) ptr[1]) &&
879 my_uca_contraction2_weight(contractions, (uchar) ptr[0], ptr[1]))
880 {
881 /* Contraction found */
882 if (maxcharlen == 1 || min_str + 1 >= min_end)
883 {
884 /* Both contraction parts don't fit, quit */
885 goto fill_max_and_min;
886 }
887
888 /* Put contraction head */
889 *min_str++= *max_str++= *ptr++;
890 maxcharlen--;
891 }
892 }
893 /* Put contraction tail, or a single character */
894 *min_str++= *max_str++= *ptr++;
895 }
896 }
897
898 *min_length= *max_length = (size_t) (min_str - min_org);
899 while (min_str != min_end)
900 *min_str++= *max_str++= ' '; /* Because if key compression */
901 return 0;
902}
903
904
905/**
906 Calculate min_str and max_str that ranges a LIKE string.
907 Generic function, currently used for ucs2, utf16, utf32,
908 but should be suitable for any other character sets with
909 cs->min_sort_char and cs->max_sort_char represented in
910 Unicode code points.
911
912 @param cs Character set and collation pointer
913 @param ptr Pointer to LIKE pattern.
914 @param ptr_length Length of LIKE pattern.
915 @param escape Escape character pattern, typically '\'.
916 @param w_one 'One character' pattern, typically '_'.
917 @param w_many 'Many characters' pattern, typically '%'.
918 @param res_length Length of min_str and max_str.
919
920 @param[out] min_str Smallest string that ranges LIKE.
921 @param[out] max_str Largest string that ranges LIKE.
922 @param[out] min_len Length of min_str
923 @param[out] max_len Length of max_str
924
925 @return Optimization status.
926 @retval FALSE if LIKE pattern can be optimized
927 @rerval TRUE if LIKE can't be optimized.
928*/
929my_bool
930my_like_range_generic(CHARSET_INFO *cs,
931 const char *ptr, size_t ptr_length,
932 pbool escape, pbool w_one, pbool w_many,
933 size_t res_length,
934 char *min_str,char *max_str,
935 size_t *min_length,size_t *max_length)
936{
937 const char *end= ptr + ptr_length;
938 const char *min_org= min_str;
939 const char *max_org= max_str;
940 char *min_end= min_str + res_length;
941 char *max_end= max_str + res_length;
942 size_t charlen= res_length / cs->mbmaxlen;
943 size_t res_length_diff;
944 const MY_CONTRACTIONS *contractions= my_charset_get_contractions(cs, 0);
945
946 for ( ; charlen > 0; charlen--)
947 {
948 my_wc_t wc, wc2;
949 int res;
950 if ((res= cs->cset->mb_wc(cs, &wc, (uchar*) ptr, (uchar*) end)) <= 0)
951 {
952 if (res == MY_CS_ILSEQ) /* Bad sequence */
953 return TRUE; /* min_length and max_length are not important */
954 break; /* End of the string */
955 }
956 ptr+= res;
957
958 if (wc == (my_wc_t) escape)
959 {
960 if ((res= cs->cset->mb_wc(cs, &wc, (uchar*) ptr, (uchar*) end)) <= 0)
961 {
962 if (res == MY_CS_ILSEQ)
963 return TRUE; /* min_length and max_length are not important */
964 /*
965 End of the string: Escape is the last character.
966 Put escape as a normal character.
967 We'll will leave the loop on the next iteration.
968 */
969 }
970 else
971 ptr+= res;
972
973 /* Put escape character to min_str and max_str */
974 if ((res= cs->cset->wc_mb(cs, wc,
975 (uchar*) min_str, (uchar*) min_end)) <= 0)
976 goto pad_set_lengths; /* No space */
977 min_str+= res;
978
979 if ((res= cs->cset->wc_mb(cs, wc,
980 (uchar*) max_str, (uchar*) max_end)) <= 0)
981 goto pad_set_lengths; /* No space */
982 max_str+= res;
983 continue;
984 }
985 else if (wc == (my_wc_t) w_one)
986 {
987 if ((res= cs->cset->wc_mb(cs, cs->min_sort_char,
988 (uchar*) min_str, (uchar*) min_end)) <= 0)
989 goto pad_set_lengths;
990 min_str+= res;
991
992 if ((res= cs->cset->wc_mb(cs, cs->max_sort_char,
993 (uchar*) max_str, (uchar*) max_end)) <= 0)
994 goto pad_set_lengths;
995 max_str+= res;
996 continue;
997 }
998 else if (wc == (my_wc_t) w_many)
999 {
1000 /*
1001 Calculate length of keys:
1002 a\min\min... is the smallest possible string
1003 a\max\max... is the biggest possible string
1004 */
1005 *min_length= (cs->state & (MY_CS_BINSORT | MY_CS_NOPAD)) ?
1006 (size_t) (min_str - min_org) :
1007 res_length;
1008 *max_length= res_length;
1009 goto pad_min_max;
1010 }
1011
1012 if (contractions &&
1013 my_uca_can_be_contraction_head(contractions, wc) &&
1014 (res= cs->cset->mb_wc(cs, &wc2, (uchar*) ptr, (uchar*) end)) > 0)
1015 {
1016 const uint16 *weight;
1017 if ((wc2 == (my_wc_t) w_one || wc2 == (my_wc_t) w_many))
1018 {
1019 /* Contraction head followed by a wildcard */
1020 *min_length= *max_length= res_length;
1021 goto pad_min_max;
1022 }
1023
1024 if (my_uca_can_be_contraction_tail(contractions, wc2) &&
1025 (weight= my_uca_contraction2_weight(contractions, wc, wc2)) && weight[0])
1026 {
1027 /* Contraction found */
1028 if (charlen == 1)
1029 {
1030 /* contraction does not fit to result */
1031 *min_length= *max_length= res_length;
1032 goto pad_min_max;
1033 }
1034
1035 ptr+= res;
1036 charlen--;
1037
1038 /* Put contraction head */
1039 if ((res= cs->cset->wc_mb(cs, wc,
1040 (uchar*) min_str, (uchar*) min_end)) <= 0)
1041 goto pad_set_lengths;
1042 min_str+= res;
1043
1044 if ((res= cs->cset->wc_mb(cs, wc,
1045 (uchar*) max_str, (uchar*) max_end)) <= 0)
1046 goto pad_set_lengths;
1047 max_str+= res;
1048 wc= wc2; /* Prepare to put contraction tail */
1049 }
1050 }
1051
1052 /* Normal character, or contraction tail */
1053 if ((res= cs->cset->wc_mb(cs, wc,
1054 (uchar*) min_str, (uchar*) min_end)) <= 0)
1055 goto pad_set_lengths;
1056 min_str+= res;
1057 if ((res= cs->cset->wc_mb(cs, wc,
1058 (uchar*) max_str, (uchar*) max_end)) <= 0)
1059 goto pad_set_lengths;
1060 max_str+= res;
1061 }
1062
1063pad_set_lengths:
1064 *min_length= (size_t) (min_str - min_org);
1065 *max_length= (size_t) (max_str - max_org);
1066
1067pad_min_max:
1068 /*
1069 Fill up max_str and min_str to res_length.
1070 fill() cannot set incomplete characters and
1071 requires that "length" argument is divisible to mbminlen.
1072 Make sure to call fill() with proper "length" argument.
1073 */
1074 res_length_diff= res_length % cs->mbminlen;
1075 cs->cset->fill(cs, min_str, min_end - min_str - res_length_diff,
1076 cs->min_sort_char);
1077 cs->cset->fill(cs, max_str, max_end - max_str - res_length_diff,
1078 cs->max_sort_char);
1079
1080 /* In case of incomplete characters set the remainder to 0x00's */
1081 if (res_length_diff)
1082 {
1083 /* Example: odd res_length for ucs2 */
1084 memset(min_end - res_length_diff, 0, res_length_diff);
1085 memset(max_end - res_length_diff, 0, res_length_diff);
1086 }
1087 return FALSE;
1088}
1089
1090
1091static int my_wildcmp_mb_bin_impl(CHARSET_INFO *cs,
1092 const char *str,const char *str_end,
1093 const char *wildstr,const char *wildend,
1094 int escape, int w_one, int w_many, int recurse_level)
1095{
1096 int result= -1; /* Not found, using wildcards */
1097
1098 if (my_string_stack_guard && my_string_stack_guard(recurse_level))
1099 return 1;
1100 while (wildstr != wildend)
1101 {
1102 while (*wildstr != w_many && *wildstr != w_one)
1103 {
1104 int l;
1105 if (*wildstr == escape && wildstr+1 != wildend)
1106 wildstr++;
1107 if ((l = my_ismbchar(cs, wildstr, wildend)))
1108 {
1109 if (str+l > str_end || memcmp(str, wildstr, l) != 0)
1110 return 1;
1111 str += l;
1112 wildstr += l;
1113 }
1114 else
1115 if (str == str_end || *wildstr++ != *str++)
1116 return(1); /* No match */
1117 if (wildstr == wildend)
1118 return (str != str_end); /* Match if both are at end */
1119 result=1; /* Found an anchor char */
1120 }
1121 if (*wildstr == w_one)
1122 {
1123 do
1124 {
1125 if (str == str_end) /* Skip one char if possible */
1126 return (result);
1127 INC_PTR(cs,str,str_end);
1128 } while (++wildstr < wildend && *wildstr == w_one);
1129 if (wildstr == wildend)
1130 break;
1131 }
1132 if (*wildstr == w_many)
1133 { /* Found w_many */
1134 int cmp;
1135 const char* mb = wildstr;
1136 int mb_len=0;
1137
1138 wildstr++;
1139 /* Remove any '%' and '_' from the wild search string */
1140 for (; wildstr != wildend ; wildstr++)
1141 {
1142 if (*wildstr == w_many)
1143 continue;
1144 if (*wildstr == w_one)
1145 {
1146 if (str == str_end)
1147 return (-1);
1148 INC_PTR(cs,str,str_end);
1149 continue;
1150 }
1151 break; /* Not a wild character */
1152 }
1153 if (wildstr == wildend)
1154 return(0); /* Ok if w_many is last */
1155 if (str == str_end)
1156 return -1;
1157
1158 if ((cmp= *wildstr) == escape && wildstr+1 != wildend)
1159 cmp= *++wildstr;
1160
1161 mb=wildstr;
1162 mb_len= my_ismbchar(cs, wildstr, wildend);
1163 INC_PTR(cs,wildstr,wildend); /* This is compared trough cmp */
1164 do
1165 {
1166 for (;;)
1167 {
1168 if (str >= str_end)
1169 return -1;
1170 if (mb_len)
1171 {
1172 if (str+mb_len <= str_end && memcmp(str, mb, mb_len) == 0)
1173 {
1174 str += mb_len;
1175 break;
1176 }
1177 }
1178 else if (!my_ismbchar(cs, str, str_end) && *str == cmp)
1179 {
1180 str++;
1181 break;
1182 }
1183 INC_PTR(cs,str, str_end);
1184 }
1185 {
1186 int tmp=my_wildcmp_mb_bin_impl(cs,str,str_end,
1187 wildstr,wildend,escape,
1188 w_one,w_many, recurse_level+1);
1189 if (tmp <= 0)
1190 return (tmp);
1191 }
1192 } while (str != str_end);
1193 return(-1);
1194 }
1195 }
1196 return (str != str_end ? 1 : 0);
1197}
1198
1199int
1200my_wildcmp_mb_bin(CHARSET_INFO *cs,
1201 const char *str,const char *str_end,
1202 const char *wildstr,const char *wildend,
1203 int escape, int w_one, int w_many)
1204{
1205 return my_wildcmp_mb_bin_impl(cs, str, str_end,
1206 wildstr, wildend,
1207 escape, w_one, w_many, 1);
1208}
1209
1210
1211/*
1212 Data was produced from EastAsianWidth.txt
1213 using utt11-dump utility.
1214*/
1215static const char pg11[256]=
1216{
12171,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
12181,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
12191,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,1,
12200,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
12210,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
12220,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
12230,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
12240,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1225};
1226
1227static const char pg23[256]=
1228{
12290,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
12300,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
12310,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
12320,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
12330,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
12340,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
12350,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
12360,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1237};
1238
1239static const char pg2E[256]=
1240{
12410,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
12420,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
12430,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
12440,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
12451,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,
12461,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
12471,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
12481,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0
1249};
1250
1251static const char pg2F[256]=
1252{
12531,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
12541,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
12551,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
12561,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
12571,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
12581,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
12591,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,
12600,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0
1261};
1262
1263static const char pg30[256]=
1264{
12651,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
12661,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,
12670,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
12681,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
12691,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1,1,
12701,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
12711,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
12721,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
1273};
1274
1275static const char pg31[256]=
1276{
12770,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
12781,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
12791,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
12801,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
12811,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
12821,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,
12830,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
12840,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
1285};
1286
1287static const char pg32[256]=
1288{
12891,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,
12901,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
12911,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
12921,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,
12931,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
12941,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
12951,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
12961,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0
1297};
1298
1299static const char pg4D[256]=
1300{
13011,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
13021,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
13031,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
13041,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
13051,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
13061,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,
13070,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
13080,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1309};
1310
1311static const char pg9F[256]=
1312{
13131,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
13141,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
13151,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
13161,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
13171,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
13181,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
13190,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
13200,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1321};
1322
1323static const char pgA4[256]=
1324{
13251,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
13261,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
13271,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
13281,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
13291,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
13301,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
13311,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
13320,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1333};
1334
1335static const char pgD7[256]=
1336{
13371,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
13381,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
13391,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
13401,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
13411,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
13421,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
13430,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
13440,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1345};
1346
1347static const char pgFA[256]=
1348{
13491,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
13501,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
13511,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
13521,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
13530,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
13540,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
13550,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
13560,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1357};
1358
1359static const char pgFE[256]=
1360{
13610,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
13620,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
13631,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,
13641,1,1,1,1,1,1,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
13650,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
13660,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
13670,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
13680,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1369};
1370
1371static const char pgFF[256]=
1372{
13730,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
13741,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
13751,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
13761,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
13770,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
13780,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
13790,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
13801,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1381};
1382
1383static const struct {int page; const char *p;} utr11_data[256]=
1384{
1385{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
1386{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
1387{0,NULL},{0,pg11},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
1388{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
1389{0,NULL},{0,NULL},{0,NULL},{0,pg23},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
1390{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,pg2E},{0,pg2F},
1391{0,pg30},{0,pg31},{0,pg32},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1392{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1393{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1394{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{0,pg4D},{1,NULL},{1,NULL},
1395{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1396{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1397{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1398{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1399{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1400{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1401{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1402{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1403{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1404{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{0,pg9F},
1405{1,NULL},{1,NULL},{1,NULL},{1,NULL},{0,pgA4},{0,NULL},{0,NULL},{0,NULL},
1406{0,NULL},{0,NULL},{0,NULL},{0,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1407{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1408{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1409{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1410{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1411{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{0,pgD7},
1412{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
1413{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
1414{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
1415{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
1416{0,NULL},{1,NULL},{0,pgFA},{0,NULL},{0,NULL},{0,NULL},{0,pgFE},{0,pgFF}
1417};
1418
1419
1420size_t my_numcells_mb(CHARSET_INFO *cs, const char *b, const char *e)
1421{
1422 my_wc_t wc;
1423 size_t clen= 0;
1424
1425 while (b < e)
1426 {
1427 int mb_len;
1428 uint pg;
1429 if ((mb_len= cs->cset->mb_wc(cs, &wc, (uchar*) b, (uchar*) e)) <= 0)
1430 {
1431 mb_len= 1; /* Let's think a wrong sequence takes 1 dysplay cell */
1432 b++;
1433 continue;
1434 }
1435 b+= mb_len;
1436 if (wc > 0xFFFF)
1437 {
1438 if (wc >= 0x20000 && wc <= 0x3FFFD) /* CJK Ideograph Extension B, C */
1439 clen+= 1;
1440 }
1441 else
1442 {
1443 pg= (wc >> 8) & 0xFF;
1444 clen+= utr11_data[pg].p ? utr11_data[pg].p[wc & 0xFF] : utr11_data[pg].page;
1445 }
1446 clen++;
1447 }
1448 return clen;
1449}
1450
1451
1452int my_mb_ctype_mb(CHARSET_INFO *cs, int *ctype,
1453 const uchar *s, const uchar *e)
1454{
1455 my_wc_t wc;
1456 int res= cs->cset->mb_wc(cs, &wc, s, e);
1457 if (res <= 0 || wc > 0xFFFF)
1458 *ctype= 0;
1459 else
1460 *ctype= my_uni_ctype[wc>>8].ctype ?
1461 my_uni_ctype[wc>>8].ctype[wc&0xFF] :
1462 my_uni_ctype[wc>>8].pctype;
1463 return res;
1464}
1465
1466
1467#endif
1468