1 | /* Copyright (c) 2002-2007 MySQL AB & tommy@valley.ne.jp |
2 | Copyright (c) 2002, 2014, Oracle and/or its affiliates. |
3 | Copyright (c) 2009, 2014, SkySQL Ab. |
4 | |
5 | This library is free software; you can redistribute it and/or |
6 | modify it under the terms of the GNU Library General Public |
7 | License as published by the Free Software Foundation; version 2 |
8 | of the License. |
9 | |
10 | This library is distributed in the hope that it will be useful, |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | Library General Public License for more details. |
14 | |
15 | You should have received a copy of the GNU Library General Public |
16 | License along with this library; if not, write to the Free |
17 | Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, |
18 | MA 02110-1301, USA */ |
19 | |
20 | /* This file is for binary pseudo charset, created by bar@mysql.com */ |
21 | |
22 | |
23 | #include "strings_def.h" |
24 | #include <m_ctype.h> |
25 | |
26 | static const uchar ctype_bin[]= |
27 | { |
28 | 0, |
29 | 32, 32, 32, 32, 32, 32, 32, 32, 32, 40, 40, 40, 40, 40, 32, 32, |
30 | 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, |
31 | 72, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, |
32 | 132,132,132,132,132,132,132,132,132,132, 16, 16, 16, 16, 16, 16, |
33 | 16,129,129,129,129,129,129, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
34 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 16, 16, 16, 16, 16, |
35 | 16,130,130,130,130,130,130, 2, 2, 2, 2, 2, 2, 2, 2, 2, |
36 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 16, 16, 16, 16, 32, |
37 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
38 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
39 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
40 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
41 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
42 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
43 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
44 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
45 | }; |
46 | |
47 | |
48 | /* Dummy array for toupper / tolower / sortorder */ |
49 | |
50 | static const uchar bin_char_array[] = |
51 | { |
52 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
53 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, |
54 | 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, |
55 | 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, |
56 | 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, |
57 | 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, |
58 | 96, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111, |
59 | 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127, |
60 | 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143, |
61 | 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159, |
62 | 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175, |
63 | 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191, |
64 | 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207, |
65 | 208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223, |
66 | 224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239, |
67 | 240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255 |
68 | }; |
69 | |
70 | |
71 | static my_bool |
72 | my_coll_init_8bit_bin(struct charset_info_st *cs, |
73 | MY_CHARSET_LOADER *loader __attribute__((unused))) |
74 | { |
75 | cs->max_sort_char=255; |
76 | return FALSE; |
77 | } |
78 | |
79 | static int my_strnncoll_binary(CHARSET_INFO * cs __attribute__((unused)), |
80 | const uchar *s, size_t slen, |
81 | const uchar *t, size_t tlen, |
82 | my_bool t_is_prefix) |
83 | { |
84 | size_t len=MY_MIN(slen,tlen); |
85 | int cmp= memcmp(s,t,len); |
86 | return cmp ? cmp : (int)((t_is_prefix ? len : slen) - tlen); |
87 | } |
88 | |
89 | |
90 | size_t my_lengthsp_binary(CHARSET_INFO *cs __attribute__((unused)), |
91 | const char *ptr __attribute__((unused)), |
92 | size_t length) |
93 | { |
94 | return length; |
95 | } |
96 | |
97 | |
98 | /* |
99 | Compare two strings. Result is sign(first_argument - second_argument) |
100 | |
101 | SYNOPSIS |
102 | my_strnncollsp_binary() |
103 | cs Chararacter set |
104 | s String to compare |
105 | slen Length of 's' |
106 | t String to compare |
107 | tlen Length of 't' |
108 | |
109 | NOTE |
110 | This function is used for real binary strings, i.e. for |
111 | BLOB, BINARY(N) and VARBINARY(N). |
112 | It compares trailing spaces as spaces. |
113 | |
114 | RETURN |
115 | < 0 s < t |
116 | 0 s == t |
117 | > 0 s > t |
118 | */ |
119 | |
120 | static int my_strnncollsp_binary(CHARSET_INFO * cs __attribute__((unused)), |
121 | const uchar *s, size_t slen, |
122 | const uchar *t, size_t tlen) |
123 | { |
124 | return my_strnncoll_binary(cs,s,slen,t,tlen,0); |
125 | } |
126 | |
127 | |
128 | static int my_strnncoll_8bit_bin(CHARSET_INFO * cs __attribute__((unused)), |
129 | const uchar *s, size_t slen, |
130 | const uchar *t, size_t tlen, |
131 | my_bool t_is_prefix) |
132 | { |
133 | size_t len=MY_MIN(slen,tlen); |
134 | int cmp= memcmp(s,t,len); |
135 | return cmp ? cmp : (int)((t_is_prefix ? len : slen) - tlen); |
136 | } |
137 | |
138 | |
139 | /* |
140 | Compare a string to an array of spaces, for PAD SPACE behaviour. |
141 | @param str - the string |
142 | @param length - the length of the string |
143 | @return <0 - if a byte less than SPACE was found |
144 | @return >0 - if a byte greater than SPACE was found |
145 | @return 0 - if the string entirely consists of SPACE characters |
146 | */ |
147 | int my_strnncollsp_padspace_bin(const uchar *str, size_t length) |
148 | { |
149 | for ( ; length ; str++, length--) |
150 | { |
151 | if (*str < ' ') |
152 | return -1; |
153 | else if (*str > ' ') |
154 | return 1; |
155 | } |
156 | return 0; |
157 | } |
158 | |
159 | |
160 | /* |
161 | Compare two strings. Result is sign(first_argument - second_argument) |
162 | |
163 | SYNOPSIS |
164 | my_strnncollsp_8bit_bin() |
165 | cs Chararacter set |
166 | s String to compare |
167 | slen Length of 's' |
168 | t String to compare |
169 | tlen Length of 't' |
170 | |
171 | NOTE |
172 | This function is used for character strings with binary collations. |
173 | The shorter string is extended with end space to be as long as the longer |
174 | one. |
175 | |
176 | RETURN |
177 | < 0 s < t |
178 | 0 s == t |
179 | > 0 s > t |
180 | */ |
181 | |
182 | static int my_strnncollsp_8bit_bin(CHARSET_INFO * cs __attribute__((unused)), |
183 | const uchar *a, size_t a_length, |
184 | const uchar *b, size_t b_length) |
185 | { |
186 | const uchar *end; |
187 | size_t length; |
188 | |
189 | end= a + (length= MY_MIN(a_length, b_length)); |
190 | while (a < end) |
191 | { |
192 | if (*a++ != *b++) |
193 | return ((int) a[-1] - (int) b[-1]); |
194 | } |
195 | return a_length == b_length ? 0 : |
196 | a_length < b_length ? |
197 | -my_strnncollsp_padspace_bin(b, b_length - length) : |
198 | my_strnncollsp_padspace_bin(a, a_length - length); |
199 | } |
200 | |
201 | |
202 | static int my_strnncollsp_8bit_nopad_bin(CHARSET_INFO * cs |
203 | __attribute__((unused)), |
204 | const uchar *a, size_t a_length, |
205 | const uchar *b, size_t b_length) |
206 | { |
207 | return my_strnncoll_8bit_bin(cs, a, a_length, b, b_length, FALSE); |
208 | } |
209 | |
210 | |
211 | /* This function is used for all conversion functions */ |
212 | |
213 | static size_t my_case_str_bin(CHARSET_INFO *cs __attribute__((unused)), |
214 | char *str __attribute__((unused))) |
215 | { |
216 | return 0; |
217 | } |
218 | |
219 | |
220 | static size_t my_case_bin(CHARSET_INFO *cs __attribute__((unused)), |
221 | char *src __attribute__((unused)), |
222 | size_t srclen, |
223 | char *dst __attribute__((unused)), |
224 | size_t dstlen __attribute__((unused))) |
225 | { |
226 | return srclen; |
227 | } |
228 | |
229 | |
230 | static int my_strcasecmp_bin(CHARSET_INFO * cs __attribute__((unused)), |
231 | const char *s, const char *t) |
232 | { |
233 | return strcmp(s,t); |
234 | } |
235 | |
236 | |
237 | static int my_mb_wc_bin(CHARSET_INFO *cs __attribute__((unused)), |
238 | my_wc_t *wc, |
239 | const uchar *str, |
240 | const uchar *end __attribute__((unused))) |
241 | { |
242 | if (str >= end) |
243 | return MY_CS_TOOSMALL; |
244 | |
245 | *wc=str[0]; |
246 | return 1; |
247 | } |
248 | |
249 | |
250 | int my_wc_mb_bin(CHARSET_INFO *cs __attribute__((unused)), |
251 | my_wc_t wc, uchar *s, uchar *e) |
252 | { |
253 | if (s >= e) |
254 | return MY_CS_TOOSMALL; |
255 | |
256 | if (wc < 256) |
257 | { |
258 | s[0]= (char) wc; |
259 | return 1; |
260 | } |
261 | return MY_CS_ILUNI; |
262 | } |
263 | |
264 | |
265 | void my_hash_sort_bin(CHARSET_INFO *cs __attribute__((unused)), |
266 | const uchar *key, size_t len,ulong *nr1, ulong *nr2) |
267 | { |
268 | const uchar *end = key + len; |
269 | ulong tmp1= *nr1; |
270 | ulong tmp2= *nr2; |
271 | |
272 | for (; key < end ; key++) |
273 | { |
274 | MY_HASH_ADD(tmp1, tmp2, (uint) *key); |
275 | } |
276 | |
277 | *nr1= tmp1; |
278 | *nr2= tmp2; |
279 | } |
280 | |
281 | |
282 | void my_hash_sort_8bit_bin(CHARSET_INFO *cs __attribute__((unused)), |
283 | const uchar *key, size_t len, |
284 | ulong *nr1, ulong *nr2) |
285 | { |
286 | /* |
287 | Remove trailing spaces. We have to do this to be able to compare |
288 | 'A ' and 'A' as identical |
289 | */ |
290 | const uchar *end= skip_trailing_space(key, len); |
291 | my_hash_sort_bin(cs, key, end - key, nr1, nr2); |
292 | } |
293 | |
294 | |
295 | /* |
296 | The following defines is here to keep the following code identical to |
297 | the one in ctype-simple.c |
298 | */ |
299 | |
300 | #define likeconv(s,A) (A) |
301 | #define INC_PTR(cs,A,B) (A)++ |
302 | |
303 | |
304 | static |
305 | int my_wildcmp_bin_impl(CHARSET_INFO *cs, |
306 | const char *str,const char *str_end, |
307 | const char *wildstr,const char *wildend, |
308 | int escape, int w_one, int w_many, int recurse_level) |
309 | { |
310 | int result= -1; /* Not found, using wildcards */ |
311 | |
312 | if (my_string_stack_guard && my_string_stack_guard(recurse_level)) |
313 | return 1; |
314 | while (wildstr != wildend) |
315 | { |
316 | while (*wildstr != w_many && *wildstr != w_one) |
317 | { |
318 | if (*wildstr == escape && wildstr+1 != wildend) |
319 | wildstr++; |
320 | if (str == str_end || likeconv(cs,*wildstr++) != likeconv(cs,*str++)) |
321 | return(1); /* No match */ |
322 | if (wildstr == wildend) |
323 | return(str != str_end); /* Match if both are at end */ |
324 | result=1; /* Found an anchor char */ |
325 | } |
326 | if (*wildstr == w_one) |
327 | { |
328 | do |
329 | { |
330 | if (str == str_end) /* Skip one char if possible */ |
331 | return(result); |
332 | INC_PTR(cs,str,str_end); |
333 | } while (++wildstr < wildend && *wildstr == w_one); |
334 | if (wildstr == wildend) |
335 | break; |
336 | } |
337 | if (*wildstr == w_many) |
338 | { /* Found w_many */ |
339 | uchar cmp; |
340 | wildstr++; |
341 | /* Remove any '%' and '_' from the wild search string */ |
342 | for (; wildstr != wildend ; wildstr++) |
343 | { |
344 | if (*wildstr == w_many) |
345 | continue; |
346 | if (*wildstr == w_one) |
347 | { |
348 | if (str == str_end) |
349 | return(-1); |
350 | INC_PTR(cs,str,str_end); |
351 | continue; |
352 | } |
353 | break; /* Not a wild character */ |
354 | } |
355 | if (wildstr == wildend) |
356 | return(0); /* match if w_many is last */ |
357 | if (str == str_end) |
358 | return(-1); |
359 | |
360 | if ((cmp= *wildstr) == escape && wildstr+1 != wildend) |
361 | cmp= *++wildstr; |
362 | |
363 | INC_PTR(cs,wildstr,wildend); /* This is compared through cmp */ |
364 | cmp=likeconv(cs,cmp); |
365 | do |
366 | { |
367 | while (str != str_end && (uchar) likeconv(cs,*str) != cmp) |
368 | str++; |
369 | if (str++ == str_end) |
370 | return(-1); |
371 | { |
372 | int tmp=my_wildcmp_bin_impl(cs,str,str_end,wildstr,wildend,escape,w_one, |
373 | w_many, recurse_level + 1); |
374 | if (tmp <= 0) |
375 | return(tmp); |
376 | } |
377 | } while (str != str_end); |
378 | return(-1); |
379 | } |
380 | } |
381 | return(str != str_end ? 1 : 0); |
382 | } |
383 | |
384 | int my_wildcmp_bin(CHARSET_INFO *cs, |
385 | const char *str,const char *str_end, |
386 | const char *wildstr,const char *wildend, |
387 | int escape, int w_one, int w_many) |
388 | { |
389 | return my_wildcmp_bin_impl(cs, str, str_end, |
390 | wildstr, wildend, |
391 | escape, w_one, w_many, 1); |
392 | } |
393 | |
394 | |
395 | static size_t |
396 | my_strnxfrm_8bit_bin(CHARSET_INFO *cs, |
397 | uchar * dst, size_t dstlen, uint nweights, |
398 | const uchar *src, size_t srclen, uint flags) |
399 | { |
400 | set_if_smaller(srclen, dstlen); |
401 | set_if_smaller(srclen, nweights); |
402 | if (dst != src) |
403 | memcpy(dst, src, srclen); |
404 | return my_strxfrm_pad_desc_and_reverse(cs, dst, dst + srclen, dst + dstlen, |
405 | (uint)(nweights - srclen), flags, 0); |
406 | } |
407 | |
408 | |
409 | static size_t |
410 | my_strnxfrm_8bit_nopad_bin(CHARSET_INFO *cs, |
411 | uchar * dst, size_t dstlen, uint nweights, |
412 | const uchar *src, size_t srclen, uint flags) |
413 | { |
414 | set_if_smaller(srclen, dstlen); |
415 | set_if_smaller(srclen, nweights); |
416 | if (dst != src) |
417 | memcpy(dst, src, srclen); |
418 | return my_strxfrm_pad_desc_and_reverse_nopad(cs, dst, dst + srclen, |
419 | dst + dstlen,(uint)(nweights - srclen), |
420 | flags, 0); |
421 | } |
422 | |
423 | |
424 | static |
425 | uint my_instr_bin(CHARSET_INFO *cs __attribute__((unused)), |
426 | const char *b, size_t b_length, |
427 | const char *s, size_t s_length, |
428 | my_match_t *match, uint nmatch) |
429 | { |
430 | register const uchar *str, *search, *end, *search_end; |
431 | |
432 | if (s_length <= b_length) |
433 | { |
434 | if (!s_length) |
435 | { |
436 | if (nmatch) |
437 | { |
438 | match->beg= 0; |
439 | match->end= 0; |
440 | match->mb_len= 0; |
441 | } |
442 | return 1; /* Empty string is always found */ |
443 | } |
444 | |
445 | str= (const uchar*) b; |
446 | search= (const uchar*) s; |
447 | end= (const uchar*) b+b_length-s_length+1; |
448 | search_end= (const uchar*) s + s_length; |
449 | |
450 | skip: |
451 | while (str != end) |
452 | { |
453 | if ( (*str++) == (*search)) |
454 | { |
455 | register const uchar *i,*j; |
456 | |
457 | i= str; |
458 | j= search+1; |
459 | |
460 | while (j != search_end) |
461 | if ((*i++) != (*j++)) |
462 | goto skip; |
463 | |
464 | if (nmatch > 0) |
465 | { |
466 | match[0].beg= 0; |
467 | match[0].end= (uint) (str- (const uchar*)b-1); |
468 | match[0].mb_len= match[0].end; |
469 | |
470 | if (nmatch > 1) |
471 | { |
472 | match[1].beg= match[0].end; |
473 | match[1].end= (uint)(match[0].end+s_length); |
474 | match[1].mb_len= match[1].end-match[1].beg; |
475 | } |
476 | } |
477 | return 2; |
478 | } |
479 | } |
480 | } |
481 | return 0; |
482 | } |
483 | |
484 | |
485 | MY_COLLATION_HANDLER my_collation_8bit_bin_handler = |
486 | { |
487 | my_coll_init_8bit_bin, |
488 | my_strnncoll_8bit_bin, |
489 | my_strnncollsp_8bit_bin, |
490 | my_strnxfrm_8bit_bin, |
491 | my_strnxfrmlen_simple, |
492 | my_like_range_simple, |
493 | my_wildcmp_bin, |
494 | my_strcasecmp_bin, |
495 | my_instr_bin, |
496 | my_hash_sort_8bit_bin, |
497 | my_propagate_simple |
498 | }; |
499 | |
500 | |
501 | MY_COLLATION_HANDLER my_collation_8bit_nopad_bin_handler = |
502 | { |
503 | my_coll_init_8bit_bin, |
504 | my_strnncoll_8bit_bin, |
505 | my_strnncollsp_8bit_nopad_bin, |
506 | my_strnxfrm_8bit_nopad_bin, |
507 | my_strnxfrmlen_simple, |
508 | my_like_range_simple, |
509 | my_wildcmp_bin, |
510 | my_strcasecmp_bin, |
511 | my_instr_bin, |
512 | my_hash_sort_bin, |
513 | my_propagate_simple |
514 | }; |
515 | |
516 | |
517 | static MY_COLLATION_HANDLER my_collation_binary_handler = |
518 | { |
519 | NULL, /* init */ |
520 | my_strnncoll_binary, |
521 | my_strnncollsp_binary, |
522 | my_strnxfrm_8bit_bin, |
523 | my_strnxfrmlen_simple, |
524 | my_like_range_simple, |
525 | my_wildcmp_bin, |
526 | my_strcasecmp_bin, |
527 | my_instr_bin, |
528 | my_hash_sort_bin, |
529 | my_propagate_simple |
530 | }; |
531 | |
532 | |
533 | static MY_CHARSET_HANDLER my_charset_handler= |
534 | { |
535 | NULL, /* init */ |
536 | my_numchars_8bit, |
537 | my_charpos_8bit, |
538 | my_lengthsp_binary, |
539 | my_numcells_8bit, |
540 | my_mb_wc_bin, |
541 | my_wc_mb_bin, |
542 | my_mb_ctype_8bit, |
543 | my_case_str_bin, |
544 | my_case_str_bin, |
545 | my_case_bin, |
546 | my_case_bin, |
547 | my_snprintf_8bit, |
548 | my_long10_to_str_8bit, |
549 | my_longlong10_to_str_8bit, |
550 | my_fill_8bit, |
551 | my_strntol_8bit, |
552 | my_strntoul_8bit, |
553 | my_strntoll_8bit, |
554 | my_strntoull_8bit, |
555 | my_strntod_8bit, |
556 | my_strtoll10_8bit, |
557 | my_strntoull10rnd_8bit, |
558 | my_scan_8bit, |
559 | my_charlen_8bit, |
560 | my_well_formed_char_length_8bit, |
561 | my_copy_8bit, |
562 | my_wc_mb_bin, |
563 | }; |
564 | |
565 | |
566 | struct charset_info_st my_charset_bin = |
567 | { |
568 | 63,0,0, /* number */ |
569 | MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_PRIMARY|MY_CS_NOPAD,/* state */ |
570 | "binary" , /* cs name */ |
571 | "binary" , /* name */ |
572 | "" , /* comment */ |
573 | NULL, /* tailoring */ |
574 | ctype_bin, /* ctype */ |
575 | bin_char_array, /* to_lower */ |
576 | bin_char_array, /* to_upper */ |
577 | NULL, /* sort_order */ |
578 | NULL, /* uca */ |
579 | NULL, /* tab_to_uni */ |
580 | NULL, /* tab_from_uni */ |
581 | &my_unicase_default, /* caseinfo */ |
582 | NULL, /* state_map */ |
583 | NULL, /* ident_map */ |
584 | 1, /* strxfrm_multiply */ |
585 | 1, /* caseup_multiply */ |
586 | 1, /* casedn_multiply */ |
587 | 1, /* mbminlen */ |
588 | 1, /* mbmaxlen */ |
589 | 0, /* min_sort_char */ |
590 | 255, /* max_sort_char */ |
591 | 0, /* pad char */ |
592 | 0, /* escape_with_backslash_is_dangerous */ |
593 | 1, /* levels_for_order */ |
594 | &my_charset_handler, |
595 | &my_collation_binary_handler |
596 | }; |
597 | |