| 1 | /* Copyright (c) 2002, 2013, Oracle and/or its affiliates. |
| 2 | Copyright (c) 2009, 2014, SkySQL Ab. |
| 3 | |
| 4 | This program is free software; you can redistribute it and/or modify |
| 5 | it under the terms of the GNU General Public License as published by |
| 6 | the Free Software Foundation; version 2 of the License. |
| 7 | |
| 8 | This program is distributed in the hope that it will be useful, |
| 9 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 11 | GNU General Public License for more details. |
| 12 | |
| 13 | You should have received a copy of the GNU General Public License |
| 14 | along with this program; if not, write to the Free Software |
| 15 | Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ |
| 16 | |
| 17 | #include "strings_def.h" |
| 18 | #include <m_ctype.h> |
| 19 | #include "my_sys.h" /* Needed for MY_ERRNO_ERANGE */ |
| 20 | #include <errno.h> |
| 21 | |
| 22 | #include "stdarg.h" |
| 23 | |
| 24 | /* |
| 25 | Returns the number of bytes required for strnxfrm(). |
| 26 | */ |
| 27 | |
| 28 | size_t my_strnxfrmlen_simple(CHARSET_INFO *cs, size_t len) |
| 29 | { |
| 30 | return len * (cs->strxfrm_multiply ? cs->strxfrm_multiply : 1); |
| 31 | } |
| 32 | |
| 33 | |
| 34 | /* |
| 35 | Converts a string into its sort key. |
| 36 | |
| 37 | SYNOPSIS |
| 38 | my_strnxfrm_xxx() |
| 39 | |
| 40 | IMPLEMENTATION |
| 41 | |
| 42 | The my_strxfrm_xxx() function transforms a string pointed to by |
| 43 | 'src' with length 'srclen' according to the charset+collation |
| 44 | pair 'cs' and copies the result key into 'dest'. |
| 45 | |
| 46 | Comparing two strings using memcmp() after my_strnxfrm_xxx() |
| 47 | is equal to comparing two original strings with my_strnncollsp_xxx(). |
| 48 | |
| 49 | Not more than 'dstlen' bytes are written into 'dst'. |
| 50 | To guarantee that the whole string is transformed, 'dstlen' must be |
| 51 | at least srclen*cs->strnxfrm_multiply bytes long. Otherwise, |
| 52 | consequent memcmp() may return a non-accurate result. |
| 53 | |
| 54 | If the source string is too short to fill whole 'dstlen' bytes, |
| 55 | then the 'dest' string is padded up to 'dstlen', ensuring that: |
| 56 | |
| 57 | "a" == "a " |
| 58 | "a\0" < "a" |
| 59 | "a\0" < "a " |
| 60 | |
| 61 | my_strnxfrm_simple() is implemented for 8bit charsets and |
| 62 | simple collations with one-to-one string->key transformation. |
| 63 | |
| 64 | See also implementations for various charsets/collations in |
| 65 | other ctype-xxx.c files. |
| 66 | |
| 67 | RETURN |
| 68 | |
| 69 | Target len 'dstlen'. |
| 70 | |
| 71 | */ |
| 72 | |
| 73 | |
| 74 | size_t my_strnxfrm_simple_internal(CHARSET_INFO * cs, |
| 75 | uchar *dst, size_t dstlen, uint *nweights, |
| 76 | const uchar *src, size_t srclen) |
| 77 | { |
| 78 | const uchar *map= cs->sort_order; |
| 79 | uchar *d0= dst; |
| 80 | uint frmlen; |
| 81 | if ((frmlen= (uint)MY_MIN(dstlen, *nweights)) > srclen) |
| 82 | frmlen= (uint)srclen; |
| 83 | if (dst != src) |
| 84 | { |
| 85 | const uchar *end; |
| 86 | for (end= src + frmlen; src < end;) |
| 87 | *dst++= map[*src++]; |
| 88 | } |
| 89 | else |
| 90 | { |
| 91 | const uchar *end; |
| 92 | for (end= dst + frmlen; dst < end; dst++) |
| 93 | *dst= map[(uchar) *dst]; |
| 94 | } |
| 95 | *nweights-= frmlen; |
| 96 | return dst - d0; |
| 97 | } |
| 98 | |
| 99 | |
| 100 | size_t my_strnxfrm_simple(CHARSET_INFO * cs, |
| 101 | uchar *dst, size_t dstlen, uint nweights, |
| 102 | const uchar *src, size_t srclen, uint flags) |
| 103 | { |
| 104 | uchar *d0= dst; |
| 105 | dst= d0 + my_strnxfrm_simple_internal(cs, dst, dstlen, &nweights, |
| 106 | src, srclen); |
| 107 | return my_strxfrm_pad_desc_and_reverse(cs, d0, dst, d0 + dstlen, |
| 108 | nweights, flags, 0); |
| 109 | } |
| 110 | |
| 111 | |
| 112 | size_t my_strnxfrm_simple_nopad(CHARSET_INFO * cs, |
| 113 | uchar *dst, size_t dstlen, uint nweights, |
| 114 | const uchar *src, size_t srclen, uint flags) |
| 115 | { |
| 116 | uchar *d0= dst; |
| 117 | dst= d0 + my_strnxfrm_simple_internal(cs, dst, dstlen, &nweights, |
| 118 | src, srclen); |
| 119 | return my_strxfrm_pad_desc_and_reverse_nopad(cs, d0, dst, d0 + dstlen, |
| 120 | nweights, flags, 0); |
| 121 | } |
| 122 | |
| 123 | |
| 124 | int my_strnncoll_simple(CHARSET_INFO * cs, const uchar *s, size_t slen, |
| 125 | const uchar *t, size_t tlen, |
| 126 | my_bool t_is_prefix) |
| 127 | { |
| 128 | size_t len = ( slen > tlen ) ? tlen : slen; |
| 129 | const uchar *map= cs->sort_order; |
| 130 | if (t_is_prefix && slen > tlen) |
| 131 | slen=tlen; |
| 132 | while (len--) |
| 133 | { |
| 134 | if (map[*s++] != map[*t++]) |
| 135 | return ((int) map[s[-1]] - (int) map[t[-1]]); |
| 136 | } |
| 137 | /* |
| 138 | We can't use (slen - tlen) here as the result may be outside of the |
| 139 | precision of a signed int |
| 140 | */ |
| 141 | return slen > tlen ? 1 : slen < tlen ? -1 : 0 ; |
| 142 | } |
| 143 | |
| 144 | |
| 145 | /* |
| 146 | Compare strings, discarding end space |
| 147 | |
| 148 | SYNOPSIS |
| 149 | my_strnncollsp_simple() |
| 150 | cs character set handler |
| 151 | a First string to compare |
| 152 | a_length Length of 'a' |
| 153 | b Second string to compare |
| 154 | b_length Length of 'b' |
| 155 | |
| 156 | IMPLEMENTATION |
| 157 | If one string is shorter as the other, then we space extend the other |
| 158 | so that the strings have equal length. |
| 159 | |
| 160 | This will ensure that the following things hold: |
| 161 | |
| 162 | "a" == "a " |
| 163 | "a\0" < "a" |
| 164 | "a\0" < "a " |
| 165 | |
| 166 | RETURN |
| 167 | < 0 a < b |
| 168 | = 0 a == b |
| 169 | > 0 a > b |
| 170 | */ |
| 171 | |
| 172 | int my_strnncollsp_simple(CHARSET_INFO * cs, const uchar *a, size_t a_length, |
| 173 | const uchar *b, size_t b_length) |
| 174 | { |
| 175 | const uchar *map= cs->sort_order, *end; |
| 176 | size_t length; |
| 177 | int res; |
| 178 | |
| 179 | end= a + (length= MY_MIN(a_length, b_length)); |
| 180 | while (a < end) |
| 181 | { |
| 182 | if (map[*a++] != map[*b++]) |
| 183 | return ((int) map[a[-1]] - (int) map[b[-1]]); |
| 184 | } |
| 185 | res= 0; |
| 186 | if (a_length != b_length) |
| 187 | { |
| 188 | int swap= 1; |
| 189 | /* |
| 190 | Check the next not space character of the longer key. If it's < ' ', |
| 191 | then it's smaller than the other key. |
| 192 | */ |
| 193 | if (a_length < b_length) |
| 194 | { |
| 195 | /* put shorter key in s */ |
| 196 | a_length= b_length; |
| 197 | a= b; |
| 198 | swap= -1; /* swap sign of result */ |
| 199 | res= -res; |
| 200 | } |
| 201 | for (end= a + a_length-length; a < end ; a++) |
| 202 | { |
| 203 | if (map[*a] != map[' ']) |
| 204 | return (map[*a] < map[' ']) ? -swap : swap; |
| 205 | } |
| 206 | } |
| 207 | return res; |
| 208 | } |
| 209 | |
| 210 | |
| 211 | int my_strnncollsp_simple_nopad(CHARSET_INFO * cs, |
| 212 | const uchar *a, size_t a_length, |
| 213 | const uchar *b, size_t b_length) |
| 214 | { |
| 215 | return my_strnncoll_simple(cs, a, a_length, b, b_length, FALSE); |
| 216 | } |
| 217 | |
| 218 | |
| 219 | size_t my_caseup_str_8bit(CHARSET_INFO * cs,char *str) |
| 220 | { |
| 221 | register const uchar *map= cs->to_upper; |
| 222 | char *str_orig= str; |
| 223 | while ((*str= (char) map[(uchar) *str]) != 0) |
| 224 | str++; |
| 225 | return (size_t) (str - str_orig); |
| 226 | } |
| 227 | |
| 228 | |
| 229 | size_t my_casedn_str_8bit(CHARSET_INFO * cs,char *str) |
| 230 | { |
| 231 | register const uchar *map= cs->to_lower; |
| 232 | char *str_orig= str; |
| 233 | while ((*str= (char) map[(uchar) *str]) != 0) |
| 234 | str++; |
| 235 | return (size_t) (str - str_orig); |
| 236 | } |
| 237 | |
| 238 | |
| 239 | size_t my_caseup_8bit(CHARSET_INFO * cs, char *src, size_t srclen, |
| 240 | char *dst __attribute__((unused)), |
| 241 | size_t dstlen __attribute__((unused))) |
| 242 | { |
| 243 | char *end= src + srclen; |
| 244 | register const uchar *map= cs->to_upper; |
| 245 | DBUG_ASSERT(src == dst && srclen == dstlen); |
| 246 | for ( ; src != end ; src++) |
| 247 | *src= (char) map[(uchar) *src]; |
| 248 | return srclen; |
| 249 | } |
| 250 | |
| 251 | |
| 252 | size_t my_casedn_8bit(CHARSET_INFO * cs, char *src, size_t srclen, |
| 253 | char *dst __attribute__((unused)), |
| 254 | size_t dstlen __attribute__((unused))) |
| 255 | { |
| 256 | char *end= src + srclen; |
| 257 | register const uchar *map=cs->to_lower; |
| 258 | DBUG_ASSERT(src == dst && srclen == dstlen); |
| 259 | for ( ; src != end ; src++) |
| 260 | *src= (char) map[(uchar) *src]; |
| 261 | return srclen; |
| 262 | } |
| 263 | |
| 264 | int my_strcasecmp_8bit(CHARSET_INFO * cs,const char *s, const char *t) |
| 265 | { |
| 266 | register const uchar *map=cs->to_upper; |
| 267 | while (map[(uchar) *s] == map[(uchar) *t++]) |
| 268 | if (!*s++) return 0; |
| 269 | return ((int) map[(uchar) s[0]] - (int) map[(uchar) t[-1]]); |
| 270 | } |
| 271 | |
| 272 | |
| 273 | int my_charlen_8bit(CHARSET_INFO *cs __attribute__((unused)), |
| 274 | const uchar *str, const uchar *end) |
| 275 | { |
| 276 | return str >= end ? MY_CS_TOOSMALL : 1; |
| 277 | } |
| 278 | |
| 279 | |
| 280 | int my_mb_wc_8bit(CHARSET_INFO *cs,my_wc_t *wc, |
| 281 | const uchar *str, |
| 282 | const uchar *end __attribute__((unused))) |
| 283 | { |
| 284 | if (str >= end) |
| 285 | return MY_CS_TOOSMALL; |
| 286 | |
| 287 | *wc=cs->tab_to_uni[*str]; |
| 288 | return (!wc[0] && str[0]) ? -1 : 1; |
| 289 | } |
| 290 | |
| 291 | int my_wc_mb_8bit(CHARSET_INFO *cs,my_wc_t wc, |
| 292 | uchar *str, |
| 293 | uchar *end) |
| 294 | { |
| 295 | MY_UNI_IDX *idx; |
| 296 | |
| 297 | if (str >= end) |
| 298 | return MY_CS_TOOSMALL; |
| 299 | |
| 300 | for (idx=cs->tab_from_uni; idx->tab ; idx++) |
| 301 | { |
| 302 | if (idx->from <= wc && idx->to >= wc) |
| 303 | { |
| 304 | str[0]= idx->tab[wc - idx->from]; |
| 305 | return (!str[0] && wc) ? MY_CS_ILUNI : 1; |
| 306 | } |
| 307 | } |
| 308 | return MY_CS_ILUNI; |
| 309 | } |
| 310 | |
| 311 | |
| 312 | /* |
| 313 | We can't use vsprintf here as it's not guaranteed to return |
| 314 | the length on all operating systems. |
| 315 | This function is also not called in a safe environment, so the |
| 316 | end buffer must be checked. |
| 317 | */ |
| 318 | |
| 319 | size_t my_snprintf_8bit(CHARSET_INFO *cs __attribute__((unused)), |
| 320 | char* to, size_t n __attribute__((unused)), |
| 321 | const char* fmt, ...) |
| 322 | { |
| 323 | va_list args; |
| 324 | size_t result; |
| 325 | va_start(args,fmt); |
| 326 | result= my_vsnprintf(to, n, fmt, args); |
| 327 | va_end(args); |
| 328 | return result; |
| 329 | } |
| 330 | |
| 331 | |
| 332 | void my_hash_sort_simple_nopad(CHARSET_INFO *cs, |
| 333 | const uchar *key, size_t len, |
| 334 | ulong *nr1, ulong *nr2) |
| 335 | { |
| 336 | register const uchar *sort_order=cs->sort_order; |
| 337 | const uchar *end= key + len; |
| 338 | register ulong m1= *nr1, m2= *nr2; |
| 339 | for (; key < (uchar*) end ; key++) |
| 340 | { |
| 341 | MY_HASH_ADD(m1, m2, (uint) sort_order[(uint) *key]); |
| 342 | } |
| 343 | *nr1= m1; |
| 344 | *nr2= m2; |
| 345 | } |
| 346 | |
| 347 | |
| 348 | void my_hash_sort_simple(CHARSET_INFO *cs, |
| 349 | const uchar *key, size_t len, |
| 350 | ulong *nr1, ulong *nr2) |
| 351 | { |
| 352 | register const uchar *sort_order=cs->sort_order; |
| 353 | const uchar *end; |
| 354 | uint16 space_weight= sort_order[' ']; |
| 355 | |
| 356 | /* |
| 357 | Remove all trailing characters that are equal to space. |
| 358 | We have to do this to be able to compare 'A ' and 'A' as identical. |
| 359 | |
| 360 | If the key is long enough, cut the trailing spaces (0x20) using an |
| 361 | optimized function implemented in skip_trailing_spaces(). |
| 362 | |
| 363 | "len > 16" is just some heuristic here. |
| 364 | Calling skip_triling_space() for short values is not desirable, |
| 365 | because its initialization block may be more expensive than the |
| 366 | performance gained. |
| 367 | */ |
| 368 | |
| 369 | end= len > 16 ? skip_trailing_space(key, len) : key + len; |
| 370 | |
| 371 | /* |
| 372 | We removed all trailing characters that are binary equal to space 0x20. |
| 373 | Now remove all trailing characters that have weights equal to space. |
| 374 | Some 8bit simple collations may have such characters: |
| 375 | - cp1250_general_ci 0xA0 NO-BREAK SPACE == 0x20 SPACE |
| 376 | - cp1251_ukrainian_ci 0x60 GRAVE ACCENT == 0x20 SPACE |
| 377 | - koi8u_general_ci 0x60 GRAVE ACCENT == 0x20 SPACE |
| 378 | */ |
| 379 | |
| 380 | for ( ; key < end ; ) |
| 381 | { |
| 382 | if (sort_order[*--end] != space_weight) |
| 383 | { |
| 384 | end++; |
| 385 | break; |
| 386 | } |
| 387 | } |
| 388 | my_hash_sort_simple_nopad(cs, key, end - key, nr1, nr2); |
| 389 | } |
| 390 | |
| 391 | |
| 392 | long my_strntol_8bit(CHARSET_INFO *cs, |
| 393 | const char *nptr, size_t l, int base, |
| 394 | char **endptr, int *err) |
| 395 | { |
| 396 | int negative; |
| 397 | register uint32 cutoff; |
| 398 | register uint cutlim; |
| 399 | register uint32 i; |
| 400 | register const char *s; |
| 401 | register uchar c; |
| 402 | const char *save, *e; |
| 403 | int overflow; |
| 404 | |
| 405 | *err= 0; /* Initialize error indicator */ |
| 406 | |
| 407 | s = nptr; |
| 408 | e = nptr+l; |
| 409 | |
| 410 | for ( ; s<e && my_isspace(cs, *s) ; s++); |
| 411 | |
| 412 | if (s == e) |
| 413 | { |
| 414 | goto noconv; |
| 415 | } |
| 416 | |
| 417 | /* Check for a sign. */ |
| 418 | if (*s == '-') |
| 419 | { |
| 420 | negative = 1; |
| 421 | ++s; |
| 422 | } |
| 423 | else if (*s == '+') |
| 424 | { |
| 425 | negative = 0; |
| 426 | ++s; |
| 427 | } |
| 428 | else |
| 429 | negative = 0; |
| 430 | |
| 431 | save = s; |
| 432 | cutoff = ((uint32)~0L) / (uint32) base; |
| 433 | cutlim = (uint) (((uint32)~0L) % (uint32) base); |
| 434 | |
| 435 | overflow = 0; |
| 436 | i = 0; |
| 437 | for (c = *s; s != e; c = *++s) |
| 438 | { |
| 439 | if (c>='0' && c<='9') |
| 440 | c -= '0'; |
| 441 | else if (c>='A' && c<='Z') |
| 442 | c = c - 'A' + 10; |
| 443 | else if (c>='a' && c<='z') |
| 444 | c = c - 'a' + 10; |
| 445 | else |
| 446 | break; |
| 447 | if (c >= base) |
| 448 | break; |
| 449 | if (i > cutoff || (i == cutoff && c > cutlim)) |
| 450 | overflow = 1; |
| 451 | else |
| 452 | { |
| 453 | i *= (uint32) base; |
| 454 | i += c; |
| 455 | } |
| 456 | } |
| 457 | |
| 458 | if (s == save) |
| 459 | goto noconv; |
| 460 | |
| 461 | if (endptr != NULL) |
| 462 | *endptr = (char *) s; |
| 463 | |
| 464 | if (negative) |
| 465 | { |
| 466 | if (i > (uint32) INT_MIN32) |
| 467 | overflow = 1; |
| 468 | } |
| 469 | else if (i > INT_MAX32) |
| 470 | overflow = 1; |
| 471 | |
| 472 | if (overflow) |
| 473 | { |
| 474 | err[0]= ERANGE; |
| 475 | return negative ? INT_MIN32 : INT_MAX32; |
| 476 | } |
| 477 | |
| 478 | return (negative ? -((long) i) : (long) i); |
| 479 | |
| 480 | noconv: |
| 481 | err[0]= EDOM; |
| 482 | if (endptr != NULL) |
| 483 | *endptr = (char *) nptr; |
| 484 | return 0L; |
| 485 | } |
| 486 | |
| 487 | |
| 488 | ulong my_strntoul_8bit(CHARSET_INFO *cs, |
| 489 | const char *nptr, size_t l, int base, |
| 490 | char **endptr, int *err) |
| 491 | { |
| 492 | int negative; |
| 493 | register uint32 cutoff; |
| 494 | register uint cutlim; |
| 495 | register uint32 i; |
| 496 | register const char *s; |
| 497 | register uchar c; |
| 498 | const char *save, *e; |
| 499 | int overflow; |
| 500 | |
| 501 | *err= 0; /* Initialize error indicator */ |
| 502 | |
| 503 | s = nptr; |
| 504 | e = nptr+l; |
| 505 | |
| 506 | for( ; s<e && my_isspace(cs, *s); s++); |
| 507 | |
| 508 | if (s==e) |
| 509 | { |
| 510 | goto noconv; |
| 511 | } |
| 512 | |
| 513 | if (*s == '-') |
| 514 | { |
| 515 | negative = 1; |
| 516 | ++s; |
| 517 | } |
| 518 | else if (*s == '+') |
| 519 | { |
| 520 | negative = 0; |
| 521 | ++s; |
| 522 | } |
| 523 | else |
| 524 | negative = 0; |
| 525 | |
| 526 | save = s; |
| 527 | cutoff = ((uint32)~0L) / (uint32) base; |
| 528 | cutlim = (uint) (((uint32)~0L) % (uint32) base); |
| 529 | overflow = 0; |
| 530 | i = 0; |
| 531 | |
| 532 | for (c = *s; s != e; c = *++s) |
| 533 | { |
| 534 | if (c>='0' && c<='9') |
| 535 | c -= '0'; |
| 536 | else if (c>='A' && c<='Z') |
| 537 | c = c - 'A' + 10; |
| 538 | else if (c>='a' && c<='z') |
| 539 | c = c - 'a' + 10; |
| 540 | else |
| 541 | break; |
| 542 | if (c >= base) |
| 543 | break; |
| 544 | if (i > cutoff || (i == cutoff && c > cutlim)) |
| 545 | overflow = 1; |
| 546 | else |
| 547 | { |
| 548 | i *= (uint32) base; |
| 549 | i += c; |
| 550 | } |
| 551 | } |
| 552 | |
| 553 | if (s == save) |
| 554 | goto noconv; |
| 555 | |
| 556 | if (endptr != NULL) |
| 557 | *endptr = (char *) s; |
| 558 | |
| 559 | if (overflow) |
| 560 | { |
| 561 | err[0]= ERANGE; |
| 562 | return (~(uint32) 0); |
| 563 | } |
| 564 | |
| 565 | return (negative ? -((long) i) : (long) i); |
| 566 | |
| 567 | noconv: |
| 568 | err[0]= EDOM; |
| 569 | if (endptr != NULL) |
| 570 | *endptr = (char *) nptr; |
| 571 | return 0L; |
| 572 | } |
| 573 | |
| 574 | |
| 575 | longlong my_strntoll_8bit(CHARSET_INFO *cs __attribute__((unused)), |
| 576 | const char *nptr, size_t l, int base, |
| 577 | char **endptr,int *err) |
| 578 | { |
| 579 | int negative; |
| 580 | register ulonglong cutoff; |
| 581 | register uint cutlim; |
| 582 | register ulonglong i; |
| 583 | register const char *s, *e; |
| 584 | const char *save; |
| 585 | int overflow; |
| 586 | |
| 587 | *err= 0; /* Initialize error indicator */ |
| 588 | |
| 589 | s = nptr; |
| 590 | e = nptr+l; |
| 591 | |
| 592 | for(; s<e && my_isspace(cs,*s); s++); |
| 593 | |
| 594 | if (s == e) |
| 595 | { |
| 596 | goto noconv; |
| 597 | } |
| 598 | |
| 599 | if (*s == '-') |
| 600 | { |
| 601 | negative = 1; |
| 602 | ++s; |
| 603 | } |
| 604 | else if (*s == '+') |
| 605 | { |
| 606 | negative = 0; |
| 607 | ++s; |
| 608 | } |
| 609 | else |
| 610 | negative = 0; |
| 611 | |
| 612 | save = s; |
| 613 | |
| 614 | cutoff = (~(ulonglong) 0) / (unsigned long int) base; |
| 615 | cutlim = (uint) ((~(ulonglong) 0) % (unsigned long int) base); |
| 616 | |
| 617 | overflow = 0; |
| 618 | i = 0; |
| 619 | for ( ; s != e; s++) |
| 620 | { |
| 621 | register uchar c= *s; |
| 622 | if (c>='0' && c<='9') |
| 623 | c -= '0'; |
| 624 | else if (c>='A' && c<='Z') |
| 625 | c = c - 'A' + 10; |
| 626 | else if (c>='a' && c<='z') |
| 627 | c = c - 'a' + 10; |
| 628 | else |
| 629 | break; |
| 630 | if (c >= base) |
| 631 | break; |
| 632 | if (i > cutoff || (i == cutoff && c > cutlim)) |
| 633 | overflow = 1; |
| 634 | else |
| 635 | { |
| 636 | i *= (ulonglong) base; |
| 637 | i += c; |
| 638 | } |
| 639 | } |
| 640 | |
| 641 | if (s == save) |
| 642 | goto noconv; |
| 643 | |
| 644 | if (endptr != NULL) |
| 645 | *endptr = (char *) s; |
| 646 | |
| 647 | if (negative) |
| 648 | { |
| 649 | if (i > (ulonglong) LONGLONG_MIN) |
| 650 | overflow = 1; |
| 651 | } |
| 652 | else if (i > (ulonglong) LONGLONG_MAX) |
| 653 | overflow = 1; |
| 654 | |
| 655 | if (overflow) |
| 656 | { |
| 657 | err[0]= ERANGE; |
| 658 | return negative ? LONGLONG_MIN : LONGLONG_MAX; |
| 659 | } |
| 660 | |
| 661 | return (negative ? -((longlong) i) : (longlong) i); |
| 662 | |
| 663 | noconv: |
| 664 | err[0]= EDOM; |
| 665 | if (endptr != NULL) |
| 666 | *endptr = (char *) nptr; |
| 667 | return 0L; |
| 668 | } |
| 669 | |
| 670 | |
| 671 | ulonglong my_strntoull_8bit(CHARSET_INFO *cs, |
| 672 | const char *nptr, size_t l, int base, |
| 673 | char **endptr, int *err) |
| 674 | { |
| 675 | int negative; |
| 676 | register ulonglong cutoff; |
| 677 | register uint cutlim; |
| 678 | register ulonglong i; |
| 679 | register const char *s, *e; |
| 680 | const char *save; |
| 681 | int overflow; |
| 682 | |
| 683 | *err= 0; /* Initialize error indicator */ |
| 684 | |
| 685 | s = nptr; |
| 686 | e = nptr+l; |
| 687 | |
| 688 | for(; s<e && my_isspace(cs,*s); s++); |
| 689 | |
| 690 | if (s == e) |
| 691 | { |
| 692 | goto noconv; |
| 693 | } |
| 694 | |
| 695 | if (*s == '-') |
| 696 | { |
| 697 | negative = 1; |
| 698 | ++s; |
| 699 | } |
| 700 | else if (*s == '+') |
| 701 | { |
| 702 | negative = 0; |
| 703 | ++s; |
| 704 | } |
| 705 | else |
| 706 | negative = 0; |
| 707 | |
| 708 | save = s; |
| 709 | |
| 710 | cutoff = (~(ulonglong) 0) / (unsigned long int) base; |
| 711 | cutlim = (uint) ((~(ulonglong) 0) % (unsigned long int) base); |
| 712 | |
| 713 | overflow = 0; |
| 714 | i = 0; |
| 715 | for ( ; s != e; s++) |
| 716 | { |
| 717 | register uchar c= *s; |
| 718 | |
| 719 | if (c>='0' && c<='9') |
| 720 | c -= '0'; |
| 721 | else if (c>='A' && c<='Z') |
| 722 | c = c - 'A' + 10; |
| 723 | else if (c>='a' && c<='z') |
| 724 | c = c - 'a' + 10; |
| 725 | else |
| 726 | break; |
| 727 | if (c >= base) |
| 728 | break; |
| 729 | if (i > cutoff || (i == cutoff && c > cutlim)) |
| 730 | overflow = 1; |
| 731 | else |
| 732 | { |
| 733 | i *= (ulonglong) base; |
| 734 | i += c; |
| 735 | } |
| 736 | } |
| 737 | |
| 738 | if (s == save) |
| 739 | goto noconv; |
| 740 | |
| 741 | if (endptr != NULL) |
| 742 | *endptr = (char *) s; |
| 743 | |
| 744 | if (overflow) |
| 745 | { |
| 746 | err[0]= ERANGE; |
| 747 | return (~(ulonglong) 0); |
| 748 | } |
| 749 | |
| 750 | return (negative ? -((longlong) i) : (longlong) i); |
| 751 | |
| 752 | noconv: |
| 753 | err[0]= EDOM; |
| 754 | if (endptr != NULL) |
| 755 | *endptr = (char *) nptr; |
| 756 | return 0L; |
| 757 | } |
| 758 | |
| 759 | |
| 760 | /* |
| 761 | Read double from string |
| 762 | |
| 763 | SYNOPSIS: |
| 764 | my_strntod_8bit() |
| 765 | cs Character set information |
| 766 | str String to convert to double |
| 767 | length Optional length for string. |
| 768 | end result pointer to end of converted string |
| 769 | err Error number if failed conversion |
| 770 | |
| 771 | NOTES: |
| 772 | If length is not INT_MAX32 or str[length] != 0 then the given str must |
| 773 | be writeable |
| 774 | If length == INT_MAX32 the str must be \0 terminated. |
| 775 | |
| 776 | It's implemented this way to save a buffer allocation and a memory copy. |
| 777 | |
| 778 | RETURN |
| 779 | Value of number in string |
| 780 | */ |
| 781 | |
| 782 | |
| 783 | double my_strntod_8bit(CHARSET_INFO *cs __attribute__((unused)), |
| 784 | char *str, size_t length, |
| 785 | char **end, int *err) |
| 786 | { |
| 787 | if (length == INT_MAX32) |
| 788 | length= 65535; /* Should be big enough */ |
| 789 | *end= str + length; |
| 790 | return my_strtod(str, end, err); |
| 791 | } |
| 792 | |
| 793 | |
| 794 | /* |
| 795 | This is a fast version optimized for the case of radix 10 / -10 |
| 796 | |
| 797 | Assume len >= 1 |
| 798 | */ |
| 799 | |
| 800 | size_t my_long10_to_str_8bit(CHARSET_INFO *cs __attribute__((unused)), |
| 801 | char *dst, size_t len, int radix, long int val) |
| 802 | { |
| 803 | char buffer[66]; |
| 804 | register char *p, *e; |
| 805 | long int new_val; |
| 806 | uint sign=0; |
| 807 | unsigned long int uval = (unsigned long int) val; |
| 808 | |
| 809 | e = p = &buffer[sizeof(buffer)-1]; |
| 810 | *p= 0; |
| 811 | |
| 812 | if (radix < 0) |
| 813 | { |
| 814 | if (val < 0) |
| 815 | { |
| 816 | /* Avoid integer overflow in (-val) for LONGLONG_MIN (BUG#31799). */ |
| 817 | uval= (unsigned long int)0 - uval; |
| 818 | *dst++= '-'; |
| 819 | len--; |
| 820 | sign= 1; |
| 821 | } |
| 822 | } |
| 823 | |
| 824 | new_val = (long) (uval / 10); |
| 825 | *--p = '0'+ (char) (uval - (unsigned long) new_val * 10); |
| 826 | val = new_val; |
| 827 | |
| 828 | while (val != 0) |
| 829 | { |
| 830 | new_val=val/10; |
| 831 | *--p = '0' + (char) (val-new_val*10); |
| 832 | val= new_val; |
| 833 | } |
| 834 | |
| 835 | len= MY_MIN(len, (size_t) (e-p)); |
| 836 | memcpy(dst, p, len); |
| 837 | return len+sign; |
| 838 | } |
| 839 | |
| 840 | |
| 841 | size_t my_longlong10_to_str_8bit(CHARSET_INFO *cs __attribute__((unused)), |
| 842 | char *dst, size_t len, int radix, |
| 843 | longlong val) |
| 844 | { |
| 845 | char buffer[65]; |
| 846 | register char *p, *e; |
| 847 | long long_val; |
| 848 | uint sign= 0; |
| 849 | ulonglong uval = (ulonglong)val; |
| 850 | |
| 851 | if (radix < 0) |
| 852 | { |
| 853 | if (val < 0) |
| 854 | { |
| 855 | /* Avoid integer overflow in (-val) for LONGLONG_MIN (BUG#31799). */ |
| 856 | uval = (ulonglong)0 - uval; |
| 857 | *dst++= '-'; |
| 858 | len--; |
| 859 | sign= 1; |
| 860 | } |
| 861 | } |
| 862 | |
| 863 | e = p = &buffer[sizeof(buffer)-1]; |
| 864 | *p= 0; |
| 865 | |
| 866 | if (uval == 0) |
| 867 | { |
| 868 | *--p= '0'; |
| 869 | len= 1; |
| 870 | goto cnv; |
| 871 | } |
| 872 | |
| 873 | while (uval > (ulonglong) LONG_MAX) |
| 874 | { |
| 875 | ulonglong quo= uval/(uint) 10; |
| 876 | uint rem= (uint) (uval- quo* (uint) 10); |
| 877 | *--p = '0' + rem; |
| 878 | uval= quo; |
| 879 | } |
| 880 | |
| 881 | long_val= (long) uval; |
| 882 | while (long_val != 0) |
| 883 | { |
| 884 | long quo= long_val/10; |
| 885 | *--p = (char) ('0' + (long_val - quo*10)); |
| 886 | long_val= quo; |
| 887 | } |
| 888 | |
| 889 | len= MY_MIN(len, (size_t) (e-p)); |
| 890 | cnv: |
| 891 | memcpy(dst, p, len); |
| 892 | return len+sign; |
| 893 | } |
| 894 | |
| 895 | |
| 896 | /* |
| 897 | ** Compare string against string with wildcard |
| 898 | ** 0 if matched |
| 899 | ** -1 if not matched with wildcard |
| 900 | ** 1 if matched with wildcard |
| 901 | */ |
| 902 | |
| 903 | #ifdef LIKE_CMP_TOUPPER |
| 904 | #define likeconv(s,A) (uchar) my_toupper(s,A) |
| 905 | #else |
| 906 | #define likeconv(s,A) (uchar) (s)->sort_order[(uchar) (A)] |
| 907 | #endif |
| 908 | |
| 909 | #define INC_PTR(cs,A,B) (A)++ |
| 910 | |
| 911 | |
| 912 | static |
| 913 | int my_wildcmp_8bit_impl(CHARSET_INFO *cs, |
| 914 | const char *str,const char *str_end, |
| 915 | const char *wildstr,const char *wildend, |
| 916 | int escape, int w_one, int w_many, int recurse_level) |
| 917 | { |
| 918 | int result= -1; /* Not found, using wildcards */ |
| 919 | |
| 920 | if (my_string_stack_guard && my_string_stack_guard(recurse_level)) |
| 921 | return 1; |
| 922 | while (wildstr != wildend) |
| 923 | { |
| 924 | while (*wildstr != w_many && *wildstr != w_one) |
| 925 | { |
| 926 | if (*wildstr == escape && wildstr+1 != wildend) |
| 927 | wildstr++; |
| 928 | |
| 929 | if (str == str_end || likeconv(cs,*wildstr++) != likeconv(cs,*str++)) |
| 930 | return(1); /* No match */ |
| 931 | if (wildstr == wildend) |
| 932 | return(str != str_end); /* Match if both are at end */ |
| 933 | result=1; /* Found an anchor char */ |
| 934 | } |
| 935 | if (*wildstr == w_one) |
| 936 | { |
| 937 | do |
| 938 | { |
| 939 | if (str == str_end) /* Skip one char if possible */ |
| 940 | return(result); |
| 941 | INC_PTR(cs,str,str_end); |
| 942 | } while (++wildstr < wildend && *wildstr == w_one); |
| 943 | if (wildstr == wildend) |
| 944 | break; |
| 945 | } |
| 946 | if (*wildstr == w_many) |
| 947 | { /* Found w_many */ |
| 948 | uchar cmp; |
| 949 | |
| 950 | wildstr++; |
| 951 | /* Remove any '%' and '_' from the wild search string */ |
| 952 | for (; wildstr != wildend ; wildstr++) |
| 953 | { |
| 954 | if (*wildstr == w_many) |
| 955 | continue; |
| 956 | if (*wildstr == w_one) |
| 957 | { |
| 958 | if (str == str_end) |
| 959 | return(-1); |
| 960 | INC_PTR(cs,str,str_end); |
| 961 | continue; |
| 962 | } |
| 963 | break; /* Not a wild character */ |
| 964 | } |
| 965 | if (wildstr == wildend) |
| 966 | return(0); /* Ok if w_many is last */ |
| 967 | if (str == str_end) |
| 968 | return(-1); |
| 969 | |
| 970 | if ((cmp= *wildstr) == escape && wildstr+1 != wildend) |
| 971 | cmp= *++wildstr; |
| 972 | |
| 973 | INC_PTR(cs,wildstr,wildend); /* This is compared trough cmp */ |
| 974 | cmp=likeconv(cs,cmp); |
| 975 | do |
| 976 | { |
| 977 | /* |
| 978 | Find the next character in the subject string equal to 'cmp', then |
| 979 | check recursively my_wildcmp_8bit_impl() for the pattern remainder. |
| 980 | */ |
| 981 | while (str != str_end && (uchar) likeconv(cs,*str) != cmp) |
| 982 | str++; |
| 983 | if (str++ == str_end) |
| 984 | return(-1); /* 'cmp' was not found in the subject string */ |
| 985 | { |
| 986 | int tmp=my_wildcmp_8bit_impl(cs,str,str_end, |
| 987 | wildstr,wildend,escape,w_one, |
| 988 | w_many, recurse_level+1); |
| 989 | if (tmp <= 0) |
| 990 | return(tmp); |
| 991 | } |
| 992 | /* |
| 993 | The recursion call did not match. But it returned 1, which means |
| 994 | the pattern remainder has some non-special characters. |
| 995 | Continue, there is a chance that we'll find another 'cmp' |
| 996 | at a different position in the subject string. |
| 997 | */ |
| 998 | } while (str != str_end); |
| 999 | return(-1); |
| 1000 | } |
| 1001 | } |
| 1002 | return(str != str_end ? 1 : 0); |
| 1003 | } |
| 1004 | |
| 1005 | int my_wildcmp_8bit(CHARSET_INFO *cs, |
| 1006 | const char *str,const char *str_end, |
| 1007 | const char *wildstr,const char *wildend, |
| 1008 | int escape, int w_one, int w_many) |
| 1009 | { |
| 1010 | return my_wildcmp_8bit_impl(cs, str, str_end, |
| 1011 | wildstr, wildend, |
| 1012 | escape, w_one, w_many, 1); |
| 1013 | } |
| 1014 | |
| 1015 | |
| 1016 | /* |
| 1017 | ** Calculate min_str and max_str that ranges a LIKE string. |
| 1018 | ** Arguments: |
| 1019 | ** ptr Pointer to LIKE string. |
| 1020 | ** ptr_length Length of LIKE string. |
| 1021 | ** escape Escape character in LIKE. (Normally '\'). |
| 1022 | ** All escape characters should be removed from min_str and max_str |
| 1023 | ** res_length Length of min_str and max_str. |
| 1024 | ** min_str Smallest case sensitive string that ranges LIKE. |
| 1025 | ** Should be space padded to res_length. |
| 1026 | ** max_str Largest case sensitive string that ranges LIKE. |
| 1027 | ** Normally padded with the biggest character sort value. |
| 1028 | ** |
| 1029 | ** The function should return 0 if ok and 1 if the LIKE string can't be |
| 1030 | ** optimized ! |
| 1031 | */ |
| 1032 | |
| 1033 | my_bool my_like_range_simple(CHARSET_INFO *cs, |
| 1034 | const char *ptr, size_t ptr_length, |
| 1035 | pbool escape, pbool w_one, pbool w_many, |
| 1036 | size_t res_length, |
| 1037 | char *min_str,char *max_str, |
| 1038 | size_t *min_length, size_t *max_length) |
| 1039 | { |
| 1040 | const char *end= ptr + ptr_length; |
| 1041 | char *min_org=min_str; |
| 1042 | char *min_end=min_str+res_length; |
| 1043 | size_t charlen= res_length / cs->mbmaxlen; |
| 1044 | |
| 1045 | for (; ptr != end && min_str != min_end && charlen > 0 ; ptr++, charlen--) |
| 1046 | { |
| 1047 | if (*ptr == escape && ptr+1 != end) |
| 1048 | { |
| 1049 | ptr++; /* Skip escape */ |
| 1050 | *min_str++= *max_str++ = *ptr; |
| 1051 | continue; |
| 1052 | } |
| 1053 | if (*ptr == w_one) /* '_' in SQL */ |
| 1054 | { |
| 1055 | *min_str++='\0'; /* This should be min char */ |
| 1056 | *max_str++= (char) cs->max_sort_char; |
| 1057 | continue; |
| 1058 | } |
| 1059 | if (*ptr == w_many) /* '%' in SQL */ |
| 1060 | { |
| 1061 | /* Calculate length of keys */ |
| 1062 | *min_length= (cs->state & (MY_CS_BINSORT | MY_CS_NOPAD)) ? |
| 1063 | (size_t) (min_str - min_org) : |
| 1064 | res_length; |
| 1065 | *max_length= res_length; |
| 1066 | do |
| 1067 | { |
| 1068 | *min_str++= 0; |
| 1069 | *max_str++= (char) cs->max_sort_char; |
| 1070 | } while (min_str != min_end); |
| 1071 | return 0; |
| 1072 | } |
| 1073 | *min_str++= *max_str++ = *ptr; |
| 1074 | } |
| 1075 | |
| 1076 | *min_length= *max_length = (size_t) (min_str - min_org); |
| 1077 | while (min_str != min_end) |
| 1078 | *min_str++= *max_str++ = ' '; /* Because if key compression */ |
| 1079 | return 0; |
| 1080 | } |
| 1081 | |
| 1082 | |
| 1083 | size_t my_scan_8bit(CHARSET_INFO *cs, const char *str, const char *end, int sq) |
| 1084 | { |
| 1085 | const char *str0= str; |
| 1086 | switch (sq) |
| 1087 | { |
| 1088 | case MY_SEQ_INTTAIL: |
| 1089 | if (*str == '.') |
| 1090 | { |
| 1091 | for(str++ ; str != end && *str == '0' ; str++); |
| 1092 | return (size_t) (str - str0); |
| 1093 | } |
| 1094 | return 0; |
| 1095 | |
| 1096 | case MY_SEQ_SPACES: |
| 1097 | for ( ; str < end ; str++) |
| 1098 | { |
| 1099 | if (!my_isspace(cs,*str)) |
| 1100 | break; |
| 1101 | } |
| 1102 | return (size_t) (str - str0); |
| 1103 | case MY_SEQ_NONSPACES: |
| 1104 | for ( ; str < end ; str++) |
| 1105 | { |
| 1106 | if (my_isspace(cs, *str)) |
| 1107 | break; |
| 1108 | } |
| 1109 | return (size_t) (str - str0); |
| 1110 | default: |
| 1111 | return 0; |
| 1112 | } |
| 1113 | } |
| 1114 | |
| 1115 | |
| 1116 | void my_fill_8bit(CHARSET_INFO *cs __attribute__((unused)), |
| 1117 | char *s, size_t l, int fill) |
| 1118 | { |
| 1119 | bfill((uchar*) s,l,fill); |
| 1120 | } |
| 1121 | |
| 1122 | |
| 1123 | size_t my_numchars_8bit(CHARSET_INFO *cs __attribute__((unused)), |
| 1124 | const char *b, const char *e) |
| 1125 | { |
| 1126 | return (size_t) (e - b); |
| 1127 | } |
| 1128 | |
| 1129 | |
| 1130 | size_t my_numcells_8bit(CHARSET_INFO *cs __attribute__((unused)), |
| 1131 | const char *b, const char *e) |
| 1132 | { |
| 1133 | return (size_t) (e - b); |
| 1134 | } |
| 1135 | |
| 1136 | |
| 1137 | size_t my_charpos_8bit(CHARSET_INFO *cs __attribute__((unused)), |
| 1138 | const char *b __attribute__((unused)), |
| 1139 | const char *e __attribute__((unused)), |
| 1140 | size_t pos) |
| 1141 | { |
| 1142 | return pos; |
| 1143 | } |
| 1144 | |
| 1145 | |
| 1146 | size_t |
| 1147 | my_well_formed_char_length_8bit(CHARSET_INFO *cs __attribute__((unused)), |
| 1148 | const char *start, const char *end, |
| 1149 | size_t nchars, MY_STRCOPY_STATUS *status) |
| 1150 | { |
| 1151 | size_t nbytes= (size_t) (end - start); |
| 1152 | size_t res= MY_MIN(nbytes, nchars); |
| 1153 | status->m_well_formed_error_pos= NULL; |
| 1154 | status->m_source_end_pos= start + res; |
| 1155 | return res; |
| 1156 | } |
| 1157 | |
| 1158 | |
| 1159 | /* |
| 1160 | Copy a 8-bit string. Not more than "nchars" character are copied. |
| 1161 | */ |
| 1162 | size_t |
| 1163 | my_copy_8bit(CHARSET_INFO *cs __attribute__((unused)), |
| 1164 | char *dst, size_t dst_length, |
| 1165 | const char *src, size_t src_length, |
| 1166 | size_t nchars, MY_STRCOPY_STATUS *status) |
| 1167 | { |
| 1168 | set_if_smaller(src_length, dst_length); |
| 1169 | set_if_smaller(src_length, nchars); |
| 1170 | if (src_length) |
| 1171 | memmove(dst, src, src_length); |
| 1172 | status->m_source_end_pos= src + src_length; |
| 1173 | status->m_well_formed_error_pos= NULL; |
| 1174 | return src_length; |
| 1175 | } |
| 1176 | |
| 1177 | |
| 1178 | size_t my_lengthsp_8bit(CHARSET_INFO *cs __attribute__((unused)), |
| 1179 | const char *ptr, size_t length) |
| 1180 | { |
| 1181 | const char *end; |
| 1182 | end= (const char *) skip_trailing_space((const uchar *)ptr, length); |
| 1183 | return (size_t) (end-ptr); |
| 1184 | } |
| 1185 | |
| 1186 | |
| 1187 | uint my_instr_simple(CHARSET_INFO *cs, |
| 1188 | const char *b, size_t b_length, |
| 1189 | const char *s, size_t s_length, |
| 1190 | my_match_t *match, uint nmatch) |
| 1191 | { |
| 1192 | register const uchar *str, *search, *end, *search_end; |
| 1193 | |
| 1194 | if (s_length <= b_length) |
| 1195 | { |
| 1196 | if (!s_length) |
| 1197 | { |
| 1198 | if (nmatch) |
| 1199 | { |
| 1200 | match->beg= 0; |
| 1201 | match->end= 0; |
| 1202 | match->mb_len= 0; |
| 1203 | } |
| 1204 | return 1; /* Empty string is always found */ |
| 1205 | } |
| 1206 | |
| 1207 | str= (const uchar*) b; |
| 1208 | search= (const uchar*) s; |
| 1209 | end= (const uchar*) b+b_length-s_length+1; |
| 1210 | search_end= (const uchar*) s + s_length; |
| 1211 | |
| 1212 | skip: |
| 1213 | while (str != end) |
| 1214 | { |
| 1215 | if (cs->sort_order[*str++] == cs->sort_order[*search]) |
| 1216 | { |
| 1217 | register const uchar *i,*j; |
| 1218 | |
| 1219 | i= str; |
| 1220 | j= search+1; |
| 1221 | |
| 1222 | while (j != search_end) |
| 1223 | if (cs->sort_order[*i++] != cs->sort_order[*j++]) |
| 1224 | goto skip; |
| 1225 | |
| 1226 | if (nmatch > 0) |
| 1227 | { |
| 1228 | match[0].beg= 0; |
| 1229 | match[0].end= (uint) (str- (const uchar*)b-1); |
| 1230 | match[0].mb_len= match[0].end; |
| 1231 | |
| 1232 | if (nmatch > 1) |
| 1233 | { |
| 1234 | match[1].beg= match[0].end; |
| 1235 | match[1].end= (uint)(match[0].end+s_length); |
| 1236 | match[1].mb_len= match[1].end-match[1].beg; |
| 1237 | } |
| 1238 | } |
| 1239 | return 2; |
| 1240 | } |
| 1241 | } |
| 1242 | } |
| 1243 | return 0; |
| 1244 | } |
| 1245 | |
| 1246 | |
| 1247 | typedef struct |
| 1248 | { |
| 1249 | int nchars; |
| 1250 | struct my_uni_idx_st uidx; |
| 1251 | } uni_idx; |
| 1252 | |
| 1253 | #define PLANE_SIZE 0x100 |
| 1254 | #define PLANE_NUM 0x100 |
| 1255 | #define PLANE_NUMBER(x) (((x)>>8) % PLANE_NUM) |
| 1256 | |
| 1257 | static int pcmp(const void * f, const void * s) |
| 1258 | { |
| 1259 | const uni_idx *F= (const uni_idx*) f; |
| 1260 | const uni_idx *S= (const uni_idx*) s; |
| 1261 | int res; |
| 1262 | |
| 1263 | if (!(res=((S->nchars)-(F->nchars)))) |
| 1264 | res=((F->uidx.from)-(S->uidx.to)); |
| 1265 | return res; |
| 1266 | } |
| 1267 | |
| 1268 | static my_bool |
| 1269 | create_fromuni(struct charset_info_st *cs, |
| 1270 | MY_CHARSET_LOADER *loader) |
| 1271 | { |
| 1272 | uni_idx idx[PLANE_NUM]; |
| 1273 | int i,n; |
| 1274 | |
| 1275 | /* |
| 1276 | Check that Unicode map is loaded. |
| 1277 | It can be not loaded when the collation is |
| 1278 | listed in Index.xml but not specified |
| 1279 | in the character set specific XML file. |
| 1280 | */ |
| 1281 | if (!cs->tab_to_uni) |
| 1282 | return TRUE; |
| 1283 | |
| 1284 | /* Clear plane statistics */ |
| 1285 | bzero(idx,sizeof(idx)); |
| 1286 | |
| 1287 | /* Count number of characters in each plane */ |
| 1288 | for (i=0; i< 0x100; i++) |
| 1289 | { |
| 1290 | uint16 wc=cs->tab_to_uni[i]; |
| 1291 | int pl= PLANE_NUMBER(wc); |
| 1292 | |
| 1293 | if (wc || !i) |
| 1294 | { |
| 1295 | if (!idx[pl].nchars) |
| 1296 | { |
| 1297 | idx[pl].uidx.from=wc; |
| 1298 | idx[pl].uidx.to=wc; |
| 1299 | }else |
| 1300 | { |
| 1301 | idx[pl].uidx.from=wc<idx[pl].uidx.from?wc:idx[pl].uidx.from; |
| 1302 | idx[pl].uidx.to=wc>idx[pl].uidx.to?wc:idx[pl].uidx.to; |
| 1303 | } |
| 1304 | idx[pl].nchars++; |
| 1305 | } |
| 1306 | } |
| 1307 | |
| 1308 | /* Sort planes in descending order */ |
| 1309 | qsort(&idx,PLANE_NUM,sizeof(uni_idx),&pcmp); |
| 1310 | |
| 1311 | for (i=0; i < PLANE_NUM; i++) |
| 1312 | { |
| 1313 | int ch,numchars; |
| 1314 | uchar *tab; |
| 1315 | |
| 1316 | /* Skip empty plane */ |
| 1317 | if (!idx[i].nchars) |
| 1318 | break; |
| 1319 | |
| 1320 | numchars=idx[i].uidx.to-idx[i].uidx.from+1; |
| 1321 | if (!(idx[i].uidx.tab= tab= (uchar*) |
| 1322 | (loader->once_alloc) (numchars * |
| 1323 | sizeof(*idx[i].uidx.tab)))) |
| 1324 | return TRUE; |
| 1325 | |
| 1326 | bzero(tab,numchars*sizeof(*tab)); |
| 1327 | |
| 1328 | for (ch=1; ch < PLANE_SIZE; ch++) |
| 1329 | { |
| 1330 | uint16 wc=cs->tab_to_uni[ch]; |
| 1331 | if (wc >= idx[i].uidx.from && wc <= idx[i].uidx.to && wc) |
| 1332 | { |
| 1333 | int ofs= wc - idx[i].uidx.from; |
| 1334 | if (!tab[ofs] || tab[ofs] > 0x7F) /* Prefer ASCII*/ |
| 1335 | { |
| 1336 | /* |
| 1337 | Some character sets can have double encoding. For example, |
| 1338 | in ARMSCII8, the following characters are encoded twice: |
| 1339 | |
| 1340 | Encoding#1 Encoding#2 Unicode Character Name |
| 1341 | ---------- ---------- ------- -------------- |
| 1342 | 0x27 0xFF U+0027 APOSTROPHE |
| 1343 | 0x28 0xA5 U+0028 LEFT PARENTHESIS |
| 1344 | 0x29 0xA4 U+0029 RIGHT PARENTHESIS |
| 1345 | 0x2C 0xAB U+002C COMMA |
| 1346 | 0x2D 0xAC U+002D HYPHEN-MINUS |
| 1347 | 0x2E 0xA9 U+002E FULL STOP |
| 1348 | |
| 1349 | That is, both 0x27 and 0xFF convert to Unicode U+0027. |
| 1350 | When converting back from Unicode to ARMSCII, |
| 1351 | we prefer the ASCII range, that is we want U+0027 |
| 1352 | to convert to 0x27 rather than to 0xFF. |
| 1353 | */ |
| 1354 | tab[ofs]= ch; |
| 1355 | } |
| 1356 | } |
| 1357 | } |
| 1358 | } |
| 1359 | |
| 1360 | /* Allocate and fill reverse table for each plane */ |
| 1361 | n=i; |
| 1362 | if (!(cs->tab_from_uni= (MY_UNI_IDX *) |
| 1363 | (loader->once_alloc)(sizeof(MY_UNI_IDX) * (n + 1)))) |
| 1364 | return TRUE; |
| 1365 | |
| 1366 | for (i=0; i< n; i++) |
| 1367 | ((struct my_uni_idx_st*)cs->tab_from_uni)[i]= idx[i].uidx; |
| 1368 | |
| 1369 | /* Set end-of-list marker */ |
| 1370 | bzero((char*) &cs->tab_from_uni[i],sizeof(MY_UNI_IDX)); |
| 1371 | return FALSE; |
| 1372 | } |
| 1373 | |
| 1374 | |
| 1375 | /* |
| 1376 | Detect if a character set is 8bit, |
| 1377 | and it is pure ascii, i.e. doesn't have |
| 1378 | characters outside U+0000..U+007F |
| 1379 | This functions is shared between "conf_to_src" |
| 1380 | and dynamic charsets loader in "mysqld". |
| 1381 | */ |
| 1382 | static my_bool |
| 1383 | my_charset_is_8bit_pure_ascii(CHARSET_INFO *cs) |
| 1384 | { |
| 1385 | size_t code; |
| 1386 | if (!cs->tab_to_uni) |
| 1387 | return 0; |
| 1388 | for (code= 0; code < 256; code++) |
| 1389 | { |
| 1390 | if (cs->tab_to_uni[code] > 0x7F) |
| 1391 | return 0; |
| 1392 | } |
| 1393 | return 1; |
| 1394 | } |
| 1395 | |
| 1396 | |
| 1397 | /* |
| 1398 | Shared function between conf_to_src and mysys. |
| 1399 | Check if a 8bit character set is compatible with |
| 1400 | ascii on the range 0x00..0x7F. |
| 1401 | */ |
| 1402 | static my_bool |
| 1403 | my_charset_is_ascii_compatible(CHARSET_INFO *cs) |
| 1404 | { |
| 1405 | uint i; |
| 1406 | if (!cs->tab_to_uni) |
| 1407 | return 1; |
| 1408 | for (i= 0; i < 128; i++) |
| 1409 | { |
| 1410 | if (cs->tab_to_uni[i] != i) |
| 1411 | return 0; |
| 1412 | } |
| 1413 | return 1; |
| 1414 | } |
| 1415 | |
| 1416 | |
| 1417 | uint my_8bit_charset_flags_from_data(CHARSET_INFO *cs) |
| 1418 | { |
| 1419 | uint flags= 0; |
| 1420 | if (my_charset_is_8bit_pure_ascii(cs)) |
| 1421 | flags|= MY_CS_PUREASCII; |
| 1422 | if (!my_charset_is_ascii_compatible(cs)) |
| 1423 | flags|= MY_CS_NONASCII; |
| 1424 | return flags; |
| 1425 | } |
| 1426 | |
| 1427 | |
| 1428 | /* |
| 1429 | Check if case sensitive sort order: A < a < B. |
| 1430 | We need MY_CS_FLAG for regex library, and for |
| 1431 | case sensitivity flag for 5.0 client protocol, |
| 1432 | to support isCaseSensitive() method in JDBC driver |
| 1433 | */ |
| 1434 | uint my_8bit_collation_flags_from_data(CHARSET_INFO *cs) |
| 1435 | { |
| 1436 | uint flags= 0; |
| 1437 | if (cs->sort_order && cs->sort_order['A'] < cs->sort_order['a'] && |
| 1438 | cs->sort_order['a'] < cs->sort_order['B']) |
| 1439 | flags|= MY_CS_CSSORT; |
| 1440 | return flags; |
| 1441 | } |
| 1442 | |
| 1443 | |
| 1444 | static my_bool |
| 1445 | my_cset_init_8bit(struct charset_info_st *cs, MY_CHARSET_LOADER *loader) |
| 1446 | { |
| 1447 | cs->state|= my_8bit_charset_flags_from_data(cs); |
| 1448 | cs->caseup_multiply= 1; |
| 1449 | cs->casedn_multiply= 1; |
| 1450 | cs->pad_char= ' '; |
| 1451 | if (!cs->to_lower || !cs->to_upper || !cs->ctype || !cs->tab_to_uni) |
| 1452 | return TRUE; |
| 1453 | return create_fromuni(cs, loader); |
| 1454 | } |
| 1455 | |
| 1456 | static void set_max_sort_char(struct charset_info_st *cs) |
| 1457 | { |
| 1458 | uchar max_char; |
| 1459 | uint i; |
| 1460 | |
| 1461 | if (!cs->sort_order) |
| 1462 | return; |
| 1463 | |
| 1464 | max_char=cs->sort_order[(uchar) cs->max_sort_char]; |
| 1465 | for (i= 0; i < 256; i++) |
| 1466 | { |
| 1467 | if ((uchar) cs->sort_order[i] > max_char) |
| 1468 | { |
| 1469 | max_char=(uchar) cs->sort_order[i]; |
| 1470 | cs->max_sort_char= i; |
| 1471 | } |
| 1472 | } |
| 1473 | } |
| 1474 | |
| 1475 | static my_bool my_coll_init_simple(struct charset_info_st *cs, |
| 1476 | MY_CHARSET_LOADER *loader __attribute__((unused))) |
| 1477 | { |
| 1478 | if (!cs->sort_order) |
| 1479 | return TRUE; |
| 1480 | cs->state|= my_8bit_collation_flags_from_data(cs); |
| 1481 | set_max_sort_char(cs); |
| 1482 | return FALSE; |
| 1483 | } |
| 1484 | |
| 1485 | |
| 1486 | longlong my_strtoll10_8bit(CHARSET_INFO *cs __attribute__((unused)), |
| 1487 | const char *nptr, char **endptr, int *error) |
| 1488 | { |
| 1489 | return my_strtoll10(nptr, endptr, error); |
| 1490 | } |
| 1491 | |
| 1492 | |
| 1493 | int my_mb_ctype_8bit(CHARSET_INFO *cs, int *ctype, |
| 1494 | const uchar *s, const uchar *e) |
| 1495 | { |
| 1496 | if (s >= e) |
| 1497 | { |
| 1498 | *ctype= 0; |
| 1499 | return MY_CS_TOOSMALL; |
| 1500 | } |
| 1501 | *ctype= cs->ctype[*s + 1]; |
| 1502 | return 1; |
| 1503 | } |
| 1504 | |
| 1505 | |
| 1506 | #define CUTOFF (ULONGLONG_MAX / 10) |
| 1507 | #define CUTLIM (ULONGLONG_MAX % 10) |
| 1508 | #define DIGITS_IN_ULONGLONG 20 |
| 1509 | |
| 1510 | static ulonglong d10[DIGITS_IN_ULONGLONG]= |
| 1511 | { |
| 1512 | 1, |
| 1513 | 10, |
| 1514 | 100, |
| 1515 | 1000, |
| 1516 | 10000, |
| 1517 | 100000, |
| 1518 | 1000000, |
| 1519 | 10000000, |
| 1520 | 100000000, |
| 1521 | 1000000000, |
| 1522 | 10000000000ULL, |
| 1523 | 100000000000ULL, |
| 1524 | 1000000000000ULL, |
| 1525 | 10000000000000ULL, |
| 1526 | 100000000000000ULL, |
| 1527 | 1000000000000000ULL, |
| 1528 | 10000000000000000ULL, |
| 1529 | 100000000000000000ULL, |
| 1530 | 1000000000000000000ULL, |
| 1531 | 10000000000000000000ULL |
| 1532 | }; |
| 1533 | |
| 1534 | |
| 1535 | /* |
| 1536 | |
| 1537 | Convert a string to unsigned long long integer value |
| 1538 | with rounding. |
| 1539 | |
| 1540 | SYNOPSIS |
| 1541 | my_strntoull10_8bit() |
| 1542 | cs in pointer to character set |
| 1543 | str in pointer to the string to be converted |
| 1544 | length in string length |
| 1545 | unsigned_flag in whether the number is unsigned |
| 1546 | endptr out pointer to the stop character |
| 1547 | error out returned error code |
| 1548 | |
| 1549 | DESCRIPTION |
| 1550 | This function takes the decimal representation of integer number |
| 1551 | from string str and converts it to an signed or unsigned |
| 1552 | long long integer value. |
| 1553 | Space characters and tab are ignored. |
| 1554 | A sign character might precede the digit characters. |
| 1555 | The number may have any number of pre-zero digits. |
| 1556 | The number may have decimal point and exponent. |
| 1557 | Rounding is always done in "away from zero" style: |
| 1558 | 0.5 -> 1 |
| 1559 | -0.5 -> -1 |
| 1560 | |
| 1561 | The function stops reading the string str after "length" bytes |
| 1562 | or at the first character that is not a part of correct number syntax: |
| 1563 | |
| 1564 | <signed numeric literal> ::= |
| 1565 | [ <sign> ] <exact numeric literal> [ E [ <sign> ] <unsigned integer> ] |
| 1566 | |
| 1567 | <exact numeric literal> ::= |
| 1568 | <unsigned integer> [ <period> [ <unsigned integer> ] ] |
| 1569 | | <period> <unsigned integer> |
| 1570 | <unsigned integer> ::= <digit>... |
| 1571 | |
| 1572 | RETURN VALUES |
| 1573 | Value of string as a signed/unsigned longlong integer |
| 1574 | |
| 1575 | endptr cannot be NULL. The function will store the end pointer |
| 1576 | to the stop character here. |
| 1577 | |
| 1578 | The error parameter contains information how things went: |
| 1579 | 0 ok |
| 1580 | ERANGE If the the value of the converted number is out of range |
| 1581 | In this case the return value is: |
| 1582 | - ULONGLONG_MAX if unsigned_flag and the number was too big |
| 1583 | - 0 if unsigned_flag and the number was negative |
| 1584 | - LONGLONG_MAX if no unsigned_flag and the number is too big |
| 1585 | - LONGLONG_MIN if no unsigned_flag and the number it too big negative |
| 1586 | |
| 1587 | EDOM If the string didn't contain any digits. |
| 1588 | In this case the return value is 0. |
| 1589 | */ |
| 1590 | |
| 1591 | ulonglong |
| 1592 | my_strntoull10rnd_8bit(CHARSET_INFO *cs __attribute__((unused)), |
| 1593 | const char *str, size_t length, int unsigned_flag, |
| 1594 | char **endptr, int *error) |
| 1595 | { |
| 1596 | const char *dot, *end9, *beg, *end= str + length; |
| 1597 | ulonglong ull; |
| 1598 | ulong ul; |
| 1599 | uchar ch; |
| 1600 | int shift= 0, digits= 0, negative, addon; |
| 1601 | |
| 1602 | /* Skip leading spaces and tabs */ |
| 1603 | for ( ; str < end && (*str == ' ' || *str == '\t') ; str++); |
| 1604 | |
| 1605 | if (str >= end) |
| 1606 | goto ret_edom; |
| 1607 | |
| 1608 | if ((negative= (*str == '-')) || *str=='+') /* optional sign */ |
| 1609 | { |
| 1610 | if (++str == end) |
| 1611 | goto ret_edom; |
| 1612 | } |
| 1613 | |
| 1614 | beg= str; |
| 1615 | end9= (str + 9) > end ? end : (str + 9); |
| 1616 | /* Accumulate small number into ulong, for performance purposes */ |
| 1617 | for (ul= 0 ; str < end9 && (ch= (uchar) (*str - '0')) < 10; str++) |
| 1618 | { |
| 1619 | ul= ul * 10 + ch; |
| 1620 | } |
| 1621 | |
| 1622 | if (str >= end) /* Small number without dots and expanents */ |
| 1623 | { |
| 1624 | *endptr= (char*) str; |
| 1625 | if (negative) |
| 1626 | { |
| 1627 | if (unsigned_flag) |
| 1628 | { |
| 1629 | *error= ul ? MY_ERRNO_ERANGE : 0; |
| 1630 | return 0; |
| 1631 | } |
| 1632 | else |
| 1633 | { |
| 1634 | *error= 0; |
| 1635 | return (ulonglong) (longlong) -(long) ul; |
| 1636 | } |
| 1637 | } |
| 1638 | else |
| 1639 | { |
| 1640 | *error=0; |
| 1641 | return (ulonglong) ul; |
| 1642 | } |
| 1643 | } |
| 1644 | |
| 1645 | digits= (int) (str - beg); |
| 1646 | |
| 1647 | /* Continue to accumulate into ulonglong */ |
| 1648 | for (dot= NULL, ull= ul; str < end; str++) |
| 1649 | { |
| 1650 | if ((ch= (uchar) (*str - '0')) < 10) |
| 1651 | { |
| 1652 | if (ull < CUTOFF || (ull == CUTOFF && ch <= CUTLIM)) |
| 1653 | { |
| 1654 | ull= ull * 10 + ch; |
| 1655 | digits++; |
| 1656 | continue; |
| 1657 | } |
| 1658 | /* |
| 1659 | Adding the next digit would overflow. |
| 1660 | Remember the next digit in "addon", for rounding. |
| 1661 | Scan all digits with an optional single dot. |
| 1662 | */ |
| 1663 | if (ull == CUTOFF) |
| 1664 | { |
| 1665 | ull= ULONGLONG_MAX; |
| 1666 | addon= 1; |
| 1667 | str++; |
| 1668 | } |
| 1669 | else |
| 1670 | addon= (*str >= '5'); |
| 1671 | if (!dot) |
| 1672 | { |
| 1673 | for ( ; str < end && (ch= (uchar) (*str - '0')) < 10; shift++, str++); |
| 1674 | if (str < end && *str == '.') |
| 1675 | { |
| 1676 | str++; |
| 1677 | for ( ; str < end && (ch= (uchar) (*str - '0')) < 10; str++); |
| 1678 | } |
| 1679 | } |
| 1680 | else |
| 1681 | { |
| 1682 | shift= (int) (dot - str); |
| 1683 | for ( ; str < end && (ch= (uchar) (*str - '0')) < 10; str++); |
| 1684 | } |
| 1685 | goto exp; |
| 1686 | } |
| 1687 | |
| 1688 | if (*str == '.') |
| 1689 | { |
| 1690 | if (dot) |
| 1691 | { |
| 1692 | /* The second dot character */ |
| 1693 | addon= 0; |
| 1694 | goto exp; |
| 1695 | } |
| 1696 | else |
| 1697 | { |
| 1698 | dot= str + 1; |
| 1699 | } |
| 1700 | continue; |
| 1701 | } |
| 1702 | |
| 1703 | /* Unknown character, exit the loop */ |
| 1704 | break; |
| 1705 | } |
| 1706 | shift= dot ? (int)(dot - str) : 0; /* Right shift */ |
| 1707 | addon= 0; |
| 1708 | |
| 1709 | exp: /* [ E [ <sign> ] <unsigned integer> ] */ |
| 1710 | |
| 1711 | if (!digits) |
| 1712 | { |
| 1713 | str= beg; |
| 1714 | goto ret_edom; |
| 1715 | } |
| 1716 | |
| 1717 | if (str < end && (*str == 'e' || *str == 'E')) |
| 1718 | { |
| 1719 | str++; |
| 1720 | if (str < end) |
| 1721 | { |
| 1722 | int negative_exp, exponent; |
| 1723 | if ((negative_exp= (*str == '-')) || *str=='+') |
| 1724 | { |
| 1725 | if (++str == end) |
| 1726 | { |
| 1727 | str-= 2; /* 'e-' or 'e+' not followed by digits */ |
| 1728 | goto ret_sign; |
| 1729 | } |
| 1730 | } |
| 1731 | for (exponent= 0 ; |
| 1732 | str < end && (ch= (uchar) (*str - '0')) < 10; |
| 1733 | str++) |
| 1734 | { |
| 1735 | exponent= exponent * 10 + ch; |
| 1736 | } |
| 1737 | shift+= negative_exp ? -exponent : exponent; |
| 1738 | } |
| 1739 | else |
| 1740 | str--; /* 'e' not followed by digits */ |
| 1741 | } |
| 1742 | |
| 1743 | if (shift == 0) /* No shift, check addon digit */ |
| 1744 | { |
| 1745 | if (addon) |
| 1746 | { |
| 1747 | if (ull == ULONGLONG_MAX) |
| 1748 | goto ret_too_big; |
| 1749 | ull++; |
| 1750 | } |
| 1751 | goto ret_sign; |
| 1752 | } |
| 1753 | |
| 1754 | if (shift < 0) /* Right shift */ |
| 1755 | { |
| 1756 | ulonglong d, r; |
| 1757 | |
| 1758 | if (-shift >= DIGITS_IN_ULONGLONG) |
| 1759 | goto ret_zero; /* Exponent is a big negative number, return 0 */ |
| 1760 | |
| 1761 | d= d10[-shift]; |
| 1762 | r= (ull % d) * 2; |
| 1763 | ull /= d; |
| 1764 | if (r >= d) |
| 1765 | ull++; |
| 1766 | goto ret_sign; |
| 1767 | } |
| 1768 | |
| 1769 | if (shift > DIGITS_IN_ULONGLONG) /* Huge left shift */ |
| 1770 | { |
| 1771 | if (!ull) |
| 1772 | goto ret_sign; |
| 1773 | goto ret_too_big; |
| 1774 | } |
| 1775 | |
| 1776 | for ( ; shift > 0; shift--, ull*= 10) /* Left shift */ |
| 1777 | { |
| 1778 | if (ull > CUTOFF) |
| 1779 | goto ret_too_big; /* Overflow, number too big */ |
| 1780 | } |
| 1781 | |
| 1782 | ret_sign: |
| 1783 | *endptr= (char*) str; |
| 1784 | |
| 1785 | if (!unsigned_flag) |
| 1786 | { |
| 1787 | if (negative) |
| 1788 | { |
| 1789 | if (ull > (ulonglong) LONGLONG_MIN) |
| 1790 | { |
| 1791 | *error= MY_ERRNO_ERANGE; |
| 1792 | return (ulonglong) LONGLONG_MIN; |
| 1793 | } |
| 1794 | *error= 0; |
| 1795 | return (ulonglong) -(longlong) ull; |
| 1796 | } |
| 1797 | else |
| 1798 | { |
| 1799 | if (ull > (ulonglong) LONGLONG_MAX) |
| 1800 | { |
| 1801 | *error= MY_ERRNO_ERANGE; |
| 1802 | return (ulonglong) LONGLONG_MAX; |
| 1803 | } |
| 1804 | *error= 0; |
| 1805 | return ull; |
| 1806 | } |
| 1807 | } |
| 1808 | |
| 1809 | /* Unsigned number */ |
| 1810 | if (negative && ull) |
| 1811 | { |
| 1812 | *error= MY_ERRNO_ERANGE; |
| 1813 | return 0; |
| 1814 | } |
| 1815 | *error= 0; |
| 1816 | return ull; |
| 1817 | |
| 1818 | ret_zero: |
| 1819 | *endptr= (char*) str; |
| 1820 | *error= 0; |
| 1821 | return 0; |
| 1822 | |
| 1823 | ret_edom: |
| 1824 | *endptr= (char*) str; |
| 1825 | *error= MY_ERRNO_EDOM; |
| 1826 | return 0; |
| 1827 | |
| 1828 | ret_too_big: |
| 1829 | *endptr= (char*) str; |
| 1830 | *error= MY_ERRNO_ERANGE; |
| 1831 | return unsigned_flag ? |
| 1832 | ULONGLONG_MAX : |
| 1833 | negative ? (ulonglong) LONGLONG_MIN : (ulonglong) LONGLONG_MAX; |
| 1834 | } |
| 1835 | |
| 1836 | |
| 1837 | /* |
| 1838 | Check if a constant can be propagated |
| 1839 | |
| 1840 | SYNOPSIS: |
| 1841 | my_propagate_simple() |
| 1842 | cs Character set information |
| 1843 | str String to convert to double |
| 1844 | length Optional length for string. |
| 1845 | |
| 1846 | NOTES: |
| 1847 | Takes the string in the given charset and check |
| 1848 | if it can be safely propagated in the optimizer. |
| 1849 | |
| 1850 | create table t1 ( |
| 1851 | s char(5) character set latin1 collate latin1_german2_ci); |
| 1852 | insert into t1 values (0xf6); -- o-umlaut |
| 1853 | select * from t1 where length(s)=1 and s='oe'; |
| 1854 | |
| 1855 | The above query should return one row. |
| 1856 | We cannot convert this query into: |
| 1857 | select * from t1 where length('oe')=1 and s='oe'; |
| 1858 | |
| 1859 | Currently we don't check the constant itself, |
| 1860 | and decide not to propagate a constant |
| 1861 | just if the collation itself allows tricky things |
| 1862 | like expansions and contractions. In the future |
| 1863 | we can write a more sophisticated functions to |
| 1864 | check the constants. For example, 'oa' can always |
| 1865 | be safety propagated in German2 because unlike |
| 1866 | 'oe' it does not have any special meaning. |
| 1867 | |
| 1868 | RETURN |
| 1869 | 1 if constant can be safely propagated |
| 1870 | 0 if it is not safe to propagate the constant |
| 1871 | */ |
| 1872 | |
| 1873 | |
| 1874 | |
| 1875 | my_bool my_propagate_simple(CHARSET_INFO *cs __attribute__((unused)), |
| 1876 | const uchar *str __attribute__((unused)), |
| 1877 | size_t length __attribute__((unused))) |
| 1878 | { |
| 1879 | return 1; |
| 1880 | } |
| 1881 | |
| 1882 | |
| 1883 | my_bool my_propagate_complex(CHARSET_INFO *cs __attribute__((unused)), |
| 1884 | const uchar *str __attribute__((unused)), |
| 1885 | size_t length __attribute__((unused))) |
| 1886 | { |
| 1887 | return 0; |
| 1888 | } |
| 1889 | |
| 1890 | |
| 1891 | /* |
| 1892 | Normalize strxfrm flags |
| 1893 | |
| 1894 | SYNOPSIS: |
| 1895 | my_strxfrm_flag_normalize() |
| 1896 | flags - non-normalized flags |
| 1897 | nlevels - number of levels |
| 1898 | |
| 1899 | NOTES: |
| 1900 | If levels are omitted, then 1-maximum is assumed. |
| 1901 | If any level number is greater than the maximum, |
| 1902 | it is treated as the maximum. |
| 1903 | |
| 1904 | RETURN |
| 1905 | normalized flags |
| 1906 | */ |
| 1907 | |
| 1908 | uint my_strxfrm_flag_normalize(uint flags, uint maximum) |
| 1909 | { |
| 1910 | DBUG_ASSERT(maximum >= 1 && maximum <= MY_STRXFRM_NLEVELS); |
| 1911 | |
| 1912 | /* If levels are omitted, then 1-maximum is assumed*/ |
| 1913 | if (!(flags & MY_STRXFRM_LEVEL_ALL)) |
| 1914 | { |
| 1915 | static uint def_level_flags[]= {0, 0x01, 0x03, 0x07, 0x0F, 0x1F, 0x3F }; |
| 1916 | uint flag_pad= flags & |
| 1917 | (MY_STRXFRM_PAD_WITH_SPACE | MY_STRXFRM_PAD_TO_MAXLEN); |
| 1918 | flags= def_level_flags[maximum] | flag_pad; |
| 1919 | } |
| 1920 | else |
| 1921 | { |
| 1922 | uint i; |
| 1923 | uint flag_lev= flags & MY_STRXFRM_LEVEL_ALL; |
| 1924 | uint flag_dsc= (flags >> MY_STRXFRM_DESC_SHIFT) & MY_STRXFRM_LEVEL_ALL; |
| 1925 | uint flag_rev= (flags >> MY_STRXFRM_REVERSE_SHIFT) & MY_STRXFRM_LEVEL_ALL; |
| 1926 | uint flag_pad= flags & |
| 1927 | (MY_STRXFRM_PAD_WITH_SPACE | MY_STRXFRM_PAD_TO_MAXLEN); |
| 1928 | |
| 1929 | /* |
| 1930 | If any level number is greater than the maximum, |
| 1931 | it is treated as the maximum. |
| 1932 | */ |
| 1933 | for (maximum--, flags= 0, i= 0; i < MY_STRXFRM_NLEVELS; i++) |
| 1934 | { |
| 1935 | uint src_bit= 1 << i; |
| 1936 | if (flag_lev & src_bit) |
| 1937 | { |
| 1938 | uint dst_bit= 1 << MY_MIN(i, maximum); |
| 1939 | flags|= dst_bit; |
| 1940 | flags|= (flag_dsc & dst_bit) << MY_STRXFRM_DESC_SHIFT; |
| 1941 | flags|= (flag_rev & dst_bit) << MY_STRXFRM_REVERSE_SHIFT; |
| 1942 | } |
| 1943 | } |
| 1944 | flags|= flag_pad; |
| 1945 | } |
| 1946 | |
| 1947 | return flags; |
| 1948 | } |
| 1949 | |
| 1950 | |
| 1951 | /* |
| 1952 | Apply DESC and REVERSE collation rules. |
| 1953 | |
| 1954 | SYNOPSIS: |
| 1955 | my_strxfrm_desc_and_reverse() |
| 1956 | str - pointer to string |
| 1957 | strend - end of string |
| 1958 | flags - flags |
| 1959 | level - which level, starting from 0. |
| 1960 | |
| 1961 | NOTES: |
| 1962 | Apply DESC or REVERSE or both flags. |
| 1963 | |
| 1964 | If DESC flag is given, then the weights |
| 1965 | come out NOTed or negated for that level. |
| 1966 | |
| 1967 | If REVERSE flags is given, then the weights come out in |
| 1968 | reverse order for that level, that is, starting with |
| 1969 | the last character and ending with the first character. |
| 1970 | |
| 1971 | If nether DESC nor REVERSE flags are give, |
| 1972 | the string is not changed. |
| 1973 | |
| 1974 | */ |
| 1975 | void |
| 1976 | my_strxfrm_desc_and_reverse(uchar *str, uchar *strend, |
| 1977 | uint flags, uint level) |
| 1978 | { |
| 1979 | if (flags & (MY_STRXFRM_DESC_LEVEL1 << level)) |
| 1980 | { |
| 1981 | if (flags & (MY_STRXFRM_REVERSE_LEVEL1 << level)) |
| 1982 | { |
| 1983 | for (strend--; str <= strend;) |
| 1984 | { |
| 1985 | uchar tmp= *str; |
| 1986 | *str++= ~*strend; |
| 1987 | *strend--= ~tmp; |
| 1988 | } |
| 1989 | } |
| 1990 | else |
| 1991 | { |
| 1992 | for (; str < strend; str++) |
| 1993 | *str= ~*str; |
| 1994 | } |
| 1995 | } |
| 1996 | else if (flags & (MY_STRXFRM_REVERSE_LEVEL1 << level)) |
| 1997 | { |
| 1998 | for (strend--; str < strend;) |
| 1999 | { |
| 2000 | uchar tmp= *str; |
| 2001 | *str++= *strend; |
| 2002 | *strend--= tmp; |
| 2003 | } |
| 2004 | } |
| 2005 | } |
| 2006 | |
| 2007 | |
| 2008 | size_t |
| 2009 | my_strxfrm_pad_desc_and_reverse(CHARSET_INFO *cs, |
| 2010 | uchar *str, uchar *frmend, uchar *strend, |
| 2011 | uint nweights, uint flags, uint level) |
| 2012 | { |
| 2013 | if (nweights && frmend < strend && (flags & MY_STRXFRM_PAD_WITH_SPACE)) |
| 2014 | { |
| 2015 | uint fill_length= MY_MIN((uint) (strend - frmend), nweights * cs->mbminlen); |
| 2016 | cs->cset->fill(cs, (char*) frmend, fill_length, cs->pad_char); |
| 2017 | frmend+= fill_length; |
| 2018 | } |
| 2019 | my_strxfrm_desc_and_reverse(str, frmend, flags, level); |
| 2020 | if ((flags & MY_STRXFRM_PAD_TO_MAXLEN) && frmend < strend) |
| 2021 | { |
| 2022 | size_t fill_length= strend - frmend; |
| 2023 | cs->cset->fill(cs, (char*) frmend, fill_length, cs->pad_char); |
| 2024 | frmend= strend; |
| 2025 | } |
| 2026 | return frmend - str; |
| 2027 | } |
| 2028 | |
| 2029 | |
| 2030 | size_t |
| 2031 | my_strxfrm_pad_desc_and_reverse_nopad(CHARSET_INFO *cs, |
| 2032 | uchar *str, uchar *frmend, uchar *strend, |
| 2033 | uint nweights, uint flags, uint level) |
| 2034 | { |
| 2035 | if (nweights && frmend < strend && (flags & MY_STRXFRM_PAD_WITH_SPACE)) |
| 2036 | { |
| 2037 | uint fill_length= MY_MIN((uint) (strend - frmend), nweights * cs->mbminlen); |
| 2038 | memset(frmend, 0x00, fill_length); |
| 2039 | frmend+= fill_length; |
| 2040 | } |
| 2041 | my_strxfrm_desc_and_reverse(str, frmend, flags, level); |
| 2042 | if ((flags & MY_STRXFRM_PAD_TO_MAXLEN) && frmend < strend) |
| 2043 | { |
| 2044 | size_t fill_length= strend - frmend; |
| 2045 | memset(frmend, 0x00, fill_length); |
| 2046 | frmend= strend; |
| 2047 | } |
| 2048 | return frmend - str; |
| 2049 | } |
| 2050 | |
| 2051 | |
| 2052 | MY_CHARSET_HANDLER my_charset_8bit_handler= |
| 2053 | { |
| 2054 | my_cset_init_8bit, |
| 2055 | my_numchars_8bit, |
| 2056 | my_charpos_8bit, |
| 2057 | my_lengthsp_8bit, |
| 2058 | my_numcells_8bit, |
| 2059 | my_mb_wc_8bit, |
| 2060 | my_wc_mb_8bit, |
| 2061 | my_mb_ctype_8bit, |
| 2062 | my_caseup_str_8bit, |
| 2063 | my_casedn_str_8bit, |
| 2064 | my_caseup_8bit, |
| 2065 | my_casedn_8bit, |
| 2066 | my_snprintf_8bit, |
| 2067 | my_long10_to_str_8bit, |
| 2068 | my_longlong10_to_str_8bit, |
| 2069 | my_fill_8bit, |
| 2070 | my_strntol_8bit, |
| 2071 | my_strntoul_8bit, |
| 2072 | my_strntoll_8bit, |
| 2073 | my_strntoull_8bit, |
| 2074 | my_strntod_8bit, |
| 2075 | my_strtoll10_8bit, |
| 2076 | my_strntoull10rnd_8bit, |
| 2077 | my_scan_8bit, |
| 2078 | my_charlen_8bit, |
| 2079 | my_well_formed_char_length_8bit, |
| 2080 | my_copy_8bit, |
| 2081 | my_wc_mb_bin, /* native_to_mb */ |
| 2082 | }; |
| 2083 | |
| 2084 | MY_COLLATION_HANDLER my_collation_8bit_simple_ci_handler = |
| 2085 | { |
| 2086 | my_coll_init_simple, /* init */ |
| 2087 | my_strnncoll_simple, |
| 2088 | my_strnncollsp_simple, |
| 2089 | my_strnxfrm_simple, |
| 2090 | my_strnxfrmlen_simple, |
| 2091 | my_like_range_simple, |
| 2092 | my_wildcmp_8bit, |
| 2093 | my_strcasecmp_8bit, |
| 2094 | my_instr_simple, |
| 2095 | my_hash_sort_simple, |
| 2096 | my_propagate_simple |
| 2097 | }; |
| 2098 | |
| 2099 | |
| 2100 | MY_COLLATION_HANDLER my_collation_8bit_simple_nopad_ci_handler = |
| 2101 | { |
| 2102 | my_coll_init_simple, /* init */ |
| 2103 | my_strnncoll_simple, |
| 2104 | my_strnncollsp_simple_nopad, |
| 2105 | my_strnxfrm_simple_nopad, |
| 2106 | my_strnxfrmlen_simple, |
| 2107 | my_like_range_simple, |
| 2108 | my_wildcmp_8bit, |
| 2109 | my_strcasecmp_8bit, |
| 2110 | my_instr_simple, |
| 2111 | my_hash_sort_simple_nopad, |
| 2112 | my_propagate_simple |
| 2113 | }; |
| 2114 | |