1/* Copyright (c) 2000, 2013, Oracle and/or its affiliates.
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License as published by
5 the Free Software Foundation; version 2 of the License.
6
7 This program is distributed in the hope that it will be useful,
8 but WITHOUT ANY WARRANTY; without even the implied warranty of
9 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 GNU General Public License for more details.
11
12 You should have received a copy of the GNU General Public License
13 along with this program; if not, write to the Free Software
14 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
15
16/*
17 A better inplementation of the UNIX ctype(3) library.
18*/
19
20#ifndef _m_ctype_h
21#define _m_ctype_h
22
23#include <my_attribute.h>
24
25enum loglevel {
26 ERROR_LEVEL= 0,
27 WARNING_LEVEL= 1,
28 INFORMATION_LEVEL= 2
29};
30
31#ifdef __cplusplus
32extern "C" {
33#endif
34
35#define MY_CS_NAME_SIZE 32
36#define MY_CS_CTYPE_TABLE_SIZE 257
37#define MY_CS_TO_LOWER_TABLE_SIZE 256
38#define MY_CS_TO_UPPER_TABLE_SIZE 256
39#define MY_CS_SORT_ORDER_TABLE_SIZE 256
40#define MY_CS_TO_UNI_TABLE_SIZE 256
41
42#define CHARSET_DIR "charsets/"
43
44#define my_wc_t ulong
45
46#define MY_CS_REPLACEMENT_CHARACTER 0xFFFD
47
48/*
49 On i386 we store Unicode->CS conversion tables for
50 some character sets using Big-endian order,
51 to copy two bytes at once.
52 This gives some performance improvement.
53*/
54#ifdef __i386__
55#define MB2(x) (((x) >> 8) + (((x) & 0xFF) << 8))
56#define MY_PUT_MB2(s, code) { *((uint16*)(s))= (code); }
57#else
58#define MB2(x) (x)
59#define MY_PUT_MB2(s, code) { (s)[0]= code >> 8; (s)[1]= code & 0xFF; }
60#endif
61
62typedef const struct my_charset_handler_st MY_CHARSET_HANDLER;
63typedef const struct my_collation_handler_st MY_COLLATION_HANDLER;
64
65typedef const struct unicase_info_st MY_UNICASE_INFO;
66typedef const struct uni_ctype_st MY_UNI_CTYPE;
67typedef const struct my_uni_idx_st MY_UNI_IDX;
68
69typedef struct unicase_info_char_st
70{
71 uint32 toupper;
72 uint32 tolower;
73 uint32 sort;
74} MY_UNICASE_CHARACTER;
75
76
77struct unicase_info_st
78{
79 my_wc_t maxchar;
80 MY_UNICASE_CHARACTER **page;
81};
82
83
84extern MY_UNICASE_INFO my_unicase_default;
85extern MY_UNICASE_INFO my_unicase_turkish;
86extern MY_UNICASE_INFO my_unicase_mysql500;
87extern MY_UNICASE_INFO my_unicase_unicode520;
88
89#define MY_UCA_MAX_CONTRACTION 6
90/*
91 The DUCET tables in ctype-uca.c are dumped with a limit of 8 weights
92 per character. cs->strxfrm_multiply is set to 8 for all UCA based collations.
93
94 In language-specific UCA collations (with tailorings) we also do not allow
95 a single character to have more than 8 weights to stay with the same
96 strxfrm_multiply limit. Note, contractions are allowed to have twice longer
97 weight strings (up to 16 weights). As a contraction consists of at
98 least 2 characters, this makes sure that strxfrm_multiply ratio of 8
99 is respected.
100*/
101#define MY_UCA_MAX_WEIGHT_SIZE (8+1) /* Including 0 terminator */
102#define MY_UCA_CONTRACTION_MAX_WEIGHT_SIZE (2*8+1) /* Including 0 terminator */
103#define MY_UCA_WEIGHT_LEVELS 2
104
105typedef struct my_contraction_t
106{
107 my_wc_t ch[MY_UCA_MAX_CONTRACTION]; /* Character sequence */
108 uint16 weight[MY_UCA_CONTRACTION_MAX_WEIGHT_SIZE];/* Its weight string, 0-terminated */
109 my_bool with_context;
110} MY_CONTRACTION;
111
112
113typedef struct my_contraction_list_t
114{
115 size_t nitems; /* Number of items in the list */
116 MY_CONTRACTION *item; /* List of contractions */
117 char *flags; /* Character flags, e.g. "is contraction head") */
118} MY_CONTRACTIONS;
119
120my_bool my_uca_can_be_contraction_head(const MY_CONTRACTIONS *c, my_wc_t wc);
121my_bool my_uca_can_be_contraction_tail(const MY_CONTRACTIONS *c, my_wc_t wc);
122uint16 *my_uca_contraction2_weight(const MY_CONTRACTIONS *c,
123 my_wc_t wc1, my_wc_t wc2);
124
125
126/* Collation weights on a single level (e.g. primary, secondary, tertiarty) */
127typedef struct my_uca_level_info_st
128{
129 my_wc_t maxchar;
130 uchar *lengths;
131 uint16 **weights;
132 MY_CONTRACTIONS contractions;
133 uint levelno;
134} MY_UCA_WEIGHT_LEVEL;
135
136
137typedef struct uca_info_st
138{
139 MY_UCA_WEIGHT_LEVEL level[MY_UCA_WEIGHT_LEVELS];
140
141 /* Logical positions */
142 my_wc_t first_non_ignorable;
143 my_wc_t last_non_ignorable;
144 my_wc_t first_primary_ignorable;
145 my_wc_t last_primary_ignorable;
146 my_wc_t first_secondary_ignorable;
147 my_wc_t last_secondary_ignorable;
148 my_wc_t first_tertiary_ignorable;
149 my_wc_t last_tertiary_ignorable;
150 my_wc_t first_trailing;
151 my_wc_t last_trailing;
152 my_wc_t first_variable;
153 my_wc_t last_variable;
154
155} MY_UCA_INFO;
156
157
158
159extern MY_UCA_INFO my_uca_v400;
160
161
162struct uni_ctype_st
163{
164 uchar pctype;
165 const uchar *ctype;
166};
167
168extern MY_UNI_CTYPE my_uni_ctype[256];
169
170/* wm_wc and wc_mb return codes */
171#define MY_CS_ILSEQ 0 /* Wrong by sequence: wb_wc */
172#define MY_CS_ILUNI 0 /* Cannot encode Unicode to charset: wc_mb */
173#define MY_CS_TOOSMALL -101 /* Need at least one byte: wc_mb and mb_wc */
174#define MY_CS_TOOSMALL2 -102 /* Need at least two bytes: wc_mb and mb_wc */
175#define MY_CS_TOOSMALL3 -103 /* Need at least three bytes: wc_mb and mb_wc */
176/* These following three are currently not really used */
177#define MY_CS_TOOSMALL4 -104 /* Need at least 4 bytes: wc_mb and mb_wc */
178#define MY_CS_TOOSMALL5 -105 /* Need at least 5 bytes: wc_mb and mb_wc */
179#define MY_CS_TOOSMALL6 -106 /* Need at least 6 bytes: wc_mb and mb_wc */
180/* A helper macros for "need at least n bytes" */
181#define MY_CS_TOOSMALLN(n) (-100-(n))
182
183#define MY_CS_MBMAXLEN 6 /* Maximum supported mbmaxlen */
184#define MY_CS_IS_TOOSMALL(rc) ((rc) >= MY_CS_TOOSMALL6 && (rc) <= MY_CS_TOOSMALL)
185
186#define MY_SEQ_INTTAIL 1
187#define MY_SEQ_SPACES 2
188#define MY_SEQ_NONSPACES 3 /* Skip non-space characters, including bad bytes */
189
190 /* My charsets_list flags */
191#define MY_CS_COMPILED 1 /* compiled-in sets */
192#define MY_CS_CONFIG 2 /* sets that have a *.conf file */
193#define MY_CS_INDEX 4 /* sets listed in the Index file */
194#define MY_CS_LOADED 8 /* sets that are currently loaded */
195#define MY_CS_BINSORT 16 /* if binary sort order */
196#define MY_CS_PRIMARY 32 /* if primary collation */
197#define MY_CS_STRNXFRM 64 /* if strnxfrm is used for sort */
198#define MY_CS_UNICODE 128 /* is a charset is BMP Unicode */
199#define MY_CS_READY 256 /* if a charset is initialized */
200#define MY_CS_AVAILABLE 512 /* If either compiled-in or loaded*/
201#define MY_CS_CSSORT 1024 /* if case sensitive sort order */
202#define MY_CS_HIDDEN 2048 /* don't display in SHOW */
203#define MY_CS_PUREASCII 4096 /* if a charset is pure ascii */
204#define MY_CS_NONASCII 8192 /* if not ASCII-compatible */
205#define MY_CS_UNICODE_SUPPLEMENT 16384 /* Non-BMP Unicode characters */
206#define MY_CS_LOWER_SORT 32768 /* If use lower case as weight */
207#define MY_CS_STRNXFRM_BAD_NWEIGHTS 0x10000 /* strnxfrm ignores "nweights" */
208#define MY_CS_NOPAD 0x20000 /* if does not ignore trailing spaces */
209#define MY_CS_NON1TO1 0x40000 /* Has a complex mapping from characters
210 to weights, e.g. contractions, expansions,
211 ignorable characters */
212#define MY_CHARSET_UNDEFINED 0
213
214/* Character repertoire flags */
215#define MY_REPERTOIRE_ASCII 1 /* Pure ASCII U+0000..U+007F */
216#define MY_REPERTOIRE_EXTENDED 2 /* Extended characters: U+0080..U+FFFF */
217#define MY_REPERTOIRE_UNICODE30 3 /* ASCII | EXTENDED: U+0000..U+FFFF */
218
219/* Flags for strxfrm */
220#define MY_STRXFRM_LEVEL1 0x00000001 /* for primary weights */
221#define MY_STRXFRM_LEVEL2 0x00000002 /* for secondary weights */
222#define MY_STRXFRM_LEVEL3 0x00000004 /* for tertiary weights */
223#define MY_STRXFRM_LEVEL4 0x00000008 /* fourth level weights */
224#define MY_STRXFRM_LEVEL5 0x00000010 /* fifth level weights */
225#define MY_STRXFRM_LEVEL6 0x00000020 /* sixth level weights */
226#define MY_STRXFRM_LEVEL_ALL 0x0000003F /* Bit OR for the above six */
227#define MY_STRXFRM_NLEVELS 6 /* Number of possible levels*/
228
229#define MY_STRXFRM_PAD_WITH_SPACE 0x00000040 /* if pad result with spaces */
230#define MY_STRXFRM_PAD_TO_MAXLEN 0x00000080 /* if pad tail(for filesort) */
231
232#define MY_STRXFRM_DESC_LEVEL1 0x00000100 /* if desc order for level1 */
233#define MY_STRXFRM_DESC_LEVEL2 0x00000200 /* if desc order for level2 */
234#define MY_STRXFRM_DESC_LEVEL3 0x00000300 /* if desc order for level3 */
235#define MY_STRXFRM_DESC_LEVEL4 0x00000800 /* if desc order for level4 */
236#define MY_STRXFRM_DESC_LEVEL5 0x00001000 /* if desc order for level5 */
237#define MY_STRXFRM_DESC_LEVEL6 0x00002000 /* if desc order for level6 */
238#define MY_STRXFRM_DESC_SHIFT 8
239
240#define MY_STRXFRM_UNUSED_00004000 0x00004000 /* for future extensions */
241#define MY_STRXFRM_UNUSED_00008000 0x00008000 /* for future extensions */
242
243#define MY_STRXFRM_REVERSE_LEVEL1 0x00010000 /* if reverse order for level1 */
244#define MY_STRXFRM_REVERSE_LEVEL2 0x00020000 /* if reverse order for level2 */
245#define MY_STRXFRM_REVERSE_LEVEL3 0x00040000 /* if reverse order for level3 */
246#define MY_STRXFRM_REVERSE_LEVEL4 0x00080000 /* if reverse order for level4 */
247#define MY_STRXFRM_REVERSE_LEVEL5 0x00100000 /* if reverse order for level5 */
248#define MY_STRXFRM_REVERSE_LEVEL6 0x00200000 /* if reverse order for level6 */
249#define MY_STRXFRM_REVERSE_SHIFT 16
250
251/*
252 Collation IDs for MariaDB that should not conflict with MySQL.
253 We reserve 256..511, because MySQL will most likely use this range
254 when the range 0..255 is full.
255
256 We use the next 256 IDs starting from 512 and divide
257 them into 8 chunks, 32 collations each, as follows:
258
259 512 + (0..31) for single byte collations (e.g. latin9)
260 512 + (32..63) reserved (e.g. for utf32le, or more single byte collations)
261 512 + (64..95) for utf8
262 512 + (96..127) for utf8mb4
263 512 + (128..159) for ucs2
264 512 + (160..192) for utf16
265 512 + (192..223) for utf16le
266 512 + (224..255) for utf32
267*/
268#define MY_PAGE2_COLLATION_ID_8BIT 0x200
269#define MY_PAGE2_COLLATION_ID_RESERVED 0x220
270#define MY_PAGE2_COLLATION_ID_UTF8 0x240
271#define MY_PAGE2_COLLATION_ID_UTF8MB4 0x260
272#define MY_PAGE2_COLLATION_ID_UCS2 0x280
273#define MY_PAGE2_COLLATION_ID_UTF16 0x2A0
274#define MY_PAGE2_COLLATION_ID_UTF16LE 0x2C0
275#define MY_PAGE2_COLLATION_ID_UTF32 0x2E0
276
277struct my_uni_idx_st
278{
279 uint16 from;
280 uint16 to;
281 const uchar *tab;
282};
283
284typedef struct
285{
286 uint beg;
287 uint end;
288 uint mb_len;
289} my_match_t;
290
291enum my_lex_states
292{
293 MY_LEX_START, MY_LEX_CHAR, MY_LEX_IDENT,
294 MY_LEX_IDENT_SEP, MY_LEX_IDENT_START,
295 MY_LEX_REAL, MY_LEX_HEX_NUMBER, MY_LEX_BIN_NUMBER,
296 MY_LEX_CMP_OP, MY_LEX_LONG_CMP_OP, MY_LEX_STRING, MY_LEX_COMMENT, MY_LEX_END,
297 MY_LEX_OPERATOR_OR_IDENT, MY_LEX_NUMBER_IDENT, MY_LEX_INT_OR_REAL,
298 MY_LEX_REAL_OR_POINT, MY_LEX_BOOL, MY_LEX_EOL, MY_LEX_ESCAPE,
299 MY_LEX_LONG_COMMENT, MY_LEX_END_LONG_COMMENT, MY_LEX_SEMICOLON,
300 MY_LEX_SET_VAR, MY_LEX_USER_END, MY_LEX_HOSTNAME, MY_LEX_SKIP,
301 MY_LEX_USER_VARIABLE_DELIMITER, MY_LEX_SYSTEM_VAR,
302 MY_LEX_IDENT_OR_KEYWORD,
303 MY_LEX_IDENT_OR_HEX, MY_LEX_IDENT_OR_BIN, MY_LEX_IDENT_OR_NCHAR,
304 MY_LEX_STRING_OR_DELIMITER, MY_LEX_MINUS_OR_COMMENT, MY_LEX_PLACEHOLDER,
305 MY_LEX_COMMA
306};
307
308struct charset_info_st;
309
310typedef struct my_charset_loader_st
311{
312 char error[128];
313 void *(*once_alloc)(size_t);
314 void *(*malloc)(size_t);
315 void *(*realloc)(void *, size_t);
316 void (*free)(void *);
317 void (*reporter)(enum loglevel, const char *format, ...);
318 int (*add_collation)(struct charset_info_st *cs);
319} MY_CHARSET_LOADER;
320
321
322extern int (*my_string_stack_guard)(int);
323
324/* See strings/CHARSET_INFO.txt for information about this structure */
325struct my_collation_handler_st
326{
327 my_bool (*init)(struct charset_info_st *, MY_CHARSET_LOADER *);
328 /* Collation routines */
329 int (*strnncoll)(CHARSET_INFO *,
330 const uchar *, size_t, const uchar *, size_t, my_bool);
331 int (*strnncollsp)(CHARSET_INFO *,
332 const uchar *, size_t, const uchar *, size_t);
333 size_t (*strnxfrm)(CHARSET_INFO *,
334 uchar *dst, size_t dstlen, uint nweights,
335 const uchar *src, size_t srclen, uint flags);
336 size_t (*strnxfrmlen)(CHARSET_INFO *, size_t);
337 my_bool (*like_range)(CHARSET_INFO *,
338 const char *s, size_t s_length,
339 pchar w_prefix, pchar w_one, pchar w_many,
340 size_t res_length,
341 char *min_str, char *max_str,
342 size_t *min_len, size_t *max_len);
343 int (*wildcmp)(CHARSET_INFO *,
344 const char *str,const char *str_end,
345 const char *wildstr,const char *wildend,
346 int escape,int w_one, int w_many);
347
348 int (*strcasecmp)(CHARSET_INFO *, const char *, const char *);
349
350 uint (*instr)(CHARSET_INFO *,
351 const char *b, size_t b_length,
352 const char *s, size_t s_length,
353 my_match_t *match, uint nmatch);
354
355 /* Hash calculation */
356 void (*hash_sort)(CHARSET_INFO *cs, const uchar *key, size_t len,
357 ulong *nr1, ulong *nr2);
358 my_bool (*propagate)(CHARSET_INFO *cs, const uchar *str, size_t len);
359};
360
361extern MY_COLLATION_HANDLER my_collation_8bit_bin_handler;
362extern MY_COLLATION_HANDLER my_collation_8bit_simple_ci_handler;
363extern MY_COLLATION_HANDLER my_collation_8bit_nopad_bin_handler;
364extern MY_COLLATION_HANDLER my_collation_8bit_simple_nopad_ci_handler;
365extern MY_COLLATION_HANDLER my_collation_ucs2_uca_handler;
366
367/* Some typedef to make it easy for C++ to make function pointers */
368typedef int (*my_charset_conv_mb_wc)(CHARSET_INFO *, my_wc_t *,
369 const uchar *, const uchar *);
370typedef int (*my_charset_conv_wc_mb)(CHARSET_INFO *, my_wc_t,
371 uchar *, uchar *);
372typedef size_t (*my_charset_conv_case)(CHARSET_INFO *,
373 char *, size_t, char *, size_t);
374
375/*
376 A structure to return the statistics of a native string copying,
377 when no Unicode conversion is involved.
378
379 The structure is OK to be uninitialized before calling a copying routine.
380 A copying routine must populate the structure as follows:
381 - m_source_end_pos must be set by to a non-NULL value
382 in the range of the input string.
383 - m_well_formed_error_pos must be set to NULL if the string was
384 well formed, or to the position of the leftmost bad byte sequence.
385*/
386typedef struct
387{
388 const char *m_source_end_pos; /* Position where reading stopped */
389 const char *m_well_formed_error_pos; /* Position where a bad byte was found*/
390} MY_STRCOPY_STATUS;
391
392
393/*
394 A structure to return the statistics of a Unicode string conversion.
395*/
396typedef struct
397{
398 const char *m_cannot_convert_error_pos;
399} MY_STRCONV_STATUS;
400
401
402/* See strings/CHARSET_INFO.txt about information on this structure */
403struct my_charset_handler_st
404{
405 my_bool (*init)(struct charset_info_st *, MY_CHARSET_LOADER *loader);
406 /* Multibyte routines */
407 size_t (*numchars)(CHARSET_INFO *, const char *b, const char *e);
408 size_t (*charpos)(CHARSET_INFO *, const char *b, const char *e,
409 size_t pos);
410 size_t (*lengthsp)(CHARSET_INFO *, const char *ptr, size_t length);
411 size_t (*numcells)(CHARSET_INFO *, const char *b, const char *e);
412
413 /* Unicode conversion */
414 my_charset_conv_mb_wc mb_wc;
415 my_charset_conv_wc_mb wc_mb;
416
417 /* CTYPE scanner */
418 int (*ctype)(CHARSET_INFO *cs, int *ctype,
419 const uchar *s, const uchar *e);
420
421 /* Functions for case and sort conversion */
422 size_t (*caseup_str)(CHARSET_INFO *, char *);
423 size_t (*casedn_str)(CHARSET_INFO *, char *);
424
425 my_charset_conv_case caseup;
426 my_charset_conv_case casedn;
427
428 /* Charset dependent snprintf() */
429 size_t (*snprintf)(CHARSET_INFO *, char *to, size_t n,
430 const char *fmt,
431 ...) ATTRIBUTE_FORMAT_FPTR(printf, 4, 5);
432 size_t (*long10_to_str)(CHARSET_INFO *, char *to, size_t n,
433 int radix, long int val);
434 size_t (*longlong10_to_str)(CHARSET_INFO *, char *to, size_t n,
435 int radix, longlong val);
436
437 void (*fill)(CHARSET_INFO *, char *to, size_t len, int fill);
438
439 /* String-to-number conversion routines */
440 long (*strntol)(CHARSET_INFO *, const char *s, size_t l,
441 int base, char **e, int *err);
442 ulong (*strntoul)(CHARSET_INFO *, const char *s, size_t l,
443 int base, char **e, int *err);
444 longlong (*strntoll)(CHARSET_INFO *, const char *s, size_t l,
445 int base, char **e, int *err);
446 ulonglong (*strntoull)(CHARSET_INFO *, const char *s, size_t l,
447 int base, char **e, int *err);
448 double (*strntod)(CHARSET_INFO *, char *s, size_t l, char **e,
449 int *err);
450 longlong (*strtoll10)(CHARSET_INFO *cs,
451 const char *nptr, char **endptr, int *error);
452 ulonglong (*strntoull10rnd)(CHARSET_INFO *cs,
453 const char *str, size_t length,
454 int unsigned_fl,
455 char **endptr, int *error);
456 size_t (*scan)(CHARSET_INFO *, const char *b, const char *e,
457 int sq);
458
459 /* String copying routines and helpers for them */
460 /*
461 charlen() - calculate length of the left-most character in bytes.
462 @param cs Character set
463 @param str The beginning of the string
464 @param end The end of the string
465
466 @return MY_CS_ILSEQ if a bad byte sequence was found.
467 @return MY_CS_TOOSMALLN(x) if the string ended unexpectedly.
468 @return a positive number in the range 1..mbmaxlen,
469 if a valid character was found.
470 */
471 int (*charlen)(CHARSET_INFO *cs, const uchar *str, const uchar *end);
472 /*
473 well_formed_char_length() - returns character length of a string.
474
475 @param cs Character set
476 @param str The beginning of the string
477 @param end The end of the string
478 @param nchars Not more than "nchars" left-most characters are checked.
479 @param status[OUT] Additional statistics is returned here.
480 "status" can be uninitialized before the call,
481 and it is fully initialized after the call.
482
483 status->m_source_end_pos is set to the position where reading stopped.
484
485 If a bad byte sequence is found, the function returns immediately and
486 status->m_well_formed_error_pos is set to the position where a bad byte
487 sequence was found.
488
489 status->m_well_formed_error_pos is set to NULL if no bad bytes were found.
490 If status->m_well_formed_error_pos is NULL after the call, that means:
491 - either the function reached the end of the string,
492 - or all "nchars" characters were read.
493 The caller can check status->m_source_end_pos to detect which of these two
494 happened.
495 */
496 size_t (*well_formed_char_length)(CHARSET_INFO *cs,
497 const char *str, const char *end,
498 size_t nchars,
499 MY_STRCOPY_STATUS *status);
500
501 /*
502 copy_fix() - copy a string, replace bad bytes to '?'.
503 Not more than "nchars" characters are copied.
504
505 status->m_source_end_pos is set to a position in the range
506 between "src" and "src + src_length", where reading stopped.
507
508 status->m_well_formed_error_pos is set to NULL if the string
509 in the range "src" and "status->m_source_end_pos" was well formed,
510 or is set to a position between "src" and "src + src_length" where
511 the leftmost bad byte sequence was found.
512 */
513 size_t (*copy_fix)(CHARSET_INFO *,
514 char *dst, size_t dst_length,
515 const char *src, size_t src_length,
516 size_t nchars, MY_STRCOPY_STATUS *status);
517 /**
518 Write a character to the target string, using its native code.
519 For Unicode character sets (utf8, ucs2, utf16, utf16le, utf32, filename)
520 native codes are equivalent to Unicode code points.
521 For 8bit character sets the native code is just the byte value.
522 For Asian characters sets:
523 - MB1 native code is just the byte value (e.g. on the ASCII range)
524 - MB2 native code is ((b0 << 8) + b1).
525 - MB3 native code is ((b0 <<16) + (b1 << 8) + b2)
526 Note, CHARSET_INFO::min_sort_char and CHARSET_INFO::max_sort_char
527 are defined in native notation and should be written using
528 cs->cset->native_to_mb() rather than cs->cset->wc_mb().
529 */
530 my_charset_conv_wc_mb native_to_mb;
531};
532
533extern MY_CHARSET_HANDLER my_charset_8bit_handler;
534extern MY_CHARSET_HANDLER my_charset_ucs2_handler;
535extern MY_CHARSET_HANDLER my_charset_utf8_handler;
536
537
538/*
539 We define this CHARSET_INFO_DEFINED here to prevent a repeat of the
540 typedef in hash.c, which will cause a compiler error.
541*/
542#define CHARSET_INFO_DEFINED
543
544/* See strings/CHARSET_INFO.txt about information on this structure */
545struct charset_info_st
546{
547 uint number;
548 uint primary_number;
549 uint binary_number;
550 uint state;
551 const char *csname;
552 const char *name;
553 const char *comment;
554 const char *tailoring;
555 const uchar *ctype;
556 const uchar *to_lower;
557 const uchar *to_upper;
558 const uchar *sort_order;
559 MY_UCA_INFO *uca;
560 const uint16 *tab_to_uni;
561 MY_UNI_IDX *tab_from_uni;
562 MY_UNICASE_INFO *caseinfo;
563 const uchar *state_map;
564 const uchar *ident_map;
565 uint strxfrm_multiply;
566 uchar caseup_multiply;
567 uchar casedn_multiply;
568 uint mbminlen;
569 uint mbmaxlen;
570 my_wc_t min_sort_char;
571 my_wc_t max_sort_char; /* For LIKE optimization */
572 uchar pad_char;
573 my_bool escape_with_backslash_is_dangerous;
574 uchar levels_for_order;
575
576 MY_CHARSET_HANDLER *cset;
577 MY_COLLATION_HANDLER *coll;
578
579};
580#define ILLEGAL_CHARSET_INFO_NUMBER (~0U)
581
582extern MYSQL_PLUGIN_IMPORT struct charset_info_st my_charset_bin;
583extern MYSQL_PLUGIN_IMPORT struct charset_info_st my_charset_latin1;
584extern MYSQL_PLUGIN_IMPORT struct charset_info_st my_charset_latin1_nopad;
585extern MYSQL_PLUGIN_IMPORT struct charset_info_st my_charset_filename;
586extern MYSQL_PLUGIN_IMPORT struct charset_info_st my_charset_utf8_general_ci;
587
588extern struct charset_info_st my_charset_big5_bin;
589extern struct charset_info_st my_charset_big5_chinese_ci;
590extern struct charset_info_st my_charset_big5_nopad_bin;
591extern struct charset_info_st my_charset_big5_chinese_nopad_ci;
592extern struct charset_info_st my_charset_cp1250_czech_ci;
593extern struct charset_info_st my_charset_cp932_bin;
594extern struct charset_info_st my_charset_cp932_japanese_ci;
595extern struct charset_info_st my_charset_cp932_nopad_bin;
596extern struct charset_info_st my_charset_cp932_japanese_nopad_ci;
597extern struct charset_info_st my_charset_eucjpms_bin;
598extern struct charset_info_st my_charset_eucjpms_japanese_ci;
599extern struct charset_info_st my_charset_eucjpms_nopad_bin;
600extern struct charset_info_st my_charset_eucjpms_japanese_nopad_ci;
601extern struct charset_info_st my_charset_euckr_bin;
602extern struct charset_info_st my_charset_euckr_korean_ci;
603extern struct charset_info_st my_charset_euckr_nopad_bin;
604extern struct charset_info_st my_charset_euckr_korean_nopad_ci;
605extern struct charset_info_st my_charset_gb2312_bin;
606extern struct charset_info_st my_charset_gb2312_chinese_ci;
607extern struct charset_info_st my_charset_gb2312_nopad_bin;
608extern struct charset_info_st my_charset_gb2312_chinese_nopad_ci;
609extern struct charset_info_st my_charset_gbk_bin;
610extern struct charset_info_st my_charset_gbk_chinese_ci;
611extern struct charset_info_st my_charset_gbk_nopad_bin;
612extern struct charset_info_st my_charset_gbk_chinese_nopad_ci;
613extern struct charset_info_st my_charset_latin1_bin;
614extern struct charset_info_st my_charset_latin1_nopad_bin;
615extern struct charset_info_st my_charset_latin1_german2_ci;
616extern struct charset_info_st my_charset_latin2_czech_ci;
617extern struct charset_info_st my_charset_sjis_bin;
618extern struct charset_info_st my_charset_sjis_japanese_ci;
619extern struct charset_info_st my_charset_sjis_nopad_bin;
620extern struct charset_info_st my_charset_sjis_japanese_nopad_ci;
621extern struct charset_info_st my_charset_tis620_bin;
622extern struct charset_info_st my_charset_tis620_thai_ci;
623extern struct charset_info_st my_charset_tis620_nopad_bin;
624extern struct charset_info_st my_charset_tis620_thai_nopad_ci;
625extern struct charset_info_st my_charset_ucs2_bin;
626extern struct charset_info_st my_charset_ucs2_general_ci;
627extern struct charset_info_st my_charset_ucs2_nopad_bin;
628extern struct charset_info_st my_charset_ucs2_general_nopad_ci;
629extern struct charset_info_st my_charset_ucs2_general_mysql500_ci;
630extern struct charset_info_st my_charset_ucs2_unicode_ci;
631extern struct charset_info_st my_charset_ucs2_unicode_nopad_ci;
632extern struct charset_info_st my_charset_ucs2_general_mysql500_ci;
633extern struct charset_info_st my_charset_ujis_bin;
634extern struct charset_info_st my_charset_ujis_japanese_ci;
635extern struct charset_info_st my_charset_ujis_nopad_bin;
636extern struct charset_info_st my_charset_ujis_japanese_nopad_ci;
637extern struct charset_info_st my_charset_utf16_bin;
638extern struct charset_info_st my_charset_utf16_general_ci;
639extern struct charset_info_st my_charset_utf16_unicode_ci;
640extern struct charset_info_st my_charset_utf16_unicode_nopad_ci;
641extern struct charset_info_st my_charset_utf16le_bin;
642extern struct charset_info_st my_charset_utf16le_general_ci;
643extern struct charset_info_st my_charset_utf16_general_nopad_ci;
644extern struct charset_info_st my_charset_utf16_nopad_bin;
645extern struct charset_info_st my_charset_utf16le_nopad_bin;
646extern struct charset_info_st my_charset_utf16le_general_nopad_ci;
647extern struct charset_info_st my_charset_utf32_bin;
648extern struct charset_info_st my_charset_utf32_general_ci;
649extern struct charset_info_st my_charset_utf32_unicode_ci;
650extern struct charset_info_st my_charset_utf32_unicode_nopad_ci;
651extern struct charset_info_st my_charset_utf32_nopad_bin;
652extern struct charset_info_st my_charset_utf32_general_nopad_ci;
653extern struct charset_info_st my_charset_utf8_bin;
654extern struct charset_info_st my_charset_utf8_nopad_bin;
655extern struct charset_info_st my_charset_utf8_general_nopad_ci;
656extern struct charset_info_st my_charset_utf8_general_mysql500_ci;
657extern struct charset_info_st my_charset_utf8_unicode_ci;
658extern struct charset_info_st my_charset_utf8_unicode_nopad_ci;
659extern struct charset_info_st my_charset_utf8mb4_bin;
660extern struct charset_info_st my_charset_utf8mb4_general_ci;
661extern struct charset_info_st my_charset_utf8mb4_nopad_bin;
662extern struct charset_info_st my_charset_utf8mb4_general_nopad_ci;
663extern struct charset_info_st my_charset_utf8mb4_unicode_ci;
664extern struct charset_info_st my_charset_utf8mb4_unicode_nopad_ci;
665
666#define MY_UTF8MB3 "utf8"
667#define MY_UTF8MB4 "utf8mb4"
668
669my_bool my_cs_have_contractions(CHARSET_INFO *cs);
670my_bool my_cs_can_be_contraction_head(CHARSET_INFO *cs, my_wc_t wc);
671my_bool my_cs_can_be_contraction_tail(CHARSET_INFO *cs, my_wc_t wc);
672const uint16 *my_cs_contraction2_weight(CHARSET_INFO *cs, my_wc_t wc1,
673 my_wc_t wc2);
674
675/* declarations for simple charsets */
676extern size_t my_strnxfrm_simple(CHARSET_INFO *,
677 uchar *dst, size_t dstlen, uint nweights,
678 const uchar *src, size_t srclen, uint flags);
679size_t my_strnxfrmlen_simple(CHARSET_INFO *, size_t);
680extern int my_strnncoll_simple(CHARSET_INFO *, const uchar *, size_t,
681 const uchar *, size_t, my_bool);
682
683extern int my_strnncollsp_simple(CHARSET_INFO *, const uchar *, size_t,
684 const uchar *, size_t);
685
686extern void my_hash_sort_simple(CHARSET_INFO *cs,
687 const uchar *key, size_t len,
688 ulong *nr1, ulong *nr2);
689
690extern void my_hash_sort_simple_nopad(CHARSET_INFO *cs,
691 const uchar *key, size_t len,
692 ulong *nr1, ulong *nr2);
693
694extern void my_hash_sort_bin(CHARSET_INFO *cs,
695 const uchar *key, size_t len, ulong *nr1,
696 ulong *nr2);
697
698/**
699 Compare a string to an array of spaces, for PAD SPACE comparison.
700 The function iterates through the string and compares every byte to 0x20.
701 @param - the string
702 @param - its length
703 @return <0 - if a byte less than 0x20 was found in the string.
704 @return 0 - if all bytes in the string were 0x20, or if length was 0.
705 @return >0 - if a byte greater than 0x20 was found in the string.
706*/
707extern int my_strnncollsp_padspace_bin(const uchar *str, size_t length);
708
709extern size_t my_lengthsp_8bit(CHARSET_INFO *cs, const char *ptr, size_t length);
710
711extern uint my_instr_simple(CHARSET_INFO *,
712 const char *b, size_t b_length,
713 const char *s, size_t s_length,
714 my_match_t *match, uint nmatch);
715
716size_t my_copy_8bit(CHARSET_INFO *,
717 char *dst, size_t dst_length,
718 const char *src, size_t src_length,
719 size_t nchars, MY_STRCOPY_STATUS *);
720size_t my_copy_fix_mb(CHARSET_INFO *cs,
721 char *dst, size_t dst_length,
722 const char *src, size_t src_length,
723 size_t nchars, MY_STRCOPY_STATUS *);
724
725/* Functions for 8bit */
726extern size_t my_caseup_str_8bit(CHARSET_INFO *, char *);
727extern size_t my_casedn_str_8bit(CHARSET_INFO *, char *);
728extern size_t my_caseup_8bit(CHARSET_INFO *, char *src, size_t srclen,
729 char *dst, size_t dstlen);
730extern size_t my_casedn_8bit(CHARSET_INFO *, char *src, size_t srclen,
731 char *dst, size_t dstlen);
732
733extern int my_strcasecmp_8bit(CHARSET_INFO * cs, const char *, const char *);
734
735int my_mb_wc_8bit(CHARSET_INFO *cs,my_wc_t *wc, const uchar *s,const uchar *e);
736int my_wc_mb_8bit(CHARSET_INFO *cs,my_wc_t wc, uchar *s, uchar *e);
737int my_wc_mb_bin(CHARSET_INFO *cs,my_wc_t wc, uchar *s, uchar *e);
738
739int my_mb_ctype_8bit(CHARSET_INFO *,int *, const uchar *,const uchar *);
740int my_mb_ctype_mb(CHARSET_INFO *,int *, const uchar *,const uchar *);
741
742size_t my_scan_8bit(CHARSET_INFO *cs, const char *b, const char *e, int sq);
743
744size_t my_snprintf_8bit(CHARSET_INFO *, char *to, size_t n,
745 const char *fmt, ...)
746 ATTRIBUTE_FORMAT(printf, 4, 5);
747
748long my_strntol_8bit(CHARSET_INFO *, const char *s, size_t l, int base,
749 char **e, int *err);
750ulong my_strntoul_8bit(CHARSET_INFO *, const char *s, size_t l, int base,
751 char **e, int *err);
752longlong my_strntoll_8bit(CHARSET_INFO *, const char *s, size_t l, int base,
753 char **e, int *err);
754ulonglong my_strntoull_8bit(CHARSET_INFO *, const char *s, size_t l, int base,
755 char **e, int *err);
756double my_strntod_8bit(CHARSET_INFO *, char *s, size_t l,char **e,
757 int *err);
758size_t my_long10_to_str_8bit(CHARSET_INFO *, char *to, size_t l, int radix,
759 long int val);
760size_t my_longlong10_to_str_8bit(CHARSET_INFO *, char *to, size_t l, int radix,
761 longlong val);
762
763longlong my_strtoll10_8bit(CHARSET_INFO *cs,
764 const char *nptr, char **endptr, int *error);
765longlong my_strtoll10_ucs2(CHARSET_INFO *cs,
766 const char *nptr, char **endptr, int *error);
767
768ulonglong my_strntoull10rnd_8bit(CHARSET_INFO *cs,
769 const char *str, size_t length, int
770 unsigned_fl, char **endptr, int *error);
771ulonglong my_strntoull10rnd_ucs2(CHARSET_INFO *cs,
772 const char *str, size_t length,
773 int unsigned_fl, char **endptr, int *error);
774
775void my_fill_8bit(CHARSET_INFO *cs, char* to, size_t l, int fill);
776
777/* For 8-bit character set */
778my_bool my_like_range_simple(CHARSET_INFO *cs,
779 const char *ptr, size_t ptr_length,
780 pbool escape, pbool w_one, pbool w_many,
781 size_t res_length,
782 char *min_str, char *max_str,
783 size_t *min_length, size_t *max_length);
784
785/* For ASCII-based multi-byte character sets with mbminlen=1 */
786my_bool my_like_range_mb(CHARSET_INFO *cs,
787 const char *ptr, size_t ptr_length,
788 pbool escape, pbool w_one, pbool w_many,
789 size_t res_length,
790 char *min_str, char *max_str,
791 size_t *min_length, size_t *max_length);
792
793/* For other character sets, with arbitrary mbminlen and mbmaxlen numbers */
794my_bool my_like_range_generic(CHARSET_INFO *cs,
795 const char *ptr, size_t ptr_length,
796 pbool escape, pbool w_one, pbool w_many,
797 size_t res_length,
798 char *min_str, char *max_str,
799 size_t *min_length, size_t *max_length);
800
801int my_wildcmp_8bit(CHARSET_INFO *,
802 const char *str,const char *str_end,
803 const char *wildstr,const char *wildend,
804 int escape, int w_one, int w_many);
805
806int my_wildcmp_bin(CHARSET_INFO *,
807 const char *str,const char *str_end,
808 const char *wildstr,const char *wildend,
809 int escape, int w_one, int w_many);
810
811size_t my_numchars_8bit(CHARSET_INFO *, const char *b, const char *e);
812size_t my_numcells_8bit(CHARSET_INFO *, const char *b, const char *e);
813size_t my_charpos_8bit(CHARSET_INFO *, const char *b, const char *e, size_t pos);
814size_t my_well_formed_char_length_8bit(CHARSET_INFO *cs,
815 const char *b, const char *e,
816 size_t nchars,
817 MY_STRCOPY_STATUS *status);
818int my_charlen_8bit(CHARSET_INFO *, const uchar *str, const uchar *end);
819
820
821/* Functions for multibyte charsets */
822extern size_t my_caseup_str_mb(CHARSET_INFO *, char *);
823extern size_t my_casedn_str_mb(CHARSET_INFO *, char *);
824extern size_t my_caseup_mb(CHARSET_INFO *, char *src, size_t srclen,
825 char *dst, size_t dstlen);
826extern size_t my_casedn_mb(CHARSET_INFO *, char *src, size_t srclen,
827 char *dst, size_t dstlen);
828extern size_t my_caseup_mb_varlen(CHARSET_INFO *, char *src, size_t srclen,
829 char *dst, size_t dstlen);
830extern size_t my_casedn_mb_varlen(CHARSET_INFO *, char *src, size_t srclen,
831 char *dst, size_t dstlen);
832extern size_t my_caseup_ujis(CHARSET_INFO *, char *src, size_t srclen,
833 char *dst, size_t dstlen);
834extern size_t my_casedn_ujis(CHARSET_INFO *, char *src, size_t srclen,
835 char *dst, size_t dstlen);
836extern int my_strcasecmp_mb(CHARSET_INFO * cs,const char *, const char *);
837
838int my_wildcmp_mb(CHARSET_INFO *,
839 const char *str,const char *str_end,
840 const char *wildstr,const char *wildend,
841 int escape, int w_one, int w_many);
842size_t my_numchars_mb(CHARSET_INFO *, const char *b, const char *e);
843size_t my_numcells_mb(CHARSET_INFO *, const char *b, const char *e);
844size_t my_charpos_mb(CHARSET_INFO *, const char *b, const char *e, size_t pos);
845uint my_instr_mb(CHARSET_INFO *,
846 const char *b, size_t b_length,
847 const char *s, size_t s_length,
848 my_match_t *match, uint nmatch);
849
850int my_wildcmp_mb_bin(CHARSET_INFO *cs,
851 const char *str,const char *str_end,
852 const char *wildstr,const char *wildend,
853 int escape, int w_one, int w_many);
854
855int my_strcasecmp_mb_bin(CHARSET_INFO * cs __attribute__((unused)),
856 const char *s, const char *t);
857
858void my_hash_sort_mb_bin(CHARSET_INFO *cs __attribute__((unused)),
859 const uchar *key, size_t len,ulong *nr1, ulong *nr2);
860
861void my_hash_sort_mb_nopad_bin(CHARSET_INFO *cs __attribute__((unused)),
862 const uchar *key, size_t len,
863 ulong *nr1, ulong *nr2);
864
865size_t my_strnxfrm_mb(CHARSET_INFO *,
866 uchar *dst, size_t dstlen, uint nweights,
867 const uchar *src, size_t srclen, uint flags);
868
869size_t my_strnxfrm_mb_nopad(CHARSET_INFO *,
870 uchar *dst, size_t dstlen, uint nweights,
871 const uchar *src, size_t srclen, uint flags);
872
873size_t my_strnxfrm_unicode(CHARSET_INFO *,
874 uchar *dst, size_t dstlen, uint nweights,
875 const uchar *src, size_t srclen, uint flags);
876
877size_t my_strnxfrm_unicode_nopad(CHARSET_INFO *,
878 uchar *dst, size_t dstlen, uint nweights,
879 const uchar *src, size_t srclen, uint flags);
880
881size_t my_strnxfrmlen_unicode(CHARSET_INFO *, size_t);
882
883size_t my_strnxfrm_unicode_full_bin(CHARSET_INFO *,
884 uchar *dst, size_t dstlen,
885 uint nweights, const uchar *src,
886 size_t srclen, uint flags);
887
888size_t my_strnxfrm_unicode_full_nopad_bin(CHARSET_INFO *,
889 uchar *dst, size_t dstlen,
890 uint nweights, const uchar *src,
891 size_t srclen, uint flags);
892
893size_t my_strnxfrmlen_unicode_full_bin(CHARSET_INFO *, size_t);
894
895int my_wildcmp_unicode(CHARSET_INFO *cs,
896 const char *str, const char *str_end,
897 const char *wildstr, const char *wildend,
898 int escape, int w_one, int w_many,
899 MY_UNICASE_INFO *weights);
900
901extern my_bool my_parse_charset_xml(MY_CHARSET_LOADER *loader,
902 const char *buf, size_t buflen);
903extern char *my_strchr(CHARSET_INFO *cs, const char *str, const char *end,
904 pchar c);
905extern size_t my_strcspn(CHARSET_INFO *cs, const char *str, const char *end,
906 const char *accept);
907
908my_bool my_propagate_simple(CHARSET_INFO *cs, const uchar *str, size_t len);
909my_bool my_propagate_complex(CHARSET_INFO *cs, const uchar *str, size_t len);
910
911
912typedef struct
913{
914 size_t char_length;
915 uint repertoire;
916} MY_STRING_METADATA;
917
918void my_string_metadata_get(MY_STRING_METADATA *metadata,
919 CHARSET_INFO *cs, const char *str, size_t len);
920uint my_string_repertoire(CHARSET_INFO *cs, const char *str, size_t len);
921my_bool my_charset_is_ascii_based(CHARSET_INFO *cs);
922uint my_charset_repertoire(CHARSET_INFO *cs);
923
924uint my_strxfrm_flag_normalize(uint flags, uint nlevels);
925void my_strxfrm_desc_and_reverse(uchar *str, uchar *strend,
926 uint flags, uint level);
927size_t my_strxfrm_pad_desc_and_reverse(CHARSET_INFO *cs,
928 uchar *str, uchar *frmend, uchar *strend,
929 uint nweights, uint flags, uint level);
930size_t my_strxfrm_pad_desc_and_reverse_nopad(CHARSET_INFO *cs,
931 uchar *str, uchar *frmend,
932 uchar *strend, uint nweights,
933 uint flags, uint level);
934
935const MY_CONTRACTIONS *my_charset_get_contractions(CHARSET_INFO *cs,
936 int level);
937
938extern size_t my_vsnprintf_ex(CHARSET_INFO *cs, char *to, size_t n,
939 const char* fmt, va_list ap);
940
941/*
942 Convert a string between two character sets.
943 Bad byte sequences as well as characters that cannot be
944 encoded in the destination character set are replaced to '?'.
945*/
946uint32 my_convert(char *to, uint32 to_length, CHARSET_INFO *to_cs,
947 const char *from, uint32 from_length,
948 CHARSET_INFO *from_cs, uint *errors);
949
950/**
951 An extended version of my_convert(), to pass non-default mb_wc() and wc_mb().
952 For example, String::copy_printable() which is used in
953 Protocol::store_warning() uses this to escape control
954 and non-convertable characters.
955*/
956uint32 my_convert_using_func(char *to, size_t to_length, CHARSET_INFO *to_cs,
957 my_charset_conv_wc_mb mb_wc,
958 const char *from, size_t from_length,
959 CHARSET_INFO *from_cs,
960 my_charset_conv_mb_wc wc_mb,
961 uint *errors);
962/*
963 Convert a string between two character sets.
964 Bad byte sequences as well as characters that cannot be
965 encoded in the destination character set are replaced to '?'.
966 Not more than "nchars" characters are copied.
967 Conversion statistics is returned in "status" and is set as follows:
968 - status->m_native_copy_status.m_source_end_pos - to the position
969 between (src) and (src+src_length), where the function stopped reading
970 the source string.
971 - status->m_native_copy_status.m_well_formed_error_pos - to the position
972 between (src) and (src+src_length), where the first badly formed byte
973 sequence was found, or to NULL if the string was well formed in the
974 given range.
975 - status->m_cannot_convert_error_pos - to the position
976 between (src) and (src+src_length), where the first character that
977 cannot be represented in the destination character set was found,
978 or to NULL if all characters in the given range were successfully
979 converted.
980
981 "src" is allowed to be a NULL pointer. In this case "src_length" must
982 be equal to 0. All "status" members are initialized to NULL, and 0 is
983 returned.
984*/
985size_t my_convert_fix(CHARSET_INFO *dstcs, char *dst, size_t dst_length,
986 CHARSET_INFO *srccs, const char *src, size_t src_length,
987 size_t nchars,
988 MY_STRCOPY_STATUS *copy_status,
989 MY_STRCONV_STATUS *conv_status);
990
991#define _MY_U 01 /* Upper case */
992#define _MY_L 02 /* Lower case */
993#define _MY_NMR 04 /* Numeral (digit) */
994#define _MY_SPC 010 /* Spacing character */
995#define _MY_PNT 020 /* Punctuation */
996#define _MY_CTR 040 /* Control character */
997#define _MY_B 0100 /* Blank */
998#define _MY_X 0200 /* heXadecimal digit */
999
1000
1001#define my_isascii(c) (!((c) & ~0177))
1002#define my_toascii(c) ((c) & 0177)
1003#define my_tocntrl(c) ((c) & 31)
1004#define my_toprint(c) ((c) | 64)
1005#define my_toupper(s,c) (char) ((s)->to_upper[(uchar) (c)])
1006#define my_tolower(s,c) (char) ((s)->to_lower[(uchar) (c)])
1007#define my_isalpha(s, c) (((s)->ctype+1)[(uchar) (c)] & (_MY_U | _MY_L))
1008#define my_isupper(s, c) (((s)->ctype+1)[(uchar) (c)] & _MY_U)
1009#define my_islower(s, c) (((s)->ctype+1)[(uchar) (c)] & _MY_L)
1010#define my_isdigit(s, c) (((s)->ctype+1)[(uchar) (c)] & _MY_NMR)
1011#define my_isxdigit(s, c) (((s)->ctype+1)[(uchar) (c)] & _MY_X)
1012#define my_isalnum(s, c) (((s)->ctype+1)[(uchar) (c)] & (_MY_U | _MY_L | _MY_NMR))
1013#define my_isspace(s, c) (((s)->ctype+1)[(uchar) (c)] & _MY_SPC)
1014#define my_ispunct(s, c) (((s)->ctype+1)[(uchar) (c)] & _MY_PNT)
1015#define my_isprint(s, c) (((s)->ctype+1)[(uchar) (c)] & (_MY_PNT | _MY_U | _MY_L | _MY_NMR | _MY_B))
1016#define my_isgraph(s, c) (((s)->ctype+1)[(uchar) (c)] & (_MY_PNT | _MY_U | _MY_L | _MY_NMR))
1017#define my_iscntrl(s, c) (((s)->ctype+1)[(uchar) (c)] & _MY_CTR)
1018
1019/* Some macros that should be cleaned up a little */
1020#define my_isvar(s,c) (my_isalnum(s,c) || (c) == '_')
1021#define my_isvar_start(s,c) (my_isalpha(s,c) || (c) == '_')
1022
1023#define my_binary_compare(s) ((s)->state & MY_CS_BINSORT)
1024#define use_strnxfrm(s) ((s)->state & MY_CS_STRNXFRM)
1025#define my_strnxfrm(cs, d, dl, s, sl) \
1026 ((cs)->coll->strnxfrm((cs), (d), (dl), (dl), (s), (sl), MY_STRXFRM_PAD_WITH_SPACE))
1027#define my_strnncoll(s, a, b, c, d) ((s)->coll->strnncoll((s), (a), (b), (c), (d), 0))
1028#define my_like_range(s, a, b, c, d, e, f, g, h, i, j) \
1029 ((s)->coll->like_range((s), (a), (b), (c), (d), (e), (f), (g), (h), (i), (j)))
1030#define my_wildcmp(cs,s,se,w,we,e,o,m) ((cs)->coll->wildcmp((cs),(s),(se),(w),(we),(e),(o),(m)))
1031#define my_strcasecmp(s, a, b) ((s)->coll->strcasecmp((s), (a), (b)))
1032#define my_charpos(cs, b, e, num) (cs)->cset->charpos((cs), (const char*) (b), (const char *)(e), (num))
1033
1034#define use_mb(s) ((s)->mbmaxlen > 1)
1035/**
1036 Detect if the leftmost character in a string is a valid multi-byte character
1037 and return its length, or return 0 otherwise.
1038 @param cs - character set
1039 @param str - the beginning of the string
1040 @param end - the string end (the next byte after the string)
1041 @return >0, for a multi-byte character
1042 @rerurn 0, for a single byte character, broken sequence, empty string.
1043*/
1044static inline
1045uint my_ismbchar(CHARSET_INFO *cs, const char *str, const char *end)
1046{
1047 int char_length= (cs->cset->charlen)(cs, (const uchar *) str,
1048 (const uchar *) end);
1049 return char_length > 1 ? (uint) char_length : 0U;
1050}
1051
1052
1053/**
1054 Return length of the leftmost character in a string.
1055 @param cs - character set
1056 @param str - the beginning of the string
1057 @param end - the string end (the next byte after the string)
1058 @return <=0 on errors (EOL, wrong byte sequence)
1059 @return 1 on a single byte character
1060 @return >1 on a multi-byte character
1061
1062 Note, inlike my_ismbchar(), 1 is returned for a single byte character.
1063*/
1064static inline
1065int my_charlen(CHARSET_INFO *cs, const char *str, const char *end)
1066{
1067 return (cs->cset->charlen)(cs, (const uchar *) str,
1068 (const uchar *) end);
1069}
1070
1071
1072/**
1073 Convert broken and incomplete byte sequences to 1 byte.
1074*/
1075static inline
1076uint my_charlen_fix(CHARSET_INFO *cs, const char *str, const char *end)
1077{
1078 int char_length= my_charlen(cs, str, end);
1079 DBUG_ASSERT(str < end);
1080 return char_length > 0 ? (uint) char_length : (uint) 1U;
1081}
1082
1083
1084/*
1085 A compatibility replacement pure C function for the former
1086 cs->cset->well_formed_len().
1087 In C++ code please use Well_formed_prefix::length() instead.
1088*/
1089static inline size_t
1090my_well_formed_length(CHARSET_INFO *cs, const char *b, const char *e,
1091 size_t nchars, int *error)
1092{
1093 MY_STRCOPY_STATUS status;
1094 (void) cs->cset->well_formed_char_length(cs, b, e, nchars, &status);
1095 *error= status.m_well_formed_error_pos == NULL ? 0 : 1;
1096 return (size_t) (status.m_source_end_pos - b);
1097}
1098
1099
1100#define my_caseup_str(s, a) ((s)->cset->caseup_str((s), (a)))
1101#define my_casedn_str(s, a) ((s)->cset->casedn_str((s), (a)))
1102#define my_strntol(s, a, b, c, d, e) ((s)->cset->strntol((s),(a),(b),(c),(d),(e)))
1103#define my_strntoul(s, a, b, c, d, e) ((s)->cset->strntoul((s),(a),(b),(c),(d),(e)))
1104#define my_strntoll(s, a, b, c, d, e) ((s)->cset->strntoll((s),(a),(b),(c),(d),(e)))
1105#define my_strntoull(s, a, b, c,d, e) ((s)->cset->strntoull((s),(a),(b),(c),(d),(e)))
1106#define my_strntod(s, a, b, c, d) ((s)->cset->strntod((s),(a),(b),(c),(d)))
1107
1108
1109/* XXX: still need to take care of this one */
1110#ifdef MY_CHARSET_TIS620
1111#error The TIS620 charset is broken at the moment. Tell tim to fix it.
1112#define USE_TIS620
1113#include "t_ctype.h"
1114#endif
1115
1116#ifdef __cplusplus
1117}
1118#endif
1119
1120#endif /* _m_ctype_h */
1121