m_ctype.h source code [MariaDB/include/m_ctype.h]

1	/ Copyright (c) 2000, 2013, Oracle and/or its affiliates.*
2
3	This program is free software; you can redistribute it and/or modify
4	it under the terms of the GNU General Public License as published by
5	the Free Software Foundation; version 2 of the License.
6
7	This program is distributed in the hope that it will be useful,
8	but WITHOUT ANY WARRANTY; without even the implied warranty of
9	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10	GNU General Public License for more details.
11
12	You should have received a copy of the GNU General Public License
13	along with this program; if not, write to the Free Software
14	Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA /*
15
16	/*
17	A better inplementation of the UNIX ctype(3) library.
18	*/
19
20	#ifndef _m_ctype_h
21	#define _m_ctype_h
22
23	#include <my_attribute.h>
24
25	enum loglevel {
26	ERROR_LEVEL= `0`,
27	WARNING_LEVEL= `1`,
28	INFORMATION_LEVEL= `2`
29	};
30
31	#ifdef __cplusplus
32	extern "C" {
33	#endif
34
35	#define MY_CS_NAME_SIZE 32
36	#define MY_CS_CTYPE_TABLE_SIZE 257
37	#define MY_CS_TO_LOWER_TABLE_SIZE 256
38	#define MY_CS_TO_UPPER_TABLE_SIZE 256
39	#define MY_CS_SORT_ORDER_TABLE_SIZE 256
40	#define MY_CS_TO_UNI_TABLE_SIZE 256
41
42	#define CHARSET_DIR "charsets/"
43
44	#define my_wc_t ulong
45
46	#define MY_CS_REPLACEMENT_CHARACTER 0xFFFD
47
48	/*
49	On i386 we store Unicode->CS conversion tables for
50	some character sets using Big-endian order,
51	to copy two bytes at once.
52	This gives some performance improvement.
53	*/
54	#ifdef __i386__
55	#define MB2(x) (((x) >> 8) + (((x) & 0xFF) << 8))
56	#define MY_PUT_MB2(s, code) { ((uint16)(s))= (code); }
57	#else
58	#define MB2(x) (x)
59	#define MY_PUT_MB2(s, code) { (s)[0]= code >> 8; (s)[1]= code & 0xFF; }
60	#endif
61
62	typedef const struct my_charset_handler_st MY_CHARSET_HANDLER;
63	typedef const struct my_collation_handler_st MY_COLLATION_HANDLER;
64
65	typedef const struct unicase_info_st MY_UNICASE_INFO;
66	typedef const struct uni_ctype_st MY_UNI_CTYPE;
67	typedef const struct my_uni_idx_st MY_UNI_IDX;
68
69	typedef struct unicase_info_char_st
70	{
71	uint32 toupper;
72	uint32 tolower;
73	uint32 sort;
74	} MY_UNICASE_CHARACTER;
75
76
77	struct unicase_info_st
78	{
79	my_wc_t maxchar;
80	MY_UNICASE_CHARACTER **page;
81	};
82
83
84	extern MY_UNICASE_INFO my_unicase_default;
85	extern MY_UNICASE_INFO my_unicase_turkish;
86	extern MY_UNICASE_INFO my_unicase_mysql500;
87	extern MY_UNICASE_INFO my_unicase_unicode520;
88
89	#define MY_UCA_MAX_CONTRACTION 6
90	/*
91	The DUCET tables in ctype-uca.c are dumped with a limit of 8 weights
92	per character. cs->strxfrm_multiply is set to 8 for all UCA based collations.
93
94	In language-specific UCA collations (with tailorings) we also do not allow
95	a single character to have more than 8 weights to stay with the same
96	strxfrm_multiply limit. Note, contractions are allowed to have twice longer
97	weight strings (up to 16 weights). As a contraction consists of at
98	least 2 characters, this makes sure that strxfrm_multiply ratio of 8
99	is respected.
100	*/
101	#define MY_UCA_MAX_WEIGHT_SIZE (8+1) /* Including 0 terminator */
102	#define MY_UCA_CONTRACTION_MAX_WEIGHT_SIZE (28+1) / Including 0 terminator */
103	#define MY_UCA_WEIGHT_LEVELS 2
104
105	typedef struct my_contraction_t
106	{
107	my_wc_t ch[MY_UCA_MAX_CONTRACTION]; / Character sequence /
108	uint16 weight[MY_UCA_CONTRACTION_MAX_WEIGHT_SIZE];/ Its weight string, 0-terminated /
109	my_bool with_context;
110	} MY_CONTRACTION;
111
112
113	typedef struct my_contraction_list_t
114	{
115	size_t nitems; / Number of items in the list /
116	MY_CONTRACTION item; /* List of contractions /
117	char flags; /* Character flags, e.g. "is contraction head") /
118	} MY_CONTRACTIONS;
119
120	my_bool my_uca_can_be_contraction_head(const MY_CONTRACTIONS *c, my_wc_t wc);
121	my_bool my_uca_can_be_contraction_tail(const MY_CONTRACTIONS *c, my_wc_t wc);
122	uint16 my_uca_contraction2_weight(const* MY_CONTRACTIONS *c,
123	my_wc_t wc1, my_wc_t wc2);
124
125
126	/ Collation weights on a single level (e.g. primary, secondary, tertiarty) /
127	typedef struct my_uca_level_info_st
128	{
129	my_wc_t maxchar;
130	uchar *lengths;
131	uint16 **weights;
132	MY_CONTRACTIONS contractions;
133	uint levelno;
134	} MY_UCA_WEIGHT_LEVEL;
135
136
137	typedef struct uca_info_st
138	{
139	MY_UCA_WEIGHT_LEVEL level[MY_UCA_WEIGHT_LEVELS];
140
141	/ Logical positions /
142	my_wc_t first_non_ignorable;
143	my_wc_t last_non_ignorable;
144	my_wc_t first_primary_ignorable;
145	my_wc_t last_primary_ignorable;
146	my_wc_t first_secondary_ignorable;
147	my_wc_t last_secondary_ignorable;
148	my_wc_t first_tertiary_ignorable;
149	my_wc_t last_tertiary_ignorable;
150	my_wc_t first_trailing;
151	my_wc_t last_trailing;
152	my_wc_t first_variable;
153	my_wc_t last_variable;
154
155	} MY_UCA_INFO;
156
157
158
159	extern MY_UCA_INFO my_uca_v400;
160
161
162	struct uni_ctype_st
163	{
164	uchar pctype;
165	const uchar *ctype;
166	};
167
168	extern MY_UNI_CTYPE my_uni_ctype[`256`];
169
170	/ wm_wc and wc_mb return codes /
171	#define MY_CS_ILSEQ 0 /* Wrong by sequence: wb_wc */
172	#define MY_CS_ILUNI 0 /* Cannot encode Unicode to charset: wc_mb */
173	#define MY_CS_TOOSMALL -101 /* Need at least one byte: wc_mb and mb_wc */
174	#define MY_CS_TOOSMALL2 -102 /* Need at least two bytes: wc_mb and mb_wc */
175	#define MY_CS_TOOSMALL3 -103 /* Need at least three bytes: wc_mb and mb_wc */
176	/ These following three are currently not really used /
177	#define MY_CS_TOOSMALL4 -104 /* Need at least 4 bytes: wc_mb and mb_wc */
178	#define MY_CS_TOOSMALL5 -105 /* Need at least 5 bytes: wc_mb and mb_wc */
179	#define MY_CS_TOOSMALL6 -106 /* Need at least 6 bytes: wc_mb and mb_wc */
180	/ A helper macros for "need at least n bytes" /
181	#define MY_CS_TOOSMALLN(n) (-100-(n))
182
183	#define MY_CS_MBMAXLEN 6 /* Maximum supported mbmaxlen */
184	#define MY_CS_IS_TOOSMALL(rc) ((rc) >= MY_CS_TOOSMALL6 && (rc) <= MY_CS_TOOSMALL)
185
186	#define MY_SEQ_INTTAIL 1
187	#define MY_SEQ_SPACES 2
188	#define MY_SEQ_NONSPACES 3 /* Skip non-space characters, including bad bytes */
189
190	/ My charsets_list flags /
191	#define MY_CS_COMPILED 1 /* compiled-in sets */
192	#define MY_CS_CONFIG 2 /* sets that have a .conf file /
193	#define MY_CS_INDEX 4 /* sets listed in the Index file */
194	#define MY_CS_LOADED 8 /* sets that are currently loaded */
195	#define MY_CS_BINSORT 16 /* if binary sort order */
196	#define MY_CS_PRIMARY 32 /* if primary collation */
197	#define MY_CS_STRNXFRM 64 /* if strnxfrm is used for sort */
198	#define MY_CS_UNICODE 128 /* is a charset is BMP Unicode */
199	#define MY_CS_READY 256 /* if a charset is initialized */
200	#define MY_CS_AVAILABLE 512 /* If either compiled-in or loaded*/
201	#define MY_CS_CSSORT 1024 /* if case sensitive sort order */
202	#define MY_CS_HIDDEN 2048 /* don't display in SHOW */
203	#define MY_CS_PUREASCII 4096 /* if a charset is pure ascii */
204	#define MY_CS_NONASCII 8192 /* if not ASCII-compatible */
205	#define MY_CS_UNICODE_SUPPLEMENT 16384 /* Non-BMP Unicode characters */
206	#define MY_CS_LOWER_SORT 32768 /* If use lower case as weight */
207	#define MY_CS_STRNXFRM_BAD_NWEIGHTS 0x10000 /* strnxfrm ignores "nweights" */
208	#define MY_CS_NOPAD 0x20000 /* if does not ignore trailing spaces */
209	#define MY_CS_NON1TO1 0x40000 /* Has a complex mapping from characters
210	to weights, e.g. contractions, expansions,
211	ignorable characters */
212	#define MY_CHARSET_UNDEFINED 0
213
214	/ Character repertoire flags /
215	#define MY_REPERTOIRE_ASCII 1 /* Pure ASCII U+0000..U+007F */
216	#define MY_REPERTOIRE_EXTENDED 2 /* Extended characters: U+0080..U+FFFF */
217	#define MY_REPERTOIRE_UNICODE30 3 /* ASCII \| EXTENDED: U+0000..U+FFFF */
218
219	/ Flags for strxfrm /
220	#define MY_STRXFRM_LEVEL1 0x00000001 /* for primary weights */
221	#define MY_STRXFRM_LEVEL2 0x00000002 /* for secondary weights */
222	#define MY_STRXFRM_LEVEL3 0x00000004 /* for tertiary weights */
223	#define MY_STRXFRM_LEVEL4 0x00000008 /* fourth level weights */
224	#define MY_STRXFRM_LEVEL5 0x00000010 /* fifth level weights */
225	#define MY_STRXFRM_LEVEL6 0x00000020 /* sixth level weights */
226	#define MY_STRXFRM_LEVEL_ALL 0x0000003F /* Bit OR for the above six */
227	#define MY_STRXFRM_NLEVELS 6 /* Number of possible levels*/
228
229	#define MY_STRXFRM_PAD_WITH_SPACE 0x00000040 /* if pad result with spaces */
230	#define MY_STRXFRM_PAD_TO_MAXLEN 0x00000080 /* if pad tail(for filesort) */
231
232	#define MY_STRXFRM_DESC_LEVEL1 0x00000100 /* if desc order for level1 */
233	#define MY_STRXFRM_DESC_LEVEL2 0x00000200 /* if desc order for level2 */
234	#define MY_STRXFRM_DESC_LEVEL3 0x00000300 /* if desc order for level3 */
235	#define MY_STRXFRM_DESC_LEVEL4 0x00000800 /* if desc order for level4 */
236	#define MY_STRXFRM_DESC_LEVEL5 0x00001000 /* if desc order for level5 */
237	#define MY_STRXFRM_DESC_LEVEL6 0x00002000 /* if desc order for level6 */
238	#define MY_STRXFRM_DESC_SHIFT 8
239
240	#define MY_STRXFRM_UNUSED_00004000 0x00004000 /* for future extensions */
241	#define MY_STRXFRM_UNUSED_00008000 0x00008000 /* for future extensions */
242
243	#define MY_STRXFRM_REVERSE_LEVEL1 0x00010000 /* if reverse order for level1 */
244	#define MY_STRXFRM_REVERSE_LEVEL2 0x00020000 /* if reverse order for level2 */
245	#define MY_STRXFRM_REVERSE_LEVEL3 0x00040000 /* if reverse order for level3 */
246	#define MY_STRXFRM_REVERSE_LEVEL4 0x00080000 /* if reverse order for level4 */
247	#define MY_STRXFRM_REVERSE_LEVEL5 0x00100000 /* if reverse order for level5 */
248	#define MY_STRXFRM_REVERSE_LEVEL6 0x00200000 /* if reverse order for level6 */
249	#define MY_STRXFRM_REVERSE_SHIFT 16
250
251	/*
252	Collation IDs for MariaDB that should not conflict with MySQL.
253	We reserve 256..511, because MySQL will most likely use this range
254	when the range 0..255 is full.
255
256	We use the next 256 IDs starting from 512 and divide
257	them into 8 chunks, 32 collations each, as follows:
258
259	512 + (0..31) for single byte collations (e.g. latin9)
260	512 + (32..63) reserved (e.g. for utf32le, or more single byte collations)
261	512 + (64..95) for utf8
262	512 + (96..127) for utf8mb4
263	512 + (128..159) for ucs2
264	512 + (160..192) for utf16
265	512 + (192..223) for utf16le
266	512 + (224..255) for utf32
267	*/
268	#define MY_PAGE2_COLLATION_ID_8BIT 0x200
269	#define MY_PAGE2_COLLATION_ID_RESERVED 0x220
270	#define MY_PAGE2_COLLATION_ID_UTF8 0x240
271	#define MY_PAGE2_COLLATION_ID_UTF8MB4 0x260
272	#define MY_PAGE2_COLLATION_ID_UCS2 0x280
273	#define MY_PAGE2_COLLATION_ID_UTF16 0x2A0
274	#define MY_PAGE2_COLLATION_ID_UTF16LE 0x2C0
275	#define MY_PAGE2_COLLATION_ID_UTF32 0x2E0
276
277	struct my_uni_idx_st
278	{
279	uint16 from;
280	uint16 to;
281	const uchar *tab;
282	};
283
284	typedef struct
285	{
286	uint beg;
287	uint end;
288	uint mb_len;
289	} my_match_t;
290
291	enum my_lex_states
292	{
293	MY_LEX_START, MY_LEX_CHAR, MY_LEX_IDENT,
294	MY_LEX_IDENT_SEP, MY_LEX_IDENT_START,
295	MY_LEX_REAL, MY_LEX_HEX_NUMBER, MY_LEX_BIN_NUMBER,
296	MY_LEX_CMP_OP, MY_LEX_LONG_CMP_OP, MY_LEX_STRING, MY_LEX_COMMENT, MY_LEX_END,
297	MY_LEX_OPERATOR_OR_IDENT, MY_LEX_NUMBER_IDENT, MY_LEX_INT_OR_REAL,
298	MY_LEX_REAL_OR_POINT, MY_LEX_BOOL, MY_LEX_EOL, MY_LEX_ESCAPE,
299	MY_LEX_LONG_COMMENT, MY_LEX_END_LONG_COMMENT, MY_LEX_SEMICOLON,
300	MY_LEX_SET_VAR, MY_LEX_USER_END, MY_LEX_HOSTNAME, MY_LEX_SKIP,
301	MY_LEX_USER_VARIABLE_DELIMITER, MY_LEX_SYSTEM_VAR,
302	MY_LEX_IDENT_OR_KEYWORD,
303	MY_LEX_IDENT_OR_HEX, MY_LEX_IDENT_OR_BIN, MY_LEX_IDENT_OR_NCHAR,
304	MY_LEX_STRING_OR_DELIMITER, MY_LEX_MINUS_OR_COMMENT, MY_LEX_PLACEHOLDER,
305	MY_LEX_COMMA
306	};
307
308	struct charset_info_st;
309
310	typedef struct my_charset_loader_st
311	{
312	char error[`128`];
313	void (once_alloc)(size_t);
314	void (malloc)(size_t);
315	void (realloc)(void *, size_t);
316	void (free)(void* *);
317	void (reporter)(enum* loglevel, const char *format, ...);
318	int (add_collation)(struct* charset_info_st *cs);
319	} MY_CHARSET_LOADER;
320
321
322	extern int (my_string_stack_guard)(int*);
323
324	/ See strings/CHARSET_INFO.txt for information about this structure /
325	struct my_collation_handler_st
326	{
327	my_bool (init)(struct* charset_info_st , MY_CHARSET_LOADER );
328	/ Collation routines /
329	int (strnncoll)(CHARSET_INFO ,
330	const uchar , size_t, const* uchar *, size_t, my_bool);
331	int (strnncollsp)(CHARSET_INFO ,
332	const uchar , size_t, const* uchar *, size_t);
333	size_t (strnxfrm)(CHARSET_INFO ,
334	uchar *dst, size_t dstlen, uint nweights,
335	const uchar *src, size_t srclen, uint flags);
336	size_t (strnxfrmlen)(CHARSET_INFO , size_t);
337	my_bool (like_range)(CHARSET_INFO ,
338	const char *s, size_t s_length,
339	pchar w_prefix, pchar w_one, pchar w_many,
340	size_t res_length,
341	char min_str, char* *max_str,
342	size_t min_len, size_t max_len);
343	int (wildcmp)(CHARSET_INFO ,
344	const char str,const* char *str_end,
345	const char wildstr,const* char *wildend,
346	int escape,int w_one, int w_many);
347
348	int (strcasecmp)(CHARSET_INFO , const char , const* char *);
349
350	uint (instr)(CHARSET_INFO ,
351	const char *b, size_t b_length,
352	const char *s, size_t s_length,
353	my_match_t *match, uint nmatch);
354
355	/ Hash calculation /
356	void (hash_sort)(CHARSET_INFO cs, const uchar *key, size_t len,
357	ulong nr1, ulong nr2);
358	my_bool (propagate)(CHARSET_INFO cs, const uchar *str, size_t len);
359	};
360
361	extern MY_COLLATION_HANDLER my_collation_8bit_bin_handler;
362	extern MY_COLLATION_HANDLER my_collation_8bit_simple_ci_handler;
363	extern MY_COLLATION_HANDLER my_collation_8bit_nopad_bin_handler;
364	extern MY_COLLATION_HANDLER my_collation_8bit_simple_nopad_ci_handler;
365	extern MY_COLLATION_HANDLER my_collation_ucs2_uca_handler;
366
367	/ Some typedef to make it easy for C++ to make function pointers /
368	typedef int (my_charset_conv_mb_wc)(CHARSET_INFO , my_wc_t *,
369	const uchar , const* uchar *);
370	typedef int (my_charset_conv_wc_mb)(CHARSET_INFO , my_wc_t,
371	uchar , uchar );
372	typedef size_t (my_charset_conv_case)(CHARSET_INFO ,
373	char , size_t, char* *, size_t);
374
375	/*
376	A structure to return the statistics of a native string copying,
377	when no Unicode conversion is involved.
378
379	The structure is OK to be uninitialized before calling a copying routine.
380	A copying routine must populate the structure as follows:
381	- m_source_end_pos must be set by to a non-NULL value
382	in the range of the input string.
383	- m_well_formed_error_pos must be set to NULL if the string was
384	well formed, or to the position of the leftmost bad byte sequence.
385	*/
386	typedef struct
387	{
388	const char m_source_end_pos; /* Position where reading stopped /
389	const char m_well_formed_error_pos; /* Position where a bad byte was found/
390	} MY_STRCOPY_STATUS;
391
392
393	/*
394	A structure to return the statistics of a Unicode string conversion.
395	*/
396	typedef struct
397	{
398	const char *m_cannot_convert_error_pos;
399	} MY_STRCONV_STATUS;
400
401
402	/ See strings/CHARSET_INFO.txt about information on this structure /
403	struct my_charset_handler_st
404	{
405	my_bool (init)(struct* charset_info_st , MY_CHARSET_LOADER loader);
406	/ Multibyte routines /
407	size_t (numchars)(CHARSET_INFO , const char b, const* char *e);
408	size_t (charpos)(CHARSET_INFO , const char b, const* char *e,
409	size_t pos);
410	size_t (lengthsp)(CHARSET_INFO , const char *ptr, size_t length);
411	size_t (numcells)(CHARSET_INFO , const char b, const* char *e);
412
413	/ Unicode conversion /
414	my_charset_conv_mb_wc mb_wc;
415	my_charset_conv_wc_mb wc_mb;
416
417	/ CTYPE scanner /
418	int (ctype)(CHARSET_INFO cs, int *ctype,
419	const uchar s, const* uchar *e);
420
421	/ Functions for case and sort conversion /
422	size_t (caseup_str)(CHARSET_INFO , char *);
423	size_t (casedn_str)(CHARSET_INFO , char *);
424
425	my_charset_conv_case caseup;
426	my_charset_conv_case casedn;
427
428	/ Charset dependent snprintf() /
429	size_t (snprintf)(CHARSET_INFO , char *to, size_t n,
430	const char *fmt,
431	...) ATTRIBUTE_FORMAT_FPTR(printf, `4`, `5`);
432	size_t (long10_to_str)(CHARSET_INFO , char *to, size_t n,
433	int radix, long int val);
434	size_t (longlong10_to_str)(CHARSET_INFO , char *to, size_t n,
435	int radix, longlong val);
436
437	void (fill)(CHARSET_INFO , char to, size_t len, int* fill);
438
439	/ String-to-number conversion routines /
440	long (strntol)(CHARSET_INFO , const char *s, size_t l,
441	int base, char *e, int* *err);
442	ulong (strntoul)(CHARSET_INFO , const char *s, size_t l,
443	int base, char *e, int* *err);
444	longlong (strntoll)(CHARSET_INFO , const char *s, size_t l,
445	int base, char *e, int* *err);
446	ulonglong (strntoull)(CHARSET_INFO , const char *s, size_t l,
447	int base, char *e, int* *err);
448	double (strntod)(CHARSET_INFO , char s, size_t l, char* **e,
449	int *err);
450	longlong (strtoll10)(CHARSET_INFO cs,
451	const char nptr, char* *endptr, int* *error);
452	ulonglong (strntoull10rnd)(CHARSET_INFO cs,
453	const char *str, size_t length,
454	int unsigned_fl,
455	char *endptr, int* *error);
456	size_t (scan)(CHARSET_INFO , const char b, const* char *e,
457	int sq);
458
459	/ String copying routines and helpers for them /
460	/*
461	charlen() - calculate length of the left-most character in bytes.
462	@param cs Character set
463	@param str The beginning of the string
464	@param end The end of the string
465
466	@return MY_CS_ILSEQ if a bad byte sequence was found.
467	@return MY_CS_TOOSMALLN(x) if the string ended unexpectedly.
468	@return a positive number in the range 1..mbmaxlen,
469	if a valid character was found.
470	*/
471	int (charlen)(CHARSET_INFO cs, const uchar str, const* uchar *end);
472	/*
473	well_formed_char_length() - returns character length of a string.
474
475	@param cs Character set
476	@param str The beginning of the string
477	@param end The end of the string
478	@param nchars Not more than "nchars" left-most characters are checked.
479	@param status[OUT] Additional statistics is returned here.
480	"status" can be uninitialized before the call,
481	and it is fully initialized after the call.
482
483	status->m_source_end_pos is set to the position where reading stopped.
484
485	If a bad byte sequence is found, the function returns immediately and
486	status->m_well_formed_error_pos is set to the position where a bad byte
487	sequence was found.
488
489	status->m_well_formed_error_pos is set to NULL if no bad bytes were found.
490	If status->m_well_formed_error_pos is NULL after the call, that means:
491	- either the function reached the end of the string,
492	- or all "nchars" characters were read.
493	The caller can check status->m_source_end_pos to detect which of these two
494	happened.
495	*/
496	size_t (well_formed_char_length)(CHARSET_INFO cs,
497	const char str, const* char *end,
498	size_t nchars,
499	MY_STRCOPY_STATUS *status);
500
501	/*
502	copy_fix() - copy a string, replace bad bytes to '?'.
503	Not more than "nchars" characters are copied.
504
505	status->m_source_end_pos is set to a position in the range
506	between "src" and "src + src_length", where reading stopped.
507
508	status->m_well_formed_error_pos is set to NULL if the string
509	in the range "src" and "status->m_source_end_pos" was well formed,
510	or is set to a position between "src" and "src + src_length" where
511	the leftmost bad byte sequence was found.
512	*/
513	size_t (copy_fix)(CHARSET_INFO ,
514	char *dst, size_t dst_length,
515	const char *src, size_t src_length,
516	size_t nchars, MY_STRCOPY_STATUS *status);
517	/**
518	Write a character to the target string, using its native code.
519	For Unicode character sets (utf8, ucs2, utf16, utf16le, utf32, filename)
520	native codes are equivalent to Unicode code points.
521	For 8bit character sets the native code is just the byte value.
522	For Asian characters sets:
523	- MB1 native code is just the byte value (e.g. on the ASCII range)
524	- MB2 native code is ((b0 << 8) + b1).
525	- MB3 native code is ((b0 <<16) + (b1 << 8) + b2)
526	Note, CHARSET_INFO::min_sort_char and CHARSET_INFO::max_sort_char
527	are defined in native notation and should be written using
528	cs->cset->native_to_mb() rather than cs->cset->wc_mb().
529	*/
530	my_charset_conv_wc_mb native_to_mb;
531	};
532
533	extern MY_CHARSET_HANDLER my_charset_8bit_handler;
534	extern MY_CHARSET_HANDLER my_charset_ucs2_handler;
535	extern MY_CHARSET_HANDLER my_charset_utf8_handler;
536
537
538	/*
539	We define this CHARSET_INFO_DEFINED here to prevent a repeat of the
540	typedef in hash.c, which will cause a compiler error.
541	*/
542	#define CHARSET_INFO_DEFINED
543
544	/ See strings/CHARSET_INFO.txt about information on this structure /
545	struct charset_info_st
546	{
547	uint number;
548	uint primary_number;
549	uint binary_number;
550	uint state;
551	const char *csname;
552	const char *name;
553	const char *comment;
554	const char *tailoring;
555	const uchar *ctype;
556	const uchar *to_lower;
557	const uchar *to_upper;
558	const uchar *sort_order;
559	MY_UCA_INFO *uca;
560	const uint16 *tab_to_uni;
561	MY_UNI_IDX *tab_from_uni;
562	MY_UNICASE_INFO *caseinfo;
563	const uchar *state_map;
564	const uchar *ident_map;
565	uint strxfrm_multiply;
566	uchar caseup_multiply;
567	uchar casedn_multiply;
568	uint mbminlen;
569	uint mbmaxlen;
570	my_wc_t min_sort_char;
571	my_wc_t max_sort_char; / For LIKE optimization /
572	uchar pad_char;
573	my_bool escape_with_backslash_is_dangerous;
574	uchar levels_for_order;
575
576	MY_CHARSET_HANDLER *cset;
577	MY_COLLATION_HANDLER *coll;
578
579	};
580	#define ILLEGAL_CHARSET_INFO_NUMBER (~0U)
581
582	extern MYSQL_PLUGIN_IMPORT struct charset_info_st my_charset_bin;
583	extern MYSQL_PLUGIN_IMPORT struct charset_info_st my_charset_latin1;
584	extern MYSQL_PLUGIN_IMPORT struct charset_info_st my_charset_latin1_nopad;
585	extern MYSQL_PLUGIN_IMPORT struct charset_info_st my_charset_filename;
586	extern MYSQL_PLUGIN_IMPORT struct charset_info_st my_charset_utf8_general_ci;
587
588	extern struct charset_info_st my_charset_big5_bin;
589	extern struct charset_info_st my_charset_big5_chinese_ci;
590	extern struct charset_info_st my_charset_big5_nopad_bin;
591	extern struct charset_info_st my_charset_big5_chinese_nopad_ci;
592	extern struct charset_info_st my_charset_cp1250_czech_ci;
593	extern struct charset_info_st my_charset_cp932_bin;
594	extern struct charset_info_st my_charset_cp932_japanese_ci;
595	extern struct charset_info_st my_charset_cp932_nopad_bin;
596	extern struct charset_info_st my_charset_cp932_japanese_nopad_ci;
597	extern struct charset_info_st my_charset_eucjpms_bin;
598	extern struct charset_info_st my_charset_eucjpms_japanese_ci;
599	extern struct charset_info_st my_charset_eucjpms_nopad_bin;
600	extern struct charset_info_st my_charset_eucjpms_japanese_nopad_ci;
601	extern struct charset_info_st my_charset_euckr_bin;
602	extern struct charset_info_st my_charset_euckr_korean_ci;
603	extern struct charset_info_st my_charset_euckr_nopad_bin;
604	extern struct charset_info_st my_charset_euckr_korean_nopad_ci;
605	extern struct charset_info_st my_charset_gb2312_bin;
606	extern struct charset_info_st my_charset_gb2312_chinese_ci;
607	extern struct charset_info_st my_charset_gb2312_nopad_bin;
608	extern struct charset_info_st my_charset_gb2312_chinese_nopad_ci;
609	extern struct charset_info_st my_charset_gbk_bin;
610	extern struct charset_info_st my_charset_gbk_chinese_ci;
611	extern struct charset_info_st my_charset_gbk_nopad_bin;
612	extern struct charset_info_st my_charset_gbk_chinese_nopad_ci;
613	extern struct charset_info_st my_charset_latin1_bin;
614	extern struct charset_info_st my_charset_latin1_nopad_bin;
615	extern struct charset_info_st my_charset_latin1_german2_ci;
616	extern struct charset_info_st my_charset_latin2_czech_ci;
617	extern struct charset_info_st my_charset_sjis_bin;
618	extern struct charset_info_st my_charset_sjis_japanese_ci;
619	extern struct charset_info_st my_charset_sjis_nopad_bin;
620	extern struct charset_info_st my_charset_sjis_japanese_nopad_ci;
621	extern struct charset_info_st my_charset_tis620_bin;
622	extern struct charset_info_st my_charset_tis620_thai_ci;
623	extern struct charset_info_st my_charset_tis620_nopad_bin;
624	extern struct charset_info_st my_charset_tis620_thai_nopad_ci;
625	extern struct charset_info_st my_charset_ucs2_bin;
626	extern struct charset_info_st my_charset_ucs2_general_ci;
627	extern struct charset_info_st my_charset_ucs2_nopad_bin;
628	extern struct charset_info_st my_charset_ucs2_general_nopad_ci;
629	extern struct charset_info_st my_charset_ucs2_general_mysql500_ci;
630	extern struct charset_info_st my_charset_ucs2_unicode_ci;
631	extern struct charset_info_st my_charset_ucs2_unicode_nopad_ci;
632	extern struct charset_info_st my_charset_ucs2_general_mysql500_ci;
633	extern struct charset_info_st my_charset_ujis_bin;
634	extern struct charset_info_st my_charset_ujis_japanese_ci;
635	extern struct charset_info_st my_charset_ujis_nopad_bin;
636	extern struct charset_info_st my_charset_ujis_japanese_nopad_ci;
637	extern struct charset_info_st my_charset_utf16_bin;
638	extern struct charset_info_st my_charset_utf16_general_ci;
639	extern struct charset_info_st my_charset_utf16_unicode_ci;
640	extern struct charset_info_st my_charset_utf16_unicode_nopad_ci;
641	extern struct charset_info_st my_charset_utf16le_bin;
642	extern struct charset_info_st my_charset_utf16le_general_ci;
643	extern struct charset_info_st my_charset_utf16_general_nopad_ci;
644	extern struct charset_info_st my_charset_utf16_nopad_bin;
645	extern struct charset_info_st my_charset_utf16le_nopad_bin;
646	extern struct charset_info_st my_charset_utf16le_general_nopad_ci;
647	extern struct charset_info_st my_charset_utf32_bin;
648	extern struct charset_info_st my_charset_utf32_general_ci;
649	extern struct charset_info_st my_charset_utf32_unicode_ci;
650	extern struct charset_info_st my_charset_utf32_unicode_nopad_ci;
651	extern struct charset_info_st my_charset_utf32_nopad_bin;
652	extern struct charset_info_st my_charset_utf32_general_nopad_ci;
653	extern struct charset_info_st my_charset_utf8_bin;
654	extern struct charset_info_st my_charset_utf8_nopad_bin;
655	extern struct charset_info_st my_charset_utf8_general_nopad_ci;
656	extern struct charset_info_st my_charset_utf8_general_mysql500_ci;
657	extern struct charset_info_st my_charset_utf8_unicode_ci;
658	extern struct charset_info_st my_charset_utf8_unicode_nopad_ci;
659	extern struct charset_info_st my_charset_utf8mb4_bin;
660	extern struct charset_info_st my_charset_utf8mb4_general_ci;
661	extern struct charset_info_st my_charset_utf8mb4_nopad_bin;
662	extern struct charset_info_st my_charset_utf8mb4_general_nopad_ci;
663	extern struct charset_info_st my_charset_utf8mb4_unicode_ci;
664	extern struct charset_info_st my_charset_utf8mb4_unicode_nopad_ci;
665
666	#define MY_UTF8MB3 "utf8"
667	#define MY_UTF8MB4 "utf8mb4"
668
669	my_bool my_cs_have_contractions(CHARSET_INFO *cs);
670	my_bool my_cs_can_be_contraction_head(CHARSET_INFO *cs, my_wc_t wc);
671	my_bool my_cs_can_be_contraction_tail(CHARSET_INFO *cs, my_wc_t wc);
672	const uint16 my_cs_contraction2_weight(CHARSET_INFO cs, my_wc_t wc1,
673	my_wc_t wc2);
674
675	/ declarations for simple charsets /
676	extern size_t my_strnxfrm_simple(CHARSET_INFO *,
677	uchar *dst, size_t dstlen, uint nweights,
678	const uchar *src, size_t srclen, uint flags);
679	size_t my_strnxfrmlen_simple(CHARSET_INFO *, size_t);
680	extern int my_strnncoll_simple(CHARSET_INFO , const* uchar *, size_t,
681	const uchar *, size_t, my_bool);
682
683	extern int my_strnncollsp_simple(CHARSET_INFO , const* uchar *, size_t,
684	const uchar *, size_t);
685
686	extern void my_hash_sort_simple(CHARSET_INFO *cs,
687	const uchar *key, size_t len,
688	ulong nr1, ulong nr2);
689
690	extern void my_hash_sort_simple_nopad(CHARSET_INFO *cs,
691	const uchar *key, size_t len,
692	ulong nr1, ulong nr2);
693
694	extern void my_hash_sort_bin(CHARSET_INFO *cs,
695	const uchar key, size_t len, ulong nr1,
696	ulong *nr2);
697
698	/**
699	Compare a string to an array of spaces, for PAD SPACE comparison.
700	The function iterates through the string and compares every byte to 0x20.
701	@param - the string
702	@param - its length
703	@return <0 - if a byte less than 0x20 was found in the string.
704	@return 0 - if all bytes in the string were 0x20, or if length was 0.
705	@return >0 - if a byte greater than 0x20 was found in the string.
706	*/
707	extern int my_strnncollsp_padspace_bin(const uchar *str, size_t length);
708
709	extern size_t my_lengthsp_8bit(CHARSET_INFO cs, const* char *ptr, size_t length);
710
711	extern uint my_instr_simple(CHARSET_INFO *,
712	const char *b, size_t b_length,
713	const char *s, size_t s_length,
714	my_match_t *match, uint nmatch);
715
716	size_t my_copy_8bit(CHARSET_INFO *,
717	char *dst, size_t dst_length,
718	const char *src, size_t src_length,
719	size_t nchars, MY_STRCOPY_STATUS *);
720	size_t my_copy_fix_mb(CHARSET_INFO *cs,
721	char *dst, size_t dst_length,
722	const char *src, size_t src_length,
723	size_t nchars, MY_STRCOPY_STATUS *);
724
725	/ Functions for 8bit /
726	extern size_t my_caseup_str_8bit(CHARSET_INFO , char* *);
727	extern size_t my_casedn_str_8bit(CHARSET_INFO , char* *);
728	extern size_t my_caseup_8bit(CHARSET_INFO , char* *src, size_t srclen,
729	char *dst, size_t dstlen);
730	extern size_t my_casedn_8bit(CHARSET_INFO , char* *src, size_t srclen,
731	char *dst, size_t dstlen);
732
733	extern int my_strcasecmp_8bit(CHARSET_INFO * cs, const char , const* char *);
734
735	int my_mb_wc_8bit(CHARSET_INFO cs,my_wc_t wc, const uchar s,const* uchar *e);
736	int my_wc_mb_8bit(CHARSET_INFO cs,my_wc_t wc, uchar s, uchar *e);
737	int my_wc_mb_bin(CHARSET_INFO cs,my_wc_t wc, uchar s, uchar *e);
738
739	int my_mb_ctype_8bit(CHARSET_INFO ,int* , const* uchar ,const* uchar *);
740	int my_mb_ctype_mb(CHARSET_INFO ,int* , const* uchar ,const* uchar *);
741
742	size_t my_scan_8bit(CHARSET_INFO cs, const* char b, const* char e, int* sq);
743
744	size_t my_snprintf_8bit(CHARSET_INFO , char* *to, size_t n,
745	const char *fmt, ...)
746	ATTRIBUTE_FORMAT(printf, `4`, `5`);
747
748	long my_strntol_8bit(CHARSET_INFO , const* char s, size_t l, int* base,
749	char *e, int* *err);
750	ulong my_strntoul_8bit(CHARSET_INFO , const* char s, size_t l, int* base,
751	char *e, int* *err);
752	longlong my_strntoll_8bit(CHARSET_INFO , const* char s, size_t l, int* base,
753	char *e, int* *err);
754	ulonglong my_strntoull_8bit(CHARSET_INFO , const* char s, size_t l, int* base,
755	char *e, int* *err);
756	double my_strntod_8bit(CHARSET_INFO , char* s, size_t l,char* **e,
757	int *err);
758	size_t my_long10_to_str_8bit(CHARSET_INFO , char* to, size_t l, int* radix,
759	long int val);
760	size_t my_longlong10_to_str_8bit(CHARSET_INFO , char* to, size_t l, int* radix,
761	longlong val);
762
763	longlong my_strtoll10_8bit(CHARSET_INFO *cs,
764	const char nptr, char* *endptr, int* *error);
765	longlong my_strtoll10_ucs2(CHARSET_INFO *cs,
766	const char nptr, char* *endptr, int* *error);
767
768	ulonglong my_strntoull10rnd_8bit(CHARSET_INFO *cs,
769	const char str, size_t length, int*
770	unsigned_fl, char *endptr, int* *error);
771	ulonglong my_strntoull10rnd_ucs2(CHARSET_INFO *cs,
772	const char *str, size_t length,
773	int unsigned_fl, char *endptr, int* *error);
774
775	void my_fill_8bit(CHARSET_INFO cs, char** to, size_t l, int fill);
776
777	/ For 8-bit character set /
778	my_bool my_like_range_simple(CHARSET_INFO *cs,
779	const char *ptr, size_t ptr_length,
780	pbool escape, pbool w_one, pbool w_many,
781	size_t res_length,
782	char min_str, char* *max_str,
783	size_t min_length, size_t max_length);
784
785	/ For ASCII-based multi-byte character sets with mbminlen=1 /
786	my_bool my_like_range_mb(CHARSET_INFO *cs,
787	const char *ptr, size_t ptr_length,
788	pbool escape, pbool w_one, pbool w_many,
789	size_t res_length,
790	char min_str, char* *max_str,
791	size_t min_length, size_t max_length);
792
793	/ For other character sets, with arbitrary mbminlen and mbmaxlen numbers /
794	my_bool my_like_range_generic(CHARSET_INFO *cs,
795	const char *ptr, size_t ptr_length,
796	pbool escape, pbool w_one, pbool w_many,
797	size_t res_length,
798	char min_str, char* *max_str,
799	size_t min_length, size_t max_length);
800
801	int my_wildcmp_8bit(CHARSET_INFO *,
802	const char str,const* char *str_end,
803	const char wildstr,const* char *wildend,
804	int escape, int w_one, int w_many);
805
806	int my_wildcmp_bin(CHARSET_INFO *,
807	const char str,const* char *str_end,
808	const char wildstr,const* char *wildend,
809	int escape, int w_one, int w_many);
810
811	size_t my_numchars_8bit(CHARSET_INFO , const* char b, const* char *e);
812	size_t my_numcells_8bit(CHARSET_INFO , const* char b, const* char *e);
813	size_t my_charpos_8bit(CHARSET_INFO , const* char b, const* char *e, size_t pos);
814	size_t my_well_formed_char_length_8bit(CHARSET_INFO *cs,
815	const char b, const* char *e,
816	size_t nchars,
817	MY_STRCOPY_STATUS *status);
818	int my_charlen_8bit(CHARSET_INFO , const* uchar str, const* uchar *end);
819
820
821	/ Functions for multibyte charsets /
822	extern size_t my_caseup_str_mb(CHARSET_INFO , char* *);
823	extern size_t my_casedn_str_mb(CHARSET_INFO , char* *);
824	extern size_t my_caseup_mb(CHARSET_INFO , char* *src, size_t srclen,
825	char *dst, size_t dstlen);
826	extern size_t my_casedn_mb(CHARSET_INFO , char* *src, size_t srclen,
827	char *dst, size_t dstlen);
828	extern size_t my_caseup_mb_varlen(CHARSET_INFO , char* *src, size_t srclen,
829	char *dst, size_t dstlen);
830	extern size_t my_casedn_mb_varlen(CHARSET_INFO , char* *src, size_t srclen,
831	char *dst, size_t dstlen);
832	extern size_t my_caseup_ujis(CHARSET_INFO , char* *src, size_t srclen,
833	char *dst, size_t dstlen);
834	extern size_t my_casedn_ujis(CHARSET_INFO , char* *src, size_t srclen,
835	char *dst, size_t dstlen);
836	extern int my_strcasecmp_mb(CHARSET_INFO * cs,const char , const* char *);
837
838	int my_wildcmp_mb(CHARSET_INFO *,
839	const char str,const* char *str_end,
840	const char wildstr,const* char *wildend,
841	int escape, int w_one, int w_many);
842	size_t my_numchars_mb(CHARSET_INFO , const* char b, const* char *e);
843	size_t my_numcells_mb(CHARSET_INFO , const* char b, const* char *e);
844	size_t my_charpos_mb(CHARSET_INFO , const* char b, const* char *e, size_t pos);
845	uint my_instr_mb(CHARSET_INFO *,
846	const char *b, size_t b_length,
847	const char *s, size_t s_length,
848	my_match_t *match, uint nmatch);
849
850	int my_wildcmp_mb_bin(CHARSET_INFO *cs,
851	const char str,const* char *str_end,
852	const char wildstr,const* char *wildend,
853	int escape, int w_one, int w_many);
854
855	int my_strcasecmp_mb_bin(CHARSET_INFO * cs __attribute__((unused)),
856	const char s, const* char *t);
857
858	void my_hash_sort_mb_bin(CHARSET_INFO cs __attribute__*((unused)),
859	const uchar key, size_t len,ulong nr1, ulong *nr2);
860
861	void my_hash_sort_mb_nopad_bin(CHARSET_INFO cs __attribute__*((unused)),
862	const uchar *key, size_t len,
863	ulong nr1, ulong nr2);
864
865	size_t my_strnxfrm_mb(CHARSET_INFO *,
866	uchar *dst, size_t dstlen, uint nweights,
867	const uchar *src, size_t srclen, uint flags);
868
869	size_t my_strnxfrm_mb_nopad(CHARSET_INFO *,
870	uchar *dst, size_t dstlen, uint nweights,
871	const uchar *src, size_t srclen, uint flags);
872
873	size_t my_strnxfrm_unicode(CHARSET_INFO *,
874	uchar *dst, size_t dstlen, uint nweights,
875	const uchar *src, size_t srclen, uint flags);
876
877	size_t my_strnxfrm_unicode_nopad(CHARSET_INFO *,
878	uchar *dst, size_t dstlen, uint nweights,
879	const uchar *src, size_t srclen, uint flags);
880
881	size_t my_strnxfrmlen_unicode(CHARSET_INFO *, size_t);
882
883	size_t my_strnxfrm_unicode_full_bin(CHARSET_INFO *,
884	uchar *dst, size_t dstlen,
885	uint nweights, const uchar *src,
886	size_t srclen, uint flags);
887
888	size_t my_strnxfrm_unicode_full_nopad_bin(CHARSET_INFO *,
889	uchar *dst, size_t dstlen,
890	uint nweights, const uchar *src,
891	size_t srclen, uint flags);
892
893	size_t my_strnxfrmlen_unicode_full_bin(CHARSET_INFO *, size_t);
894
895	int my_wildcmp_unicode(CHARSET_INFO *cs,
896	const char str, const* char *str_end,
897	const char wildstr, const* char *wildend,
898	int escape, int w_one, int w_many,
899	MY_UNICASE_INFO *weights);
900
901	extern my_bool my_parse_charset_xml(MY_CHARSET_LOADER *loader,
902	const char *buf, size_t buflen);
903	extern char my_strchr(CHARSET_INFO cs, const char str, const* char *end,
904	pchar c);
905	extern size_t my_strcspn(CHARSET_INFO cs, const* char str, const* char *end,
906	const char *accept);
907
908	my_bool my_propagate_simple(CHARSET_INFO cs, const* uchar *str, size_t len);
909	my_bool my_propagate_complex(CHARSET_INFO cs, const* uchar *str, size_t len);
910
911
912	typedef struct
913	{
914	size_t char_length;
915	uint repertoire;
916	} MY_STRING_METADATA;
917
918	void my_string_metadata_get(MY_STRING_METADATA *metadata,
919	CHARSET_INFO cs, const* char *str, size_t len);
920	uint my_string_repertoire(CHARSET_INFO cs, const* char *str, size_t len);
921	my_bool my_charset_is_ascii_based(CHARSET_INFO *cs);
922	uint my_charset_repertoire(CHARSET_INFO *cs);
923
924	uint my_strxfrm_flag_normalize(uint flags, uint nlevels);
925	void my_strxfrm_desc_and_reverse(uchar str, uchar strend,
926	uint flags, uint level);
927	size_t my_strxfrm_pad_desc_and_reverse(CHARSET_INFO *cs,
928	uchar str, uchar frmend, uchar *strend,
929	uint nweights, uint flags, uint level);
930	size_t my_strxfrm_pad_desc_and_reverse_nopad(CHARSET_INFO *cs,
931	uchar str, uchar frmend,
932	uchar *strend, uint nweights,
933	uint flags, uint level);
934
935	const MY_CONTRACTIONS my_charset_get_contractions(CHARSET_INFO cs,
936	int level);
937
938	extern size_t my_vsnprintf_ex(CHARSET_INFO cs, char* *to, size_t n,
939	const char* fmt, va_list ap);
940
941	/*
942	Convert a string between two character sets.
943	Bad byte sequences as well as characters that cannot be
944	encoded in the destination character set are replaced to '?'.
945	*/
946	uint32 my_convert(char to, uint32 to_length, CHARSET_INFO to_cs,
947	const char *from, uint32 from_length,
948	CHARSET_INFO from_cs, uint errors);
949
950	/**
951	An extended version of my_convert(), to pass non-default mb_wc() and wc_mb().
952	For example, String::copy_printable() which is used in
953	Protocol::store_warning() uses this to escape control
954	and non-convertable characters.
955	*/
956	uint32 my_convert_using_func(char to, size_t to_length, CHARSET_INFO to_cs,
957	my_charset_conv_wc_mb mb_wc,
958	const char *from, size_t from_length,
959	CHARSET_INFO *from_cs,
960	my_charset_conv_mb_wc wc_mb,
961	uint *errors);
962	/*
963	Convert a string between two character sets.
964	Bad byte sequences as well as characters that cannot be
965	encoded in the destination character set are replaced to '?'.
966	Not more than "nchars" characters are copied.
967	Conversion statistics is returned in "status" and is set as follows:
968	- status->m_native_copy_status.m_source_end_pos - to the position
969	between (src) and (src+src_length), where the function stopped reading
970	the source string.
971	- status->m_native_copy_status.m_well_formed_error_pos - to the position
972	between (src) and (src+src_length), where the first badly formed byte
973	sequence was found, or to NULL if the string was well formed in the
974	given range.
975	- status->m_cannot_convert_error_pos - to the position
976	between (src) and (src+src_length), where the first character that
977	cannot be represented in the destination character set was found,
978	or to NULL if all characters in the given range were successfully
979	converted.
980
981	"src" is allowed to be a NULL pointer. In this case "src_length" must
982	be equal to 0. All "status" members are initialized to NULL, and 0 is
983	returned.
984	*/
985	size_t my_convert_fix(CHARSET_INFO dstcs, char* *dst, size_t dst_length,
986	CHARSET_INFO srccs, const* char *src, size_t src_length,
987	size_t nchars,
988	MY_STRCOPY_STATUS *copy_status,
989	MY_STRCONV_STATUS *conv_status);
990
991	#define _MY_U 01 /* Upper case */
992	#define _MY_L 02 /* Lower case */
993	#define _MY_NMR 04 /* Numeral (digit) */
994	#define _MY_SPC 010 /* Spacing character */
995	#define _MY_PNT 020 /* Punctuation */
996	#define _MY_CTR 040 /* Control character */
997	#define _MY_B 0100 /* Blank */
998	#define _MY_X 0200 /* heXadecimal digit */
999
1000
1001	#define my_isascii(c) (!((c) & ~0177))
1002	#define my_toascii(c) ((c) & 0177)
1003	#define my_tocntrl(c) ((c) & 31)
1004	#define my_toprint(c) ((c) \| 64)
1005	#define my_toupper(s,c) (char) ((s)->to_upper[(uchar) (c)])
1006	#define my_tolower(s,c) (char) ((s)->to_lower[(uchar) (c)])
1007	#define my_isalpha(s, c) (((s)->ctype+1)[(uchar) (c)] & (_MY_U \| _MY_L))
1008	#define my_isupper(s, c) (((s)->ctype+1)[(uchar) (c)] & _MY_U)
1009	#define my_islower(s, c) (((s)->ctype+1)[(uchar) (c)] & _MY_L)
1010	#define my_isdigit(s, c) (((s)->ctype+1)[(uchar) (c)] & _MY_NMR)
1011	#define my_isxdigit(s, c) (((s)->ctype+1)[(uchar) (c)] & _MY_X)
1012	#define my_isalnum(s, c) (((s)->ctype+1)[(uchar) (c)] & (_MY_U \| _MY_L \| _MY_NMR))
1013	#define my_isspace(s, c) (((s)->ctype+1)[(uchar) (c)] & _MY_SPC)
1014	#define my_ispunct(s, c) (((s)->ctype+1)[(uchar) (c)] & _MY_PNT)
1015	#define my_isprint(s, c) (((s)->ctype+1)[(uchar) (c)] & (_MY_PNT \| _MY_U \| _MY_L \| _MY_NMR \| _MY_B))
1016	#define my_isgraph(s, c) (((s)->ctype+1)[(uchar) (c)] & (_MY_PNT \| _MY_U \| _MY_L \| _MY_NMR))
1017	#define my_iscntrl(s, c) (((s)->ctype+1)[(uchar) (c)] & _MY_CTR)
1018
1019	/ Some macros that should be cleaned up a little /
1020	#define my_isvar(s,c) (my_isalnum(s,c) \|\| (c) == '_')
1021	#define my_isvar_start(s,c) (my_isalpha(s,c) \|\| (c) == '_')
1022
1023	#define my_binary_compare(s) ((s)->state & MY_CS_BINSORT)
1024	#define use_strnxfrm(s) ((s)->state & MY_CS_STRNXFRM)
1025	#define my_strnxfrm(cs, d, dl, s, sl) \
1026	((cs)->coll->strnxfrm((cs), (d), (dl), (dl), (s), (sl), MY_STRXFRM_PAD_WITH_SPACE))
1027	#define my_strnncoll(s, a, b, c, d) ((s)->coll->strnncoll((s), (a), (b), (c), (d), 0))
1028	#define my_like_range(s, a, b, c, d, e, f, g, h, i, j) \
1029	((s)->coll->like_range((s), (a), (b), (c), (d), (e), (f), (g), (h), (i), (j)))
1030	#define my_wildcmp(cs,s,se,w,we,e,o,m) ((cs)->coll->wildcmp((cs),(s),(se),(w),(we),(e),(o),(m)))
1031	#define my_strcasecmp(s, a, b) ((s)->coll->strcasecmp((s), (a), (b)))
1032	#define my_charpos(cs, b, e, num) (cs)->cset->charpos((cs), (const char) (b), (const char )(e), (num))
1033
1034	#define use_mb(s) ((s)->mbmaxlen > 1)
1035	/**
1036	Detect if the leftmost character in a string is a valid multi-byte character
1037	and return its length, or return 0 otherwise.
1038	@param cs - character set
1039	@param str - the beginning of the string
1040	@param end - the string end (the next byte after the string)
1041	@return >0, for a multi-byte character
1042	@rerurn 0, for a single byte character, broken sequence, empty string.
1043	*/
1044	static inline
1045	uint my_ismbchar(CHARSET_INFO cs, const* char str, const* char *end)
1046	{
1047	int char_length= (cs->cset->charlen)(cs, (const uchar *) str,
1048	(const uchar *) end);
1049	return char_length > `1` ? (uint) char_length : `0U`;
1050	}
1051
1052
1053	/**
1054	Return length of the leftmost character in a string.
1055	@param cs - character set
1056	@param str - the beginning of the string
1057	@param end - the string end (the next byte after the string)
1058	@return <=0 on errors (EOL, wrong byte sequence)
1059	@return 1 on a single byte character
1060	@return >1 on a multi-byte character
1061
1062	Note, inlike my_ismbchar(), 1 is returned for a single byte character.
1063	*/
1064	static inline
1065	int my_charlen(CHARSET_INFO cs, const* char str, const* char *end)
1066	{
1067	return (cs->cset->charlen)(cs, (const uchar *) str,
1068	(const uchar *) end);
1069	}
1070
1071
1072	/**
1073	Convert broken and incomplete byte sequences to 1 byte.
1074	*/
1075	static inline
1076	uint my_charlen_fix(CHARSET_INFO cs, const* char str, const* char *end)
1077	{
1078	int char_length= my_charlen(cs, str, end);
1079	DBUG_ASSERT(str < end);
1080	return char_length > `0` ? (uint) char_length : (uint) `1U`;
1081	}
1082
1083
1084	/*
1085	A compatibility replacement pure C function for the former
1086	cs->cset->well_formed_len().
1087	In C++ code please use Well_formed_prefix::length() instead.
1088	*/
1089	static inline size_t
1090	my_well_formed_length(CHARSET_INFO cs, const* char b, const* char *e,
1091	size_t nchars, int *error)
1092	{
1093	MY_STRCOPY_STATUS status;
1094	(void) cs->cset->well_formed_char_length(cs, b, e, nchars, &status);
1095	*error= status.m_well_formed_error_pos == NULL ? `0` : `1`;
1096	return (size_t) (status.m_source_end_pos - b);
1097	}
1098
1099
1100	#define my_caseup_str(s, a) ((s)->cset->caseup_str((s), (a)))
1101	#define my_casedn_str(s, a) ((s)->cset->casedn_str((s), (a)))
1102	#define my_strntol(s, a, b, c, d, e) ((s)->cset->strntol((s),(a),(b),(c),(d),(e)))
1103	#define my_strntoul(s, a, b, c, d, e) ((s)->cset->strntoul((s),(a),(b),(c),(d),(e)))
1104	#define my_strntoll(s, a, b, c, d, e) ((s)->cset->strntoll((s),(a),(b),(c),(d),(e)))
1105	#define my_strntoull(s, a, b, c,d, e) ((s)->cset->strntoull((s),(a),(b),(c),(d),(e)))
1106	#define my_strntod(s, a, b, c, d) ((s)->cset->strntod((s),(a),(b),(c),(d)))
1107
1108
1109	/ XXX: still need to take care of this one /
1110	#ifdef MY_CHARSET_TIS620
1111	#error The TIS620 charset is broken at the moment. Tell tim to fix it.
1112	#define USE_TIS620
1113	#include "t_ctype.h"
1114	#endif
1115
1116	#ifdef __cplusplus
1117	}
1118	#endif
1119
1120	#endif /* _m_ctype_h */
1121

Browse the source code of MariaDB/include/m_ctype.h