1/****************************************************************************
2 Copyright (C) 2012 Monty Program AB
3
4 This library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Library General Public
6 License as published by the Free Software Foundation; either
7 version 2 of the License, or (at your option) any later version.
8
9 This library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Library General Public License for more details.
13
14 You should have received a copy of the GNU Library General Public
15 License along with this library; if not see <http://www.gnu.org/licenses>
16 or write to the Free Software Foundation, Inc.,
17 51 Franklin St., Fifth Floor, Boston, MA 02110, USA
18
19 Part of this code includes code from the PHP project which
20 is freely available from http://www.php.net
21*****************************************************************************/
22
23/* The implementation for character set support was ported from PHP's mysqlnd
24 extension, written by Andrey Hristov, Georg Richter and Ulf Wendel
25
26 Original file header:
27 +----------------------------------------------------------------------+
28 | PHP Version 5 |
29 +----------------------------------------------------------------------+
30 | Copyright (c) 2006-2011 The PHP Group |
31 +----------------------------------------------------------------------+
32 | This source file is subject to version 3.01 of the PHP license, |
33 | that is bundled with this package in the file LICENSE, and is |
34 | available through the world-wide-web at the following url: |
35 | http://www.php.net/license/3_01.txt |
36 | If you did not receive a copy of the PHP license and are unable to |
37 | obtain it through the world-wide-web, please send a note to |
38 | license@php.net so we can mail you a copy immediately. |
39 +----------------------------------------------------------------------+
40 | Authors: Georg Richter <georg@mysql.com> |
41 | Andrey Hristov <andrey@mysql.com> |
42 | Ulf Wendel <uwendel@mysql.com> |
43 +----------------------------------------------------------------------+
44*/
45
46#ifndef _WIN32
47#include <strings.h>
48#include <string.h>
49#else
50#include <string.h>
51#endif
52#include <ma_global.h>
53#include <mariadb_ctype.h>
54#include <ma_string.h>
55
56#ifdef _WIN32
57#include "../win-iconv/iconv.h"
58#else
59#include <iconv.h>
60#endif
61
62
63#if defined(HAVE_NL_LANGINFO) && defined(HAVE_SETLOCALE)
64#include <locale.h>
65#include <langinfo.h>
66#endif
67
68/*
69 +----------------------------------------------------------------------+
70 | PHP Version 5 |
71 +----------------------------------------------------------------------+
72 | Copyright (c) 2006-2011 The PHP Group |
73 +----------------------------------------------------------------------+
74 | This source file is subject to version 3.01 of the PHP license, |
75 | that is bundled with this package in the file LICENSE, and is |
76 | available through the world-wide-web at the following url: |
77 | http://www.php.net/license/3_01.txt |
78 | If you did not receive a copy of the PHP license and are unable to |
79 | obtain it through the world-wide-web, please send a note to |
80 | license@php.net so we can mail you a copy immediately. |
81 +----------------------------------------------------------------------+
82 | Authors: Georg Richter <georg@mysql.com> |
83 | Andrey Hristov <andrey@mysql.com> |
84 | Ulf Wendel <uwendel@mysql.com> |
85 +----------------------------------------------------------------------+
86*/
87
88/* {{{ utf8 functions */
89static unsigned int check_mb_utf8mb3_sequence(const char *start, const char *end)
90{
91 uchar c;
92
93 if (start >= end) {
94 return 0;
95 }
96
97 c = (uchar) start[0];
98
99 if (c < 0x80) {
100 return 1; /* single byte character */
101 }
102 if (c < 0xC2) {
103 return 0; /* invalid mb character */
104 }
105 if (c < 0xE0) {
106 if (start + 2 > end) {
107 return 0; /* too small */
108 }
109 if (!(((uchar)start[1] ^ 0x80) < 0x40)) {
110 return 0;
111 }
112 return 2;
113 }
114 if (c < 0xF0) {
115 if (start + 3 > end) {
116 return 0; /* too small */
117 }
118 if (!(((uchar)start[1] ^ 0x80) < 0x40 && ((uchar)start[2] ^ 0x80) < 0x40 &&
119 (c >= 0xE1 || (uchar)start[1] >= 0xA0))) {
120 return 0; /* invalid utf8 character */
121 }
122 return 3;
123 }
124 return 0;
125}
126
127
128static unsigned int check_mb_utf8_sequence(const char *start, const char *end)
129{
130 uchar c;
131
132 if (start >= end) {
133 return 0;
134 }
135
136 c = (uchar) start[0];
137
138 if (c < 0x80) {
139 return 1; /* single byte character */
140 }
141 if (c < 0xC2) {
142 return 0; /* invalid mb character */
143 }
144 if (c < 0xE0) {
145 if (start + 2 > end) {
146 return 0; /* too small */
147 }
148 if (!(((uchar)start[1] ^ 0x80) < 0x40)) {
149 return 0;
150 }
151 return 2;
152 }
153 if (c < 0xF0) {
154 if (start + 3 > end) {
155 return 0; /* too small */
156 }
157 if (!(((uchar)start[1] ^ 0x80) < 0x40 && ((uchar)start[2] ^ 0x80) < 0x40 &&
158 (c >= 0xE1 || (uchar)start[1] >= 0xA0))) {
159 return 0; /* invalid utf8 character */
160 }
161 return 3;
162 }
163 if (c < 0xF5) {
164 if (start + 4 > end) { /* We need 4 characters */
165 return 0; /* too small */
166 }
167
168 /*
169 UTF-8 quick four-byte mask:
170 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
171 Encoding allows to encode U+00010000..U+001FFFFF
172
173 The maximum character defined in the Unicode standard is U+0010FFFF.
174 Higher characters U+00110000..U+001FFFFF are not used.
175
176 11110000.10010000.10xxxxxx.10xxxxxx == F0.90.80.80 == U+00010000 (min)
177 11110100.10001111.10111111.10111111 == F4.8F.BF.BF == U+0010FFFF (max)
178
179 Valid codes:
180 [F0][90..BF][80..BF][80..BF]
181 [F1][80..BF][80..BF][80..BF]
182 [F2][80..BF][80..BF][80..BF]
183 [F3][80..BF][80..BF][80..BF]
184 [F4][80..8F][80..BF][80..BF]
185 */
186
187 if (!(((uchar)start[1] ^ 0x80) < 0x40 &&
188 ((uchar)start[2] ^ 0x80) < 0x40 &&
189 ((uchar)start[3] ^ 0x80) < 0x40 &&
190 (c >= 0xf1 || (uchar)start[1] >= 0x90) &&
191 (c <= 0xf3 || (uchar)start[1] <= 0x8F)))
192 {
193 return 0; /* invalid utf8 character */
194 }
195 return 4;
196 }
197 return 0;
198}
199
200static unsigned int check_mb_utf8mb3_valid(const char *start, const char *end)
201{
202 unsigned int len = check_mb_utf8mb3_sequence(start, end);
203 return (len > 1)? len:0;
204}
205
206static unsigned int check_mb_utf8_valid(const char *start, const char *end)
207{
208 unsigned int len = check_mb_utf8_sequence(start, end);
209 return (len > 1)? len:0;
210}
211
212
213static unsigned int mysql_mbcharlen_utf8mb3(unsigned int utf8)
214{
215 if (utf8 < 0x80) {
216 return 1; /* single byte character */
217 }
218 if (utf8 < 0xC2) {
219 return 0; /* invalid multibyte header */
220 }
221 if (utf8 < 0xE0) {
222 return 2; /* double byte character */
223 }
224 if (utf8 < 0xF0) {
225 return 3; /* triple byte character */
226 }
227 return 0;
228}
229
230
231static unsigned int mysql_mbcharlen_utf8(unsigned int utf8)
232{
233 if (utf8 < 0x80) {
234 return 1; /* single byte character */
235 }
236 if (utf8 < 0xC2) {
237 return 0; /* invalid multibyte header */
238 }
239 if (utf8 < 0xE0) {
240 return 2; /* double byte character */
241 }
242 if (utf8 < 0xF0) {
243 return 3; /* triple byte character */
244 }
245 if (utf8 < 0xF8) {
246 return 4; /* four byte character */
247 }
248 return 0;
249}
250/* }}} */
251
252
253/* {{{ big5 functions */
254#define valid_big5head(c) (0xA1 <= (unsigned int)(c) && (unsigned int)(c) <= 0xF9)
255#define valid_big5tail(c) ((0x40 <= (unsigned int)(c) && (unsigned int)(c) <= 0x7E) || \
256 (0xA1 <= (unsigned int)(c) && (unsigned int)(c) <= 0xFE))
257
258#define isbig5code(c,d) (isbig5head(c) && isbig5tail(d))
259
260static unsigned int check_mb_big5(const char *start, const char *end)
261{
262 return (valid_big5head(*((const uchar*) start)) && (end - start) > 1 && valid_big5tail(*((const uchar*) start + 1)) ? 2 : 0);
263}
264
265
266static unsigned int mysql_mbcharlen_big5(unsigned int big5)
267{
268 return (valid_big5head(big5)) ? 2 : 1;
269}
270/* }}} */
271
272
273/* {{{ cp932 functions */
274#define valid_cp932head(c) ((0x81 <= (c) && (c) <= 0x9F) || (0xE0 <= (c) && c <= 0xFC))
275#define valid_cp932tail(c) ((0x40 <= (c) && (c) <= 0x7E) || (0x80 <= (c) && c <= 0xFC))
276
277
278static unsigned int check_mb_cp932(const char *start, const char *end)
279{
280 return (valid_cp932head((uchar)start[0]) && (end - start > 1) &&
281 valid_cp932tail((uchar)start[1])) ? 2 : 0;
282}
283
284
285static unsigned int mysql_mbcharlen_cp932(unsigned int cp932)
286{
287 return (valid_cp932head((uchar)cp932)) ? 2 : 1;
288}
289/* }}} */
290
291
292/* {{{ euckr functions */
293#define valid_euckr(c) ((0xA1 <= (uchar)(c) && (uchar)(c) <= 0xFE))
294
295static unsigned int check_mb_euckr(const char *start, const char *end)
296{
297 if (end - start <= 1) {
298 return 0; /* invalid length */
299 }
300 if (*(uchar *)start < 0x80) {
301 return 0; /* invalid euckr character */
302 }
303 if (valid_euckr(start[1])) {
304 return 2;
305 }
306 return 0;
307}
308
309
310static unsigned int mysql_mbcharlen_euckr(unsigned int kr)
311{
312 return (valid_euckr(kr)) ? 2 : 1;
313}
314/* }}} */
315
316
317/* {{{ eucjpms functions */
318#define valid_eucjpms(c) (((c) & 0xFF) >= 0xA1 && ((c) & 0xFF) <= 0xFE)
319#define valid_eucjpms_kata(c) (((c) & 0xFF) >= 0xA1 && ((c) & 0xFF) <= 0xDF)
320#define valid_eucjpms_ss2(c) (((c) & 0xFF) == 0x8E)
321#define valid_eucjpms_ss3(c) (((c) & 0xFF) == 0x8F)
322
323static unsigned int check_mb_eucjpms(const char *start, const char *end)
324{
325 if (*((uchar *)start) < 0x80) {
326 return 0; /* invalid eucjpms character */
327 }
328 if (valid_eucjpms(start[0]) && (end - start) > 1 && valid_eucjpms(start[1])) {
329 return 2;
330 }
331 if (valid_eucjpms_ss2(start[0]) && (end - start) > 1 && valid_eucjpms_kata(start[1])) {
332 return 2;
333 }
334 if (valid_eucjpms_ss3(start[0]) && (end - start) > 2 && valid_eucjpms(start[1]) &&
335 valid_eucjpms(start[2])) {
336 return 2;
337 }
338 return 0;
339}
340
341
342static unsigned int mysql_mbcharlen_eucjpms(unsigned int jpms)
343{
344 if (valid_eucjpms(jpms) || valid_eucjpms_ss2(jpms)) {
345 return 2;
346 }
347 if (valid_eucjpms_ss3(jpms)) {
348 return 3;
349 }
350 return 1;
351}
352/* }}} */
353
354
355/* {{{ gb2312 functions */
356#define valid_gb2312_head(c) (0xA1 <= (uchar)(c) && (uchar)(c) <= 0xF7)
357#define valid_gb2312_tail(c) (0xA1 <= (uchar)(c) && (uchar)(c) <= 0xFE)
358
359
360static unsigned int check_mb_gb2312(const char *start, const char *end)
361{
362 return (valid_gb2312_head((unsigned int)start[0]) && end - start > 1 &&
363 valid_gb2312_tail((unsigned int)start[1])) ? 2 : 0;
364}
365
366
367static unsigned int mysql_mbcharlen_gb2312(unsigned int gb)
368{
369 return (valid_gb2312_head(gb)) ? 2 : 1;
370}
371/* }}} */
372
373
374/* {{{ gbk functions */
375#define valid_gbk_head(c) (0x81<=(uchar)(c) && (uchar)(c)<=0xFE)
376#define valid_gbk_tail(c) ((0x40<=(uchar)(c) && (uchar)(c)<=0x7E) || (0x80<=(uchar)(c) && (uchar)(c)<=0xFE))
377
378static unsigned int check_mb_gbk(const char *start, const char *end)
379{
380 return (valid_gbk_head(start[0]) && (end) - (start) > 1 && valid_gbk_tail(start[1])) ? 2 : 0;
381}
382
383static unsigned int mysql_mbcharlen_gbk(unsigned int gbk)
384{
385 return (valid_gbk_head(gbk) ? 2 : 1);
386}
387/* }}} */
388
389
390/* {{{ sjis functions */
391#define valid_sjis_head(c) ((0x81 <= (c) && (c) <= 0x9F) || (0xE0 <= (c) && (c) <= 0xFC))
392#define valid_sjis_tail(c) ((0x40 <= (c) && (c) <= 0x7E) || (0x80 <= (c) && (c) <= 0xFC))
393
394
395static unsigned int check_mb_sjis(const char *start, const char *end)
396{
397 return (valid_sjis_head((uchar)start[0]) && (end - start) > 1 && valid_sjis_tail((uchar)start[1])) ? 2 : 0;
398}
399
400
401static unsigned int mysql_mbcharlen_sjis(unsigned int sjis)
402{
403 return (valid_sjis_head((uchar)sjis)) ? 2 : 1;
404}
405/* }}} */
406
407
408/* {{{ ucs2 functions */
409static unsigned int check_mb_ucs2(const char *start __attribute((unused)), const char *end __attribute((unused)))
410{
411 return 2; /* always 2 */
412}
413
414static unsigned int mysql_mbcharlen_ucs2(unsigned int ucs2 __attribute((unused)))
415{
416 return 2; /* always 2 */
417}
418/* }}} */
419
420
421/* {{{ ujis functions */
422#define valid_ujis(c) ((0xA1 <= ((c)&0xFF) && ((c)&0xFF) <= 0xFE))
423#define valid_ujis_kata(c) ((0xA1 <= ((c)&0xFF) && ((c)&0xFF) <= 0xDF))
424#define valid_ujis_ss2(c) (((c)&0xFF) == 0x8E)
425#define valid_ujis_ss3(c) (((c)&0xFF) == 0x8F)
426
427static unsigned int check_mb_ujis(const char *start, const char *end)
428{
429 if (*(uchar*)start < 0x80) {
430 return 0; /* invalid ujis character */
431 }
432 if (valid_ujis(*(start)) && valid_ujis(*((start)+1))) {
433 return 2;
434 }
435 if (valid_ujis_ss2(*(start)) && valid_ujis_kata(*((start)+1))) {
436 return 2;
437 }
438 if (valid_ujis_ss3(*(start)) && (end-start) > 2 && valid_ujis(*((start)+1)) && valid_ujis(*((start)+2))) {
439 return 3;
440 }
441 return 0;
442}
443
444
445static unsigned int mysql_mbcharlen_ujis(unsigned int ujis)
446{
447 return (valid_ujis(ujis)? 2: valid_ujis_ss2(ujis)? 2: valid_ujis_ss3(ujis)? 3: 1);
448}
449/* }}} */
450
451
452
453/* {{{ utf16 functions */
454#define UTF16_HIGH_HEAD(x) ((((uchar) (x)) & 0xFC) == 0xD8)
455#define UTF16_LOW_HEAD(x) ((((uchar) (x)) & 0xFC) == 0xDC)
456
457static unsigned int check_mb_utf16(const char *start, const char *end)
458{
459 if (start + 2 > end) {
460 return 0;
461 }
462
463 if (UTF16_HIGH_HEAD(*start)) {
464 return (start + 4 <= end) && UTF16_LOW_HEAD(start[2]) ? 4 : 0;
465 }
466
467 if (UTF16_LOW_HEAD(*start)) {
468 return 0;
469 }
470 return 2;
471}
472
473
474static uint mysql_mbcharlen_utf16(unsigned int utf16)
475{
476 return UTF16_HIGH_HEAD(utf16) ? 4 : 2;
477}
478/* }}} */
479
480
481/* {{{ utf32 functions */
482static uint
483check_mb_utf32(const char *start __attribute((unused)), const char *end __attribute((unused)))
484{
485 return 4;
486}
487
488
489static uint
490mysql_mbcharlen_utf32(unsigned int utf32 __attribute((unused)))
491{
492 return 4;
493}
494/* }}} */
495
496/* {{{ gb18030 functions */
497#define is_gb18030_odd(c) (0x81 <= (unsigned char) (c) && (unsigned char) (c) <= 0xFE)
498#define is_gb18030_even_2(c) ((0x40 <= (unsigned char) (c) && (unsigned char) (c) <= 0x7E) || (0x80 <= (unsigned char) (c) && (unsigned char) (c) <= 0xFE))
499#define is_gb18030_even_4(c) (0x30 <= (unsigned char) (c) && (unsigned char) (c) <= 0x39)
500
501
502static unsigned int mysql_mbcharlen_gb18030(unsigned int c)
503{
504 if (c <= 0xFF) {
505 return !is_gb18030_odd(c);
506 }
507 if (c > 0xFFFF || !is_gb18030_odd((c >> 8) & 0xFF)) {
508 return 0;
509 }
510 if (is_gb18030_even_2((c & 0xFF))) {
511 return 2;
512 }
513 if (is_gb18030_even_4((c & 0xFF))) {
514 return 4;
515 }
516
517 return 0;
518}
519
520static unsigned int check_mb_gb18030_valid(const char * start, const char * end)
521{
522 if (end - start <= 1 || !is_gb18030_odd(start[0])) {
523 return 0;
524 }
525
526 if (is_gb18030_even_2(start[1])) {
527 return 2;
528 } else if (end - start > 3 && is_gb18030_even_4(start[1]) && is_gb18030_odd(start[2]) && is_gb18030_even_4(start[3])) {
529 return 4;
530 }
531
532 return 0;
533}
534/* }}} */
535
536/*
537 The server compiles sometimes the full utf-8 (the mb4) as utf8m4, and the old as utf8,
538 for BC reasons. Sometimes, utf8mb4 is just utf8 but the old charsets are utf8mb3.
539 Change easily now, with a macro, could be made compilastion dependable.
540*/
541
542#define UTF8_MB4 "utf8mb4"
543#define UTF8_MB3 "utf8"
544
545/* {{{ mysql_charsets */
546const MARIADB_CHARSET_INFO mariadb_compiled_charsets[] =
547{
548 { 1, 1, "big5","big5_chinese_ci", "", 950, "BIG5", 1, 2, mysql_mbcharlen_big5, check_mb_big5},
549 { 3, 1, "dec8", "dec8_swedisch_ci", "", 0, "DEC", 1, 1, NULL, NULL},
550 { 4, 1, "cp850", "cp850_general_ci", "", 850, "CP850", 1, 1, NULL, NULL},
551 { 6, 1, "hp8", "hp8_english_ci", "", 0, "HP-ROMAN8", 1, 1, NULL, NULL},
552 { 7, 1, "koi8r", "koi8r_general_ci", "", 20866, "KOI8R", 1, 1, NULL, NULL},
553 { 8, 1, "latin1", "latin1_swedish_ci", "", 1252, "LATIN1", 1, 1, NULL, NULL},
554 { 9, 1, "latin2", "latin2_general_ci", "", 852, "LATIN2", 1, 1, NULL, NULL},
555 { 10, 1, "swe7", "swe7_swedish_ci", "", 20107, "", 1, 1, NULL, NULL},
556 { 11, 1, "ascii", "ascii_general_ci", "", 1252, "ASCII", 1, 1, NULL, NULL},
557 { 12, 1, "ujis", "ujis_japanese_ci", "", 20932, "UJIS", 1, 3, mysql_mbcharlen_ujis, check_mb_ujis},
558 { 13, 1, "sjis", "sjis_japanese_ci", "", 932, "SJIS", 1, 2, mysql_mbcharlen_sjis, check_mb_sjis},
559 { 16, 1, "hebrew", "hebrew_general_ci", "", 1255, "HEBREW", 1, 1, NULL, NULL},
560 { 18, 1, "tis620", "tis620_thai_ci", "", 874, "TIS620", 1, 1, NULL, NULL},
561 { 19, 1, "euckr", "euckr_korean_ci", "", 51949, "EUCKR", 1, 2, mysql_mbcharlen_euckr, check_mb_euckr},
562 { 22, 1, "koi8u", "koi8u_general_ci", "", 21866, "KOI8U", 1, 1, NULL, NULL},
563 { 24, 1, "gb2312", "gb2312_chinese_ci", "", 936, "GB2312", 1, 2, mysql_mbcharlen_gb2312, check_mb_gb2312},
564 { 25, 1, "greek", "greek_general_ci", "", 28597, "GREEK", 1, 1, NULL, NULL},
565 { 26, 1, "cp1250", "cp1250_general_ci", "", 1250, "CP1250", 1, 1, NULL, NULL},
566 { 28, 1, "gbk", "gbk_chinese_ci", "", 936, "GBK", 1, 2, mysql_mbcharlen_gbk, check_mb_gbk},
567 { 30, 1, "latin5", "latin5_turkish_ci", "", 1254, "LATIN5", 1, 1, NULL, NULL},
568 { 32, 1, "armscii8", "armscii8_general_ci", "", 0, "ARMSCII-8", 1, 1, NULL, NULL},
569 { 33, 1, UTF8_MB3, UTF8_MB3"_general_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
570 { 35, 1, "ucs2", "ucs2_general_ci", "", 1200, "UCS-2BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
571 { 36, 1, "cp866", "cp866_general_ci", "", 866, "CP866", 1, 1, NULL, NULL},
572 { 37, 1, "keybcs2", "keybcs2_general_ci", "", 0, "", 1, 1, NULL, NULL},
573 { 38, 1, "macce", "macce_general_ci", "", 10029, "CP1282", 1, 1, NULL, NULL},
574 { 39, 1, "macroman", "macroman_general_ci", "", 10000, "MACINTOSH", 1, 1, NULL, NULL},
575 { 40, 1, "cp852", "cp852_general_ci", "", 852, "CP852", 1, 1, NULL, NULL},
576 { 41, 1, "latin7", "latin7_general_ci", "", 28603, "LATIN7", 1, 1, NULL, NULL},
577 { 51, 1, "cp1251", "cp1251_general_ci", "", 1251, "CP1251", 1, 1, NULL, NULL},
578 { 57, 1, "cp1256", "cp1256_general_ci", "", 1256, "CP1256", 1, 1, NULL, NULL},
579 { 59, 1, "cp1257", "cp1257_general_ci", "", 1257, "CP1257", 1, 1, NULL, NULL},
580 { 63, 1, "binary", "binary", "", 0, "ASCII", 1, 1, NULL, NULL},
581 { 64, 1, "armscii8", "armscii8_bin", "", 0, "ARMSCII-8", 1, 1, NULL, NULL},
582 { 92, 1, "geostd8", "geostd8_general_ci", "", 0, "GEORGIAN-PS", 1, 1, NULL, NULL},
583 { 95, 1, "cp932", "cp932_japanese_ci", "", 932, "CP932", 1, 2, mysql_mbcharlen_cp932, check_mb_cp932},
584 { 97, 1, "eucjpms", "eucjpms_japanese_ci", "", 932, "EUC-JP-MS", 1, 3, mysql_mbcharlen_eucjpms, check_mb_eucjpms},
585 { 2, 1, "latin2", "latin2_czech_cs", "", 852, "LATIN2", 1, 1, NULL, NULL},
586 { 5, 1, "latin1", "latin1_german_ci", "", 1252, "LATIN1", 1, 1, NULL, NULL},
587 { 14, 1, "cp1251", "cp1251_bulgarian_ci", "", 1251, "CP1251", 1, 1, NULL, NULL},
588 { 15, 1, "latin1", "latin1_danish_ci", "", 1252, "LATIN1", 1, 1, NULL, NULL},
589 { 17, 1, "filename", "filename", "", 0, "", 1, 5, NULL, NULL},
590 { 20, 1, "latin7", "latin7_estonian_cs", "", 28603, "LATIN7", 1, 1, NULL, NULL},
591 { 21, 1, "latin2", "latin2_hungarian_ci", "", 852, "LATIN2", 1, 1, NULL, NULL},
592 { 23, 1, "cp1251", "cp1251_ukrainian_ci", "", 1251, "CP1251", 1, 1, NULL, NULL},
593 { 27, 1, "latin2", "latin2_croatian_ci", "", 852, "LATIN2", 1, 1, NULL, NULL},
594 { 29, 1, "cp1257", "cp1257_lithunian_ci", "", 1257, "CP1257", 1, 1, NULL, NULL},
595 { 31, 1, "latin1", "latin1_german2_ci", "", 1252, "LATIN1", 1, 1, NULL, NULL},
596 { 34, 1, "cp1250", "cp1250_czech_cs", "", 1250, "CP1250", 1, 1, NULL, NULL},
597 { 42, 1, "latin7", "latin7_general_cs", "", 28603, "LATIN7", 1, 1, NULL, NULL},
598 { 43, 1, "macce", "macce_bin", "", 10029, "CP1282", 1, 1, NULL, NULL},
599 { 44, 1, "cp1250", "cp1250_croatian_ci", "", 1250, "CP1250", 1, 1, NULL, NULL},
600 { 45, 1, UTF8_MB4, UTF8_MB4"_general_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
601 { 46, 1, UTF8_MB4, UTF8_MB4"_bin", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
602 { 47, 1, "latin1", "latin1_bin", "", 1250, "LATIN1", 1, 1, NULL, NULL},
603 { 48, 1, "latin1", "latin1_general_ci", "", 1250, "LATIN1", 1, 1, NULL, NULL},
604 { 49, 1, "latin1", "latin1_general_cs", "", 1250, "LATIN1", 1, 1, NULL, NULL},
605 { 50, 1, "cp1251", "cp1251_bin", "", 1251, "CP1251", 1, 1, NULL, NULL},
606 { 52, 1, "cp1251", "cp1251_general_cs", "", 1251, "CP1251", 1, 1, NULL, NULL},
607 { 53, 1, "macroman", "macroman_bin", "", 10000, "MACINTOSH", 1, 1, NULL, NULL},
608 { 54, 1, "utf16", "utf16_general_ci", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
609 { 55, 1, "utf16", "utf16_bin", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
610 { 56, 1, "utf16le", "utf16_general_ci", "", 1200, "UTF16LE", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
611 { 58, 1, "cp1257", "cp1257_bin", "", 1257, "CP1257", 1, 1, NULL, NULL},
612#ifdef USED_TO_BE_SO_BEFORE_MYSQL_5_5
613 { 60, 1, "armascii8", "armascii8_bin", "", 0, "ARMSCII-8", 1, 1, NULL, NULL},
614#endif
615 { 60, 1, "utf32", "utf32_general_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
616 { 61, 1, "utf32", "utf32_bin", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
617 { 62, 1, "utf16le", "utf16_bin", "", 1200, "UTF16LE", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
618 { 65, 1, "ascii", "ascii_bin", "", 1252, "ASCII", 1, 1, NULL, NULL},
619 { 66, 1, "cp1250", "cp1250_bin", "", 1250, "CP1250", 1, 1, NULL, NULL},
620 { 67, 1, "cp1256", "cp1256_bin", "", 1256, "CP1256", 1, 1, NULL, NULL},
621 { 68, 1, "cp866", "cp866_bin", "", 866, "CP866", 1, 1, NULL, NULL},
622 { 69, 1, "dec8", "dec8_bin", "", 0, "DEC", 1, 1, NULL, NULL},
623 { 70, 1, "greek", "greek_bin", "", 28597, "GREEK", 1, 1, NULL, NULL},
624 { 71, 1, "hebrew", "hebrew_bin", "", 1255, "hebrew", 1, 1, NULL, NULL},
625 { 72, 1, "hp8", "hp8_bin", "", 0, "HPROMAN-8", 1, 1, NULL, NULL},
626 { 73, 1, "keybcs2", "keybcs2_bin", "", 0, "", 1, 1, NULL, NULL},
627 { 74, 1, "koi8r", "koi8r_bin", "", 20866, "KOI8R", 1, 1, NULL, NULL},
628 { 75, 1, "koi8u", "koi8u_bin", "", 21866, "KOI8U", 1, 1, NULL, NULL},
629 { 76, 1, UTF8_MB3, UTF8_MB3"_tolower_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
630 { 77, 1, "latin2", "latin2_bin", "", 28592, "LATIN2", 1, 1, NULL, NULL},
631 { 78, 1, "latin5", "latin5_bin", "", 1254, "LATIN5", 1, 1, NULL, NULL},
632 { 79, 1, "latin7", "latin7_bin", "", 28603, "LATIN7", 1, 1, NULL, NULL},
633 { 80, 1, "cp850", "cp850_bin", "", 850, "CP850", 1, 1, NULL, NULL},
634 { 81, 1, "cp852", "cp852_bin", "", 852, "CP852", 1, 1, NULL, NULL},
635 { 82, 1, "swe7", "swe7_bin", "", 0, "", 1, 1, NULL, NULL},
636 { 93, 1, "geostd8", "geostd8_bin", "", 0, "GEORGIAN-PS", 1, 1, NULL, NULL},
637 { 83, 1, UTF8_MB3, UTF8_MB3"_bin", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
638 { 84, 1, "big5", "big5_bin", "", 65000, "BIG5", 1, 2, mysql_mbcharlen_big5, check_mb_big5},
639 { 85, 1, "euckr", "euckr_bin", "", 51949, "EUCKR", 1, 2, mysql_mbcharlen_euckr, check_mb_euckr},
640 { 86, 1, "gb2312", "gb2312_bin", "", 936, "GB2312", 1, 2, mysql_mbcharlen_gb2312, check_mb_gb2312},
641 { 87, 1, "gbk", "gbk_bin", "", 936, "GBK", 1, 2, mysql_mbcharlen_gbk, check_mb_gbk},
642 { 88, 1, "sjis", "sjis_bin", "", 932, "SJIS", 1, 2, mysql_mbcharlen_sjis, check_mb_sjis},
643 { 89, 1, "tis620", "tis620_bin", "", 874, "TIS620", 1, 1, NULL, NULL},
644 { 90, 1, "ucs2", "ucs2_bin", "", 1200, "UCS-2BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
645 { 91, 1, "ujis", "ujis_bin", "", 20932, "UJIS", 1, 3, mysql_mbcharlen_ujis, check_mb_ujis},
646 { 94, 1, "latin1", "latin1_spanish_ci", "", 1252, "LATIN1", 1, 1, NULL, NULL},
647 { 96, 1, "cp932", "cp932_bin", "", 932, "CP932", 1, 2, mysql_mbcharlen_cp932, check_mb_cp932},
648 { 99, 1, "cp1250", "cp1250_polish_ci", "", 1250, "CP1250", 1, 1, NULL, NULL},
649 { 98, 1, "eucjpms", "eucjpms_bin", "", 932, "EUCJP-MS", 1, 3, mysql_mbcharlen_eucjpms, check_mb_eucjpms},
650 { 101, 1, "utf16", "utf16_unicode_ci", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
651 { 102, 1, "utf16", "utf16_icelandic_ci", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
652 { 103, 1, "utf16", "utf16_latvian_ci", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
653 { 104, 1, "utf16", "utf16_romanian_ci", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
654 { 105, 1, "utf16", "utf16_slovenian_ci", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
655 { 106, 1, "utf16", "utf16_polish_ci", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
656 { 107, 1, "utf16", "utf16_estonian_ci", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
657 { 108, 1, "utf16", "utf16_spanish_ci", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
658 { 109, 1, "utf16", "utf16_swedish_ci", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
659 { 110, 1, "utf16", "utf16_turkish_ci", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
660 { 111, 1, "utf16", "utf16_czech_ci", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
661 { 112, 1, "utf16", "utf16_danish_ci", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
662 { 113, 1, "utf16", "utf16_lithunian_ci", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
663 { 114, 1, "utf16", "utf16_slovak_ci", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
664 { 115, 1, "utf16", "utf16_spanish2_ci", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
665 { 116, 1, "utf16", "utf16_roman_ci", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
666 { 117, 1, "utf16", "utf16_persian_ci", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
667 { 118, 1, "utf16", "utf16_esperanto_ci", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
668 { 120, 1, "utf16", "utf16_sinhala_ci", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
669 { 121, 1, "utf16", "utf16_german2_ci", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
670 { 122, 1, "utf16", "utf16_croatian_mysql561_ci", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
671 { 123, 1, "utf16", "utf16_unicode_520_ci", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
672 { 124, 1, "utf16", "utf16_vietnamese_ci", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
673 { 128, 1, "ucs2", "ucs2_unicode_ci", "", 1200, "UCS-2BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
674 { 129, 1, "ucs2", "ucs2_icelandic_ci", "", 1200, "UCS-2BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
675 { 130, 1, "ucs2", "ucs2_latvian_ci", "", 1200, "UCS-2BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
676 { 131, 1, "ucs2", "ucs2_romanian_ci", "", 1200, "UCS2-BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
677 { 132, 1, "ucs2", "ucs2_slovenian_ci", "", 1200, "UCS2-BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
678 { 133, 1, "ucs2", "ucs2_polish_ci", "", 1200, "UCS2-BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
679 { 134, 1, "ucs2", "ucs2_estonian_ci", "", 1200, "UCS2-BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
680 { 135, 1, "ucs2", "ucs2_spanish_ci", "", 1200, "UCS2-BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
681 { 136, 1, "ucs2", "ucs2_swedish_ci", "", 1200, "UCS2-BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
682 { 137, 1, "ucs2", "ucs2_turkish_ci", "", 1200, "UCS2-BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
683 { 138, 1, "ucs2", "ucs2_czech_ci", "", 1200, "UCS2-BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
684 { 139, 1, "ucs2", "ucs2_danish_ci", "", 1200, "UCS2-BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
685 { 140, 1, "ucs2", "ucs2_lithunian_ci", "", 1200, "UCS2-BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
686 { 141, 1, "ucs2", "ucs2_slovak_ci", "", 1200, "UCS2-BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
687 { 142, 1, "ucs2", "ucs2_spanish2_ci", "", 1200, "UCS2-BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
688 { 143, 1, "ucs2", "ucs2_roman_ci", "", 1200, "UCS2-BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
689 { 144, 1, "ucs2", "ucs2_persian_ci", "", 1200, "UCS2-BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
690 { 145, 1, "ucs2", "ucs2_esperanto_ci", "", 1200, "UCS2-BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
691 { 146, 1, "ucs2", "ucs2_hungarian_ci", "", 1200, "UCS2-BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
692 { 147, 1, "ucs2", "ucs2_sinhala_ci", "", 1200, "UCS2-BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
693 { 148, 1, "ucs2", "ucs2_german2_ci", "", 1200, "UCS2-BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
694 { 149, 1, "ucs2", "ucs2_croatian_ci", "", 1200, "UCS2-BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2}, /* MDB */
695 { 150, 1, "ucs2", "ucs2_unicode_520_ci", "", 1200, "UCS2-BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2}, /* MDB */
696 { 151, 1, "ucs2", "ucs2_vietnamese_ci", "", 1200, "UCS2-BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2}, /* MDB */
697 { 159, 1, "ucs2", "ucs2_general_mysql500_ci", "", 1200, "UCS2-BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2}, /* MDB */
698 { 160, 1, "utf32", "utf32_unicode_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
699 { 161, 1, "utf32", "utf32_icelandic_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
700 { 162, 1, "utf32", "utf32_latvian_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
701 { 163, 1, "utf32", "utf32_romanian_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
702 { 164, 1, "utf32", "utf32_slovenian_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
703 { 165, 1, "utf32", "utf32_polish_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
704 { 166, 1, "utf32", "utf32_estonian_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
705 { 167, 1, "utf32", "utf32_spanish_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
706 { 168, 1, "utf32", "utf32_swedish_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
707 { 169, 1, "utf32", "utf32_turkish_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
708 { 170, 1, "utf32", "utf32_czech_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
709 { 171, 1, "utf32", "utf32_danish_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
710 { 172, 1, "utf32", "utf32_lithunian_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
711 { 173, 1, "utf32", "utf32_slovak_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
712 { 174, 1, "utf32", "utf32_spanish_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
713 { 175, 1, "utf32", "utf32_roman_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
714 { 176, 1, "utf32", "utf32_persian_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
715 { 177, 1, "utf32", "utf32_esperanto_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
716 { 178, 1, "utf32", "utf32_hungarian_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
717 { 179, 1, "utf32", "utf32_sinhala_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
718 { 180, 1, "utf32", "utf32_german2_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
719 { 181, 1, "utf32", "utf32_croatian_mysql561_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
720 { 182, 1, "utf32", "utf32_unicode_520_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
721 { 183, 1, "utf32", "utf32_vietnamese_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
722
723 { 192, 1, UTF8_MB3, UTF8_MB3"_general_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
724 { 193, 1, UTF8_MB3, UTF8_MB3"_icelandic_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
725 { 194, 1, UTF8_MB3, UTF8_MB3"_latvian_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
726 { 195, 1, UTF8_MB3, UTF8_MB3"_romanian_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
727 { 196, 1, UTF8_MB3, UTF8_MB3"_slovenian_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
728 { 197, 1, UTF8_MB3, UTF8_MB3"_polish_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
729 { 198, 1, UTF8_MB3, UTF8_MB3"_estonian_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
730 { 199, 1, UTF8_MB3, UTF8_MB3"_spanish_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
731 { 119, 1, UTF8_MB3, UTF8_MB3"_spanish_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
732 { 200, 1, UTF8_MB3, UTF8_MB3"_swedish_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
733 { 201, 1, UTF8_MB3, UTF8_MB3"_turkish_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
734 { 202, 1, UTF8_MB3, UTF8_MB3"_czech_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
735 { 203, 1, UTF8_MB3, UTF8_MB3"_danish_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid },
736 { 204, 1, UTF8_MB3, UTF8_MB3"_lithunian_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid },
737 { 205, 1, UTF8_MB3, UTF8_MB3"_slovak_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
738 { 206, 1, UTF8_MB3, UTF8_MB3"_spanish2_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
739 { 207, 1, UTF8_MB3, UTF8_MB3"_roman_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
740 { 208, 1, UTF8_MB3, UTF8_MB3"_persian_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
741 { 209, 1, UTF8_MB3, UTF8_MB3"_esperanto_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
742 { 210, 1, UTF8_MB3, UTF8_MB3"_hungarian_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
743 { 211, 1, UTF8_MB3, UTF8_MB3"_sinhala_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
744 { 212, 1, UTF8_MB3, UTF8_MB3"_german_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
745 { 214, 1, UTF8_MB3, UTF8_MB3"_unicode_520_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
746 { 215, 1, UTF8_MB3, UTF8_MB3"_vietnamese_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
747 { 213, 1, UTF8_MB3, UTF8_MB3"_croatian_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid}, /*MDB*/
748 { 223, 1, UTF8_MB3, UTF8_MB3"_general_mysql500_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid}, /*MDB*/
749
750 { 224, 1, UTF8_MB4, UTF8_MB4"_unicode_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
751 { 225, 1, UTF8_MB4, UTF8_MB4"_icelandic_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
752 { 226, 1, UTF8_MB4, UTF8_MB4"_latvian_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
753 { 227, 1, UTF8_MB4, UTF8_MB4"_romanian_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
754 { 228, 1, UTF8_MB4, UTF8_MB4"_slovenian_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
755 { 229, 1, UTF8_MB4, UTF8_MB4"_polish_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
756 { 230, 1, UTF8_MB4, UTF8_MB4"_estonian_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
757 { 231, 1, UTF8_MB4, UTF8_MB4"_spanish_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
758 { 232, 1, UTF8_MB4, UTF8_MB4"_swedish_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
759 { 233, 1, UTF8_MB4, UTF8_MB4"_turkish_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
760 { 234, 1, UTF8_MB4, UTF8_MB4"_czech_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
761 { 235, 1, UTF8_MB4, UTF8_MB4"_danish_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
762 { 236, 1, UTF8_MB4, UTF8_MB4"_lithuanian_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
763 { 237, 1, UTF8_MB4, UTF8_MB4"_slovak_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
764 { 238, 1, UTF8_MB4, UTF8_MB4"_spanish2_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
765 { 239, 1, UTF8_MB4, UTF8_MB4"_roman_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
766 { 240, 1, UTF8_MB4, UTF8_MB4"_persian_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
767 { 241, 1, UTF8_MB4, UTF8_MB4"_esperanto_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
768 { 242, 1, UTF8_MB4, UTF8_MB4"_hungarian_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
769 { 243, 1, UTF8_MB4, UTF8_MB4"_sinhala_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
770 { 244, 1, UTF8_MB4, UTF8_MB4"_german2_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
771 { 245, 1, UTF8_MB4, UTF8_MB4"_croatian_mysql561_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
772 { 246, 1, UTF8_MB4, UTF8_MB4"_unicode_520_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
773 { 247, 1, UTF8_MB4, UTF8_MB4"_vietnamese_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
774 { 248, 1, "gb18030", "gb18030_chinese_ci", "", 54936, "GB18030", 1, 4, mysql_mbcharlen_gb18030, check_mb_gb18030_valid},
775 { 249, 1, "gb18030", "gb18030_bin", "", 54936, "GB18030", 1, 4, mysql_mbcharlen_gb18030, check_mb_gb18030_valid},
776 { 250, 1, "gb18030", "gb18030_unicode_520_ci", "", 54936, "GB18030", 1, 4, mysql_mbcharlen_gb18030, check_mb_gb18030_valid},
777
778
779 { 254, 1, UTF8_MB3, UTF8_MB3"_general_cs", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8, check_mb_utf8_valid},
780
781 { 255, 1, UTF8_MB4, UTF8_MB4"_0900_ai_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
782 { 256, 1, UTF8_MB4, UTF8_MB4"_de_pb_0900_ai_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
783 { 257, 1, UTF8_MB4, UTF8_MB4"_is_0900_ai_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
784 { 258, 1, UTF8_MB4, UTF8_MB4"_lv_0900_ai_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
785 { 259, 1, UTF8_MB4, UTF8_MB4"_ro_0900_ai_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
786 { 260, 1, UTF8_MB4, UTF8_MB4"_sl_0900_ai_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
787 { 261, 1, UTF8_MB4, UTF8_MB4"_pl_0900_ai_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
788 { 262, 1, UTF8_MB4, UTF8_MB4"_et_0900_ai_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
789 { 263, 1, UTF8_MB4, UTF8_MB4"_es_0900_ai_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
790 { 264, 1, UTF8_MB4, UTF8_MB4"_sv_0900_ai_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
791 { 265, 1, UTF8_MB4, UTF8_MB4"_tr_0900_ai_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
792 { 266, 1, UTF8_MB4, UTF8_MB4"_cs_0900_ai_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
793 { 267, 1, UTF8_MB4, UTF8_MB4"_da_0900_ai_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
794 { 268, 1, UTF8_MB4, UTF8_MB4"_lt_0900_ai_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
795 { 269, 1, UTF8_MB4, UTF8_MB4"_sk_0900_ai_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
796 { 270, 1, UTF8_MB4, UTF8_MB4"_es_trad_0900_ai_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
797 { 271, 1, UTF8_MB4, UTF8_MB4"_la_0900_ai_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
798 { 273, 1, UTF8_MB4, UTF8_MB4"_eo_0900_ai_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
799 { 274, 1, UTF8_MB4, UTF8_MB4"_hu_0900_ai_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
800 { 275, 1, UTF8_MB4, UTF8_MB4"_hr_0900_ai_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
801 { 277, 1, UTF8_MB4, UTF8_MB4"_vi_0900_ai_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
802 { 278, 1, UTF8_MB4, UTF8_MB4"_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
803 { 279, 1, UTF8_MB4, UTF8_MB4"_de_pb__0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
804 { 280, 1, UTF8_MB4, UTF8_MB4"_is_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
805 { 281, 1, UTF8_MB4, UTF8_MB4"_lv_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
806 { 282, 1, UTF8_MB4, UTF8_MB4"_ro_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
807 { 283, 1, UTF8_MB4, UTF8_MB4"_sl_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
808 { 284, 1, UTF8_MB4, UTF8_MB4"_pl_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
809 { 285, 1, UTF8_MB4, UTF8_MB4"_et_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
810 { 286, 1, UTF8_MB4, UTF8_MB4"_es_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
811 { 287, 1, UTF8_MB4, UTF8_MB4"_sv_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
812 { 288, 1, UTF8_MB4, UTF8_MB4"_tr_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
813 { 289, 1, UTF8_MB4, UTF8_MB4"_cs_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
814 { 290, 1, UTF8_MB4, UTF8_MB4"_da_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
815 { 291, 1, UTF8_MB4, UTF8_MB4"_lt_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
816 { 292, 1, UTF8_MB4, UTF8_MB4"_sk_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
817 { 293, 1, UTF8_MB4, UTF8_MB4"_es_trad_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
818 { 294, 1, UTF8_MB4, UTF8_MB4"_la_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
819 { 296, 1, UTF8_MB4, UTF8_MB4"_eo_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
820 { 297, 1, UTF8_MB4, UTF8_MB4"_hu_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
821 { 298, 1, UTF8_MB4, UTF8_MB4"_hr_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
822 { 300, 1, UTF8_MB4, UTF8_MB4"_vi_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
823 { 303, 1, UTF8_MB4, UTF8_MB4"_ja_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
824 { 304, 1, UTF8_MB4, UTF8_MB4"_ja_0900_as_cs_ks", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
825 { 305, 1, UTF8_MB4, UTF8_MB4"_0900_as_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
826 { 306, 1, UTF8_MB4, UTF8_MB4"_ru_0900_as_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
827 { 307, 1, UTF8_MB4, UTF8_MB4"_ru_0900_as_cs", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
828 { 576, 1, UTF8_MB3, UTF8_MB3"_croatian_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid}, /*MDB*/
829 { 577, 1, UTF8_MB3, UTF8_MB3"_myanmar_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid}, /*MDB*/
830 { 578, 1, UTF8_MB3, UTF8_MB3"_thai_520_w2", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid}, /*MDB*/
831 { 608, 1, UTF8_MB4, UTF8_MB4"_croatian_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
832 { 609, 1, UTF8_MB4, UTF8_MB4"_myanmar_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
833 { 610, 1, UTF8_MB4, UTF8_MB4"_thai_520_w2", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
834 { 640, 1, "ucs2", "ucs2_croatian_ci", "", 1200, "UCS2-BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
835 { 641, 1, "ucs2", "ucs2_myanmar_ci", "", 1200, "UCS2-BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
836 { 642, 1, "ucs2", "ucs2_thai_520_w2", "", 1200, "UCS2-BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
837 { 672, 1, "utf16", "utf16_croatian_ci", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
838 { 673, 1, "utf16", "utf16_myanmar_ci", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
839 { 674, 1, "utf16", "utf16_thai_520_w2", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
840 { 736, 1, "utf32", "utf32_croatian_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
841 { 737, 1, "utf32", "utf32_myanmar_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
842 { 738, 1, "utf32", "utf32_thai_520_w2", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
843 {1025, 1, "big5","big5_chinese_nopad_ci", "", 950, "BIG5", 1, 2, mysql_mbcharlen_big5, check_mb_big5},
844 {1027, 1, "dec8", "dec8_swedisch_nopad_ci", "", 0, "DEC", 1, 1, NULL, NULL},
845 {1028, 1, "cp850", "cp850_general_nopad_ci", "", 850, "CP850", 1, 1, NULL, NULL},
846 {1030, 1, "hp8", "hp8_english_nopad_ci", "", 0, "HP-ROMAN8", 1, 1, NULL, NULL},
847 {1031, 1, "koi8r", "koi8r_general_nopad_ci", "", 878, "KOI8R", 1, 1, NULL, NULL},
848 {1032, 1, "latin1", "latin1_swedish_nopad_ci", "", 850, "LATIN1", 1, 1, NULL, NULL},
849 {1033, 1, "latin2", "latin2_general_nopad_ci", "", 852, "LATIN2", 1, 1, NULL, NULL},
850 {1034, 1, "swe7", "swe7_swedish_nopad_ci", "", 20107, "", 1, 1, NULL, NULL},
851 {1035, 1, "ascii", "ascii_general_nopad_ci", "", 1252, "ASCII", 1, 1, NULL, NULL},
852 {1036, 1, "ujis", "ujis_japanese_nopad_ci", "", 20932, "UJIS", 1, 3, mysql_mbcharlen_ujis, check_mb_ujis},
853 {1037, 1, "sjis", "sjis_japanese_nopad_ci", "", 932, "SJIS", 1, 2, mysql_mbcharlen_sjis, check_mb_sjis},
854 {1040, 1, "hebrew", "hebrew_general_nopad_ci", "", 1255, "HEBREW", 1, 1, NULL, NULL},
855 {1042, 1, "tis620", "tis620_thai_nopad_ci", "", 874, "TIS620", 1, 1, NULL, NULL},
856 {1043, 1, "euckr", "euckr_korean_nopad_ci", "", 51949, "EUCKR", 1, 2, mysql_mbcharlen_euckr, check_mb_euckr},
857 {1046, 1, "koi8u", "koi8u_general_nopad_ci", "", 20866, "KOI8U", 1, 1, NULL, NULL},
858 {1048, 1, "gb2312", "gb2312_chinese_nopad_ci", "", 936, "GB2312", 1, 2, mysql_mbcharlen_gb2312, check_mb_gb2312},
859 {1049, 1, "greek", "greek_general_nopad_ci", "", 28597, "GREEK", 1, 1, NULL, NULL},
860 {1050, 1, "cp1250", "cp1250_general_nopad_ci", "", 1250, "CP1250", 1, 1, NULL, NULL},
861 {1052, 1, "gbk", "gbk_chinese_nopad_ci", "", 936, "GBK", 1, 2, mysql_mbcharlen_gbk, check_mb_gbk},
862 {1054, 1, "latin5", "latin5_turkish_nopad_ci", "", 1254, "LATIN5", 1, 1, NULL, NULL},
863 {1056, 1, "armscii8", "armscii8_general_nopad_ci", "", 0, "ARMSCII-8", 1, 1, NULL, NULL},
864 {1057, 1, UTF8_MB3, UTF8_MB3"_general_nopad_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
865 {1059, 1, "ucs2", "ucs2_general_nopad_ci", "", 1200, "UCS-2BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
866 {1060, 1, "cp866", "cp866_general_nopad_ci", "", 866, "CP866", 1, 1, NULL, NULL},
867 {1061, 1, "keybcs2", "keybcs2_general_nopad_ci", "", 0, "", 1, 1, NULL, NULL},
868 {1062, 1, "macce", "macce_general_nopad_ci", "", 10029, "CP1282", 1, 1, NULL, NULL},
869 {1063, 1, "macroman", "macroman_general_nopad_ci", "", 10000, "MACINTOSH", 1, 1, NULL, NULL},
870 {1064, 1, "cp852", "cp852_general_nopad_ci", "", 852, "CP852", 1, 1, NULL, NULL},
871 {1065, 1, "latin7", "latin7_general_nopad_ci", "", 28603, "LATIN7", 1, 1, NULL, NULL},
872 {1067, 1, "macce", "macce_nopad_bin", "", 10029, "CP1282", 1, 1, NULL, NULL},
873 {1069, 1, UTF8_MB4, UTF8_MB4"_general_nopad_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
874 {1070, 1, UTF8_MB4, UTF8_MB4"_general_nopad_bin", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
875 {1071, 1, "latin1", "latin1_nopad_bin", "", 850, "LATIN1", 1, 1, NULL, NULL},
876 {1074, 1, "cp1251", "cp1251_nopad_bin", "", 1251, "CP1251", 1, 1, NULL, NULL},
877 {1075, 1, "cp1251", "cp1251_general_nopad_ci", "", 1251, "CP1251", 1, 1, NULL, NULL},
878 {1077, 1, "macroman", "macroman_nopad_bin", "", 10000, "MACINTOSH", 1, 1, NULL, NULL},
879 {1078, 1, "utf16", "utf16_general_nopad_ci", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
880 {1079, 1, "utf16", "utf16_nopad_bin", "", 0, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
881 {1080, 1, "utf16le", "utf16le_general_nopad_ci", "", 1200, "UTF16LE", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
882 {1081, 1, "cp1256", "cp1256_general_nopad_ci", "", 1256, "CP1256", 1, 1, NULL, NULL},
883 {1082, 1, "cp1257", "cp1257_nopad_bin", "", 1257, "CP1257", 1, 1, NULL, NULL},
884 {1083, 1, "cp1257", "cp1257_general_nopad_ci", "", 1257, "CP1257", 1, 1, NULL, NULL},
885 {1084, 1, "utf32", "utf32_general_nopad_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
886 {1085, 1, "utf32", "utf32_nopad_bin", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
887 {1086, 1, "utf16le", "utf16le_nopad_bin", "", 1200, "UTF16LE", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
888 {1088, 1, "armscii8", "armscii8_nopad_bin", "", 0, "ARMSCII-8", 1, 1, NULL, NULL},
889 {1089, 1, "ascii", "ascii_nopad_bin", "", 1252, "ASCII", 1, 1, NULL, NULL},
890 {1090, 1, "cp1250", "cp1250_nopad_bin", "", 1250, "CP1250", 1, 1, NULL, NULL},
891 {1091, 1, "cp1256", "cp1256_nopad_bin", "", 1256, "CP1256", 1, 1, NULL, NULL},
892 {1092, 1, "cp866", "cp866_nopad_bin", "", 866, "CP866", 1, 1, NULL, NULL},
893 {1093, 1, "dec8", "dec8_nopad_bin", "", 0, "DEC", 1, 1, NULL, NULL},
894 {1094, 1, "greek", "greek_nopad_bin", "", 28597, "GREEK", 1, 1, NULL, NULL},
895 {1095, 1, "hebrew", "hebrew_nopad_bin", "", 1255, "HEBREW", 1, 1, NULL, NULL},
896 {1096, 1, "hp8", "hp8_nopad_bin", "", 0, "HP-ROMAN8", 1, 1, NULL, NULL},
897 {1097, 1, "keybcs2", "keybcs2_nopad_bin", "", 0, "", 1, 1, NULL, NULL},
898 {1098, 1, "koi8r", "koi8r_nopad_bin", "", 878, "KOI8R", 1, 1, NULL, NULL},
899 {1099, 1, "koi8u", "koi8u_nopad_bin", "", 20866, "KOI8U", 1, 1, NULL, NULL},
900 {1101, 1, "latin2", "latin2_nopad_bin", "", 852, "LATIN2", 1, 1, NULL, NULL},
901 {1102, 1, "latin5", "latin5_nopad_bin", "", 1254, "LATIN5", 1, 1, NULL, NULL},
902 {1103, 1, "latin7", "latin7_nopad_bin", "", 28603, "LATIN7", 1, 1, NULL, NULL},
903 {1104, 1, "cp850", "cp850_nopad_bin", "", 850, "CP850", 1, 1, NULL, NULL},
904 {1105, 1, "cp852", "cp852_nopad_bin", "", 852, "CP852", 1, 1, NULL, NULL},
905 {1106, 1, "swe7", "swe7_nopad_bin", "", 20107, "", 1, 1, NULL, NULL},
906 {1107, 1, UTF8_MB3, UTF8_MB3"_nopad_bin", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
907 {1108, 1, "big5","big5_nopad_bin", "", 950, "BIG5", 1, 2, mysql_mbcharlen_big5, check_mb_big5},
908 {1109, 1, "euckr", "euckr_nopad_bin", "", 51949, "EUCKR", 1, 2, mysql_mbcharlen_euckr, check_mb_euckr},
909 {1110, 1, "gb2312", "gb2312_nopad_bin", "", 936, "GB2312", 1, 2, mysql_mbcharlen_gb2312, check_mb_gb2312},
910 {1111, 1, "gbk", "gbk_nopad_bin", "", 936, "GBK", 1, 2, mysql_mbcharlen_gbk, check_mb_gbk},
911 {1112, 1, "sjis", "sjis_nopad_bin", "", 932, "SJIS", 1, 2, mysql_mbcharlen_sjis, check_mb_sjis},
912 {1113, 1, "tis620", "tis620_nopad_bin", "", 874, "TIS620", 1, 1, NULL, NULL},
913 {1114, 1, "ucs2", "ucs2_nopad_bin", "", 1200, "UCS-2BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
914 {1115, 1, "ujis", "ujis_nopad_bin", "", 20932, "UJIS", 1, 3, mysql_mbcharlen_ujis, check_mb_ujis},
915 {1116, 1, "geostd8", "geostd8_general_nopad_ci", "", 0, "GEORGIAN-PS", 1, 1, NULL, NULL},
916 {1117, 1, "geostd8", "geostd8_nopad_bin", "", 0, "GEORGIAN-PS", 1, 1, NULL, NULL},
917 {1119, 1, "cp932", "cp932_japanese_nopad_ci", "", 932, "CP932", 1, 2, mysql_mbcharlen_cp932, check_mb_cp932},
918 {1120, 1, "cp932", "cp932_nopad_bin", "", 932, "CP932", 1, 2, mysql_mbcharlen_cp932, check_mb_cp932},
919 {1121, 1, "eucjpms", "eucjpms_japanese_nopad_ci", "", 932, "EUCJP-MS", 1, 3, mysql_mbcharlen_eucjpms, check_mb_eucjpms},
920 {1122, 1, "eucjpms", "eucjpms_nopad_bin", "", 932, "EUCJP-MS", 1, 3, mysql_mbcharlen_eucjpms, check_mb_eucjpms},
921 {1125, 1, "utf16", "utf16_unicode_nopad_ci", "", 1200, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
922 {1147, 1, "utf16", "utf16_unicode_520_nopad_ci", "", 1200, "UTF16", 2, 4, mysql_mbcharlen_utf16, check_mb_utf16},
923 {1152, 1, "ucs2", "ucs2_unicode_nopad_ci", "", 1200, "UCS-2BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
924 {1174, 1, "ucs2", "ucs2_unicode_520_nopad_ci", "", 1200, "UCS-2BE", 2, 2, mysql_mbcharlen_ucs2, check_mb_ucs2},
925 {1184, 1, "utf32", "utf32_unicode_nopad_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
926 {1206, 1, "utf32", "utf32_unicode_520_nopad_ci", "", 0, "UTF32", 4, 4, mysql_mbcharlen_utf32, check_mb_utf32},
927 {1216, 1, UTF8_MB3, UTF8_MB3"_unicode_nopad_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
928 {1238, 1, UTF8_MB3, UTF8_MB3"_unicode_520_nopad_ci", "", 65001, "UTF-8", 1, 3, mysql_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
929 {1248, 1, UTF8_MB4, UTF8_MB4"_unicode_nopad_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
930 {1270, 1, UTF8_MB4, UTF8_MB4"_unicode_520_nopad_ci", "", 65001, "UTF-8", 1, 4, mysql_mbcharlen_utf8, check_mb_utf8_valid},
931 { 0, 0, NULL, NULL, NULL, 0, NULL, 0, 0, NULL, NULL}
932};
933/* }}} */
934
935
936/* {{{ mysql_find_charset_nr */
937const MARIADB_CHARSET_INFO * mysql_find_charset_nr(unsigned int charsetnr)
938{
939 const MARIADB_CHARSET_INFO * c = mariadb_compiled_charsets;
940
941 do {
942 if (c->nr == charsetnr) {
943 return(c);
944 }
945 ++c;
946 } while (c[0].nr != 0);
947 return(NULL);
948}
949/* }}} */
950
951
952/* {{{ mysql_find_charset_name */
953MARIADB_CHARSET_INFO * mysql_find_charset_name(const char *name)
954{
955 MARIADB_CHARSET_INFO *c = (MARIADB_CHARSET_INFO *)mariadb_compiled_charsets;
956 const char *csname;
957
958 if (!strcasecmp(name, MADB_AUTODETECT_CHARSET_NAME))
959 csname= madb_get_os_character_set();
960 else
961 csname= (char *)name;
962
963 do {
964 if (!strcasecmp(c->csname, csname)) {
965 return(c);
966 }
967 ++c;
968 } while (c[0].nr != 0);
969 return(NULL);
970}
971/* }}} */
972
973
974/* {{{ mysql_cset_escape_quotes */
975size_t mysql_cset_escape_quotes(const MARIADB_CHARSET_INFO *cset, char *newstr,
976 const char * escapestr, size_t escapestr_len )
977{
978 const char *newstr_s = newstr;
979 const char *newstr_e = newstr + 2 * escapestr_len;
980 const char *end = escapestr + escapestr_len;
981 my_bool escape_overflow = FALSE;
982
983 for (;escapestr < end; escapestr++) {
984 unsigned int len = 0;
985 /* check unicode characters */
986
987 if (cset->char_maxlen > 1 && (len = cset->mb_valid(escapestr, end))) {
988
989 /* check possible overflow */
990 if ((newstr + len) > newstr_e) {
991 escape_overflow = TRUE;
992 break;
993 }
994 /* copy mb char without escaping it */
995 while (len--) {
996 *newstr++ = *escapestr++;
997 }
998 escapestr--;
999 continue;
1000 }
1001 if (*escapestr == '\'') {
1002 if (newstr + 2 > newstr_e) {
1003 escape_overflow = TRUE;
1004 break;
1005 }
1006 *newstr++ = '\'';
1007 *newstr++ = '\'';
1008 } else {
1009 if (newstr + 1 > newstr_e) {
1010 escape_overflow = TRUE;
1011 break;
1012 }
1013 *newstr++ = *escapestr;
1014 }
1015 }
1016 *newstr = '\0';
1017
1018 if (escape_overflow) {
1019 return((size_t)~0);
1020 }
1021 return((size_t)(newstr - newstr_s));
1022}
1023/* }}} */
1024
1025
1026/* {{{ mysql_cset_escape_slashes */
1027size_t mysql_cset_escape_slashes(const MARIADB_CHARSET_INFO * cset, char *newstr,
1028 const char * escapestr, size_t escapestr_len )
1029{
1030 const char *newstr_s = newstr;
1031 const char *newstr_e = newstr + 2 * escapestr_len;
1032 const char *end = escapestr + escapestr_len;
1033 my_bool escape_overflow = FALSE;
1034
1035 for (;escapestr < end; escapestr++) {
1036 char esc = '\0';
1037 unsigned int len = 0;
1038
1039 /* check unicode characters */
1040 if (cset->char_maxlen > 1 && (len = cset->mb_valid(escapestr, end))) {
1041 /* check possible overflow */
1042 if ((newstr + len) > newstr_e) {
1043 escape_overflow = TRUE;
1044 break;
1045 }
1046 /* copy mb char without escaping it */
1047 while (len--) {
1048 *newstr++ = *escapestr++;
1049 }
1050 escapestr--;
1051 continue;
1052 }
1053 if (cset->char_maxlen > 1 && cset->mb_charlen(*escapestr) > 1) {
1054 esc = *escapestr;
1055 } else {
1056 switch (*escapestr) {
1057 case 0:
1058 esc = '0';
1059 break;
1060 case '\n':
1061 esc = 'n';
1062 break;
1063 case '\r':
1064 esc = 'r';
1065 break;
1066 case '\\':
1067 case '\'':
1068 case '"':
1069 esc = *escapestr;
1070 break;
1071 case '\032':
1072 esc = 'Z';
1073 break;
1074 }
1075 }
1076 if (esc) {
1077 if (newstr + 2 > newstr_e) {
1078 escape_overflow = TRUE;
1079 break;
1080 }
1081 /* copy escaped character */
1082 *newstr++ = '\\';
1083 *newstr++ = esc;
1084 } else {
1085 if (newstr + 1 > newstr_e) {
1086 escape_overflow = TRUE;
1087 break;
1088 }
1089 /* copy non escaped character */
1090 *newstr++ = *escapestr;
1091 }
1092 }
1093 *newstr = '\0';
1094
1095 if (escape_overflow) {
1096 return((size_t)~0);
1097 }
1098 return((size_t)(newstr - newstr_s));
1099}
1100/* }}} */
1101
1102/* {{{ MADB_OS_CHARSET */
1103struct st_madb_os_charset {
1104 const char *identifier;
1105 const char *description;
1106 const char *charset;
1107 const char *iconv_cs;
1108 unsigned char supported;
1109};
1110
1111#define MADB_CS_UNSUPPORTED 0
1112#define MADB_CS_APPROX 1
1113#define MADB_CS_EXACT 2
1114
1115/* Please add new character sets at the end. */
1116struct st_madb_os_charset MADB_OS_CHARSET[]=
1117{
1118#ifdef _WIN32
1119 /* Windows code pages */
1120 {"037", "IBM EBCDIC US-Canada", NULL, NULL, MADB_CS_UNSUPPORTED},
1121 {"437", "OEM United States", "cp850", NULL, MADB_CS_APPROX},
1122 {"500", "IBM EBCDIC International", NULL, NULL, MADB_CS_UNSUPPORTED},
1123 {"708", "Arabic (ASMO 708)", NULL, NULL, MADB_CS_UNSUPPORTED},
1124 {"709", "Arabic (ASMO-449+, BCON V4)", NULL, NULL, MADB_CS_UNSUPPORTED},
1125 {"710", "Transparent Arabic", NULL, NULL, MADB_CS_UNSUPPORTED},
1126 {"720", "Arabic (DOS)", NULL, NULL, MADB_CS_UNSUPPORTED},
1127 {"737", "Greek (DOS)", NULL, NULL, MADB_CS_UNSUPPORTED},
1128 {"775", "Baltic (DOS)", NULL, NULL, MADB_CS_UNSUPPORTED},
1129 {"850", "Western European (DOS)", "cp850", NULL, MADB_CS_EXACT},
1130 {"852", "Central European (DOS)", "cp852", NULL, MADB_CS_EXACT},
1131 {"855", "Cyrillic (primarily Russian)", NULL, NULL, MADB_CS_UNSUPPORTED},
1132 {"857", "Turkish (DOS)", NULL, NULL, MADB_CS_UNSUPPORTED},
1133 {"858", "OEM Multilingual Latin 1 + Euro symbol", "cp850", NULL, MADB_CS_EXACT},
1134 {"860", "Portuguese (DOS)", NULL, NULL, MADB_CS_UNSUPPORTED},
1135 {"861", "Icelandic (DOS)", NULL, NULL, MADB_CS_UNSUPPORTED},
1136 {"862", "Hebrew (DOS)", NULL, NULL, MADB_CS_UNSUPPORTED},
1137 {"863", "French Canadian (DOS)", NULL, NULL, MADB_CS_UNSUPPORTED},
1138 {"864", "Arabic (864)", NULL, NULL, MADB_CS_UNSUPPORTED},
1139 {"865", "Nordic (DOS)", NULL, NULL, MADB_CS_UNSUPPORTED},
1140 {"866", "Cyrillic (DOS)", "cp866", NULL, MADB_CS_EXACT},
1141 {"869", "Greek, Modern (DOS)", "greek", NULL, MADB_CS_EXACT},
1142 {"870", "IBM EBCDIC Multilingual Latin 2", NULL, NULL, MADB_CS_UNSUPPORTED},
1143 {"874", "Thai (Windows)", "tis620", NULL, MADB_CS_UNSUPPORTED},
1144 {"875", "Greek Modern", NULL, NULL, MADB_CS_UNSUPPORTED},
1145 {"932", "Japanese (Shift-JIS)", "cp932", NULL, MADB_CS_EXACT},
1146 {"936", "Chinese Simplified (GB2312)", "gbk", NULL, MADB_CS_EXACT},
1147 {"949", "ANSI/OEM Korean (Unified Hangul Code)", "euckr", NULL, MADB_CS_EXACT},
1148 {"950", "Chinese Traditional (Big5)", "big5", NULL, MADB_CS_EXACT},
1149 {"1026", "EBCDIC Turkish (Latin 5)", NULL, NULL, MADB_CS_UNSUPPORTED},
1150 {"1047", "EBCDIC Latin 1/Open System", NULL, NULL, MADB_CS_UNSUPPORTED},
1151 {"1140", "IBM EBCDIC (US-Canada-Euro)", NULL, NULL, MADB_CS_UNSUPPORTED},
1152 {"1141", "IBM EBCDIC (Germany-Euro)", NULL, NULL, MADB_CS_UNSUPPORTED},
1153 {"1142", "IBM EBCDIC (Denmark-Norway-Euro)", NULL, NULL, MADB_CS_UNSUPPORTED},
1154 {"1143", "IBM EBCDIC (Finland-Sweden-Euro)", NULL, NULL, MADB_CS_UNSUPPORTED},
1155 {"1144", "IBM EBCDIC (Italy-Euro)", NULL, NULL, MADB_CS_UNSUPPORTED},
1156 {"1145", "IBM EBCDIC (Spain-Euro)", NULL, NULL, MADB_CS_UNSUPPORTED},
1157 {"1146", "IBM EBCDIC (UK-Euro)", NULL, NULL, MADB_CS_UNSUPPORTED},
1158 {"1147", "IBM EBCDIC (France-Euro)", NULL, NULL, MADB_CS_UNSUPPORTED},
1159 {"1148", "IBM EBCDIC (International-Euro)", NULL, NULL, MADB_CS_UNSUPPORTED},
1160 {"1149", "IBM EBCDIC (Icelandic-Euro)", NULL, NULL, MADB_CS_UNSUPPORTED},
1161 {"1200", "UTF-16, little endian byte order", NULL, NULL, MADB_CS_UNSUPPORTED},
1162 {"1201", "UTF-16, big endian byte order", "utf16", NULL, MADB_CS_UNSUPPORTED},
1163 {"1250", "Central European (Windows)", "cp1250", NULL, MADB_CS_EXACT},
1164 {"1251", "Cyrillic (Windows)", "cp1251", NULL, MADB_CS_EXACT},
1165 {"1252", "Western European (Windows)", "latin1", NULL, MADB_CS_EXACT},
1166 {"1253", "Greek (Windows)", "greek", NULL, MADB_CS_EXACT},
1167 {"1254", "Turkish (Windows)", "latin5", NULL, MADB_CS_EXACT},
1168 {"1255", "Hebrew (Windows)", "hewbrew", NULL, MADB_CS_EXACT},
1169 {"1256", "Arabic (Windows)", "cp1256", NULL, MADB_CS_EXACT},
1170 {"1257", "Baltic (Windows)","cp1257", NULL, MADB_CS_EXACT},
1171 {"1258", "Vietnamese (Windows)", NULL, NULL, MADB_CS_UNSUPPORTED},
1172 {"1361", "Korean (Johab)", NULL, NULL, MADB_CS_UNSUPPORTED},
1173 {"10000", "Western European (Mac)", "macroman", NULL, MADB_CS_EXACT},
1174 {"10001", "Japanese (Mac)", "sjis", NULL, MADB_CS_EXACT},
1175 {"10002", "Chinese Traditional (Mac)", "big5", NULL, MADB_CS_EXACT},
1176 {"10003", "Korean (Mac)", NULL, NULL, MADB_CS_UNSUPPORTED},
1177 {"10004", "Arabic (Mac)", NULL, NULL, MADB_CS_UNSUPPORTED},
1178 {"10005", "Hebrew (Mac)", NULL, NULL, MADB_CS_UNSUPPORTED},
1179 {"10006", "Greek (Mac)", NULL, NULL, MADB_CS_UNSUPPORTED},
1180 {"10007", "Cyrillic (Mac)", NULL, NULL, MADB_CS_UNSUPPORTED},
1181 {"10008", "Chinese Simplified (Mac)", "gb2312", NULL, MADB_CS_EXACT},
1182 {"10010", "Romanian (Mac)", NULL, NULL, MADB_CS_UNSUPPORTED},
1183 {"10017", "Ukrainian (Mac)", NULL, NULL, MADB_CS_UNSUPPORTED},
1184 {"10021", "Thai (Mac)", "tis620", NULL, MADB_CS_EXACT},
1185 {"10029", "Central European (Mac)", "macce", NULL, MADB_CS_EXACT},
1186 {"10079", "Icelandic (Mac)", NULL, NULL, MADB_CS_UNSUPPORTED},
1187 {"10081", "Turkish (Mac)", NULL, NULL, MADB_CS_UNSUPPORTED},
1188 {"10082", "Croatian (Mac)", NULL, NULL, MADB_CS_UNSUPPORTED},
1189 {"12000", "Unicode UTF-32, little endian byte order", NULL, NULL, MADB_CS_UNSUPPORTED},
1190 {"12001", "Unicode UTF-32, big endian byte order", "utf32", NULL, MADB_CS_UNSUPPORTED},
1191 {"20000", "Chinese Traditional (CNS)", NULL, NULL, MADB_CS_UNSUPPORTED},
1192 {"20001", "TCA Taiwan", NULL, NULL, MADB_CS_UNSUPPORTED},
1193 {"20002", "Chinese Traditional (Eten)", NULL, NULL, MADB_CS_UNSUPPORTED},
1194 {"20003", "IBM5550 Taiwan", NULL, NULL, MADB_CS_UNSUPPORTED},
1195 {"20004", "TeleText Taiwan", NULL, NULL, MADB_CS_UNSUPPORTED},
1196 {"20005", "Wang Taiwan", NULL, NULL, MADB_CS_UNSUPPORTED},
1197 {"20105", "Western European (IA5)", NULL, NULL, MADB_CS_UNSUPPORTED},
1198 {"20106", "IA5 German (7-bit)", NULL, NULL, MADB_CS_UNSUPPORTED},
1199 {"20107", "Swedish (7-bit)", NULL, NULL, MADB_CS_UNSUPPORTED},
1200 {"20108", "Norwegian (7-bit)", NULL, NULL, MADB_CS_UNSUPPORTED},
1201 {"20127", "US-ASCII (7-bit)", "ascii", NULL, MADB_CS_EXACT},
1202 {"20261", "T.61", NULL, NULL, MADB_CS_UNSUPPORTED},
1203 {"20269", "Non-Spacing Accent", NULL, NULL, MADB_CS_UNSUPPORTED},
1204 {"20273", "EBCDIC Germany", NULL, NULL, MADB_CS_UNSUPPORTED},
1205 {"20277", "EBCDIC Denmark-Norway", NULL, NULL, MADB_CS_UNSUPPORTED},
1206 {"20278", "EBCDIC Finland-Sweden", NULL, NULL, MADB_CS_UNSUPPORTED},
1207 {"20280", "EBCDIC Italy", NULL, NULL, MADB_CS_UNSUPPORTED},
1208 {"20284", "EBCDIC Latin America-Spain", NULL, NULL, MADB_CS_UNSUPPORTED},
1209 {"20285", "EBCDIC United Kingdom", NULL, NULL, MADB_CS_UNSUPPORTED},
1210 {"20290", "EBCDIC Japanese Katakana Extended", NULL, NULL, MADB_CS_UNSUPPORTED},
1211 {"20297", "EBCDIC France", NULL, NULL, MADB_CS_UNSUPPORTED},
1212 {"20420", "EBCDIC Arabic", NULL, NULL, MADB_CS_UNSUPPORTED},
1213 {"20423", "EBCDIC Greek", NULL, NULL, MADB_CS_UNSUPPORTED},
1214 {"20424", "EBCDIC Hebrew", NULL, NULL, MADB_CS_UNSUPPORTED},
1215 {"20833", "EBCDIC Korean Extended", NULL, NULL, MADB_CS_UNSUPPORTED},
1216 {"20838", "EBCDIC Thai", NULL, NULL, MADB_CS_UNSUPPORTED},
1217 {"20866", "Cyrillic (KOI8-R)", "koi8r", NULL, MADB_CS_EXACT},
1218 {"20871", "EBCDIC Icelandic", NULL, NULL, MADB_CS_UNSUPPORTED},
1219 {"20880", "EBCDIC Cyrillic Russian", NULL, NULL, MADB_CS_UNSUPPORTED},
1220 {"20905", "EBCDIC Turkish", NULL, NULL, MADB_CS_UNSUPPORTED},
1221 {"20924", "EBCDIC Latin 1/Open System (1047 + Euro symbol)", NULL, NULL, MADB_CS_UNSUPPORTED},
1222 {"20932", "Japanese (JIS 0208-1990 and 0121-1990)", "ujis", NULL, MADB_CS_EXACT},
1223 {"20936", "Chinese Simplified (GB2312-80)", "gb2312", NULL, MADB_CS_APPROX},
1224 {"20949", "Korean Wansung", "euckr", NULL, MADB_CS_APPROX},
1225 {"21025", "EBCDIC Cyrillic Serbian-Bulgarian", NULL, NULL, MADB_CS_UNSUPPORTED},
1226 {"21866", "Cyrillic (KOI8-U)", "koi8u", NULL, MADB_CS_EXACT},
1227 {"28591", "Western European (ISO)", "latin1", NULL, MADB_CS_APPROX},
1228 {"28592", "Central European (ISO)", "latin2", NULL, MADB_CS_EXACT},
1229 {"28593", "Latin 3", NULL, NULL, MADB_CS_UNSUPPORTED},
1230 {"28594", "Baltic", NULL, NULL, MADB_CS_UNSUPPORTED},
1231 {"28595", "ISO 8859-5 Cyrillic", NULL, NULL, MADB_CS_UNSUPPORTED},
1232 {"28596", "ISO 8859-6 Arabic", NULL, NULL, MADB_CS_UNSUPPORTED},
1233 {"28597", "ISO 8859-7 Greek", "greek", NULL, MADB_CS_EXACT},
1234 {"28598", "Hebrew (ISO-Visual)", "hebrew", NULL, MADB_CS_EXACT},
1235 {"28599", "ISO 8859-9 Turkish", "latin5", NULL, MADB_CS_EXACT},
1236 {"28603", "ISO 8859-13 Estonian", "latin7", NULL, MADB_CS_EXACT},
1237 {"28605", "8859-15 Latin 9", NULL, NULL, MADB_CS_UNSUPPORTED},
1238 {"29001", "Europa 3", NULL, NULL, MADB_CS_UNSUPPORTED},
1239 {"38598", "ISO 8859-8 Hebrew; Hebrew (ISO-Logical)", "hebrew", NULL, MADB_CS_EXACT},
1240 {"50220", "ISO 2022 Japanese with no halfwidth Katakana", NULL, NULL, MADB_CS_UNSUPPORTED},
1241 {"50221", "ISO 2022 Japanese with halfwidth Katakana", NULL, NULL, MADB_CS_UNSUPPORTED},
1242 {"50222", "ISO 2022 Japanese JIS X 0201-1989", NULL, NULL, MADB_CS_UNSUPPORTED},
1243 {"50225", "ISO 2022 Korean", NULL, NULL, MADB_CS_UNSUPPORTED},
1244 {"50227", "ISO 2022 Simplified Chinese", NULL, NULL, MADB_CS_UNSUPPORTED},
1245 {"50229", "ISO 2022 Traditional Chinese", NULL, NULL, MADB_CS_UNSUPPORTED},
1246 {"50930", "EBCDIC Japanese (Katakana) Extended", NULL, NULL, MADB_CS_UNSUPPORTED},
1247 {"50931", "EBCDIC US-Canada and Japanese", NULL, NULL, MADB_CS_UNSUPPORTED},
1248 {"50933", "EBCDIC Korean Extended and Korean", NULL, NULL, MADB_CS_UNSUPPORTED},
1249 {"50935", "EBCDIC Simplified Chinese Extended and Simplified Chinese", NULL, NULL, MADB_CS_UNSUPPORTED},
1250 {"50936", "EBCDIC Simplified Chinese", NULL, NULL, MADB_CS_UNSUPPORTED},
1251 {"50937", "EBCDIC US-Canada and Traditional Chinese", NULL, NULL, MADB_CS_UNSUPPORTED},
1252 {"50939", "EBCDIC Japanese (Latin) Extended and Japanese", NULL, NULL, MADB_CS_UNSUPPORTED},
1253 {"51932", "EUC Japanese", "ujis", NULL, MADB_CS_EXACT},
1254 {"51936", "EUC Simplified Chinese; Chinese Simplified (EUC)", "gb2312", NULL, MADB_CS_EXACT},
1255 {"51949", "EUC Korean", "euckr", NULL, MADB_CS_EXACT},
1256 {"51950", "EUC Traditional Chinese", "big5", NULL, MADB_CS_EXACT},
1257 {"52936", "Chinese Simplified (HZ)", NULL, NULL, MADB_CS_UNSUPPORTED},
1258 {"54936", "Chinese Simplified (GB18030)", NULL, NULL, MADB_CS_UNSUPPORTED},
1259 {"57002", "ISCII Devanagari", NULL, NULL, MADB_CS_UNSUPPORTED},
1260 {"57003", "ISCII Bengali", NULL, NULL, MADB_CS_UNSUPPORTED},
1261 {"57004", "ISCII Tamil", NULL, NULL, MADB_CS_UNSUPPORTED},
1262 {"57005", "ISCII Telugu", NULL, NULL, MADB_CS_UNSUPPORTED},
1263 {"57006", "ISCII Assamese", NULL, NULL, MADB_CS_UNSUPPORTED},
1264 {"57007", "ISCII Oriya", NULL, NULL, MADB_CS_UNSUPPORTED},
1265 {"57008", "ISCII Kannada", NULL, NULL, MADB_CS_UNSUPPORTED},
1266 {"57009", "ISCII Malayalam", NULL, NULL, MADB_CS_UNSUPPORTED},
1267 {"57010", "ISCII Gujarati", NULL, NULL, MADB_CS_UNSUPPORTED},
1268 {"57011", "ISCII Punjabi", NULL, NULL, MADB_CS_UNSUPPORTED},
1269 {"65000", "utf-7 Unicode (UTF-7)", NULL, NULL, MADB_CS_UNSUPPORTED},
1270 {"65001", "utf-8 Unicode (UTF-8)", "utf8", NULL, MADB_CS_EXACT},
1271 /* non Windows */
1272#else
1273 /* iconv encodings */
1274 {"ASCII", "US-ASCII", "ascii", "ASCII", MADB_CS_APPROX},
1275 {"US-ASCII", "US-ASCII", "ascii", "ASCII", MADB_CS_APPROX},
1276 {"Big5", "Chinese for Taiwan Multi-byte set", "big5", "BIG5", MADB_CS_EXACT},
1277 {"CP866", "IBM 866", "cp866", "CP866", MADB_CS_EXACT},
1278 {"IBM-1252", "Catalan Spain", "cp1252", "CP1252", MADB_CS_EXACT},
1279 {"ISCII-DEV", "Hindi", NULL, NULL, MADB_CS_UNSUPPORTED},
1280 {"ISO-8859-1", "ISO-8859-1", "latin1", "ISO_8859-1", MADB_CS_APPROX},
1281 {"ISO8859-1", "ISO-8859-1", "latin1", "ISO_8859-1", MADB_CS_APPROX},
1282 {"ISO_8859-1", "ISO-8859-1", "latin1", "ISO_8859-1", MADB_CS_APPROX},
1283 {"ISO88591", "ISO-8859-1", "latin1", "ISO_8859-1", MADB_CS_APPROX},
1284 {"ISO-8859-13", "ISO-8859-13", "latin7", "ISO_8859-13", MADB_CS_EXACT},
1285 {"ISO8859-13", "ISO-8859-13", "latin7", "ISO_8859-13", MADB_CS_EXACT},
1286 {"ISO_8859-13", "ISO-8859-13", "latin7", "ISO_8859-13", MADB_CS_EXACT},
1287 {"ISO885913", "ISO-8859-13", "latin7", "ISO_8859-13", MADB_CS_EXACT},
1288 {"ISO-8859-15", "ISO-8859-15", "latin9", "ISO_8859-15", MADB_CS_UNSUPPORTED},
1289 {"ISO8859-15", "ISO-8859-15", "latin9", "ISO_8859-15", MADB_CS_UNSUPPORTED},
1290 {"ISO_8859-15", "ISO-8859-15", "latin9", "ISO_8859-15", MADB_CS_UNSUPPORTED},
1291 {"ISO885915", "ISO-8859-15", "latin9", "ISO_8859-15", MADB_CS_UNSUPPORTED},
1292 {"ISO-8859-2", "ISO-8859-2", "latin2", "ISO_8859-2", MADB_CS_EXACT},
1293 {"ISO8859-2", "ISO-8859-2", "latin2", "ISO_8859-2", MADB_CS_EXACT},
1294 {"ISO_8859-2", "ISO-8859-2", "latin2", "ISO_8859-2", MADB_CS_EXACT},
1295 {"ISO88592", "ISO-8859-2", "latin2", "ISO_8859-2", MADB_CS_EXACT},
1296 {"ISO-8859-7", "ISO-8859-7", "greek", "ISO_8859-7", MADB_CS_EXACT},
1297 {"ISO8859-7", "ISO-8859-7", "greek", "ISO_8859-7", MADB_CS_EXACT},
1298 {"ISO_8859-7", "ISO-8859-7", "greek", "ISO_8859-7", MADB_CS_EXACT},
1299 {"ISO88597", "ISO-8859-7", "greek", "ISO_8859-7", MADB_CS_EXACT},
1300 {"ISO-8859-8", "ISO-8859-8", "hebrew", "ISO_8859-8", MADB_CS_EXACT},
1301 {"ISO8859-8", "ISO-8859-8", "hebrew", "ISO_8859-8", MADB_CS_EXACT},
1302 {"ISO_8859-8", "ISO-8859-8", "hebrew", "ISO_8859-8", MADB_CS_EXACT},
1303 {"ISO88598", "ISO-8859-8", "hebrew", "ISO_8859-8", MADB_CS_EXACT},
1304 {"ISO-8859-9", "ISO-8859-9", "latin5", "ISO_8859-9", MADB_CS_EXACT},
1305 {"ISO8859-9", "ISO-8859-9", "latin5", "ISO_8859-9", MADB_CS_EXACT},
1306 {"ISO_8859-9", "ISO-8859-9", "latin5", "ISO_8859-9", MADB_CS_EXACT},
1307 {"ISO88599", "ISO-8859-9", "latin5", "ISO_8859-9", MADB_CS_EXACT},
1308 {"ISO-8859-4", "ISO-8859-4", NULL, "ISO_8859-4", MADB_CS_UNSUPPORTED},
1309 {"ISO8859-4", "ISO-8859-4", NULL, "ISO_8859-4", MADB_CS_UNSUPPORTED},
1310 {"ISO_8859-4", "ISO-8859-4", NULL, "ISO_8859-4", MADB_CS_UNSUPPORTED},
1311 {"ISO88594", "ISO-8859-4", NULL, "ISO_8859-4", MADB_CS_UNSUPPORTED},
1312 {"ISO-8859-5", "ISO-8859-5", NULL, "ISO_8859-5", MADB_CS_UNSUPPORTED},
1313 {"ISO8859-5", "ISO-8859-5", NULL, "ISO_8859-5", MADB_CS_UNSUPPORTED},
1314 {"ISO_8859-5", "ISO-8859-5", NULL, "ISO_8859-5", MADB_CS_UNSUPPORTED},
1315 {"ISO88595", "ISO-8859-5", NULL, "ISO_8859-5", MADB_CS_UNSUPPORTED},
1316 {"KOI8-R", "KOI8-R", "koi8r", "KOI8R", MADB_CS_EXACT},
1317 {"koi8r", "KOI8-R", "koi8r", "KOI8R", MADB_CS_EXACT},
1318 {"KOI8-U", "KOI8-U", "koi8u", "KOI8U", MADB_CS_EXACT},
1319 {"koi8u", "KOI8-U", "koi8u", "KOI8U", MADB_CS_EXACT},
1320 {"koi8t", "KOI8-T", NULL, "KOI8-T", MADB_CS_UNSUPPORTED},
1321 {"KOI8-T", "KOI8-T", NULL, "KOI8-T", MADB_CS_UNSUPPORTED},
1322 {"SJIS", "SHIFT_JIS", "sjis", "SJIS", MADB_CS_EXACT},
1323 {"Shift-JIS", "SHIFT_JIS", "sjis", "SJIS", MADB_CS_EXACT},
1324 {"ansi1251", "Cyrillic", "cp1251", "CP1251", MADB_CS_EXACT},
1325 {"cp1251", "Cyrillic", "cp1251", "CP1251", MADB_CS_EXACT},
1326 {"armscii8", "Armenian", "armscii8", "ASMSCII-8", MADB_CS_EXACT},
1327 {"armscii-8", "Armenian", "armscii8", "ASMSCII-8", MADB_CS_EXACT},
1328 {"big5hkscs", "Big5-HKSCS", NULL, NULL, MADB_CS_UNSUPPORTED},
1329 {"cp1255", "Hebrew", "cp1255", "CP1255", MADB_CS_EXACT},
1330 {"eucCN", "GB-2312", "gb2312", "GB2312", MADB_CS_EXACT},
1331 {"eucJP", "UJIS", "ujis", "UJIS", MADB_CS_EXACT},
1332 {"eucKR", "EUC-KR", "euckr", "EUCKR", MADB_CS_EXACT},
1333 {"euctw", "EUC-TW", NULL, NULL, MADB_CS_UNSUPPORTED},
1334 {"gb18030", "GB 18030-2000", "gb18030", "GB18030", MADB_CS_UNSUPPORTED},
1335 {"gb2312", "GB2312", "gb2312", "GB2312", MADB_CS_EXACT},
1336 {"gbk", "GBK", "gbk", "GBK", MADB_CS_EXACT},
1337 {"georgianps", "Georgian", "geostd8", "GEORGIAN-PS", MADB_CS_EXACT},
1338 {"utf8", "UTF8", "utf8", "UTF-8", MADB_CS_EXACT},
1339 {"utf-8", "UTF8", "utf8", "UTF-8", MADB_CS_EXACT},
1340#endif
1341 {NULL, NULL, NULL, NULL, 0}
1342};
1343/* }}} */
1344
1345/* {{{ madb_get_os_character_set */
1346const char *madb_get_os_character_set()
1347{
1348 unsigned int i= 0;
1349 char *p= NULL;
1350#ifdef _WIN32
1351 char codepage[FN_REFLEN];
1352 snprintf(codepage, FN_REFLEN, "%u", GetACP());
1353 p= codepage;
1354#elif defined(HAVE_NL_LANGINFO) && defined(HAVE_SETLOCALE)
1355 if (setlocale(LC_CTYPE, ""))
1356 p= nl_langinfo(CODESET);
1357#endif
1358 if (!p)
1359 return MADB_DEFAULT_CHARSET_NAME;
1360 while (MADB_OS_CHARSET[i].identifier)
1361 {
1362 if (MADB_OS_CHARSET[i].supported > MADB_CS_UNSUPPORTED &&
1363 strcasecmp(MADB_OS_CHARSET[i].identifier, p) == 0)
1364 return MADB_OS_CHARSET[i].charset;
1365 i++;
1366 }
1367 return MADB_DEFAULT_CHARSET_NAME;
1368}
1369/* }}} */
1370
1371/* {{{ madb_get_code_page */
1372#ifdef _WIN32
1373int madb_get_windows_cp(const char *charset)
1374{
1375 unsigned int i= 0;
1376 while (MADB_OS_CHARSET[i].identifier)
1377 {
1378 if (MADB_OS_CHARSET[i].supported > MADB_CS_UNSUPPORTED &&
1379 strcmp(MADB_OS_CHARSET[i].charset, charset) == 0)
1380 return atoi(MADB_OS_CHARSET[i].identifier);
1381 i++;
1382 }
1383 return -1;
1384}
1385#endif
1386/* }}} */
1387
1388
1389/* {{{ map_charset_name
1390 Changing charset name into something iconv understands, if necessary.
1391 Another purpose it to avoid BOMs in result string, adding BE if necessary
1392 e.g.UTF16 does not work form iconv, while UTF-16 does.
1393 */
1394static void map_charset_name(const char *cs_name, my_bool target_cs, char *buffer, size_t buff_len)
1395{
1396 char digits[3], endianness[3]= "BE";
1397
1398 if (sscanf(cs_name, "UTF%2[0-9]%2[LBE]", digits, endianness))
1399 {
1400 /* We should have at least digits. Endianness we write either default(BE), or what we found in the string */
1401 snprintf(buffer, buff_len, "UTF-%s%s", digits, endianness);
1402 }
1403 else
1404 {
1405 /* Not our client - copy as is*/
1406 strncpy(buffer, cs_name, buff_len);
1407 }
1408
1409 if (target_cs)
1410 {
1411 strncat(buffer, "//TRANSLIT", buff_len);
1412 }
1413}
1414/* }}} */
1415
1416/* {{{ mariadb_convert_string
1417 Converts string from one charset to another, and writes converted string to given buffer
1418 @param[in] from
1419 @param[in/out] from_len
1420 @param[in] from_cs
1421 @param[out] to
1422 @param[in/out] to_len
1423 @param[in] to_cs
1424 @param[out] errorcode
1425
1426 @return -1 in case of error, bytes used in the "to" buffer, otherwise
1427 */
1428size_t STDCALL mariadb_convert_string(const char *from, size_t *from_len, MARIADB_CHARSET_INFO *from_cs,
1429 char *to, size_t *to_len, MARIADB_CHARSET_INFO *to_cs, int *errorcode)
1430{
1431 iconv_t conv= 0;
1432 size_t rc= -1;
1433 size_t save_len= *to_len;
1434 char to_encoding[128], from_encoding[128];
1435
1436 *errorcode= 0;
1437
1438 /* check if conversion is supported */
1439 if (!from_cs || !from_cs->encoding || !from_cs->encoding[0] ||
1440 !to_cs || !to_cs->encoding || !to_cs->encoding[0])
1441 {
1442 *errorcode= EINVAL;
1443 return rc;
1444 }
1445
1446 map_charset_name(to_cs->encoding, 1, to_encoding, sizeof(to_encoding));
1447 map_charset_name(from_cs->encoding, 0, from_encoding, sizeof(from_encoding));
1448
1449 if ((conv= iconv_open(to_encoding, from_encoding)) == (iconv_t)-1)
1450 {
1451 *errorcode= errno;
1452 goto error;
1453 }
1454 if ((rc= iconv(conv, IF_WIN(,(char **))&from, from_len, &to, to_len)) == (size_t)-1)
1455 {
1456 *errorcode= errno;
1457 goto error;
1458 }
1459 rc= save_len - *to_len;
1460error:
1461 if (conv != (iconv_t)-1)
1462 iconv_close(conv);
1463 return rc;
1464}
1465/* }}} */
1466
1467