1/* Copyright (c) 2000-2003, 2005-2007 MySQL AB, 2009 Sun Microsystems, Inc.
2 Copyright (c) 2009-2011, Monty Program Ab
3 Use is subject to license terms.
4 Copyright (c) 2009-2011, Monty Program Ab
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; version 2 of the License.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
18
19#include "strings_def.h"
20#include <m_ctype.h>
21#include <fcntl.h>
22#include <my_xml.h>
23
24#define ROW_LEN 16
25#define ROW16_LEN 8
26#define MAX_BUF (64*1024)
27
28
29#define MY_ALL_CHARSETS_SIZE 2048
30
31static struct charset_info_st all_charsets[MY_ALL_CHARSETS_SIZE];
32static uint refids[MY_ALL_CHARSETS_SIZE];
33
34static CHARSET_INFO *inheritance_source(uint id)
35{
36 return &all_charsets[refids[id]];
37}
38
39
40void
41print_array(FILE *f, const char *set, const char *name, const uchar *a, int n)
42{
43 int i;
44
45 fprintf(f,"static const uchar %s_%s[] = {\n", name, set);
46
47 for (i=0 ;i<n ; i++)
48 {
49 fprintf(f,"0x%02X",a[i]);
50 fprintf(f, (i+1<n) ? "," :"" );
51 fprintf(f, ((i+1) % ROW_LEN == n % ROW_LEN) ? "\n" : "" );
52 }
53 fprintf(f,"};\n\n");
54}
55
56
57void
58print_array16(FILE *f, const char *set, const char *name, const uint16 *a, int n)
59{
60 int i;
61
62 fprintf(f,"static const uint16 %s_%s[] = {\n", name, set);
63
64 for (i=0 ;i<n ; i++)
65 {
66 fprintf(f,"0x%04X",a[i]);
67 fprintf(f, (i+1<n) ? "," :"" );
68 fprintf(f, ((i+1) % ROW16_LEN == n % ROW16_LEN) ? "\n" : "" );
69 }
70 fprintf(f,"};\n\n");
71}
72
73
74static uint get_collation_number(const char *name)
75{
76 CHARSET_INFO *cs;
77 for (cs= all_charsets;
78 cs < all_charsets + array_elements(all_charsets);
79 cs++)
80 {
81 if (cs->name && !strcmp(cs->name, name))
82 return cs->number;
83 }
84 return 0;
85}
86
87
88static uint
89get_charset_number_internal(const char *charset_name, uint cs_flags)
90{
91 CHARSET_INFO *cs;
92 for (cs= all_charsets;
93 cs < all_charsets + array_elements(all_charsets);
94 cs++)
95 {
96 if (cs->csname && (cs->state & cs_flags) &&
97 !strcmp(cs->csname, charset_name))
98 return cs->number;
99 }
100 return 0;
101}
102
103char *mdup(const char *src, uint len)
104{
105 char *dst=(char*)malloc(len);
106 if (!dst)
107 exit(1);
108 memcpy(dst,src,len);
109 return dst;
110}
111
112static void simple_cs_copy_data(struct charset_info_st *to, CHARSET_INFO *from)
113{
114 to->number= from->number ? from->number : to->number;
115 to->state|= from->state;
116
117 if (from->csname)
118 to->csname= strdup(from->csname);
119
120 if (from->name)
121 to->name= strdup(from->name);
122
123 if (from->tailoring)
124 to->tailoring= strdup(from->tailoring);
125
126 if (from->ctype)
127 to->ctype= (uchar*) mdup((char*) from->ctype, MY_CS_CTYPE_TABLE_SIZE);
128 if (from->to_lower)
129 to->to_lower= (uchar*) mdup((char*) from->to_lower, MY_CS_TO_LOWER_TABLE_SIZE);
130 if (from->to_upper)
131 to->to_upper= (uchar*) mdup((char*) from->to_upper, MY_CS_TO_UPPER_TABLE_SIZE);
132 if (from->sort_order)
133 {
134 to->sort_order= (uchar*) mdup((char*) from->sort_order, MY_CS_SORT_ORDER_TABLE_SIZE);
135 /*
136 set_max_sort_char(to);
137 */
138 }
139 if (from->tab_to_uni)
140 {
141 uint sz= MY_CS_TO_UNI_TABLE_SIZE*sizeof(uint16);
142 to->tab_to_uni= (uint16*) mdup((char*)from->tab_to_uni, sz);
143 /*
144 create_fromuni(to);
145 */
146 }
147}
148
149
150/*
151 cs->xxx arrays can be NULL in case when a collation has an entry only
152 in Index.xml and has no entry in csname.xml (e.g. in case of a binary
153 collation or a collation using <import> command).
154
155 refcs->xxx arrays can be NULL if <import> refers to a collation
156 which is not defined in csname.xml, e.g. an always compiled collation
157 such as latin1_swedish_ci.
158*/
159static void inherit_charset_data(struct charset_info_st *cs,
160 CHARSET_INFO *refcs)
161{
162 cs->state|= (refcs->state & (MY_CS_PUREASCII|MY_CS_NONASCII));
163 if (refcs->ctype && cs->ctype &&
164 !memcmp(cs->ctype, refcs->ctype, MY_CS_CTYPE_TABLE_SIZE))
165 cs->ctype= NULL;
166 if (refcs->to_lower && cs->to_lower &&
167 !memcmp(cs->to_lower, refcs->to_lower, MY_CS_TO_LOWER_TABLE_SIZE))
168 cs->to_lower= NULL;
169 if (refcs->to_upper && cs->to_upper &&
170 !memcmp(cs->to_upper, refcs->to_upper, MY_CS_TO_LOWER_TABLE_SIZE))
171 cs->to_upper= NULL;
172 if (refcs->tab_to_uni && cs->tab_to_uni &&
173 !memcmp(cs->tab_to_uni, refcs->tab_to_uni,
174 MY_CS_TO_UNI_TABLE_SIZE * sizeof(uint16)))
175 cs->tab_to_uni= NULL;
176}
177
178
179static CHARSET_INFO *find_charset_data_inheritance_source(CHARSET_INFO *cs)
180{
181 CHARSET_INFO *refcs;
182 uint refid= get_charset_number_internal(cs->csname, MY_CS_PRIMARY);
183 return refid && refid != cs->number &&
184 (refcs= &all_charsets[refid]) &&
185 (refcs->state & MY_CS_LOADED) ? refcs : NULL;
186}
187
188
189/**
190 Detect if "cs" needs further loading from csname.xml
191 @param cs - the character set pointer
192 @retval FALSE - if the current data (e.g. loaded from from Index.xml)
193 is not enough to dump the character set and requires
194 further reading from the csname.xml file.
195 @retval TRUE - if the current data is enough to dump,
196 no reading of csname.xml is needed.
197*/
198static my_bool simple_cs_is_full(CHARSET_INFO *cs)
199{
200 return ((cs->csname && cs->tab_to_uni && cs->ctype && cs->to_upper &&
201 cs->to_lower) &&
202 (cs->number && cs->name &&
203 (cs->sort_order || cs->tailoring || (cs->state & MY_CS_BINSORT))));
204}
205
206static int add_collation(struct charset_info_st *cs)
207{
208 if (cs->name &&
209 (cs->number || (cs->number= get_collation_number(cs->name))))
210 {
211 if (!(all_charsets[cs->number].state & MY_CS_COMPILED))
212 {
213 simple_cs_copy_data(&all_charsets[cs->number],cs);
214
215 }
216
217 cs->number= 0;
218 cs->name= NULL;
219 cs->tailoring= NULL;
220 cs->state= 0;
221 cs->sort_order= NULL;
222 cs->state= 0;
223 }
224 return MY_XML_OK;
225}
226
227
228static void
229default_reporter(enum loglevel level __attribute__ ((unused)),
230 const char *format __attribute__ ((unused)),
231 ...)
232{
233}
234
235
236static void
237my_charset_loader_init(MY_CHARSET_LOADER *loader)
238{
239 loader->error[0]= '\0';
240 loader->once_alloc= malloc;
241 loader->malloc= malloc;
242 loader->realloc= realloc;
243 loader->free= free;
244 loader->reporter= default_reporter;
245 loader->add_collation= add_collation;
246}
247
248
249static int my_read_charset_file(const char *filename)
250{
251 char buf[MAX_BUF];
252 int fd;
253 uint len;
254 MY_CHARSET_LOADER loader;
255
256 my_charset_loader_init(&loader);
257 if ((fd=open(filename,O_RDONLY)) < 0)
258 {
259 fprintf(stderr,"Can't open '%s'\n",filename);
260 return 1;
261 }
262
263 len=read(fd,buf,MAX_BUF);
264 DBUG_ASSERT(len < MAX_BUF);
265 close(fd);
266
267 if (my_parse_charset_xml(&loader, buf, len))
268 {
269 fprintf(stderr, "Error while parsing '%s': %s\n", filename, loader.error);
270 exit(1);
271 }
272
273 return FALSE;
274}
275
276
277void print_arrays(FILE *f, CHARSET_INFO *cs)
278{
279 if (cs->ctype)
280 print_array(f, cs->name, "ctype", cs->ctype, MY_CS_CTYPE_TABLE_SIZE);
281 if (cs->to_lower)
282 print_array(f, cs->name, "to_lower", cs->to_lower, MY_CS_TO_LOWER_TABLE_SIZE);
283 if (cs->to_upper)
284 print_array(f, cs->name, "to_upper", cs->to_upper, MY_CS_TO_UPPER_TABLE_SIZE);
285 if (cs->sort_order)
286 print_array(f, cs->name, "sort_order", cs->sort_order, MY_CS_SORT_ORDER_TABLE_SIZE);
287 if (cs->tab_to_uni)
288 print_array16(f, cs->name, "to_uni", cs->tab_to_uni, MY_CS_TO_UNI_TABLE_SIZE);
289}
290
291
292/**
293 Print an array member of a CHARSET_INFO.
294 @param f - the file to print into
295 @param cs0 - reference to the CHARSET_INFO to print
296 @param array0 - pointer to the array data (can be NULL)
297 @param cs1 - reference to the CHARSET_INFO that the data
298 can be inherited from (e.g. primary collation)
299 @param array1 - pointer to the array data in cs1 (can be NULL)
300 @param name - name of the member
301
302 If array0 is not null, then the CHARSET_INFO being dumped has its
303 own array (e.g. the default collation for the character set).
304 We print the name of this array using cs0->name and return.
305
306 If array1 is not null, then the CHARSET_INFO being dumpled reuses
307 the array from another collation. We print the name of the array of
308 the referenced collation using cs1->name and return.
309
310 Otherwise (if both array0 and array1 are NULL), we have a collation
311 of a character set whose primary collation is not available now,
312 and which does not have its own entry in csname.xml file.
313
314 For example, Index.xml has this entry:
315 <collation name="latin1_swedish_ci_copy">
316 <rules>
317 <import source="latin1_swedish_ci"/>
318 </rules>
319 </collation>
320 and latin1.xml does not have entries for latin1_swedish_ci_copy.
321
322 In such cases we print NULL as a pointer to the array.
323 It will be set to a not-null data during the first initialization
324 by the inherit_charset_data() call (see mysys/charset.c for details).
325*/
326static void
327print_array_ref(FILE *f,
328 CHARSET_INFO *cs0, const void *array0,
329 CHARSET_INFO *cs1, const void *array1,
330 const char *name)
331{
332 CHARSET_INFO *cs= array0 ? cs0 : array1 ? cs1 : NULL;
333 if (cs)
334 fprintf(f," %s_%s, /* %s */\n",
335 name, cs->name, name);
336 else
337 fprintf(f," NULL, /* %s */\n", name);
338}
339
340
341static const char *nopad_infix(CHARSET_INFO *cs)
342{
343 return (cs->state & MY_CS_NOPAD) ? "_nopad" : "";
344}
345
346
347void dispcset(FILE *f,CHARSET_INFO *cs)
348{
349 fprintf(f,"{\n");
350 fprintf(f," %d,%d,%d,\n",cs->number,0,0);
351 fprintf(f," MY_CS_COMPILED%s%s%s%s%s%s,\n",
352 cs->state & MY_CS_BINSORT ? "|MY_CS_BINSORT" : "",
353 cs->state & MY_CS_PRIMARY ? "|MY_CS_PRIMARY" : "",
354 cs->state & MY_CS_CSSORT ? "|MY_CS_CSSORT" : "",
355 cs->state & MY_CS_PUREASCII ? "|MY_CS_PUREASCII" : "",
356 cs->state & MY_CS_NONASCII ? "|MY_CS_NONASCII" : "",
357 cs->state & MY_CS_NOPAD ? "|MY_CS_NOPAD" : "");
358
359 if (cs->name)
360 {
361 CHARSET_INFO *srccs= inheritance_source(cs->number);
362 fprintf(f," \"%s\", /* cset name */\n",cs->csname);
363 fprintf(f," \"%s\", /* coll name */\n",cs->name);
364 fprintf(f," \"\", /* comment */\n");
365 if (cs->tailoring)
366 fprintf(f, " \"%s\", /* tailoring */\n", cs->tailoring);
367 else
368 fprintf(f," NULL, /* tailoring */\n");
369
370 print_array_ref(f, cs, cs->ctype, srccs, srccs->ctype, "ctype");
371 print_array_ref(f, cs, cs->to_lower, srccs, srccs->to_lower, "to_lower");
372 print_array_ref(f, cs, cs->to_upper, srccs, srccs->to_upper, "to_upper");
373
374 if (cs->sort_order)
375 fprintf(f," sort_order_%s, /* sort_order */\n",cs->name);
376 else
377 fprintf(f," NULL, /* sort_order */\n");
378
379 fprintf(f," NULL, /* uca */\n");
380
381 print_array_ref(f, cs, cs->tab_to_uni, srccs, srccs->tab_to_uni, "to_uni");
382 }
383 else
384 {
385 fprintf(f," NULL, /* cset name */\n");
386 fprintf(f," NULL, /* coll name */\n");
387 fprintf(f," NULL, /* comment */\n");
388 fprintf(f," NULL, /* tailoging */\n");
389 fprintf(f," NULL, /* ctype */\n");
390 fprintf(f," NULL, /* lower */\n");
391 fprintf(f," NULL, /* upper */\n");
392 fprintf(f," NULL, /* sort order */\n");
393 fprintf(f," NULL, /* uca */\n");
394 fprintf(f," NULL, /* to_uni */\n");
395 }
396
397 fprintf(f," NULL, /* from_uni */\n");
398 fprintf(f," &my_unicase_default, /* caseinfo */\n");
399 fprintf(f," NULL, /* state map */\n");
400 fprintf(f," NULL, /* ident map */\n");
401 fprintf(f," 1, /* strxfrm_multiply*/\n");
402 fprintf(f," 1, /* caseup_multiply*/\n");
403 fprintf(f," 1, /* casedn_multiply*/\n");
404 fprintf(f," 1, /* mbminlen */\n");
405 fprintf(f," 1, /* mbmaxlen */\n");
406 fprintf(f," 0, /* min_sort_char */\n");
407 fprintf(f," 255, /* max_sort_char */\n");
408 fprintf(f," ' ', /* pad_char */\n");
409 fprintf(f," 0, /* escape_with_backslash_is_dangerous */\n");
410 fprintf(f," 1, /* levels_for_order */\n");
411 fprintf(f," &my_charset_8bit_handler,\n");
412
413 if (cs->state & MY_CS_BINSORT)
414 fprintf(f," &my_collation_8bit%s_bin_handler,\n", nopad_infix(cs));
415 else
416 fprintf(f," &my_collation_8bit_simple%s_ci_handler,\n", nopad_infix(cs));
417 fprintf(f,"}\n");
418}
419
420
421static void
422fprint_copyright(FILE *file)
423{
424 fprintf(file,
425"/* Copyright 2000-2008 MySQL AB, 2008 Sun Microsystems, Inc.\n"
426" Copyright (c) 2000, 2011, Oracle and/or its affiliates.\n"
427" Copyright 2008-2016 MariaDB Corporation\n"
428"\n"
429" This program is free software; you can redistribute it and/or modify\n"
430" it under the terms of the GNU General Public License as published by\n"
431" the Free Software Foundation; version 2 of the License.\n"
432"\n"
433" This program is distributed in the hope that it will be useful,\n"
434" but WITHOUT ANY WARRANTY; without even the implied warranty of\n"
435" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n"
436" GNU General Public License for more details.\n"
437"\n"
438" You should have received a copy of the GNU General Public License\n"
439" along with this program; if not, write to the Free Software\n"
440" Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */\n"
441"\n");
442}
443
444
445int
446main(int argc, char **argv __attribute__((unused)))
447{
448 struct charset_info_st ncs, *cs;
449 char filename[256];
450 FILE *f= stdout;
451
452 if (argc < 2)
453 {
454 fprintf(stderr, "usage: %s source-dir\n", argv[0]);
455 exit(EXIT_FAILURE);
456 }
457
458 bzero((void*)&ncs,sizeof(ncs));
459 bzero((void*)&all_charsets,sizeof(all_charsets));
460 bzero((void*) refids, sizeof(refids));
461
462 sprintf(filename,"%s/%s",argv[1],"Index.xml");
463 my_read_charset_file(filename);
464
465 for (cs= all_charsets;
466 cs < all_charsets + array_elements(all_charsets);
467 cs++)
468 {
469 if (cs->number && !(cs->state & MY_CS_COMPILED))
470 {
471 if ( (!simple_cs_is_full(cs)) && (cs->csname))
472 {
473 sprintf(filename,"%s/%s.xml",argv[1],cs->csname);
474 my_read_charset_file(filename);
475 }
476 cs->state|= MY_CS_LOADED;
477 }
478 }
479
480 fprintf(f, "/*\n");
481 fprintf(f, " This file was generated by the conf_to_src utility. "
482 "Do not edit it directly,\n");
483 fprintf(f, " edit the XML definitions in sql/share/charsets/ instead.\n\n");
484 fprintf(f, " To re-generate, run the following in the strings/ "
485 "directory:\n");
486 fprintf(f, " ./conf_to_src ../sql/share/charsets/ > FILE\n");
487 fprintf(f, "*/\n\n");
488 fprint_copyright(f);
489 fprintf(f,"#include \"strings_def.h\"\n");
490 fprintf(f,"#include <m_ctype.h>\n\n");
491
492
493 for (cs= all_charsets;
494 cs < all_charsets + array_elements(all_charsets);
495 cs++)
496 {
497 if (cs->state & MY_CS_LOADED)
498 {
499 CHARSET_INFO *refcs= find_charset_data_inheritance_source(cs);
500 cs->state|= my_8bit_charset_flags_from_data(cs) |
501 my_8bit_collation_flags_from_data(cs);
502 if (refcs)
503 {
504 refids[cs->number]= refcs->number;
505 inherit_charset_data(cs, refcs);
506 }
507 fprintf(f,"#ifdef HAVE_CHARSET_%s\n",cs->csname);
508 print_arrays(f, cs);
509 fprintf(f,"#endif\n");
510 fprintf(f,"\n");
511 }
512 }
513
514 fprintf(f,"struct charset_info_st compiled_charsets[] = {\n");
515 for (cs= all_charsets;
516 cs < all_charsets + array_elements(all_charsets);
517 cs++)
518 {
519 if (cs->state & MY_CS_LOADED)
520 {
521 fprintf(f,"#ifdef HAVE_CHARSET_%s\n",cs->csname);
522 dispcset(f,cs);
523 fprintf(f,",\n");
524 fprintf(f,"#endif\n");
525 }
526 }
527
528 dispcset(f,&ncs);
529 fprintf(f,"};\n");
530
531 return 0;
532}
533