conf_to_src.c source code [MariaDB/strings/conf_to_src.c]

1	/ Copyright (c) 2000-2003, 2005-2007 MySQL AB, 2009 Sun Microsystems, Inc.*
2	Copyright (c) 2009-2011, Monty Program Ab
3	Use is subject to license terms.
4	Copyright (c) 2009-2011, Monty Program Ab
5
6	This program is free software; you can redistribute it and/or modify
7	it under the terms of the GNU General Public License as published by
8	the Free Software Foundation; version 2 of the License.
9
10	This program is distributed in the hope that it will be useful,
11	but WITHOUT ANY WARRANTY; without even the implied warranty of
12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13	GNU General Public License for more details.
14
15	You should have received a copy of the GNU General Public License
16	along with this program; if not, write to the Free Software
17	Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA /*
18
19	#include "strings_def.h"
20	#include <m_ctype.h>
21	#include <fcntl.h>
22	#include <my_xml.h>
23
24	#define ROW_LEN 16
25	#define ROW16_LEN 8
26	#define MAX_BUF (64*1024)
27
28
29	#define MY_ALL_CHARSETS_SIZE 2048
30
31	static struct charset_info_st all_charsets[MY_ALL_CHARSETS_SIZE];
32	static uint refids[MY_ALL_CHARSETS_SIZE];
33
34	static CHARSET_INFO *inheritance_source(uint id)
35	{
36	return &all_charsets[refids[id]];
37	}
38
39
40	void
41	print_array(FILE f, const* char set, const* char name, const* uchar a, int* n)
42	{
43	int i;
44
45	fprintf(f,"static const uchar %s_%s[] = {\n", name, set);
46
47	for (i=`0` ;i<n ; i++)
48	{
49	fprintf(f,"0x%02X",a[i]);
50	fprintf(f, (i+`1`<n) ? "," :"" );
51	fprintf(f, ((i+`1`) % ROW_LEN == n % ROW_LEN) ? "\n" : "" );
52	}
53	fprintf(f,"};\n\n");
54	}
55
56
57	void
58	print_array16(FILE f, const* char set, const* char name, const* uint16 a, int* n)
59	{
60	int i;
61
62	fprintf(f,"static const uint16 %s_%s[] = {\n", name, set);
63
64	for (i=`0` ;i<n ; i++)
65	{
66	fprintf(f,"0x%04X",a[i]);
67	fprintf(f, (i+`1`<n) ? "," :"" );
68	fprintf(f, ((i+`1`) % ROW16_LEN == n % ROW16_LEN) ? "\n" : "" );
69	}
70	fprintf(f,"};\n\n");
71	}
72
73
74	static uint get_collation_number(const char *name)
75	{
76	CHARSET_INFO *cs;
77	for (cs= all_charsets;
78	cs < all_charsets + array_elements(all_charsets);
79	cs++)
80	{
81	if (cs->name && !strcmp(cs->name, name))
82	return cs->number;
83	}
84	return `0`;
85	}
86
87
88	static uint
89	get_charset_number_internal(const char *charset_name, uint cs_flags)
90	{
91	CHARSET_INFO *cs;
92	for (cs= all_charsets;
93	cs < all_charsets + array_elements(all_charsets);
94	cs++)
95	{
96	if (cs->csname && (cs->state & cs_flags) &&
97	!strcmp(cs->csname, charset_name))
98	return cs->number;
99	}
100	return `0`;
101	}
102
103	char mdup(const* char *src, uint len)
104	{
105	char dst=(char**)malloc(len);
106	if (!dst)
107	exit(`1`);
108	memcpy(dst,src,len);
109	return dst;
110	}
111
112	static void simple_cs_copy_data(struct charset_info_st to, CHARSET_INFO from)
113	{
114	to->number= from->number ? from->number : to->number;
115	to->state\|= from->state;
116
117	if (from->csname)
118	to->csname= strdup(from->csname);
119
120	if (from->name)
121	to->name= strdup(from->name);
122
123	if (from->tailoring)
124	to->tailoring= strdup(from->tailoring);
125
126	if (from->ctype)
127	to->ctype= (uchar) mdup((char**) from->ctype, MY_CS_CTYPE_TABLE_SIZE);
128	if (from->to_lower)
129	to->to_lower= (uchar) mdup((char**) from->to_lower, MY_CS_TO_LOWER_TABLE_SIZE);
130	if (from->to_upper)
131	to->to_upper= (uchar) mdup((char**) from->to_upper, MY_CS_TO_UPPER_TABLE_SIZE);
132	if (from->sort_order)
133	{
134	to->sort_order= (uchar) mdup((char**) from->sort_order, MY_CS_SORT_ORDER_TABLE_SIZE);
135	/*
136	set_max_sort_char(to);
137	*/
138	}
139	if (from->tab_to_uni)
140	{
141	uint sz= MY_CS_TO_UNI_TABLE_SIZE*sizeof(uint16);
142	to->tab_to_uni= (uint16) mdup((char**)from->tab_to_uni, sz);
143	/*
144	create_fromuni(to);
145	*/
146	}
147	}
148
149
150	/*
151	cs->xxx arrays can be NULL in case when a collation has an entry only
152	in Index.xml and has no entry in csname.xml (e.g. in case of a binary
153	collation or a collation using <import> command).
154
155	refcs->xxx arrays can be NULL if <import> refers to a collation
156	which is not defined in csname.xml, e.g. an always compiled collation
157	such as latin1_swedish_ci.
158	*/
159	static void inherit_charset_data(struct charset_info_st *cs,
160	CHARSET_INFO *refcs)
161	{
162	cs->state\|= (refcs->state & (MY_CS_PUREASCII\|MY_CS_NONASCII));
163	if (refcs->ctype && cs->ctype &&
164	!memcmp(cs->ctype, refcs->ctype, MY_CS_CTYPE_TABLE_SIZE))
165	cs->ctype= NULL;
166	if (refcs->to_lower && cs->to_lower &&
167	!memcmp(cs->to_lower, refcs->to_lower, MY_CS_TO_LOWER_TABLE_SIZE))
168	cs->to_lower= NULL;
169	if (refcs->to_upper && cs->to_upper &&
170	!memcmp(cs->to_upper, refcs->to_upper, MY_CS_TO_LOWER_TABLE_SIZE))
171	cs->to_upper= NULL;
172	if (refcs->tab_to_uni && cs->tab_to_uni &&
173	!memcmp(cs->tab_to_uni, refcs->tab_to_uni,
174	MY_CS_TO_UNI_TABLE_SIZE * sizeof(uint16)))
175	cs->tab_to_uni= NULL;
176	}
177
178
179	static CHARSET_INFO find_charset_data_inheritance_source(CHARSET_INFO cs)
180	{
181	CHARSET_INFO *refcs;
182	uint refid= get_charset_number_internal(cs->csname, MY_CS_PRIMARY);
183	return refid && refid != cs->number &&
184	(refcs= &all_charsets[refid]) &&
185	(refcs->state & MY_CS_LOADED) ? refcs : NULL;
186	}
187
188
189	/**
190	Detect if "cs" needs further loading from csname.xml
191	@param cs - the character set pointer
192	@retval FALSE - if the current data (e.g. loaded from from Index.xml)
193	is not enough to dump the character set and requires
194	further reading from the csname.xml file.
195	@retval TRUE - if the current data is enough to dump,
196	no reading of csname.xml is needed.
197	*/
198	static my_bool simple_cs_is_full(CHARSET_INFO *cs)
199	{
200	return ((cs->csname && cs->tab_to_uni && cs->ctype && cs->to_upper &&
201	cs->to_lower) &&
202	(cs->number && cs->name &&
203	(cs->sort_order \|\| cs->tailoring \|\| (cs->state & MY_CS_BINSORT))));
204	}
205
206	static int add_collation(struct charset_info_st *cs)
207	{
208	if (cs->name &&
209	(cs->number \|\| (cs->number= get_collation_number(cs->name))))
210	{
211	if (!(all_charsets[cs->number].state & MY_CS_COMPILED))
212	{
213	simple_cs_copy_data(&all_charsets[cs->number],cs);
214
215	}
216
217	cs->number= `0`;
218	cs->name= NULL;
219	cs->tailoring= NULL;
220	cs->state= `0`;
221	cs->sort_order= NULL;
222	cs->state= `0`;
223	}
224	return MY_XML_OK;
225	}
226
227
228	static void
229	default_reporter(enum loglevel level __attribute__ ((unused)),
230	const char format __attribute__* ((unused)),
231	...)
232	{
233	}
234
235
236	static void
237	my_charset_loader_init(MY_CHARSET_LOADER *loader)
238	{
239	loader->error[`0`]= `'\0'`;
240	loader->once_alloc= malloc;
241	loader->malloc= malloc;
242	loader->realloc= realloc;
243	loader->free= free;
244	loader->reporter= default_reporter;
245	loader->add_collation= add_collation;
246	}
247
248
249	static int my_read_charset_file(const char *filename)
250	{
251	char buf[MAX_BUF];
252	int fd;
253	uint len;
254	MY_CHARSET_LOADER loader;
255
256	my_charset_loader_init(&loader);
257	if ((fd=open(filename,O_RDONLY)) < `0`)
258	{
259	fprintf(stderr,"Can't open '%s'\n",filename);
260	return `1`;
261	}
262
263	len=read(fd,buf,MAX_BUF);
264	DBUG_ASSERT(len < MAX_BUF);
265	close(fd);
266
267	if (my_parse_charset_xml(&loader, buf, len))
268	{
269	fprintf(stderr, "Error while parsing '%s': %s\n", filename, loader.error);
270	exit(`1`);
271	}
272
273	return FALSE;
274	}
275
276
277	void print_arrays(FILE f, CHARSET_INFO cs)
278	{
279	if (cs->ctype)
280	print_array(f, cs->name, "ctype", cs->ctype, MY_CS_CTYPE_TABLE_SIZE);
281	if (cs->to_lower)
282	print_array(f, cs->name, "to_lower", cs->to_lower, MY_CS_TO_LOWER_TABLE_SIZE);
283	if (cs->to_upper)
284	print_array(f, cs->name, "to_upper", cs->to_upper, MY_CS_TO_UPPER_TABLE_SIZE);
285	if (cs->sort_order)
286	print_array(f, cs->name, "sort_order", cs->sort_order, MY_CS_SORT_ORDER_TABLE_SIZE);
287	if (cs->tab_to_uni)
288	print_array16(f, cs->name, "to_uni", cs->tab_to_uni, MY_CS_TO_UNI_TABLE_SIZE);
289	}
290
291
292	/**
293	Print an array member of a CHARSET_INFO.
294	@param f - the file to print into
295	@param cs0 - reference to the CHARSET_INFO to print
296	@param array0 - pointer to the array data (can be NULL)
297	@param cs1 - reference to the CHARSET_INFO that the data
298	can be inherited from (e.g. primary collation)
299	@param array1 - pointer to the array data in cs1 (can be NULL)
300	@param name - name of the member
301
302	If array0 is not null, then the CHARSET_INFO being dumped has its
303	own array (e.g. the default collation for the character set).
304	We print the name of this array using cs0->name and return.
305
306	If array1 is not null, then the CHARSET_INFO being dumpled reuses
307	the array from another collation. We print the name of the array of
308	the referenced collation using cs1->name and return.
309
310	Otherwise (if both array0 and array1 are NULL), we have a collation
311	of a character set whose primary collation is not available now,
312	and which does not have its own entry in csname.xml file.
313
314	For example, Index.xml has this entry:
315	<collation name="latin1_swedish_ci_copy">
316	<rules>
317	<import source="latin1_swedish_ci"/>
318	</rules>
319	</collation>
320	and latin1.xml does not have entries for latin1_swedish_ci_copy.
321
322	In such cases we print NULL as a pointer to the array.
323	It will be set to a not-null data during the first initialization
324	by the inherit_charset_data() call (see mysys/charset.c for details).
325	*/
326	static void
327	print_array_ref(FILE *f,
328	CHARSET_INFO cs0, const* void *array0,
329	CHARSET_INFO cs1, const* void *array1,
330	const char *name)
331	{
332	CHARSET_INFO *cs= array0 ? cs0 : array1 ? cs1 : NULL;
333	if (cs)
334	fprintf(f," %s_%s, /* %s */\n",
335	name, cs->name, name);
336	else
337	fprintf(f," NULL, /* %s */\n", name);
338	}
339
340
341	static const char nopad_infix(CHARSET_INFO cs)
342	{
343	return (cs->state & MY_CS_NOPAD) ? "_nopad" : "";
344	}
345
346
347	void dispcset(FILE f,CHARSET_INFO cs)
348	{
349	fprintf(f,"{\n");
350	fprintf(f," %d,%d,%d,\n",cs->number,`0`,`0`);
351	fprintf(f," MY_CS_COMPILED%s%s%s%s%s%s,\n",
352	cs->state & MY_CS_BINSORT ? "\|MY_CS_BINSORT" : "",
353	cs->state & MY_CS_PRIMARY ? "\|MY_CS_PRIMARY" : "",
354	cs->state & MY_CS_CSSORT ? "\|MY_CS_CSSORT" : "",
355	cs->state & MY_CS_PUREASCII ? "\|MY_CS_PUREASCII" : "",
356	cs->state & MY_CS_NONASCII ? "\|MY_CS_NONASCII" : "",
357	cs->state & MY_CS_NOPAD ? "\|MY_CS_NOPAD" : "");
358
359	if (cs->name)
360	{
361	CHARSET_INFO *srccs= inheritance_source(cs->number);
362	fprintf(f," \"%s\", /* cset name */\n",cs->csname);
363	fprintf(f," \"%s\", /* coll name */\n",cs->name);
364	fprintf(f," \"\", /* comment */\n");
365	if (cs->tailoring)
366	fprintf(f, " \"%s\", /* tailoring */\n", cs->tailoring);
367	else
368	fprintf(f," NULL, /* tailoring */\n");
369
370	print_array_ref(f, cs, cs->ctype, srccs, srccs->ctype, "ctype");
371	print_array_ref(f, cs, cs->to_lower, srccs, srccs->to_lower, "to_lower");
372	print_array_ref(f, cs, cs->to_upper, srccs, srccs->to_upper, "to_upper");
373
374	if (cs->sort_order)
375	fprintf(f," sort_order_%s, /* sort_order */\n",cs->name);
376	else
377	fprintf(f," NULL, /* sort_order */\n");
378
379	fprintf(f," NULL, /* uca */\n");
380
381	print_array_ref(f, cs, cs->tab_to_uni, srccs, srccs->tab_to_uni, "to_uni");
382	}
383	else
384	{
385	fprintf(f," NULL, /* cset name */\n");
386	fprintf(f," NULL, /* coll name */\n");
387	fprintf(f," NULL, /* comment */\n");
388	fprintf(f," NULL, /* tailoging */\n");
389	fprintf(f," NULL, /* ctype */\n");
390	fprintf(f," NULL, /* lower */\n");
391	fprintf(f," NULL, /* upper */\n");
392	fprintf(f," NULL, /* sort order */\n");
393	fprintf(f," NULL, /* uca */\n");
394	fprintf(f," NULL, /* to_uni */\n");
395	}
396
397	fprintf(f," NULL, /* from_uni */\n");
398	fprintf(f," &my_unicase_default, /* caseinfo */\n");
399	fprintf(f," NULL, /* state map */\n");
400	fprintf(f," NULL, /* ident map */\n");
401	fprintf(f," 1, /* strxfrm_multiply*/\n");
402	fprintf(f," 1, /* caseup_multiply*/\n");
403	fprintf(f," 1, /* casedn_multiply*/\n");
404	fprintf(f," 1, /* mbminlen */\n");
405	fprintf(f," 1, /* mbmaxlen */\n");
406	fprintf(f," 0, /* min_sort_char */\n");
407	fprintf(f," 255, /* max_sort_char */\n");
408	fprintf(f," ' ', /* pad_char */\n");
409	fprintf(f," 0, /* escape_with_backslash_is_dangerous */\n");
410	fprintf(f," 1, /* levels_for_order */\n");
411	fprintf(f," &my_charset_8bit_handler,\n");
412
413	if (cs->state & MY_CS_BINSORT)
414	fprintf(f," &my_collation_8bit%s_bin_handler,\n", nopad_infix(cs));
415	else
416	fprintf(f," &my_collation_8bit_simple%s_ci_handler,\n", nopad_infix(cs));
417	fprintf(f,"}\n");
418	}
419
420
421	static void
422	fprint_copyright(FILE *file)
423	{
424	fprintf(file,
425	"/* Copyright 2000-2008 MySQL AB, 2008 Sun Microsystems, Inc.\n"
426	" Copyright (c) 2000, 2011, Oracle and/or its affiliates.\n"
427	" Copyright 2008-2016 MariaDB Corporation\n"
428	"\n"
429	" This program is free software; you can redistribute it and/or modify\n"
430	" it under the terms of the GNU General Public License as published by\n"
431	" the Free Software Foundation; version 2 of the License.\n"
432	"\n"
433	" This program is distributed in the hope that it will be useful,\n"
434	" but WITHOUT ANY WARRANTY; without even the implied warranty of\n"
435	" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n"
436	" GNU General Public License for more details.\n"
437	"\n"
438	" You should have received a copy of the GNU General Public License\n"
439	" along with this program; if not, write to the Free Software\n"
440	" Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */\n"
441	"\n");
442	}
443
444
445	int
446	main(int argc, char argv __attribute__**((unused)))
447	{
448	struct charset_info_st ncs, *cs;
449	char filename[`256`];
450	FILE *f= stdout;
451
452	if (argc < `2`)
453	{
454	fprintf(stderr, "usage: %s source-dir\n", argv[`0`]);
455	exit(EXIT_FAILURE);
456	}
457
458	bzero((void)&ncs,sizeof*(ncs));
459	bzero((void)&all_charsets,sizeof*(all_charsets));
460	bzero((void) refids, sizeof*(refids));
461
462	sprintf(filename,"%s/%s",argv[`1`],"Index.xml");
463	my_read_charset_file(filename);
464
465	for (cs= all_charsets;
466	cs < all_charsets + array_elements(all_charsets);
467	cs++)
468	{
469	if (cs->number && !(cs->state & MY_CS_COMPILED))
470	{
471	if ( (!simple_cs_is_full(cs)) && (cs->csname))
472	{
473	sprintf(filename,"%s/%s.xml",argv[`1`],cs->csname);
474	my_read_charset_file(filename);
475	}
476	cs->state\|= MY_CS_LOADED;
477	}
478	}
479
480	fprintf(f, "/*\n");
481	fprintf(f, " This file was generated by the conf_to_src utility. "
482	"Do not edit it directly,\n");
483	fprintf(f, " edit the XML definitions in sql/share/charsets/ instead.\n\n");
484	fprintf(f, " To re-generate, run the following in the strings/ "
485	"directory:\n");
486	fprintf(f, " ./conf_to_src ../sql/share/charsets/ > FILE\n");
487	fprintf(f, "*/\n\n");
488	fprint_copyright(f);
489	fprintf(f,"#include \"strings_def.h\"\n");
490	fprintf(f,"#include <m_ctype.h>\n\n");
491
492
493	for (cs= all_charsets;
494	cs < all_charsets + array_elements(all_charsets);
495	cs++)
496	{
497	if (cs->state & MY_CS_LOADED)
498	{
499	CHARSET_INFO *refcs= find_charset_data_inheritance_source(cs);
500	cs->state\|= my_8bit_charset_flags_from_data(cs) \|
501	my_8bit_collation_flags_from_data(cs);
502	if (refcs)
503	{
504	refids[cs->number]= refcs->number;
505	inherit_charset_data(cs, refcs);
506	}
507	fprintf(f,"#ifdef HAVE_CHARSET_%s\n",cs->csname);
508	print_arrays(f, cs);
509	fprintf(f,"#endif\n");
510	fprintf(f,"\n");
511	}
512	}
513
514	fprintf(f,"struct charset_info_st compiled_charsets[] = {\n");
515	for (cs= all_charsets;
516	cs < all_charsets + array_elements(all_charsets);
517	cs++)
518	{
519	if (cs->state & MY_CS_LOADED)
520	{
521	fprintf(f,"#ifdef HAVE_CHARSET_%s\n",cs->csname);
522	dispcset(f,cs);
523	fprintf(f,",\n");
524	fprintf(f,"#endif\n");
525	}
526	}
527
528	dispcset(f,&ncs);
529	fprintf(f,"};\n");
530
531	return `0`;
532	}
533

Browse the source code of MariaDB/strings/conf_to_src.c