ucnv_io.cpp source code [ClickHouse/contrib/icu/icu4c/source/common/ucnv_io.cpp]

1	// © 2016 and later: Unicode, Inc. and others.
2	// License & terms of use: http://www.unicode.org/copyright.html
3	/*
4	******************************************************************************
5	*
6	* Copyright (C) 1999-2015, International Business Machines
7	* Corporation and others. All Rights Reserved.
8	*
9	******************************************************************************
10	*
11	*
12	* ucnv_io.cpp:
13	* initializes global variables and defines functions pertaining to converter
14	* name resolution aspect of the conversion code.
15	*
16	* new implementation:
17	*
18	* created on: 1999nov22
19	* created by: Markus W. Scherer
20	*
21	* Use the binary cnvalias.icu (created from convrtrs.txt) to work
22	* with aliases for converter names.
23	*
24	* Date Name Description
25	* 11/22/1999 markus Created
26	* 06/28/2002 grhoten Major overhaul of the converter alias design.
27	* Now an alias can map to different converters
28	* depending on the specified standard.
29	*******************************************************************************
30	*/
31
32	#include "unicode/utypes.h"
33
34	#if !UCONFIG_NO_CONVERSION
35
36	#include "unicode/ucnv.h"
37	#include "unicode/udata.h"
38
39	#include "umutex.h"
40	#include "uarrsort.h"
41	#include "uassert.h"
42	#include "udataswp.h"
43	#include "cstring.h"
44	#include "cmemory.h"
45	#include "ucnv_io.h"
46	#include "uenumimp.h"
47	#include "ucln_cmn.h"
48
49	/ Format of cnvalias.icu -----------------------------------------------------*
50	*
51	* cnvalias.icu is a binary, memory-mappable form of convrtrs.txt.
52	* This binary form contains several tables. All indexes are to uint16_t
53	* units, and not to the bytes (uint8_t units). Addressing everything on
54	* 16-bit boundaries allows us to store more information with small index
55	* numbers, which are also 16-bit in size. The majority of the table (except
56	* the string table) are 16-bit numbers.
57	*
58	* First there is the size of the Table of Contents (TOC). The TOC
59	* entries contain the size of each section. In order to find the offset
60	* you just need to sum up the previous offsets.
61	* The TOC length and entries are an array of uint32_t values.
62	* The first section after the TOC starts immediately after the TOC.
63	*
64	* 1) This section contains a list of converters. This list contains indexes
65	* into the string table for the converter name. The index of this list is
66	* also used by other sections, which are mentioned later on.
67	* This list is not sorted.
68	*
69	* 2) This section contains a list of tags. This list contains indexes
70	* into the string table for the tag name. The index of this list is
71	* also used by other sections, which are mentioned later on.
72	* This list is in priority order of standards.
73	*
74	* 3) This section contains a list of sorted unique aliases. This
75	* list contains indexes into the string table for the alias name. The
76	* index of this list is also used by other sections, like the 4th section.
77	* The index for the 3rd and 4th section is used to get the
78	* alias -> converter name mapping. Section 3 and 4 form a two column table.
79	* Some of the most significant bits of each index may contain other
80	* information (see findConverter for details).
81	*
82	* 4) This section contains a list of mapped converter names. Consider this
83	* as a table that maps the 3rd section to the 1st section. This list contains
84	* indexes into the 1st section. The index of this list is the same index in
85	* the 3rd section. There is also some extra information in the high bits of
86	* each converter index in this table. Currently it's only used to say that
87	* an alias mapped to this converter is ambiguous. See UCNV_CONVERTER_INDEX_MASK
88	* and UCNV_AMBIGUOUS_ALIAS_MAP_BIT for more information. This section is
89	* the predigested form of the 5th section so that an alias lookup can be fast.
90	*
91	* 5) This section contains a 2D array with indexes to the 6th section. This
92	* section is the full form of all alias mappings. The column index is the
93	* index into the converter list (column header). The row index is the index
94	* to tag list (row header). This 2D array is the top part a 3D array. The
95	* third dimension is in the 6th section.
96	*
97	* 6) This is blob of variable length arrays. Each array starts with a size,
98	* and is followed by indexes to alias names in the string table. This is
99	* the third dimension to the section 5. No other section should be referencing
100	* this section.
101	*
102	* 7) Starting in ICU 3.6, this can be a UConverterAliasOptions struct. Its
103	* presence indicates that a section 9 exists. UConverterAliasOptions specifies
104	* what type of string normalization is used among other potential things in the
105	* future.
106	*
107	* 8) This is the string table. All strings are indexed on an even address.
108	* There are two reasons for this. First many chip architectures locate strings
109	* faster on even address boundaries. Second, since all indexes are 16-bit
110	* numbers, this string table can be 128KB in size instead of 64KB when we
111	* only have strings starting on an even address.
112	*
113	* 9) When present this is a set of prenormalized strings from section 8. This
114	* table contains normalized strings with the dashes and spaces stripped out,
115	* and all strings lowercased. In the future, the options in section 7 may state
116	* other types of normalization.
117	*
118	* Here is the concept of section 5 and 6. It's a 3D cube. Each tag
119	* has a unique alias among all converters. That same alias can
120	* be mentioned in other standards on different converters,
121	* but only one alias per tag can be unique.
122	*
123	*
124	* Converter Names (Usually in TR22 form)
125	* -------------------------------------------.
126	* T / /\|
127	* a / / \|
128	* g / / \|
129	* s / / \|
130	* / / \|
131	* ------------------------------------------/ \|
132	* A \| \| \|
133	* l \| \| \|
134	* i \| \| /
135	* a \| \| /
136	* s \| \| /
137	* e \| \| /
138	* s \| \|/
139	* -------------------------------------------
140	*
141	*
142	*
143	* Here is what it really looks like. It's like swiss cheese.
144	* There are holes. Some converters aren't recognized by
145	* a standard, or they are really old converters that the
146	* standard doesn't recognize anymore.
147	*
148	* Converter Names (Usually in TR22 form)
149	* -------------------------------------------.
150	* T /##########################################/\|
151	* a / # # /#
152	* g / # ## ## ### # ### ### ### #/
153	* s / # ##### #### ## ## #/#
154	* / ### # # ## # # # ### # # #/##
155	* ------------------------------------------/# #
156	* A \|### # # ## # # # ### # # #\|# #
157	* l \|# # # # # ## # #\|# #
158	* i \|# # # # # # #\|#
159	* a \|# #\|#
160	* s \| #\|#
161	* e
162	* s
163	*
164	*/
165
166	/**
167	* Used by the UEnumeration API
168	*/
169	typedef struct UAliasContext {
170	uint32_t listOffset;
171	uint32_t listIdx;
172	} UAliasContext;
173
174	static const char DATA_NAME[] = "cnvalias";
175	static const char DATA_TYPE[] = "icu";
176
177	static UDataMemory *gAliasData=NULL;
178	static icu::UInitOnce gAliasDataInitOnce = U_INITONCE_INITIALIZER;
179
180	enum {
181	tocLengthIndex=`0`,
182	converterListIndex=`1`,
183	tagListIndex=`2`,
184	aliasListIndex=`3`,
185	untaggedConvArrayIndex=`4`,
186	taggedAliasArrayIndex=`5`,
187	taggedAliasListsIndex=`6`,
188	tableOptionsIndex=`7`,
189	stringTableIndex=`8`,
190	normalizedStringTableIndex=`9`,
191	offsetsCount, / length of the swapper's temporary offsets[] /
192	minTocLength=`8` / min. tocLength in the file, does not count the tocLengthIndex! /
193	};
194
195	static const UConverterAliasOptions defaultTableOptions = {
196	UCNV_IO_UNNORMALIZED,
197	`0` / containsCnvOptionInfo /
198	};
199	static UConverterAlias gMainTable;
200
201	#define GET_STRING(idx) (const char *)(gMainTable.stringTable + (idx))
202	#define GET_NORMALIZED_STRING(idx) (const char *)(gMainTable.normalizedStringTable + (idx))
203
204	static UBool U_CALLCONV
205	isAcceptable(void * /context/,
206	const char * /type/, const char * /name/,
207	const UDataInfo *pInfo) {
208	return (UBool)(
209	pInfo->size>=`20` &&
210	pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
211	pInfo->charsetFamily==U_CHARSET_FAMILY &&
212	pInfo->dataFormat[`0`]==`0x43` && / dataFormat="CvAl" /
213	pInfo->dataFormat[`1`]==`0x76` &&
214	pInfo->dataFormat[`2`]==`0x41` &&
215	pInfo->dataFormat[`3`]==`0x6c` &&
216	pInfo->formatVersion[`0`]==`3`);
217	}
218
219	static UBool U_CALLCONV ucnv_io_cleanup(void)
220	{
221	if (gAliasData) {
222	udata_close(gAliasData);
223	gAliasData = NULL;
224	}
225	gAliasDataInitOnce.reset();
226
227	uprv_memset(&gMainTable, `0`, sizeof(gMainTable));
228
229	return TRUE; / Everything was cleaned up /
230	}
231
232	static void U_CALLCONV initAliasData(UErrorCode &errCode) {
233	UDataMemory *data;
234	const uint16_t *table;
235	const uint32_t *sectionSizes;
236	uint32_t tableStart;
237	uint32_t currOffset;
238
239	ucln_common_registerCleanup(UCLN_COMMON_UCNV_IO, ucnv_io_cleanup);
240
241	U_ASSERT(gAliasData == NULL);
242	data = udata_openChoice(NULL, DATA_TYPE, DATA_NAME, isAcceptable, NULL, &errCode);
243	if(U_FAILURE(errCode)) {
244	return;
245	}
246
247	sectionSizes = (const uint32_t *)udata_getMemory(data);
248	table = (const uint16_t *)sectionSizes;
249
250	tableStart = sectionSizes[`0`];
251	if (tableStart < minTocLength) {
252	errCode = U_INVALID_FORMAT_ERROR;
253	udata_close(data);
254	return;
255	}
256	gAliasData = data;
257
258	gMainTable.converterListSize = sectionSizes[`1`];
259	gMainTable.tagListSize = sectionSizes[`2`];
260	gMainTable.aliasListSize = sectionSizes[`3`];
261	gMainTable.untaggedConvArraySize = sectionSizes[`4`];
262	gMainTable.taggedAliasArraySize = sectionSizes[`5`];
263	gMainTable.taggedAliasListsSize = sectionSizes[`6`];
264	gMainTable.optionTableSize = sectionSizes[`7`];
265	gMainTable.stringTableSize = sectionSizes[`8`];
266
267	if (tableStart > `8`) {
268	gMainTable.normalizedStringTableSize = sectionSizes[`9`];
269	}
270
271	currOffset = tableStart * (sizeof(uint32_t)/sizeof(uint16_t)) + (sizeof(uint32_t)/sizeof(uint16_t));
272	gMainTable.converterList = table + currOffset;
273
274	currOffset += gMainTable.converterListSize;
275	gMainTable.tagList = table + currOffset;
276
277	currOffset += gMainTable.tagListSize;
278	gMainTable.aliasList = table + currOffset;
279
280	currOffset += gMainTable.aliasListSize;
281	gMainTable.untaggedConvArray = table + currOffset;
282
283	currOffset += gMainTable.untaggedConvArraySize;
284	gMainTable.taggedAliasArray = table + currOffset;
285
286	/ aliasLists is a 1's based array, but it has a padding character /
287	currOffset += gMainTable.taggedAliasArraySize;
288	gMainTable.taggedAliasLists = table + currOffset;
289
290	currOffset += gMainTable.taggedAliasListsSize;
291	if (gMainTable.optionTableSize > `0`
292	&& ((const UConverterAliasOptions *)(table + currOffset))->stringNormalizationType < UCNV_IO_NORM_TYPE_COUNT)
293	{
294	/ Faster table /
295	gMainTable.optionTable = (const UConverterAliasOptions *)(table + currOffset);
296	}
297	else {
298	/ Smaller table, or I can't handle this normalization mode!*
299	Use the original slower table lookup. /*
300	gMainTable.optionTable = &defaultTableOptions;
301	}
302
303	currOffset += gMainTable.optionTableSize;
304	gMainTable.stringTable = table + currOffset;
305
306	currOffset += gMainTable.stringTableSize;
307	gMainTable.normalizedStringTable = ((gMainTable.optionTable->stringNormalizationType == UCNV_IO_UNNORMALIZED)
308	? gMainTable.stringTable : (table + currOffset));
309	}
310
311
312	static UBool
313	haveAliasData(UErrorCode *pErrorCode) {
314	umtx_initOnce(gAliasDataInitOnce, &initAliasData, *pErrorCode);
315	return U_SUCCESS(*pErrorCode);
316	}
317
318	static inline UBool
319	isAlias(const char alias, UErrorCode pErrorCode) {
320	if(alias==NULL) {
321	*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
322	return FALSE;
323	}
324	return (UBool)(*alias!=`0`);
325	}
326
327	static uint32_t getTagNumber(const char *tagname) {
328	if (gMainTable.tagList) {
329	uint32_t tagNum;
330	for (tagNum = `0`; tagNum < gMainTable.tagListSize; tagNum++) {
331	if (!uprv_stricmp(GET_STRING(gMainTable.tagList[tagNum]), tagname)) {
332	return tagNum;
333	}
334	}
335	}
336
337	return UINT32_MAX;
338	}
339
340	/ character types relevant for ucnv_compareNames() /
341	enum {
342	UIGNORE,
343	ZERO,
344	NONZERO,
345	MINLETTER / any values from here on are lowercase letter mappings /
346	};
347
348	/ character types for ASCII 00..7F /
349	static const uint8_t asciiTypes[`128`] = {
350	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
351	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
352	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
353	ZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, `0`, `0`, `0`, `0`, `0`, `0`,
354	`0`, `0x61`, `0x62`, `0x63`, `0x64`, `0x65`, `0x66`, `0x67`, `0x68`, `0x69`, `0x6a`, `0x6b`, `0x6c`, `0x6d`, `0x6e`, `0x6f`,
355	`0x70`, `0x71`, `0x72`, `0x73`, `0x74`, `0x75`, `0x76`, `0x77`, `0x78`, `0x79`, `0x7a`, `0`, `0`, `0`, `0`, `0`,
356	`0`, `0x61`, `0x62`, `0x63`, `0x64`, `0x65`, `0x66`, `0x67`, `0x68`, `0x69`, `0x6a`, `0x6b`, `0x6c`, `0x6d`, `0x6e`, `0x6f`,
357	`0x70`, `0x71`, `0x72`, `0x73`, `0x74`, `0x75`, `0x76`, `0x77`, `0x78`, `0x79`, `0x7a`, `0`, `0`, `0`, `0`, `0`
358	};
359
360	#define GET_ASCII_TYPE(c) ((int8_t)(c) >= 0 ? asciiTypes[(uint8_t)c] : (uint8_t)UIGNORE)
361
362	/ character types for EBCDIC 80..FF /
363	static const uint8_t ebcdicTypes[`128`] = {
364	`0`, `0x81`, `0x82`, `0x83`, `0x84`, `0x85`, `0x86`, `0x87`, `0x88`, `0x89`, `0`, `0`, `0`, `0`, `0`, `0`,
365	`0`, `0x91`, `0x92`, `0x93`, `0x94`, `0x95`, `0x96`, `0x97`, `0x98`, `0x99`, `0`, `0`, `0`, `0`, `0`, `0`,
366	`0`, `0`, `0xa2`, `0xa3`, `0xa4`, `0xa5`, `0xa6`, `0xa7`, `0xa8`, `0xa9`, `0`, `0`, `0`, `0`, `0`, `0`,
367	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
368	`0`, `0x81`, `0x82`, `0x83`, `0x84`, `0x85`, `0x86`, `0x87`, `0x88`, `0x89`, `0`, `0`, `0`, `0`, `0`, `0`,
369	`0`, `0x91`, `0x92`, `0x93`, `0x94`, `0x95`, `0x96`, `0x97`, `0x98`, `0x99`, `0`, `0`, `0`, `0`, `0`, `0`,
370	`0`, `0`, `0xa2`, `0xa3`, `0xa4`, `0xa5`, `0xa6`, `0xa7`, `0xa8`, `0xa9`, `0`, `0`, `0`, `0`, `0`, `0`,
371	ZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, `0`, `0`, `0`, `0`, `0`, `0`
372	};
373
374	#define GET_EBCDIC_TYPE(c) ((int8_t)(c) < 0 ? ebcdicTypes[(c)&0x7f] : (uint8_t)UIGNORE)
375
376	#if U_CHARSET_FAMILY==U_ASCII_FAMILY
377	# define GET_CHAR_TYPE(c) GET_ASCII_TYPE(c)
378	#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
379	# define GET_CHAR_TYPE(c) GET_EBCDIC_TYPE(c)
380	#else
381	# error U_CHARSET_FAMILY is not valid
382	#endif
383
384
385	/ @see ucnv_compareNames /
386	U_CAPI char * U_CALLCONV
387	ucnv_io_stripASCIIForCompare(char dst, const* char *name) {
388	char *dstItr = dst;
389	uint8_t type, nextType;
390	char c1;
391	UBool afterDigit = FALSE;
392
393	while ((c1 = *name++) != `0`) {
394	type = GET_ASCII_TYPE(c1);
395	switch (type) {
396	case UIGNORE:
397	afterDigit = FALSE;
398	continue; / ignore all but letters and digits /
399	case ZERO:
400	if (!afterDigit) {
401	nextType = GET_ASCII_TYPE(*name);
402	if (nextType == ZERO \|\| nextType == NONZERO) {
403	continue; / ignore leading zero before another digit /
404	}
405	}
406	break;
407	case NONZERO:
408	afterDigit = TRUE;
409	break;
410	default:
411	c1 = (char)type; / lowercased letter /
412	afterDigit = FALSE;
413	break;
414	}
415	*dstItr++ = c1;
416	}
417	*dstItr = `0`;
418	return dst;
419	}
420
421	U_CAPI char * U_CALLCONV
422	ucnv_io_stripEBCDICForCompare(char dst, const* char *name) {
423	char *dstItr = dst;
424	uint8_t type, nextType;
425	char c1;
426	UBool afterDigit = FALSE;
427
428	while ((c1 = *name++) != `0`) {
429	type = GET_EBCDIC_TYPE(c1);
430	switch (type) {
431	case UIGNORE:
432	afterDigit = FALSE;
433	continue; / ignore all but letters and digits /
434	case ZERO:
435	if (!afterDigit) {
436	nextType = GET_EBCDIC_TYPE(*name);
437	if (nextType == ZERO \|\| nextType == NONZERO) {
438	continue; / ignore leading zero before another digit /
439	}
440	}
441	break;
442	case NONZERO:
443	afterDigit = TRUE;
444	break;
445	default:
446	c1 = (char)type; / lowercased letter /
447	afterDigit = FALSE;
448	break;
449	}
450	*dstItr++ = c1;
451	}
452	*dstItr = `0`;
453	return dst;
454	}
455
456	/**
457	* Do a fuzzy compare of two converter/alias names.
458	* The comparison is case-insensitive, ignores leading zeroes if they are not
459	* followed by further digits, and ignores all but letters and digits.
460	* Thus the strings "UTF-8", "utf_8", "u*T@f08" and "Utf 8" are exactly equivalent.
461	* See section 1.4, Charset Alias Matching in Unicode Technical Standard #22
462	* at http://www.unicode.org/reports/tr22/
463	*
464	* This is a symmetrical (commutative) operation; order of arguments
465	* is insignificant. This is an important property for sorting the
466	* list (when the list is preprocessed into binary form) and for
467	* performing binary searches on it at run time.
468	*
469	* @param name1 a converter name or alias, zero-terminated
470	* @param name2 a converter name or alias, zero-terminated
471	* @return 0 if the names match, or a negative value if the name1
472	* lexically precedes name2, or a positive value if the name1
473	* lexically follows name2.
474	*
475	* @see ucnv_io_stripForCompare
476	*/
477	U_CAPI int U_EXPORT2
478	ucnv_compareNames(const char name1, const* char *name2) {
479	int rc;
480	uint8_t type, nextType;
481	char c1, c2;
482	UBool afterDigit1 = FALSE, afterDigit2 = FALSE;
483
484	for (;;) {
485	while ((c1 = *name1++) != `0`) {
486	type = GET_CHAR_TYPE(c1);
487	switch (type) {
488	case UIGNORE:
489	afterDigit1 = FALSE;
490	continue; / ignore all but letters and digits /
491	case ZERO:
492	if (!afterDigit1) {
493	nextType = GET_CHAR_TYPE(*name1);
494	if (nextType == ZERO \|\| nextType == NONZERO) {
495	continue; / ignore leading zero before another digit /
496	}
497	}
498	break;
499	case NONZERO:
500	afterDigit1 = TRUE;
501	break;
502	default:
503	c1 = (char)type; / lowercased letter /
504	afterDigit1 = FALSE;
505	break;
506	}
507	break; / deliver c1 /
508	}
509	while ((c2 = *name2++) != `0`) {
510	type = GET_CHAR_TYPE(c2);
511	switch (type) {
512	case UIGNORE:
513	afterDigit2 = FALSE;
514	continue; / ignore all but letters and digits /
515	case ZERO:
516	if (!afterDigit2) {
517	nextType = GET_CHAR_TYPE(*name2);
518	if (nextType == ZERO \|\| nextType == NONZERO) {
519	continue; / ignore leading zero before another digit /
520	}
521	}
522	break;
523	case NONZERO:
524	afterDigit2 = TRUE;
525	break;
526	default:
527	c2 = (char)type; / lowercased letter /
528	afterDigit2 = FALSE;
529	break;
530	}
531	break; / deliver c2 /
532	}
533
534	/ If we reach the ends of both strings then they match /
535	if ((c1\|c2)==`0`) {
536	return `0`;
537	}
538
539	/ Case-insensitive comparison /
540	rc = (int)(unsigned char)c1 - (int)(unsigned char)c2;
541	if (rc != `0`) {
542	return rc;
543	}
544	}
545	}
546
547	/*
548	* search for an alias
549	* return the converter number index for gConverterList
550	*/
551	static inline uint32_t
552	findConverter(const char alias, UBool containsOption, UErrorCode *pErrorCode) {
553	uint32_t mid, start, limit;
554	uint32_t lastMid;
555	int result;
556	int isUnnormalized = (gMainTable.optionTable->stringNormalizationType == UCNV_IO_UNNORMALIZED);
557	char strippedName[UCNV_MAX_CONVERTER_NAME_LENGTH];
558
559	if (!isUnnormalized) {
560	if (uprv_strlen(alias) >= UCNV_MAX_CONVERTER_NAME_LENGTH) {
561	*pErrorCode = U_BUFFER_OVERFLOW_ERROR;
562	return UINT32_MAX;
563	}
564
565	/ Lower case and remove ignoreable characters. /
566	ucnv_io_stripForCompare(strippedName, alias);
567	alias = strippedName;
568	}
569
570	/ do a binary search for the alias /
571	start = `0`;
572	limit = gMainTable.untaggedConvArraySize;
573	mid = limit;
574	lastMid = UINT32_MAX;
575
576	for (;;) {
577	mid = (uint32_t)((start + limit) / `2`);
578	if (lastMid == mid) { / Have we moved? /
579	break; / We haven't moved, and it wasn't found. /
580	}
581	lastMid = mid;
582	if (isUnnormalized) {
583	result = ucnv_compareNames(alias, GET_STRING(gMainTable.aliasList[mid]));
584	}
585	else {
586	result = uprv_strcmp(alias, GET_NORMALIZED_STRING(gMainTable.aliasList[mid]));
587	}
588
589	if (result < `0`) {
590	limit = mid;
591	} else if (result > `0`) {
592	start = mid;
593	} else {
594	/ Since the gencnval tool folds duplicates into one entry,*
595	* this alias in gAliasList is unique, but different standards
596	* may map an alias to different converters.
597	*/
598	if (gMainTable.untaggedConvArray[mid] & UCNV_AMBIGUOUS_ALIAS_MAP_BIT) {
599	*pErrorCode = U_AMBIGUOUS_ALIAS_WARNING;
600	}
601	/ State whether the canonical converter name contains an option.*
602	This information is contained in this list in order to maintain backward & forward compatibility. /*
603	if (containsOption) {
604	UBool containsCnvOptionInfo = (UBool)gMainTable.optionTable->containsCnvOptionInfo;
605	*containsOption = (UBool)((containsCnvOptionInfo
606	&& ((gMainTable.untaggedConvArray[mid] & UCNV_CONTAINS_OPTION_BIT) != `0`))
607	\|\| !containsCnvOptionInfo);
608	}
609	return gMainTable.untaggedConvArray[mid] & UCNV_CONVERTER_INDEX_MASK;
610	}
611	}
612
613	return UINT32_MAX;
614	}
615
616	/*
617	* Is this alias in this list?
618	* alias and listOffset should be non-NULL.
619	*/
620	static inline UBool
621	isAliasInList(const char *alias, uint32_t listOffset) {
622	if (listOffset) {
623	uint32_t currAlias;
624	uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
625	/ +1 to skip listCount /
626	const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + `1`;
627	for (currAlias = `0`; currAlias < listCount; currAlias++) {
628	if (currList[currAlias]
629	&& ucnv_compareNames(alias, GET_STRING(currList[currAlias]))==`0`)
630	{
631	return TRUE;
632	}
633	}
634	}
635	return FALSE;
636	}
637
638	/*
639	* Search for an standard name of an alias (what is the default name
640	* that this standard uses?)
641	* return the listOffset for gTaggedAliasLists. If it's 0,
642	* the it couldn't be found, but the parameters are valid.
643	*/
644	static uint32_t
645	findTaggedAliasListsOffset(const char alias, const* char standard, UErrorCode pErrorCode) {
646	uint32_t idx;
647	uint32_t listOffset;
648	uint32_t convNum;
649	UErrorCode myErr = U_ZERO_ERROR;
650	uint32_t tagNum = getTagNumber(standard);
651
652	/ Make a quick guess. Hopefully they used a TR22 canonical alias. /
653	convNum = findConverter(alias, NULL, &myErr);
654	if (myErr != U_ZERO_ERROR) {
655	*pErrorCode = myErr;
656	}
657
658	if (tagNum < (gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS) && convNum < gMainTable.converterListSize) {
659	listOffset = gMainTable.taggedAliasArray[tagNum*gMainTable.converterListSize + convNum];
660	if (listOffset && gMainTable.taggedAliasLists[listOffset + `1`]) {
661	return listOffset;
662	}
663	if (myErr == U_AMBIGUOUS_ALIAS_WARNING) {
664	/ Uh Oh! They used an ambiguous alias.*
665	We have to search the whole swiss cheese starting
666	at the highest standard affinity.
667	This may take a while.
668	*/
669	for (idx = `0`; idx < gMainTable.taggedAliasArraySize; idx++) {
670	listOffset = gMainTable.taggedAliasArray[idx];
671	if (listOffset && isAliasInList(alias, listOffset)) {
672	uint32_t currTagNum = idx/gMainTable.converterListSize;
673	uint32_t currConvNum = (idx - currTagNum*gMainTable.converterListSize);
674	uint32_t tempListOffset = gMainTable.taggedAliasArray[tagNum*gMainTable.converterListSize + currConvNum];
675	if (tempListOffset && gMainTable.taggedAliasLists[tempListOffset + `1`]) {
676	return tempListOffset;
677	}
678	/ else keep on looking /
679	/ We could speed this up by starting on the next row*
680	because an alias is unique per row, right now.
681	This would change if alias versioning appears. /*
682	}
683	}
684	/ The standard doesn't know about the alias /
685	}
686	/ else no default name /
687	return `0`;
688	}
689	/ else converter or tag not found /
690
691	return UINT32_MAX;
692	}
693
694	/ Return the canonical name /
695	static uint32_t
696	findTaggedConverterNum(const char alias, const* char standard, UErrorCode pErrorCode) {
697	uint32_t idx;
698	uint32_t listOffset;
699	uint32_t convNum;
700	UErrorCode myErr = U_ZERO_ERROR;
701	uint32_t tagNum = getTagNumber(standard);
702
703	/ Make a quick guess. Hopefully they used a TR22 canonical alias. /
704	convNum = findConverter(alias, NULL, &myErr);
705	if (myErr != U_ZERO_ERROR) {
706	*pErrorCode = myErr;
707	}
708
709	if (tagNum < (gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS) && convNum < gMainTable.converterListSize) {
710	listOffset = gMainTable.taggedAliasArray[tagNum*gMainTable.converterListSize + convNum];
711	if (listOffset && isAliasInList(alias, listOffset)) {
712	return convNum;
713	}
714	if (myErr == U_AMBIGUOUS_ALIAS_WARNING) {
715	/ Uh Oh! They used an ambiguous alias.*
716	We have to search one slice of the swiss cheese.
717	We search only in the requested tag, not the whole thing.
718	This may take a while.
719	*/
720	uint32_t convStart = (tagNum)*gMainTable.converterListSize;
721	uint32_t convLimit = (tagNum+`1`)*gMainTable.converterListSize;
722	for (idx = convStart; idx < convLimit; idx++) {
723	listOffset = gMainTable.taggedAliasArray[idx];
724	if (listOffset && isAliasInList(alias, listOffset)) {
725	return idx-convStart;
726	}
727	}
728	/ The standard doesn't know about the alias /
729	}
730	/ else no canonical name /
731	}
732	/ else converter or tag not found /
733
734	return UINT32_MAX;
735	}
736
737	U_CAPI const char *
738	ucnv_io_getConverterName(const char alias, UBool containsOption, UErrorCode *pErrorCode) {
739	const char *aliasTmp = alias;
740	int32_t i = `0`;
741	for (i = `0`; i < `2`; i++) {
742	if (i == `1`) {
743	/*
744	* After the first unsuccess converter lookup, check to see if
745	* the name begins with 'x-'. If it does, strip it off and try
746	* again. This behaviour is similar to how ICU4J does it.
747	*/
748	if (aliasTmp[`0`] == `'x'` && aliasTmp[`1`] == `'-'`) {
749	aliasTmp = aliasTmp+`2`;
750	} else {
751	break;
752	}
753	}
754	if(haveAliasData(pErrorCode) && isAlias(aliasTmp, pErrorCode)) {
755	uint32_t convNum = findConverter(aliasTmp, containsOption, pErrorCode);
756	if (convNum < gMainTable.converterListSize) {
757	return GET_STRING(gMainTable.converterList[convNum]);
758	}
759	/ else converter not found /
760	} else {
761	break;
762	}
763	}
764
765	return NULL;
766	}
767
768	U_CDECL_BEGIN
769
770
771	static int32_t U_CALLCONV
772	ucnv_io_countStandardAliases(UEnumeration enumerator, UErrorCode /pErrorCode/) {
773	int32_t value = `0`;
774	UAliasContext myContext = (UAliasContext )(enumerator->context);
775	uint32_t listOffset = myContext->listOffset;
776
777	if (listOffset) {
778	value = gMainTable.taggedAliasLists[listOffset];
779	}
780	return value;
781	}
782
783	static const char * U_CALLCONV
784	ucnv_io_nextStandardAliases(UEnumeration *enumerator,
785	int32_t* resultLength,
786	UErrorCode * /pErrorCode/)
787	{
788	UAliasContext myContext = (UAliasContext )(enumerator->context);
789	uint32_t listOffset = myContext->listOffset;
790
791	if (listOffset) {
792	uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
793	const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + `1`;
794
795	if (myContext->listIdx < listCount) {
796	const char *myStr = GET_STRING(currList[myContext->listIdx++]);
797	if (resultLength) {
798	*resultLength = (int32_t)uprv_strlen(myStr);
799	}
800	return myStr;
801	}
802	}
803	/ Either we accessed a zero length list, or we enumerated too far. /
804	if (resultLength) {
805	*resultLength = `0`;
806	}
807	return NULL;
808	}
809
810	static void U_CALLCONV
811	ucnv_io_resetStandardAliases(UEnumeration enumerator, UErrorCode /pErrorCode/) {
812	((UAliasContext *)(enumerator->context))->listIdx = `0`;
813	}
814
815	static void U_CALLCONV
816	ucnv_io_closeUEnumeration(UEnumeration *enumerator) {
817	uprv_free(enumerator->context);
818	uprv_free(enumerator);
819	}
820
821	U_CDECL_END
822
823	/ Enumerate the aliases for the specified converter and standard tag /
824	static const UEnumeration gEnumAliases = {
825	NULL,
826	NULL,
827	ucnv_io_closeUEnumeration,
828	ucnv_io_countStandardAliases,
829	uenum_unextDefault,
830	ucnv_io_nextStandardAliases,
831	ucnv_io_resetStandardAliases
832	};
833
834	U_CAPI UEnumeration * U_EXPORT2
835	ucnv_openStandardNames(const char *convName,
836	const char *standard,
837	UErrorCode *pErrorCode)
838	{
839	UEnumeration *myEnum = NULL;
840	if (haveAliasData(pErrorCode) && isAlias(convName, pErrorCode)) {
841	uint32_t listOffset = findTaggedAliasListsOffset(convName, standard, pErrorCode);
842
843	/ When listOffset == 0, we want to acknowledge that the*
844	converter name and standard are okay, but there
845	is nothing to enumerate. /*
846	if (listOffset < gMainTable.taggedAliasListsSize) {
847	UAliasContext *myContext;
848
849	myEnum = static_cast<UEnumeration >(uprv_malloc(sizeof*(UEnumeration)));
850	if (myEnum == NULL) {
851	*pErrorCode = U_MEMORY_ALLOCATION_ERROR;
852	return NULL;
853	}
854	uprv_memcpy(myEnum, &gEnumAliases, sizeof(UEnumeration));
855	myContext = static_cast<UAliasContext >(uprv_malloc(sizeof*(UAliasContext)));
856	if (myContext == NULL) {
857	*pErrorCode = U_MEMORY_ALLOCATION_ERROR;
858	uprv_free(myEnum);
859	return NULL;
860	}
861	myContext->listOffset = listOffset;
862	myContext->listIdx = `0`;
863	myEnum->context = myContext;
864	}
865	/ else converter or tag not found /
866	}
867	return myEnum;
868	}
869
870	static uint16_t
871	ucnv_io_countAliases(const char alias, UErrorCode pErrorCode) {
872	if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
873	uint32_t convNum = findConverter(alias, NULL, pErrorCode);
874	if (convNum < gMainTable.converterListSize) {
875	/ tagListNum - 1 is the ALL tag /
876	int32_t listOffset = gMainTable.taggedAliasArray[(gMainTable.tagListSize - `1`)*gMainTable.converterListSize + convNum];
877
878	if (listOffset) {
879	return gMainTable.taggedAliasLists[listOffset];
880	}
881	/ else this shouldn't happen. internal program error /
882	}
883	/ else converter not found /
884	}
885	return `0`;
886	}
887
888	static uint16_t
889	ucnv_io_getAliases(const char alias, uint16_t start, const* char *aliases, UErrorCode pErrorCode) {
890	if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
891	uint32_t currAlias;
892	uint32_t convNum = findConverter(alias, NULL, pErrorCode);
893	if (convNum < gMainTable.converterListSize) {
894	/ tagListNum - 1 is the ALL tag /
895	int32_t listOffset = gMainTable.taggedAliasArray[(gMainTable.tagListSize - `1`)*gMainTable.converterListSize + convNum];
896
897	if (listOffset) {
898	uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
899	/ +1 to skip listCount /
900	const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + `1`;
901
902	for (currAlias = start; currAlias < listCount; currAlias++) {
903	aliases[currAlias] = GET_STRING(currList[currAlias]);
904	}
905	}
906	/ else this shouldn't happen. internal program error /
907	}
908	/ else converter not found /
909	}
910	return `0`;
911	}
912
913	static const char *
914	ucnv_io_getAlias(const char alias, uint16_t n, UErrorCode pErrorCode) {
915	if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
916	uint32_t convNum = findConverter(alias, NULL, pErrorCode);
917	if (convNum < gMainTable.converterListSize) {
918	/ tagListNum - 1 is the ALL tag /
919	int32_t listOffset = gMainTable.taggedAliasArray[(gMainTable.tagListSize - `1`)*gMainTable.converterListSize + convNum];
920
921	if (listOffset) {
922	uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
923	/ +1 to skip listCount /
924	const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + `1`;
925
926	if (n < listCount) {
927	return GET_STRING(currList[n]);
928	}
929	*pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
930	}
931	/ else this shouldn't happen. internal program error /
932	}
933	/ else converter not found /
934	}
935	return NULL;
936	}
937
938	static uint16_t
939	ucnv_io_countStandards(UErrorCode *pErrorCode) {
940	if (haveAliasData(pErrorCode)) {
941	/ Don't include the empty list /
942	return (uint16_t)(gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS);
943	}
944
945	return `0`;
946	}
947
948	U_CAPI const char * U_EXPORT2
949	ucnv_getStandard(uint16_t n, UErrorCode *pErrorCode) {
950	if (haveAliasData(pErrorCode)) {
951	if (n < gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS) {
952	return GET_STRING(gMainTable.tagList[n]);
953	}
954	*pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
955	}
956
957	return NULL;
958	}
959
960	U_CAPI const char * U_EXPORT2
961	ucnv_getStandardName(const char alias, const* char standard, UErrorCode pErrorCode) {
962	if (haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
963	uint32_t listOffset = findTaggedAliasListsOffset(alias, standard, pErrorCode);
964
965	if (`0` < listOffset && listOffset < gMainTable.taggedAliasListsSize) {
966	const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + `1`;
967
968	/ Get the preferred name from this list /
969	if (currList[`0`]) {
970	return GET_STRING(currList[`0`]);
971	}
972	/ else someone screwed up the alias table. /
973	/ pErrorCode = U_INVALID_FORMAT_ERROR /*
974	}
975	}
976
977	return NULL;
978	}
979
980	U_CAPI uint16_t U_EXPORT2
981	ucnv_countAliases(const char alias, UErrorCode pErrorCode)
982	{
983	return ucnv_io_countAliases(alias, pErrorCode);
984	}
985
986
987	U_CAPI const char* U_EXPORT2
988	ucnv_getAlias(const char alias, uint16_t n, UErrorCode pErrorCode)
989	{
990	return ucnv_io_getAlias(alias, n, pErrorCode);
991	}
992
993	U_CAPI void U_EXPORT2
994	ucnv_getAliases(const char alias, const* char *aliases, UErrorCode pErrorCode)
995	{
996	ucnv_io_getAliases(alias, `0`, aliases, pErrorCode);
997	}
998
999	U_CAPI uint16_t U_EXPORT2
1000	ucnv_countStandards(void)
1001	{
1002	UErrorCode err = U_ZERO_ERROR;
1003	return ucnv_io_countStandards(&err);
1004	}
1005
1006	U_CAPI const char * U_EXPORT2
1007	ucnv_getCanonicalName(const char alias, const* char standard, UErrorCode pErrorCode) {
1008	if (haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
1009	uint32_t convNum = findTaggedConverterNum(alias, standard, pErrorCode);
1010
1011	if (convNum < gMainTable.converterListSize) {
1012	return GET_STRING(gMainTable.converterList[convNum]);
1013	}
1014	}
1015
1016	return NULL;
1017	}
1018
1019	U_CDECL_BEGIN
1020
1021
1022	static int32_t U_CALLCONV
1023	ucnv_io_countAllConverters(UEnumeration * /enumerator/, UErrorCode * /pErrorCode/) {
1024	return gMainTable.converterListSize;
1025	}
1026
1027	static const char * U_CALLCONV
1028	ucnv_io_nextAllConverters(UEnumeration *enumerator,
1029	int32_t* resultLength,
1030	UErrorCode * /pErrorCode/)
1031	{
1032	uint16_t myContext = (uint16_t )(enumerator->context);
1033
1034	if (*myContext < gMainTable.converterListSize) {
1035	const char myStr = GET_STRING(gMainTable.converterList[(myContext)++]);
1036	if (resultLength) {
1037	*resultLength = (int32_t)uprv_strlen(myStr);
1038	}
1039	return myStr;
1040	}
1041	/ Either we accessed a zero length list, or we enumerated too far. /
1042	if (resultLength) {
1043	*resultLength = `0`;
1044	}
1045	return NULL;
1046	}
1047
1048	static void U_CALLCONV
1049	ucnv_io_resetAllConverters(UEnumeration enumerator, UErrorCode /pErrorCode/) {
1050	((uint16_t )(enumerator->context)) = `0`;
1051	}
1052	U_CDECL_END
1053	static const UEnumeration gEnumAllConverters = {
1054	NULL,
1055	NULL,
1056	ucnv_io_closeUEnumeration,
1057	ucnv_io_countAllConverters,
1058	uenum_unextDefault,
1059	ucnv_io_nextAllConverters,
1060	ucnv_io_resetAllConverters
1061	};
1062
1063	U_CAPI UEnumeration * U_EXPORT2
1064	ucnv_openAllNames(UErrorCode *pErrorCode) {
1065	UEnumeration *myEnum = NULL;
1066	if (haveAliasData(pErrorCode)) {
1067	uint16_t *myContext;
1068
1069	myEnum = static_cast<UEnumeration >(uprv_malloc(sizeof*(UEnumeration)));
1070	if (myEnum == NULL) {
1071	*pErrorCode = U_MEMORY_ALLOCATION_ERROR;
1072	return NULL;
1073	}
1074	uprv_memcpy(myEnum, &gEnumAllConverters, sizeof(UEnumeration));
1075	myContext = static_cast<uint16_t >(uprv_malloc(sizeof*(uint16_t)));
1076	if (myContext == NULL) {
1077	*pErrorCode = U_MEMORY_ALLOCATION_ERROR;
1078	uprv_free(myEnum);
1079	return NULL;
1080	}
1081	*myContext = `0`;
1082	myEnum->context = myContext;
1083	}
1084	return myEnum;
1085	}
1086
1087	U_CAPI uint16_t
1088	ucnv_io_countKnownConverters(UErrorCode *pErrorCode) {
1089	if (haveAliasData(pErrorCode)) {
1090	return (uint16_t)gMainTable.converterListSize;
1091	}
1092	return `0`;
1093	}
1094
1095	/ alias table swapping ----------------------------------------------------- /
1096
1097	U_CDECL_BEGIN
1098
1099	typedef char * U_CALLCONV StripForCompareFn(char dst, const* char *name);
1100	U_CDECL_END
1101
1102
1103	/*
1104	* row of a temporary array
1105	*
1106	* gets platform-endian charset string indexes and sorting indexes;
1107	* after sorting this array by strings, the actual arrays are permutated
1108	* according to the sorting indexes
1109	*/
1110	typedef struct TempRow {
1111	uint16_t strIndex, sortIndex;
1112	} TempRow;
1113
1114	typedef struct TempAliasTable {
1115	const char *chars;
1116	TempRow *rows;
1117	uint16_t *resort;
1118	StripForCompareFn *stripForCompare;
1119	} TempAliasTable;
1120
1121	enum {
1122	STACK_ROW_CAPACITY=`500`
1123	};
1124
1125	static int32_t U_CALLCONV
1126	io_compareRows(const void context, const* void left, const* void *right) {
1127	char strippedLeft[UCNV_MAX_CONVERTER_NAME_LENGTH],
1128	strippedRight[UCNV_MAX_CONVERTER_NAME_LENGTH];
1129
1130	TempAliasTable tempTable=(TempAliasTable )context;
1131	const char *chars=tempTable->chars;
1132
1133	return (int32_t)uprv_strcmp(tempTable->stripForCompare(strippedLeft, chars+`2`((const* TempRow *)left)->strIndex),
1134	tempTable->stripForCompare(strippedRight, chars+`2`((const* TempRow *)right)->strIndex));
1135	}
1136
1137	U_CAPI int32_t U_EXPORT2
1138	ucnv_swapAliases(const UDataSwapper *ds,
1139	const void inData, int32_t length, void* *outData,
1140	UErrorCode *pErrorCode) {
1141	const UDataInfo *pInfo;
1142	int32_t headerSize;
1143
1144	const uint16_t *inTable;
1145	const uint32_t *inSectionSizes;
1146	uint32_t toc[offsetsCount];
1147	uint32_t offsets[offsetsCount]; / 16-bit-addressed offsets from inTable/outTable /
1148	uint32_t i, count, tocLength, topOffset;
1149
1150	TempRow rows[STACK_ROW_CAPACITY];
1151	uint16_t resort[STACK_ROW_CAPACITY];
1152	TempAliasTable tempTable;
1153
1154	/ udata_swapDataHeader checks the arguments /
1155	headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
1156	if(pErrorCode==NULL \|\| U_FAILURE(*pErrorCode)) {
1157	return `0`;
1158	}
1159
1160	/ check data format and format version /
1161	pInfo=(const UDataInfo )((const* char *)inData+`4`);
1162	if(!(
1163	pInfo->dataFormat[`0`]==`0x43` && / dataFormat="CvAl" /
1164	pInfo->dataFormat[`1`]==`0x76` &&
1165	pInfo->dataFormat[`2`]==`0x41` &&
1166	pInfo->dataFormat[`3`]==`0x6c` &&
1167	pInfo->formatVersion[`0`]==`3`
1168	)) {
1169	udata_printError(ds, "ucnv_swapAliases(): data format %02x.%02x.%02x.%02x (format version %02x) is not an alias table\n",
1170	pInfo->dataFormat[`0`], pInfo->dataFormat[`1`],
1171	pInfo->dataFormat[`2`], pInfo->dataFormat[`3`],
1172	pInfo->formatVersion[`0`]);
1173	*pErrorCode=U_UNSUPPORTED_ERROR;
1174	return `0`;
1175	}
1176
1177	/ an alias table must contain at least the table of contents array /
1178	if(length>=`0` && (length-headerSize)<`4`*(`1`+minTocLength)) {
1179	udata_printError(ds, "ucnv_swapAliases(): too few bytes (%d after header) for an alias table\n",
1180	length-headerSize);
1181	*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
1182	return `0`;
1183	}
1184
1185	inSectionSizes=(const uint32_t )((const* char *)inData+headerSize);
1186	inTable=(const uint16_t *)inSectionSizes;
1187	uprv_memset(toc, `0`, sizeof(toc));
1188	toc[tocLengthIndex]=tocLength=ds->readUInt32(inSectionSizes[tocLengthIndex]);
1189	if(tocLength<minTocLength \|\| offsetsCount<=tocLength) {
1190	udata_printError(ds, "ucnv_swapAliases(): table of contents contains unsupported number of sections (%u sections)\n", tocLength);
1191	*pErrorCode=U_INVALID_FORMAT_ERROR;
1192	return `0`;
1193	}
1194
1195	/ read the known part of the table of contents /
1196	for(i=converterListIndex; i<=tocLength; ++i) {
1197	toc[i]=ds->readUInt32(inSectionSizes[i]);
1198	}
1199
1200	/ compute offsets /
1201	uprv_memset(offsets, `0`, sizeof(offsets));
1202	offsets[converterListIndex]=`2`(`1`+tocLength); /* count two 16-bit units per toc entry /
1203	for(i=tagListIndex; i<=tocLength; ++i) {
1204	offsets[i]=offsets[i-`1`]+toc[i-`1`];
1205	}
1206
1207	/ compute the overall size of the after-header data, in numbers of 16-bit units /
1208	topOffset=offsets[i-`1`]+toc[i-`1`];
1209
1210	if(length>=`0`) {
1211	uint16_t *outTable;
1212	const uint16_t p, p2;
1213	uint16_t q, q2;
1214	uint16_t oldIndex;
1215
1216	if((length-headerSize)<(`2`*(int32_t)topOffset)) {
1217	udata_printError(ds, "ucnv_swapAliases(): too few bytes (%d after header) for an alias table\n",
1218	length-headerSize);
1219	*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
1220	return `0`;
1221	}
1222
1223	outTable=(uint16_t )((char* *)outData+headerSize);
1224
1225	/ swap the entire table of contents /
1226	ds->swapArray32(ds, inTable, `4`*(`1`+tocLength), outTable, pErrorCode);
1227
1228	/ swap unormalized strings & normalized strings /
1229	ds->swapInvChars(ds, inTable+offsets[stringTableIndex], `2`*(int32_t)(toc[stringTableIndex]+toc[normalizedStringTableIndex]),
1230	outTable+offsets[stringTableIndex], pErrorCode);
1231	if(U_FAILURE(*pErrorCode)) {
1232	udata_printError(ds, "ucnv_swapAliases().swapInvChars(charset names) failed\n");
1233	return `0`;
1234	}
1235
1236	if(ds->inCharset==ds->outCharset) {
1237	/ no need to sort, just swap all 16-bit values together /
1238	ds->swapArray16(ds,
1239	inTable+offsets[converterListIndex],
1240	`2`*(int32_t)(offsets[stringTableIndex]-offsets[converterListIndex]),
1241	outTable+offsets[converterListIndex],
1242	pErrorCode);
1243	} else {
1244	/ allocate the temporary table for sorting /
1245	count=toc[aliasListIndex];
1246
1247	tempTable.chars=(const char )(outTable+offsets[stringTableIndex]); /* sort by outCharset /
1248
1249	if(count<=STACK_ROW_CAPACITY) {
1250	tempTable.rows=rows;
1251	tempTable.resort=resort;
1252	} else {
1253	tempTable.rows=(TempRow )uprv_malloc(countsizeof(TempRow)+count*`2`);
1254	if(tempTable.rows==NULL) {
1255	udata_printError(ds, "ucnv_swapAliases(): unable to allocate memory for sorting tables (max length: %u)\n",
1256	count);
1257	*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
1258	return `0`;
1259	}
1260	tempTable.resort=(uint16_t *)(tempTable.rows+count);
1261	}
1262
1263	if(ds->outCharset==U_ASCII_FAMILY) {
1264	tempTable.stripForCompare=ucnv_io_stripASCIIForCompare;
1265	} else / U_EBCDIC_FAMILY / {
1266	tempTable.stripForCompare=ucnv_io_stripEBCDICForCompare;
1267	}
1268
1269	/*
1270	* Sort unique aliases+mapped names.
1271	*
1272	* We need to sort the list again by outCharset strings because they
1273	* sort differently for different charset families.
1274	* First we set up a temporary table with the string indexes and
1275	* sorting indexes and sort that.
1276	* Then we permutate and copy/swap the actual values.
1277	*/
1278	p=inTable+offsets[aliasListIndex];
1279	q=outTable+offsets[aliasListIndex];
1280
1281	p2=inTable+offsets[untaggedConvArrayIndex];
1282	q2=outTable+offsets[untaggedConvArrayIndex];
1283
1284	for(i=`0`; i<count; ++i) {
1285	tempTable.rows[i].strIndex=ds->readUInt16(p[i]);
1286	tempTable.rows[i].sortIndex=(uint16_t)i;
1287	}
1288
1289	uprv_sortArray(tempTable.rows, (int32_t)count, sizeof(TempRow),
1290	io_compareRows, &tempTable,
1291	FALSE, pErrorCode);
1292
1293	if(U_SUCCESS(*pErrorCode)) {
1294	/ copy/swap/permutate items /
1295	if(p!=q) {
1296	for(i=`0`; i<count; ++i) {
1297	oldIndex=tempTable.rows[i].sortIndex;
1298	ds->swapArray16(ds, p+oldIndex, `2`, q+i, pErrorCode);
1299	ds->swapArray16(ds, p2+oldIndex, `2`, q2+i, pErrorCode);
1300	}
1301	} else {
1302	/*
1303	* If we swap in-place, then the permutation must use another
1304	* temporary array (tempTable.resort)
1305	* before the results are copied to the outBundle.
1306	*/
1307	uint16_t *r=tempTable.resort;
1308
1309	for(i=`0`; i<count; ++i) {
1310	oldIndex=tempTable.rows[i].sortIndex;
1311	ds->swapArray16(ds, p+oldIndex, `2`, r+i, pErrorCode);
1312	}
1313	uprv_memcpy(q, r, `2`*(size_t)count);
1314
1315	for(i=`0`; i<count; ++i) {
1316	oldIndex=tempTable.rows[i].sortIndex;
1317	ds->swapArray16(ds, p2+oldIndex, `2`, r+i, pErrorCode);
1318	}
1319	uprv_memcpy(q2, r, `2`*(size_t)count);
1320	}
1321	}
1322
1323	if(tempTable.rows!=rows) {
1324	uprv_free(tempTable.rows);
1325	}
1326
1327	if(U_FAILURE(*pErrorCode)) {
1328	udata_printError(ds, "ucnv_swapAliases().uprv_sortArray(%u items) failed\n",
1329	count);
1330	return `0`;
1331	}
1332
1333	/ swap remaining 16-bit values /
1334	ds->swapArray16(ds,
1335	inTable+offsets[converterListIndex],
1336	`2`*(int32_t)(offsets[aliasListIndex]-offsets[converterListIndex]),
1337	outTable+offsets[converterListIndex],
1338	pErrorCode);
1339	ds->swapArray16(ds,
1340	inTable+offsets[taggedAliasArrayIndex],
1341	`2`*(int32_t)(offsets[stringTableIndex]-offsets[taggedAliasArrayIndex]),
1342	outTable+offsets[taggedAliasArrayIndex],
1343	pErrorCode);
1344	}
1345	}
1346
1347	return headerSize+`2`*(int32_t)topOffset;
1348	}
1349
1350	#endif
1351
1352
1353	/*
1354	* Hey, Emacs, please set the following:
1355	*
1356	* Local Variables:
1357	* indent-tabs-mode: nil
1358	* End:
1359	*
1360	*/
1361

Browse the source code of ClickHouse/contrib/icu/icu4c/source/common/ucnv_io.cpp