unames.cpp source code [ClickHouse/contrib/icu/icu4c/source/common/unames.cpp]

1	// © 2016 and later: Unicode, Inc. and others.
2	// License & terms of use: http://www.unicode.org/copyright.html
3	/*
4	******************************************************************************
5	*
6	* Copyright (C) 1999-2014, International Business Machines
7	* Corporation and others. All Rights Reserved.
8	*
9	******************************************************************************
10	* file name: unames.c
11	* encoding: UTF-8
12	* tab size: 8 (not used)
13	* indentation:4
14	*
15	* created on: 1999oct04
16	* created by: Markus W. Scherer
17	*/
18
19	#include "unicode/utypes.h"
20	#include "unicode/putil.h"
21	#include "unicode/uchar.h"
22	#include "unicode/udata.h"
23	#include "unicode/utf.h"
24	#include "unicode/utf16.h"
25	#include "uassert.h"
26	#include "ustr_imp.h"
27	#include "umutex.h"
28	#include "cmemory.h"
29	#include "cstring.h"
30	#include "ucln_cmn.h"
31	#include "udataswp.h"
32	#include "uprops.h"
33
34	U_NAMESPACE_BEGIN
35
36	/ prototypes ------------------------------------------------------------- /
37
38	static const char DATA_NAME[] = "unames";
39	static const char DATA_TYPE[] = "icu";
40
41	#define GROUP_SHIFT 5
42	#define LINES_PER_GROUP (1L<<GROUP_SHIFT)
43	#define GROUP_MASK (LINES_PER_GROUP-1)
44
45	/*
46	* This struct was replaced by explicitly accessing equivalent
47	* fields from triples of uint16_t.
48	* The Group struct was padded to 8 bytes on compilers for early ARM CPUs,
49	* which broke the assumption that sizeof(Group)==6 and that the ++ operator
50	* would advance by 6 bytes (3 uint16_t).
51	*
52	* We can't just change the data structure because it's loaded from a data file,
53	* and we don't want to make it less compact, so we changed the access code.
54	*
55	* For details see ICU tickets 6331 and 6008.
56	typedef struct {
57	uint16_t groupMSB,
58	offsetHigh, offsetLow; / avoid padding * /*
59	} Group;
60	*/
61	enum {
62	GROUP_MSB,
63	GROUP_OFFSET_HIGH,
64	GROUP_OFFSET_LOW,
65	GROUP_LENGTH
66	};
67
68	/*
69	* Get the 32-bit group offset.
70	* @param group (const uint16_t *) pointer to a Group triple of uint16_t
71	* @return group offset (int32_t)
72	*/
73	#define GET_GROUP_OFFSET(group) ((int32_t)(group)[GROUP_OFFSET_HIGH]<<16\|(group)[GROUP_OFFSET_LOW])
74
75	#define NEXT_GROUP(group) ((group)+GROUP_LENGTH)
76	#define PREV_GROUP(group) ((group)-GROUP_LENGTH)
77
78	typedef struct {
79	uint32_t start, end;
80	uint8_t type, variant;
81	uint16_t size;
82	} AlgorithmicRange;
83
84	typedef struct {
85	uint32_t tokenStringOffset, groupsOffset, groupStringOffset, algNamesOffset;
86	} UCharNames;
87
88	/*
89	* Get the groups table from a UCharNames struct.
90	* The groups table consists of one uint16_t groupCount followed by
91	* groupCount groups. Each group is a triple of uint16_t, see GROUP_LENGTH
92	* and the comment for the old struct Group above.
93	*
94	* @param names (const UCharNames *) pointer to the UCharNames indexes
95	* @return (const uint16_t *) pointer to the groups table
96	*/
97	#define GET_GROUPS(names) (const uint16_t )((const char )names+names->groupsOffset)
98
99	typedef struct {
100	const char *otherName;
101	UChar32 code;
102	} FindName;
103
104	#define DO_FIND_NAME NULL
105
106	static UDataMemory *uCharNamesData=NULL;
107	static UCharNames *uCharNames=NULL;
108	static icu::UInitOnce gCharNamesInitOnce = U_INITONCE_INITIALIZER;
109
110	/*
111	* Maximum length of character names (regular & 1.0).
112	*/
113	static int32_t gMaxNameLength=`0`;
114
115	/*
116	* Set of chars used in character names (regular & 1.0).
117	* Chars are platform-dependent (can be EBCDIC).
118	*/
119	static uint32_t gNameSet[`8`]={ `0` };
120
121	#define U_NONCHARACTER_CODE_POINT U_CHAR_CATEGORY_COUNT
122	#define U_LEAD_SURROGATE U_CHAR_CATEGORY_COUNT + 1
123	#define U_TRAIL_SURROGATE U_CHAR_CATEGORY_COUNT + 2
124
125	#define U_CHAR_EXTENDED_CATEGORY_COUNT (U_CHAR_CATEGORY_COUNT + 3)
126
127	static const char * const charCatNames[U_CHAR_EXTENDED_CATEGORY_COUNT] = {
128	"unassigned",
129	"uppercase letter",
130	"lowercase letter",
131	"titlecase letter",
132	"modifier letter",
133	"other letter",
134	"non spacing mark",
135	"enclosing mark",
136	"combining spacing mark",
137	"decimal digit number",
138	"letter number",
139	"other number",
140	"space separator",
141	"line separator",
142	"paragraph separator",
143	"control",
144	"format",
145	"private use area",
146	"surrogate",
147	"dash punctuation",
148	"start punctuation",
149	"end punctuation",
150	"connector punctuation",
151	"other punctuation",
152	"math symbol",
153	"currency symbol",
154	"modifier symbol",
155	"other symbol",
156	"initial punctuation",
157	"final punctuation",
158	"noncharacter",
159	"lead surrogate",
160	"trail surrogate"
161	};
162
163	/ implementation ----------------------------------------------------------- /
164
165	static UBool U_CALLCONV unames_cleanup(void)
166	{
167	if(uCharNamesData) {
168	udata_close(uCharNamesData);
169	uCharNamesData = NULL;
170	}
171	if(uCharNames) {
172	uCharNames = NULL;
173	}
174	gCharNamesInitOnce.reset();
175	gMaxNameLength=`0`;
176	return TRUE;
177	}
178
179	static UBool U_CALLCONV
180	isAcceptable(void * /context/,
181	const char * /type/, const char * /name/,
182	const UDataInfo *pInfo) {
183	return (UBool)(
184	pInfo->size>=`20` &&
185	pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
186	pInfo->charsetFamily==U_CHARSET_FAMILY &&
187	pInfo->dataFormat[`0`]==`0x75` && / dataFormat="unam" /
188	pInfo->dataFormat[`1`]==`0x6e` &&
189	pInfo->dataFormat[`2`]==`0x61` &&
190	pInfo->dataFormat[`3`]==`0x6d` &&
191	pInfo->formatVersion[`0`]==`1`);
192	}
193
194	static void U_CALLCONV
195	loadCharNames(UErrorCode &status) {
196	U_ASSERT(uCharNamesData == NULL);
197	U_ASSERT(uCharNames == NULL);
198
199	uCharNamesData = udata_openChoice(NULL, DATA_TYPE, DATA_NAME, isAcceptable, NULL, &status);
200	if(U_FAILURE(status)) {
201	uCharNamesData = NULL;
202	} else {
203	uCharNames = (UCharNames *)udata_getMemory(uCharNamesData);
204	}
205	ucln_common_registerCleanup(UCLN_COMMON_UNAMES, unames_cleanup);
206	}
207
208
209	static UBool
210	isDataLoaded(UErrorCode *pErrorCode) {
211	umtx_initOnce(gCharNamesInitOnce, &loadCharNames, *pErrorCode);
212	return U_SUCCESS(*pErrorCode);
213	}
214
215	#define WRITE_CHAR(buffer, bufferLength, bufferPos, c) UPRV_BLOCK_MACRO_BEGIN { \
216	if((bufferLength)>0) { \
217	*(buffer)++=c; \
218	--(bufferLength); \
219	} \
220	++(bufferPos); \
221	} UPRV_BLOCK_MACRO_END
222
223	#define U_ISO_COMMENT U_CHAR_NAME_CHOICE_COUNT
224
225	/*
226	* Important: expandName() and compareName() are almost the same -
227	* apply fixes to both.
228	*
229	* UnicodeData.txt uses ';' as a field separator, so no
230	* field can contain ';' as part of its contents.
231	* In unames.dat, it is marked as token[';']==-1 only if the
232	* semicolon is used in the data file - which is iff we
233	* have Unicode 1.0 names or ISO comments or aliases.
234	* So, it will be token[';']==-1 if we store U1.0 names/ISO comments/aliases
235	* although we know that it will never be part of a name.
236	*/
237	static uint16_t
238	expandName(UCharNames *names,
239	const uint8_t *name, uint16_t nameLength, UCharNameChoice nameChoice,
240	char *buffer, uint16_t bufferLength) {
241	uint16_t tokens=(uint16_t )names+`8`;
242	uint16_t token, tokenCount=*tokens++, bufferPos=`0`;
243	uint8_t tokenStrings=(uint8_t )names+names->tokenStringOffset;
244	uint8_t c;
245
246	if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {
247	/*
248	* skip the modern name if it is not requested _and_
249	* if the semicolon byte value is a character, not a token number
250	*/
251	if((uint8_t)`';'`>=tokenCount \|\| tokens[(uint8_t)`';'`]==(uint16_t)(-`1`)) {
252	int fieldIndex= nameChoice==U_ISO_COMMENT ? `2` : nameChoice;
253	do {
254	while(nameLength>`0`) {
255	--nameLength;
256	if(*name++==`';'`) {
257	break;
258	}
259	}
260	} while(--fieldIndex>`0`);
261	} else {
262	/*
263	* the semicolon byte value is a token number, therefore
264	* only modern names are stored in unames.dat and there is no
265	* such requested alternate name here
266	*/
267	nameLength=`0`;
268	}
269	}
270
271	/ write each letter directly, and write a token word per token /
272	while(nameLength>`0`) {
273	--nameLength;
274	c=*name++;
275
276	if(c>=tokenCount) {
277	if(c!=`';'`) {
278	/ implicit letter /
279	WRITE_CHAR(buffer, bufferLength, bufferPos, c);
280	} else {
281	/ finished /
282	break;
283	}
284	} else {
285	token=tokens[c];
286	if(token==(uint16_t)(-`2`)) {
287	/ this is a lead byte for a double-byte token /
288	token=tokens[c<<`8`\|*name++];
289	--nameLength;
290	}
291	if(token==(uint16_t)(-`1`)) {
292	if(c!=`';'`) {
293	/ explicit letter /
294	WRITE_CHAR(buffer, bufferLength, bufferPos, c);
295	} else {
296	/ stop, but skip the semicolon if we are seeking*
297	extended names and there was no 2.0 name but there
298	is a 1.0 name. /*
299	if(!bufferPos && nameChoice == U_EXTENDED_CHAR_NAME) {
300	if ((uint8_t)`';'`>=tokenCount \|\| tokens[(uint8_t)`';'`]==(uint16_t)(-`1`)) {
301	continue;
302	}
303	}
304	/ finished /
305	break;
306	}
307	} else {
308	/ write token word /
309	uint8_t *tokenString=tokenStrings+token;
310	while((c=*tokenString++)!=`0`) {
311	WRITE_CHAR(buffer, bufferLength, bufferPos, c);
312	}
313	}
314	}
315	}
316
317	/ zero-terminate /
318	if(bufferLength>`0`) {
319	*buffer=`0`;
320	}
321
322	return bufferPos;
323	}
324
325	/*
326	* compareName() is almost the same as expandName() except that it compares
327	* the currently expanded name to an input name.
328	* It returns the match/no match result as soon as possible.
329	*/
330	static UBool
331	compareName(UCharNames *names,
332	const uint8_t *name, uint16_t nameLength, UCharNameChoice nameChoice,
333	const char *otherName) {
334	uint16_t tokens=(uint16_t )names+`8`;
335	uint16_t token, tokenCount=*tokens++;
336	uint8_t tokenStrings=(uint8_t )names+names->tokenStringOffset;
337	uint8_t c;
338	const char *origOtherName = otherName;
339
340	if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {
341	/*
342	* skip the modern name if it is not requested _and_
343	* if the semicolon byte value is a character, not a token number
344	*/
345	if((uint8_t)`';'`>=tokenCount \|\| tokens[(uint8_t)`';'`]==(uint16_t)(-`1`)) {
346	int fieldIndex= nameChoice==U_ISO_COMMENT ? `2` : nameChoice;
347	do {
348	while(nameLength>`0`) {
349	--nameLength;
350	if(*name++==`';'`) {
351	break;
352	}
353	}
354	} while(--fieldIndex>`0`);
355	} else {
356	/*
357	* the semicolon byte value is a token number, therefore
358	* only modern names are stored in unames.dat and there is no
359	* such requested alternate name here
360	*/
361	nameLength=`0`;
362	}
363	}
364
365	/ compare each letter directly, and compare a token word per token /
366	while(nameLength>`0`) {
367	--nameLength;
368	c=*name++;
369
370	if(c>=tokenCount) {
371	if(c!=`';'`) {
372	/ implicit letter /
373	if((char)c!=*otherName++) {
374	return FALSE;
375	}
376	} else {
377	/ finished /
378	break;
379	}
380	} else {
381	token=tokens[c];
382	if(token==(uint16_t)(-`2`)) {
383	/ this is a lead byte for a double-byte token /
384	token=tokens[c<<`8`\|*name++];
385	--nameLength;
386	}
387	if(token==(uint16_t)(-`1`)) {
388	if(c!=`';'`) {
389	/ explicit letter /
390	if((char)c!=*otherName++) {
391	return FALSE;
392	}
393	} else {
394	/ stop, but skip the semicolon if we are seeking*
395	extended names and there was no 2.0 name but there
396	is a 1.0 name. /*
397	if(otherName == origOtherName && nameChoice == U_EXTENDED_CHAR_NAME) {
398	if ((uint8_t)`';'`>=tokenCount \|\| tokens[(uint8_t)`';'`]==(uint16_t)(-`1`)) {
399	continue;
400	}
401	}
402	/ finished /
403	break;
404	}
405	} else {
406	/ write token word /
407	uint8_t *tokenString=tokenStrings+token;
408	while((c=*tokenString++)!=`0`) {
409	if((char)c!=*otherName++) {
410	return FALSE;
411	}
412	}
413	}
414	}
415	}
416
417	/ complete match? /
418	return (UBool)(*otherName==`0`);
419	}
420
421	static uint8_t getCharCat(UChar32 cp) {
422	uint8_t cat;
423
424	if (U_IS_UNICODE_NONCHAR(cp)) {
425	return U_NONCHARACTER_CODE_POINT;
426	}
427
428	if ((cat = u_charType(cp)) == U_SURROGATE) {
429	cat = U_IS_LEAD(cp) ? U_LEAD_SURROGATE : U_TRAIL_SURROGATE;
430	}
431
432	return cat;
433	}
434
435	static const char *getCharCatName(UChar32 cp) {
436	uint8_t cat = getCharCat(cp);
437
438	/ Return unknown if the table of names above is not up to*
439	date. /*
440
441	if (cat >= UPRV_LENGTHOF(charCatNames)) {
442	return "unknown";
443	} else {
444	return charCatNames[cat];
445	}
446	}
447
448	static uint16_t getExtName(uint32_t code, char *buffer, uint16_t bufferLength) {
449	const char *catname = getCharCatName(code);
450	uint16_t length = `0`;
451
452	UChar32 cp;
453	int ndigits, i;
454
455	WRITE_CHAR(buffer, bufferLength, length, `'<'`);
456	while (catname[length - `1`]) {
457	WRITE_CHAR(buffer, bufferLength, length, catname[length - `1`]);
458	}
459	WRITE_CHAR(buffer, bufferLength, length, `'-'`);
460	for (cp = code, ndigits = `0`; cp; ++ndigits, cp >>= `4`)
461	;
462	if (ndigits < `4`)
463	ndigits = `4`;
464	for (cp = code, i = ndigits; (cp \|\| i > `0`) && bufferLength; cp >>= `4`, bufferLength--) {
465	uint8_t v = (uint8_t)(cp & `0xf`);
466	buffer[--i] = (v < `10` ? `'0'` + v : `'A'` + v - `10`);
467	}
468	buffer += ndigits;
469	length += static_cast<uint16_t>(ndigits);
470	WRITE_CHAR(buffer, bufferLength, length, `'>'`);
471
472	return length;
473	}
474
475	/*
476	* getGroup() does a binary search for the group that contains the
477	* Unicode code point "code".
478	* The return value is always a valid Group* that may contain "code"
479	* or else is the highest group before "code".
480	* If the lowest group is after "code", then that one is returned.
481	*/
482	static const uint16_t *
483	getGroup(UCharNames *names, uint32_t code) {
484	const uint16_t *groups=GET_GROUPS(names);
485	uint16_t groupMSB=(uint16_t)(code>>GROUP_SHIFT),
486	start=`0`,
487	limit=*groups++,
488	number;
489
490	/ binary search for the group of names that contains the one for code /
491	while(start<limit-`1`) {
492	number=(uint16_t)((start+limit)/`2`);
493	if(groupMSB<groups[number*GROUP_LENGTH+GROUP_MSB]) {
494	limit=number;
495	} else {
496	start=number;
497	}
498	}
499
500	/ return this regardless of whether it is an exact match /
501	return groups+start*GROUP_LENGTH;
502	}
503
504	/*
505	* expandGroupLengths() reads a block of compressed lengths of 32 strings and
506	* expands them into offsets and lengths for each string.
507	* Lengths are stored with a variable-width encoding in consecutive nibbles:
508	* If a nibble<0xc, then it is the length itself (0=empty string).
509	* If a nibble>=0xc, then it forms a length value with the following nibble.
510	* Calculation see below.
511	* The offsets and lengths arrays must be at least 33 (one more) long because
512	* there is no check here at the end if the last nibble is still used.
513	*/
514	static const uint8_t *
515	expandGroupLengths(const uint8_t *s,
516	uint16_t offsets[LINES_PER_GROUP+`1`], uint16_t lengths[LINES_PER_GROUP+`1`]) {
517	/ read the lengths of the 32 strings in this group and get each string's offset /
518	uint16_t i=`0`, offset=`0`, length=`0`;
519	uint8_t lengthByte;
520
521	/ all 32 lengths must be read to get the offset of the first group string /
522	while(i<LINES_PER_GROUP) {
523	lengthByte=*s++;
524
525	/ read even nibble - MSBs of lengthByte /
526	if(length>=`12`) {
527	/ double-nibble length spread across two bytes /
528	length=(uint16_t)(((length&`0x3`)<<`4`\|lengthByte>>`4`)+`12`);
529	lengthByte&=`0xf`;
530	} else if((lengthByte / &0xf0 /)>=`0xc0`) {
531	/ double-nibble length spread across this one byte /
532	length=(uint16_t)((lengthByte&`0x3f`)+`12`);
533	} else {
534	/ single-nibble length in MSBs /
535	length=(uint16_t)(lengthByte>>`4`);
536	lengthByte&=`0xf`;
537	}
538
539	*offsets++=offset;
540	*lengths++=length;
541
542	offset+=length;
543	++i;
544
545	/ read odd nibble - LSBs of lengthByte /
546	if((lengthByte&`0xf0`)==`0`) {
547	/ this nibble was not consumed for a double-nibble length above /
548	length=lengthByte;
549	if(length<`12`) {
550	/ single-nibble length in LSBs /
551	*offsets++=offset;
552	*lengths++=length;
553
554	offset+=length;
555	++i;
556	}
557	} else {
558	length=`0`; / prevent double-nibble detection in the next iteration /
559	}
560	}
561
562	/ now, s is at the first group string /
563	return s;
564	}
565
566	static uint16_t
567	expandGroupName(UCharNames names, const* uint16_t *group,
568	uint16_t lineNumber, UCharNameChoice nameChoice,
569	char *buffer, uint16_t bufferLength) {
570	uint16_t offsets[LINES_PER_GROUP+`2`], lengths[LINES_PER_GROUP+`2`];
571	const uint8_t s=(uint8_t )names+names->groupStringOffset+GET_GROUP_OFFSET(group);
572	s=expandGroupLengths(s, offsets, lengths);
573	return expandName(names, s+offsets[lineNumber], lengths[lineNumber], nameChoice,
574	buffer, bufferLength);
575	}
576
577	static uint16_t
578	getName(UCharNames *names, uint32_t code, UCharNameChoice nameChoice,
579	char *buffer, uint16_t bufferLength) {
580	const uint16_t *group=getGroup(names, code);
581	if((uint16_t)(code>>GROUP_SHIFT)==group[GROUP_MSB]) {
582	return expandGroupName(names, group, (uint16_t)(code&GROUP_MASK), nameChoice,
583	buffer, bufferLength);
584	} else {
585	/ group not found /
586	/ zero-terminate /
587	if(bufferLength>`0`) {
588	*buffer=`0`;
589	}
590	return `0`;
591	}
592	}
593
594	/*
595	* enumGroupNames() enumerates all the names in a 32-group
596	* and either calls the enumerator function or finds a given input name.
597	*/
598	static UBool
599	enumGroupNames(UCharNames names, const* uint16_t *group,
600	UChar32 start, UChar32 end,
601	UEnumCharNamesFn fn, void* *context,
602	UCharNameChoice nameChoice) {
603	uint16_t offsets[LINES_PER_GROUP+`2`], lengths[LINES_PER_GROUP+`2`];
604	const uint8_t s=(uint8_t )names+names->groupStringOffset+GET_GROUP_OFFSET(group);
605
606	s=expandGroupLengths(s, offsets, lengths);
607	if(fn!=DO_FIND_NAME) {
608	char buffer[`200`];
609	uint16_t length;
610
611	while(start<=end) {
612	length=expandName(names, s+offsets[start&GROUP_MASK], lengths[start&GROUP_MASK], nameChoice, buffer, sizeof(buffer));
613	if (!length && nameChoice == U_EXTENDED_CHAR_NAME) {
614	buffer[length = getExtName(start, buffer, sizeof(buffer))] = `0`;
615	}
616	/ here, we assume that the buffer is large enough /
617	if(length>`0`) {
618	if(!fn(context, start, nameChoice, buffer, length)) {
619	return FALSE;
620	}
621	}
622	++start;
623	}
624	} else {
625	const char otherName=((FindName )context)->otherName;
626	while(start<=end) {
627	if(compareName(names, s+offsets[start&GROUP_MASK], lengths[start&GROUP_MASK], nameChoice, otherName)) {
628	((FindName *)context)->code=start;
629	return FALSE;
630	}
631	++start;
632	}
633	}
634	return TRUE;
635	}
636
637	/*
638	* enumExtNames enumerate extended names.
639	* It only needs to do it if it is called with a real function and not
640	* with the dummy DO_FIND_NAME, because u_charFromName() does a check
641	* for extended names by itself.
642	*/
643	static UBool
644	enumExtNames(UChar32 start, UChar32 end,
645	UEnumCharNamesFn fn, void* *context)
646	{
647	if(fn!=DO_FIND_NAME) {
648	char buffer[`200`];
649	uint16_t length;
650
651	while(start<=end) {
652	buffer[length = getExtName(start, buffer, sizeof(buffer))] = `0`;
653	/ here, we assume that the buffer is large enough /
654	if(length>`0`) {
655	if(!fn(context, start, U_EXTENDED_CHAR_NAME, buffer, length)) {
656	return FALSE;
657	}
658	}
659	++start;
660	}
661	}
662
663	return TRUE;
664	}
665
666	static UBool
667	enumNames(UCharNames *names,
668	UChar32 start, UChar32 limit,
669	UEnumCharNamesFn fn, void* *context,
670	UCharNameChoice nameChoice) {
671	uint16_t startGroupMSB, endGroupMSB, groupCount;
672	const uint16_t group, groupLimit;
673
674	startGroupMSB=(uint16_t)(start>>GROUP_SHIFT);
675	endGroupMSB=(uint16_t)((limit-`1`)>>GROUP_SHIFT);
676
677	/ find the group that contains start, or the highest before it /
678	group=getGroup(names, start);
679
680	if(startGroupMSB<group[GROUP_MSB] && nameChoice==U_EXTENDED_CHAR_NAME) {
681	/ enumerate synthetic names between start and the group start /
682	UChar32 extLimit=((UChar32)group[GROUP_MSB]<<GROUP_SHIFT);
683	if(extLimit>limit) {
684	extLimit=limit;
685	}
686	if(!enumExtNames(start, extLimit-`1`, fn, context)) {
687	return FALSE;
688	}
689	start=extLimit;
690	}
691
692	if(startGroupMSB==endGroupMSB) {
693	if(startGroupMSB==group[GROUP_MSB]) {
694	/ if start and limit-1 are in the same group, then enumerate only in that one /
695	return enumGroupNames(names, group, start, limit-`1`, fn, context, nameChoice);
696	}
697	} else {
698	const uint16_t *groups=GET_GROUPS(names);
699	groupCount=*groups++;
700	groupLimit=groups+groupCount*GROUP_LENGTH;
701
702	if(startGroupMSB==group[GROUP_MSB]) {
703	/ enumerate characters in the partial start group /
704	if((start&GROUP_MASK)!=`0`) {
705	if(!enumGroupNames(names, group,
706	start, ((UChar32)startGroupMSB<<GROUP_SHIFT)+LINES_PER_GROUP-`1`,
707	fn, context, nameChoice)) {
708	return FALSE;
709	}
710	group=NEXT_GROUP(group); / continue with the next group /
711	}
712	} else if(startGroupMSB>group[GROUP_MSB]) {
713	/ make sure that we start enumerating with the first group after start /
714	const uint16_t *nextGroup=NEXT_GROUP(group);
715	if (nextGroup < groupLimit && nextGroup[GROUP_MSB] > startGroupMSB && nameChoice == U_EXTENDED_CHAR_NAME) {
716	UChar32 end = nextGroup[GROUP_MSB] << GROUP_SHIFT;
717	if (end > limit) {
718	end = limit;
719	}
720	if (!enumExtNames(start, end - `1`, fn, context)) {
721	return FALSE;
722	}
723	}
724	group=nextGroup;
725	}
726
727	/ enumerate entire groups between the start- and end-groups /
728	while(group<groupLimit && group[GROUP_MSB]<endGroupMSB) {
729	const uint16_t *nextGroup;
730	start=(UChar32)group[GROUP_MSB]<<GROUP_SHIFT;
731	if(!enumGroupNames(names, group, start, start+LINES_PER_GROUP-`1`, fn, context, nameChoice)) {
732	return FALSE;
733	}
734	nextGroup=NEXT_GROUP(group);
735	if (nextGroup < groupLimit && nextGroup[GROUP_MSB] > group[GROUP_MSB] + `1` && nameChoice == U_EXTENDED_CHAR_NAME) {
736	UChar32 end = nextGroup[GROUP_MSB] << GROUP_SHIFT;
737	if (end > limit) {
738	end = limit;
739	}
740	if (!enumExtNames((group[GROUP_MSB] + `1`) << GROUP_SHIFT, end - `1`, fn, context)) {
741	return FALSE;
742	}
743	}
744	group=nextGroup;
745	}
746
747	/ enumerate within the end group (group[GROUP_MSB]==endGroupMSB) /
748	if(group<groupLimit && group[GROUP_MSB]==endGroupMSB) {
749	return enumGroupNames(names, group, (limit-`1`)&~GROUP_MASK, limit-`1`, fn, context, nameChoice);
750	} else if (nameChoice == U_EXTENDED_CHAR_NAME && group == groupLimit) {
751	UChar32 next = (PREV_GROUP(group)[GROUP_MSB] + `1`) << GROUP_SHIFT;
752	if (next > start) {
753	start = next;
754	}
755	} else {
756	return TRUE;
757	}
758	}
759
760	/ we have not found a group, which means everything is made of*
761	extended names. /*
762	if (nameChoice == U_EXTENDED_CHAR_NAME) {
763	if (limit > UCHAR_MAX_VALUE + `1`) {
764	limit = UCHAR_MAX_VALUE + `1`;
765	}
766	return enumExtNames(start, limit - `1`, fn, context);
767	}
768
769	return TRUE;
770	}
771
772	static uint16_t
773	writeFactorSuffix(const uint16_t *factors, uint16_t count,
774	const char s, /* suffix elements /
775	uint32_t code,
776	uint16_t indexes[`8`], / output fields from here /
777	const char elementBases[`8`], const* char *elements[`8`],
778	char *buffer, uint16_t bufferLength) {
779	uint16_t i, factor, bufferPos=`0`;
780	char c;
781
782	/ write elements according to the factors /
783
784	/*
785	* the factorized elements are determined by modulo arithmetic
786	* with the factors of this algorithm
787	*
788	* note that for fewer operations, count is decremented here
789	*/
790	--count;
791	for(i=count; i>`0`; --i) {
792	factor=factors[i];
793	indexes[i]=(uint16_t)(code%factor);
794	code/=factor;
795	}
796	/*
797	* we don't need to calculate the last modulus because start<=code<=end
798	* guarantees here that code<=factors[0]
799	*/
800	indexes[`0`]=(uint16_t)code;
801
802	/ write each element /
803	for(;;) {
804	if(elementBases!=NULL) {
805	*elementBases++=s;
806	}
807
808	/ skip indexes[i] strings /
809	factor=indexes[i];
810	while(factor>`0`) {
811	while(*s++!=`0`) {}
812	--factor;
813	}
814	if(elements!=NULL) {
815	*elements++=s;
816	}
817
818	/ write element /
819	while((c=*s++)!=`0`) {
820	WRITE_CHAR(buffer, bufferLength, bufferPos, c);
821	}
822
823	/ we do not need to perform the rest of this loop for i==count - break here /
824	if(i>=count) {
825	break;
826	}
827
828	/ skip the rest of the strings for this factors[i] /
829	factor=(uint16_t)(factors[i]-indexes[i]-`1`);
830	while(factor>`0`) {
831	while(*s++!=`0`) {}
832	--factor;
833	}
834
835	++i;
836	}
837
838	/ zero-terminate /
839	if(bufferLength>`0`) {
840	*buffer=`0`;
841	}
842
843	return bufferPos;
844	}
845
846	/*
847	* Important:
848	* Parts of findAlgName() are almost the same as some of getAlgName().
849	* Fixes must be applied to both.
850	*/
851	static uint16_t
852	getAlgName(AlgorithmicRange *range, uint32_t code, UCharNameChoice nameChoice,
853	char *buffer, uint16_t bufferLength) {
854	uint16_t bufferPos=`0`;
855
856	/ Only the normative character name can be algorithmic. /
857	if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {
858	/ zero-terminate /
859	if(bufferLength>`0`) {
860	*buffer=`0`;
861	}
862	return `0`;
863	}
864
865	switch(range->type) {
866	case `0`: {
867	/ name = prefix hex-digits /
868	const char s=(const* char *)(range+`1`);
869	char c;
870
871	uint16_t i, count;
872
873	/ copy prefix /
874	while((c=*s++)!=`0`) {
875	WRITE_CHAR(buffer, bufferLength, bufferPos, c);
876	}
877
878	/ write hexadecimal code point value /
879	count=range->variant;
880
881	/ zero-terminate /
882	if(count<bufferLength) {
883	buffer[count]=`0`;
884	}
885
886	for(i=count; i>`0`;) {
887	if(--i<bufferLength) {
888	c=(char)(code&`0xf`);
889	if(c<`10`) {
890	c+=`'0'`;
891	} else {
892	c+=`'A'`-`10`;
893	}
894	buffer[i]=c;
895	}
896	code>>=`4`;
897	}
898
899	bufferPos+=count;
900	break;
901	}
902	case `1`: {
903	/ name = prefix factorized-elements /
904	uint16_t indexes[`8`];
905	const uint16_t factors=(const* uint16_t *)(range+`1`);
906	uint16_t count=range->variant;
907	const char s=(const* char *)(factors+count);
908	char c;
909
910	/ copy prefix /
911	while((c=*s++)!=`0`) {
912	WRITE_CHAR(buffer, bufferLength, bufferPos, c);
913	}
914
915	bufferPos+=writeFactorSuffix(factors, count,
916	s, code-range->start, indexes, NULL, NULL, buffer, bufferLength);
917	break;
918	}
919	default:
920	/ undefined type /
921	/ zero-terminate /
922	if(bufferLength>`0`) {
923	*buffer=`0`;
924	}
925	break;
926	}
927
928	return bufferPos;
929	}
930
931	/*
932	* Important: enumAlgNames() and findAlgName() are almost the same.
933	* Any fix must be applied to both.
934	*/
935	static UBool
936	enumAlgNames(AlgorithmicRange *range,
937	UChar32 start, UChar32 limit,
938	UEnumCharNamesFn fn, void* *context,
939	UCharNameChoice nameChoice) {
940	char buffer[`200`];
941	uint16_t length;
942
943	if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {
944	return TRUE;
945	}
946
947	switch(range->type) {
948	case `0`: {
949	char s, end;
950	char c;
951
952	/ get the full name of the start character /
953	length=getAlgName(range, (uint32_t)start, nameChoice, buffer, sizeof(buffer));
954	if(length<=`0`) {
955	return TRUE;
956	}
957
958	/ call the enumerator function with this first character /
959	if(!fn(context, start, nameChoice, buffer, length)) {
960	return FALSE;
961	}
962
963	/ go to the end of the name; all these names have the same length /
964	end=buffer;
965	while(*end!=`0`) {
966	++end;
967	}
968
969	/ enumerate the rest of the names /
970	while(++start<limit) {
971	/ increment the hexadecimal number on a character-basis /
972	s=end;
973	for (;;) {
974	c=*--s;
975	if((`'0'`<=c && c<`'9'`) \|\| (`'A'`<=c && c<`'F'`)) {
976	s=(char*)(c+`1`);
977	break;
978	} else if(c==`'9'`) {
979	*s=`'A'`;
980	break;
981	} else if(c==`'F'`) {
982	*s=`'0'`;
983	}
984	}
985
986	if(!fn(context, start, nameChoice, buffer, length)) {
987	return FALSE;
988	}
989	}
990	break;
991	}
992	case `1`: {
993	uint16_t indexes[`8`];
994	const char elementBases[`8`], elements[`8`];
995	const uint16_t factors=(const* uint16_t *)(range+`1`);
996	uint16_t count=range->variant;
997	const char s=(const* char *)(factors+count);
998	char suffix, t;
999	uint16_t prefixLength, i, idx;
1000
1001	char c;
1002
1003	/ name = prefix factorized-elements /
1004
1005	/ copy prefix /
1006	suffix=buffer;
1007	prefixLength=`0`;
1008	while((c=*s++)!=`0`) {
1009	*suffix++=c;
1010	++prefixLength;
1011	}
1012
1013	/ append the suffix of the start character /
1014	length=(uint16_t)(prefixLength+writeFactorSuffix(factors, count,
1015	s, (uint32_t)start-range->start,
1016	indexes, elementBases, elements,
1017	suffix, (uint16_t)(sizeof(buffer)-prefixLength)));
1018
1019	/ call the enumerator function with this first character /
1020	if(!fn(context, start, nameChoice, buffer, length)) {
1021	return FALSE;
1022	}
1023
1024	/ enumerate the rest of the names /
1025	while(++start<limit) {
1026	/ increment the indexes in lexical order bound by the factors /
1027	i=count;
1028	for (;;) {
1029	idx=(uint16_t)(indexes[--i]+`1`);
1030	if(idx<factors[i]) {
1031	/ skip one index and its element string /
1032	indexes[i]=idx;
1033	s=elements[i];
1034	while(*s++!=`0`) {
1035	}
1036	elements[i]=s;
1037	break;
1038	} else {
1039	/ reset this index to 0 and its element string to the first one /
1040	indexes[i]=`0`;
1041	elements[i]=elementBases[i];
1042	}
1043	}
1044
1045	/ to make matters a little easier, just append all elements to the suffix /
1046	t=suffix;
1047	length=prefixLength;
1048	for(i=`0`; i<count; ++i) {
1049	s=elements[i];
1050	while((c=*s++)!=`0`) {
1051	*t++=c;
1052	++length;
1053	}
1054	}
1055	/ zero-terminate /
1056	*t=`0`;
1057
1058	if(!fn(context, start, nameChoice, buffer, length)) {
1059	return FALSE;
1060	}
1061	}
1062	break;
1063	}
1064	default:
1065	/ undefined type /
1066	break;
1067	}
1068
1069	return TRUE;
1070	}
1071
1072	/*
1073	* findAlgName() is almost the same as enumAlgNames() except that it
1074	* returns the code point for a name if it fits into the range.
1075	* It returns 0xffff otherwise.
1076	*/
1077	static UChar32
1078	findAlgName(AlgorithmicRange range, UCharNameChoice nameChoice, const* char *otherName) {
1079	UChar32 code;
1080
1081	if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {
1082	return `0xffff`;
1083	}
1084
1085	switch(range->type) {
1086	case `0`: {
1087	/ name = prefix hex-digits /
1088	const char s=(const* char *)(range+`1`);
1089	char c;
1090
1091	uint16_t i, count;
1092
1093	/ compare prefix /
1094	while((c=*s++)!=`0`) {
1095	if((char)c!=*otherName++) {
1096	return `0xffff`;
1097	}
1098	}
1099
1100	/ read hexadecimal code point value /
1101	count=range->variant;
1102	code=`0`;
1103	for(i=`0`; i<count; ++i) {
1104	c=*otherName++;
1105	if(`'0'`<=c && c<=`'9'`) {
1106	code=(code<<`4`)\|(c-`'0'`);
1107	} else if(`'A'`<=c && c<=`'F'`) {
1108	code=(code<<`4`)\|(c-`'A'`+`10`);
1109	} else {
1110	return `0xffff`;
1111	}
1112	}
1113
1114	/ does it fit into the range? /
1115	if(*otherName==`0` && range->start<=(uint32_t)code && (uint32_t)code<=range->end) {
1116	return code;
1117	}
1118	break;
1119	}
1120	case `1`: {
1121	char buffer[`64`];
1122	uint16_t indexes[`8`];
1123	const char elementBases[`8`], elements[`8`];
1124	const uint16_t factors=(const* uint16_t *)(range+`1`);
1125	uint16_t count=range->variant;
1126	const char s=(const* char )(factors+count), t;
1127	UChar32 start, limit;
1128	uint16_t i, idx;
1129
1130	char c;
1131
1132	/ name = prefix factorized-elements /
1133
1134	/ compare prefix /
1135	while((c=*s++)!=`0`) {
1136	if((char)c!=*otherName++) {
1137	return `0xffff`;
1138	}
1139	}
1140
1141	start=(UChar32)range->start;
1142	limit=(UChar32)(range->end+`1`);
1143
1144	/ initialize the suffix elements for enumeration; indexes should all be set to 0 /
1145	writeFactorSuffix(factors, count, s, `0`,
1146	indexes, elementBases, elements, buffer, sizeof(buffer));
1147
1148	/ compare the first suffix /
1149	if(`0`==uprv_strcmp(otherName, buffer)) {
1150	return start;
1151	}
1152
1153	/ enumerate and compare the rest of the suffixes /
1154	while(++start<limit) {
1155	/ increment the indexes in lexical order bound by the factors /
1156	i=count;
1157	for (;;) {
1158	idx=(uint16_t)(indexes[--i]+`1`);
1159	if(idx<factors[i]) {
1160	/ skip one index and its element string /
1161	indexes[i]=idx;
1162	s=elements[i];
1163	while(*s++!=`0`) {}
1164	elements[i]=s;
1165	break;
1166	} else {
1167	/ reset this index to 0 and its element string to the first one /
1168	indexes[i]=`0`;
1169	elements[i]=elementBases[i];
1170	}
1171	}
1172
1173	/ to make matters a little easier, just compare all elements of the suffix /
1174	t=otherName;
1175	for(i=`0`; i<count; ++i) {
1176	s=elements[i];
1177	while((c=*s++)!=`0`) {
1178	if(c!=*t++) {
1179	s=""; / does not match /
1180	i=`99`;
1181	}
1182	}
1183	}
1184	if(i<`99` && *t==`0`) {
1185	return start;
1186	}
1187	}
1188	break;
1189	}
1190	default:
1191	/ undefined type /
1192	break;
1193	}
1194
1195	return `0xffff`;
1196	}
1197
1198	/ sets of name characters, maximum name lengths ---------------------------- /
1199
1200	#define SET_ADD(set, c) ((set)[(uint8_t)c>>5]\|=((uint32_t)1<<((uint8_t)c&0x1f)))
1201	#define SET_CONTAINS(set, c) (((set)[(uint8_t)c>>5]&((uint32_t)1<<((uint8_t)c&0x1f)))!=0)
1202
1203	static int32_t
1204	calcStringSetLength(uint32_t set[`8`], const char *s) {
1205	int32_t length=`0`;
1206	char c;
1207
1208	while((c=*s++)!=`0`) {
1209	SET_ADD(set, c);
1210	++length;
1211	}
1212	return length;
1213	}
1214
1215	static int32_t
1216	calcAlgNameSetsLengths(int32_t maxNameLength) {
1217	AlgorithmicRange *range;
1218	uint32_t *p;
1219	uint32_t rangeCount;
1220	int32_t length;
1221
1222	/ enumerate algorithmic ranges /
1223	p=(uint32_t )((uint8_t )uCharNames+uCharNames->algNamesOffset);
1224	rangeCount=*p;
1225	range=(AlgorithmicRange *)(p+`1`);
1226	while(rangeCount>`0`) {
1227	switch(range->type) {
1228	case `0`:
1229	/ name = prefix + (range->variant times) hex-digits /
1230	/ prefix /
1231	length=calcStringSetLength(gNameSet, (const char *)(range+`1`))+range->variant;
1232	if(length>maxNameLength) {
1233	maxNameLength=length;
1234	}
1235	break;
1236	case `1`: {
1237	/ name = prefix factorized-elements /
1238	const uint16_t factors=(const* uint16_t *)(range+`1`);
1239	const char *s;
1240	int32_t i, count=range->variant, factor, factorLength, maxFactorLength;
1241
1242	/ prefix length /
1243	s=(const char *)(factors+count);
1244	length=calcStringSetLength(gNameSet, s);
1245	s+=length+`1`; / start of factor suffixes /
1246
1247	/ get the set and maximum factor suffix length for each factor /
1248	for(i=`0`; i<count; ++i) {
1249	maxFactorLength=`0`;
1250	for(factor=factors[i]; factor>`0`; --factor) {
1251	factorLength=calcStringSetLength(gNameSet, s);
1252	s+=factorLength+`1`;
1253	if(factorLength>maxFactorLength) {
1254	maxFactorLength=factorLength;
1255	}
1256	}
1257	length+=maxFactorLength;
1258	}
1259
1260	if(length>maxNameLength) {
1261	maxNameLength=length;
1262	}
1263	break;
1264	}
1265	default:
1266	/ unknown type /
1267	break;
1268	}
1269
1270	range=(AlgorithmicRange )((uint8_t )range+range->size);
1271	--rangeCount;
1272	}
1273	return maxNameLength;
1274	}
1275
1276	static int32_t
1277	calcExtNameSetsLengths(int32_t maxNameLength) {
1278	int32_t i, length;
1279
1280	for(i=`0`; i<UPRV_LENGTHOF(charCatNames); ++i) {
1281	/*
1282	* for each category, count the length of the category name
1283	* plus 9=
1284	* 2 for <>
1285	* 1 for -
1286	* 6 for most hex digits per code point
1287	*/
1288	length=`9`+calcStringSetLength(gNameSet, charCatNames[i]);
1289	if(length>maxNameLength) {
1290	maxNameLength=length;
1291	}
1292	}
1293	return maxNameLength;
1294	}
1295
1296	static int32_t
1297	calcNameSetLength(const uint16_t tokens, uint16_t tokenCount, const* uint8_t tokenStrings, int8_t tokenLengths,
1298	uint32_t set[`8`],
1299	const uint8_t *pLine, const* uint8_t *lineLimit) {
1300	const uint8_t line=pLine;
1301	int32_t length=`0`, tokenLength;
1302	uint16_t c, token;
1303
1304	while(line!=lineLimit && (c=*line++)!=(uint8_t)`';'`) {
1305	if(c>=tokenCount) {
1306	/ implicit letter /
1307	SET_ADD(set, c);
1308	++length;
1309	} else {
1310	token=tokens[c];
1311	if(token==(uint16_t)(-`2`)) {
1312	/ this is a lead byte for a double-byte token /
1313	c=c<<`8`\|*line++;
1314	token=tokens[c];
1315	}
1316	if(token==(uint16_t)(-`1`)) {
1317	/ explicit letter /
1318	SET_ADD(set, c);
1319	++length;
1320	} else {
1321	/ count token word /
1322	if(tokenLengths!=NULL) {
1323	/ use cached token length /
1324	tokenLength=tokenLengths[c];
1325	if(tokenLength==`0`) {
1326	tokenLength=calcStringSetLength(set, (const char *)tokenStrings+token);
1327	tokenLengths[c]=(int8_t)tokenLength;
1328	}
1329	} else {
1330	tokenLength=calcStringSetLength(set, (const char *)tokenStrings+token);
1331	}
1332	length+=tokenLength;
1333	}
1334	}
1335	}
1336
1337	*pLine=line;
1338	return length;
1339	}
1340
1341	static void
1342	calcGroupNameSetsLengths(int32_t maxNameLength) {
1343	uint16_t offsets[LINES_PER_GROUP+`2`], lengths[LINES_PER_GROUP+`2`];
1344
1345	uint16_t tokens=(uint16_t )uCharNames+`8`;
1346	uint16_t tokenCount=*tokens++;
1347	uint8_t tokenStrings=(uint8_t )uCharNames+uCharNames->tokenStringOffset;
1348
1349	int8_t *tokenLengths;
1350
1351	const uint16_t *group;
1352	const uint8_t s, line, *lineLimit;
1353
1354	int32_t groupCount, lineNumber, length;
1355
1356	tokenLengths=(int8_t *)uprv_malloc(tokenCount);
1357	if(tokenLengths!=NULL) {
1358	uprv_memset(tokenLengths, `0`, tokenCount);
1359	}
1360
1361	group=GET_GROUPS(uCharNames);
1362	groupCount=*group++;
1363
1364	/ enumerate all groups /
1365	while(groupCount>`0`) {
1366	s=(uint8_t *)uCharNames+uCharNames->groupStringOffset+GET_GROUP_OFFSET(group);
1367	s=expandGroupLengths(s, offsets, lengths);
1368
1369	/ enumerate all lines in each group /
1370	for(lineNumber=`0`; lineNumber<LINES_PER_GROUP; ++lineNumber) {
1371	line=s+offsets[lineNumber];
1372	length=lengths[lineNumber];
1373	if(length==`0`) {
1374	continue;
1375	}
1376
1377	lineLimit=line+length;
1378
1379	/ read regular name /
1380	length=calcNameSetLength(tokens, tokenCount, tokenStrings, tokenLengths, gNameSet, &line, lineLimit);
1381	if(length>maxNameLength) {
1382	maxNameLength=length;
1383	}
1384	if(line==lineLimit) {
1385	continue;
1386	}
1387
1388	/ read Unicode 1.0 name /
1389	length=calcNameSetLength(tokens, tokenCount, tokenStrings, tokenLengths, gNameSet, &line, lineLimit);
1390	if(length>maxNameLength) {
1391	maxNameLength=length;
1392	}
1393	if(line==lineLimit) {
1394	continue;
1395	}
1396
1397	/ read ISO comment /
1398	/length=calcNameSetLength(tokens, tokenCount, tokenStrings, tokenLengths, gISOCommentSet, &line, lineLimit);/
1399	}
1400
1401	group=NEXT_GROUP(group);
1402	--groupCount;
1403	}
1404
1405	if(tokenLengths!=NULL) {
1406	uprv_free(tokenLengths);
1407	}
1408
1409	/ set gMax... - name length last for threading /
1410	gMaxNameLength=maxNameLength;
1411	}
1412
1413	static UBool
1414	calcNameSetsLengths(UErrorCode *pErrorCode) {
1415	static const char extChars[]="0123456789ABCDEF<>-";
1416	int32_t i, maxNameLength;
1417
1418	if(gMaxNameLength!=`0`) {
1419	return TRUE;
1420	}
1421
1422	if(!isDataLoaded(pErrorCode)) {
1423	return FALSE;
1424	}
1425
1426	/ set hex digits, used in various names, and <>-, used in extended names /
1427	for(i=`0`; i<(int32_t)sizeof(extChars)-`1`; ++i) {
1428	SET_ADD(gNameSet, extChars[i]);
1429	}
1430
1431	/ set sets and lengths from algorithmic names /
1432	maxNameLength=calcAlgNameSetsLengths(`0`);
1433
1434	/ set sets and lengths from extended names /
1435	maxNameLength=calcExtNameSetsLengths(maxNameLength);
1436
1437	/ set sets and lengths from group names, set global maximum values /
1438	calcGroupNameSetsLengths(maxNameLength);
1439
1440	return TRUE;
1441	}
1442
1443	U_NAMESPACE_END
1444
1445	/ public API --------------------------------------------------------------- /
1446
1447	U_NAMESPACE_USE
1448
1449	U_CAPI int32_t U_EXPORT2
1450	u_charName(UChar32 code, UCharNameChoice nameChoice,
1451	char *buffer, int32_t bufferLength,
1452	UErrorCode *pErrorCode) {
1453	AlgorithmicRange *algRange;
1454	uint32_t *p;
1455	uint32_t i;
1456	int32_t length;
1457
1458	/ check the argument values /
1459	if(pErrorCode==NULL \|\| U_FAILURE(*pErrorCode)) {
1460	return `0`;
1461	} else if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT \|\|
1462	bufferLength<`0` \|\| (bufferLength>`0` && buffer==NULL)
1463	) {
1464	*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1465	return `0`;
1466	}
1467
1468	if((uint32_t)code>UCHAR_MAX_VALUE \|\| !isDataLoaded(pErrorCode)) {
1469	return u_terminateChars(buffer, bufferLength, `0`, pErrorCode);
1470	}
1471
1472	length=`0`;
1473
1474	/ try algorithmic names first /
1475	p=(uint32_t )((uint8_t )uCharNames+uCharNames->algNamesOffset);
1476	i=*p;
1477	algRange=(AlgorithmicRange *)(p+`1`);
1478	while(i>`0`) {
1479	if(algRange->start<=(uint32_t)code && (uint32_t)code<=algRange->end) {
1480	length=getAlgName(algRange, (uint32_t)code, nameChoice, buffer, (uint16_t)bufferLength);
1481	break;
1482	}
1483	algRange=(AlgorithmicRange )((uint8_t )algRange+algRange->size);
1484	--i;
1485	}
1486
1487	if(i==`0`) {
1488	if (nameChoice == U_EXTENDED_CHAR_NAME) {
1489	length = getName(uCharNames, (uint32_t )code, U_EXTENDED_CHAR_NAME, buffer, (uint16_t) bufferLength);
1490	if (!length) {
1491	/ extended character name /
1492	length = getExtName((uint32_t) code, buffer, (uint16_t) bufferLength);
1493	}
1494	} else {
1495	/ normal character name /
1496	length=getName(uCharNames, (uint32_t)code, nameChoice, buffer, (uint16_t)bufferLength);
1497	}
1498	}
1499
1500	return u_terminateChars(buffer, bufferLength, length, pErrorCode);
1501	}
1502
1503	U_CAPI int32_t U_EXPORT2
1504	u_getISOComment(UChar32 /c/,
1505	char *dest, int32_t destCapacity,
1506	UErrorCode *pErrorCode) {
1507	/ check the argument values /
1508	if(pErrorCode==NULL \|\| U_FAILURE(*pErrorCode)) {
1509	return `0`;
1510	} else if(destCapacity<`0` \|\| (destCapacity>`0` && dest==NULL)) {
1511	*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1512	return `0`;
1513	}
1514
1515	return u_terminateChars(dest, destCapacity, `0`, pErrorCode);
1516	}
1517
1518	U_CAPI UChar32 U_EXPORT2
1519	u_charFromName(UCharNameChoice nameChoice,
1520	const char *name,
1521	UErrorCode *pErrorCode) {
1522	char upper[`120`], lower[`120`];
1523	FindName findName;
1524	AlgorithmicRange *algRange;
1525	uint32_t *p;
1526	uint32_t i;
1527	UChar32 cp = `0`;
1528	char c0;
1529	static constexpr UChar32 error = `0xffff`; / Undefined, but use this for backwards compatibility. /
1530
1531	if(pErrorCode==NULL \|\| U_FAILURE(*pErrorCode)) {
1532	return error;
1533	}
1534
1535	if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT \|\| name==NULL \|\| *name==`0`) {
1536	*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1537	return error;
1538	}
1539
1540	if(!isDataLoaded(pErrorCode)) {
1541	return error;
1542	}
1543
1544	/ construct the uppercase and lowercase of the name first /
1545	for(i=`0`; i<sizeof(upper); ++i) {
1546	if((c0=*name++)!=`0`) {
1547	upper[i]=uprv_toupper(c0);
1548	lower[i]=uprv_tolower(c0);
1549	} else {
1550	upper[i]=lower[i]=`0`;
1551	break;
1552	}
1553	}
1554	if(i==sizeof(upper)) {
1555	/ name too long, there is no such character /
1556	*pErrorCode = U_ILLEGAL_CHAR_FOUND;
1557	return error;
1558	}
1559	// i==strlen(name)==strlen(lower)==strlen(upper)
1560
1561	/ try extended names first /
1562	if (lower[`0`] == `'<'`) {
1563	if (nameChoice == U_EXTENDED_CHAR_NAME && lower[--i] == `'>'`) {
1564	// Parse a string like "<category-HHHH>" where HHHH is a hex code point.
1565	uint32_t limit = i;
1566	while (i >= `3` && lower[--i] != `'-'`) {}
1567
1568	// There should be 1 to 8 hex digits.
1569	int32_t hexLength = limit - (i + `1`);
1570	if (i >= `2` && lower[i] == `'-'` && `1` <= hexLength && hexLength <= `8`) {
1571	uint32_t cIdx;
1572
1573	lower[i] = `0`;
1574
1575	for (++i; i < limit; ++i) {
1576	if (lower[i] >= `'0'` && lower[i] <= `'9'`) {
1577	cp = (cp << `4`) + lower[i] - `'0'`;
1578	} else if (lower[i] >= `'a'` && lower[i] <= `'f'`) {
1579	cp = (cp << `4`) + lower[i] - `'a'` + `10`;
1580	} else {
1581	*pErrorCode = U_ILLEGAL_CHAR_FOUND;
1582	return error;
1583	}
1584	// Prevent signed-integer overflow and out-of-range code points.
1585	if (cp > UCHAR_MAX_VALUE) {
1586	*pErrorCode = U_ILLEGAL_CHAR_FOUND;
1587	return error;
1588	}
1589	}
1590
1591	/ Now validate the category name.*
1592	We could use a binary search, or a trie, if
1593	we really wanted to. /*
1594	uint8_t cat = getCharCat(cp);
1595	for (lower[i] = `0`, cIdx = `0`; cIdx < UPRV_LENGTHOF(charCatNames); ++cIdx) {
1596
1597	if (!uprv_strcmp(lower + `1`, charCatNames[cIdx])) {
1598	if (cat == cIdx) {
1599	return cp;
1600	}
1601	break;
1602	}
1603	}
1604	}
1605	}
1606
1607	*pErrorCode = U_ILLEGAL_CHAR_FOUND;
1608	return error;
1609	}
1610
1611	/ try algorithmic names now /
1612	p=(uint32_t )((uint8_t )uCharNames+uCharNames->algNamesOffset);
1613	i=*p;
1614	algRange=(AlgorithmicRange *)(p+`1`);
1615	while(i>`0`) {
1616	if((cp=findAlgName(algRange, nameChoice, upper))!=`0xffff`) {
1617	return cp;
1618	}
1619	algRange=(AlgorithmicRange )((uint8_t )algRange+algRange->size);
1620	--i;
1621	}
1622
1623	/ normal character name /
1624	findName.otherName=upper;
1625	findName.code=error;
1626	enumNames(uCharNames, `0`, UCHAR_MAX_VALUE + `1`, DO_FIND_NAME, &findName, nameChoice);
1627	if (findName.code == error) {
1628	*pErrorCode = U_ILLEGAL_CHAR_FOUND;
1629	}
1630	return findName.code;
1631	}
1632
1633	U_CAPI void U_EXPORT2
1634	u_enumCharNames(UChar32 start, UChar32 limit,
1635	UEnumCharNamesFn *fn,
1636	void *context,
1637	UCharNameChoice nameChoice,
1638	UErrorCode *pErrorCode) {
1639	AlgorithmicRange *algRange;
1640	uint32_t *p;
1641	uint32_t i;
1642
1643	if(pErrorCode==NULL \|\| U_FAILURE(*pErrorCode)) {
1644	return;
1645	}
1646
1647	if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT \|\| fn==NULL) {
1648	*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1649	return;
1650	}
1651
1652	if((uint32_t) limit > UCHAR_MAX_VALUE + `1`) {
1653	limit = UCHAR_MAX_VALUE + `1`;
1654	}
1655	if((uint32_t)start>=(uint32_t)limit) {
1656	return;
1657	}
1658
1659	if(!isDataLoaded(pErrorCode)) {
1660	return;
1661	}
1662
1663	/ interleave the data-driven ones with the algorithmic ones /
1664	/ iterate over all algorithmic ranges; assume that they are in ascending order /
1665	p=(uint32_t )((uint8_t )uCharNames+uCharNames->algNamesOffset);
1666	i=*p;
1667	algRange=(AlgorithmicRange *)(p+`1`);
1668	while(i>`0`) {
1669	/ enumerate the character names before the current algorithmic range /
1670	/ here: start<limit /
1671	if((uint32_t)start<algRange->start) {
1672	if((uint32_t)limit<=algRange->start) {
1673	enumNames(uCharNames, start, limit, fn, context, nameChoice);
1674	return;
1675	}
1676	if(!enumNames(uCharNames, start, (UChar32)algRange->start, fn, context, nameChoice)) {
1677	return;
1678	}
1679	start=(UChar32)algRange->start;
1680	}
1681	/ enumerate the character names in the current algorithmic range /
1682	/ here: algRange->start<=start<limit /
1683	if((uint32_t)start<=algRange->end) {
1684	if((uint32_t)limit<=(algRange->end+`1`)) {
1685	enumAlgNames(algRange, start, limit, fn, context, nameChoice);
1686	return;
1687	}
1688	if(!enumAlgNames(algRange, start, (UChar32)algRange->end+`1`, fn, context, nameChoice)) {
1689	return;
1690	}
1691	start=(UChar32)algRange->end+`1`;
1692	}
1693	/ continue to the next algorithmic range (here: start<limit) /
1694	algRange=(AlgorithmicRange )((uint8_t )algRange+algRange->size);
1695	--i;
1696	}
1697	/ enumerate the character names after the last algorithmic range /
1698	enumNames(uCharNames, start, limit, fn, context, nameChoice);
1699	}
1700
1701	U_CAPI int32_t U_EXPORT2
1702	uprv_getMaxCharNameLength() {
1703	UErrorCode errorCode=U_ZERO_ERROR;
1704	if(calcNameSetsLengths(&errorCode)) {
1705	return gMaxNameLength;
1706	} else {
1707	return `0`;
1708	}
1709	}
1710
1711	/**
1712	* Converts the char set cset into a Unicode set uset.
1713	* @param cset Set of 256 bit flags corresponding to a set of chars.
1714	* @param uset USet to receive characters. Existing contents are deleted.
1715	*/
1716	static void
1717	charSetToUSet(uint32_t cset[`8`], const USetAdder *sa) {
1718	UChar us[`256`];
1719	char cs[`256`];
1720
1721	int32_t i, length;
1722	UErrorCode errorCode;
1723
1724	errorCode=U_ZERO_ERROR;
1725
1726	if(!calcNameSetsLengths(&errorCode)) {
1727	return;
1728	}
1729
1730	/ build a char string with all chars that are used in character names /
1731	length=`0`;
1732	for(i=`0`; i<`256`; ++i) {
1733	if(SET_CONTAINS(cset, i)) {
1734	cs[length++]=(char)i;
1735	}
1736	}
1737
1738	/ convert the char string to a UChar string /
1739	u_charsToUChars(cs, us, length);
1740
1741	/ add each UChar to the USet /
1742	for(i=`0`; i<length; ++i) {
1743	if(us[i]!=`0` \|\| cs[i]==`0`) { / non-invariant chars become (UChar)0 /
1744	sa->add(sa->set, us[i]);
1745	}
1746	}
1747	}
1748
1749	/**
1750	* Fills set with characters that are used in Unicode character names.
1751	* @param set USet to receive characters.
1752	*/
1753	U_CAPI void U_EXPORT2
1754	uprv_getCharNameCharacters(const USetAdder *sa) {
1755	charSetToUSet(gNameSet, sa);
1756	}
1757
1758	/ data swapping ------------------------------------------------------------ /
1759
1760	/*
1761	* The token table contains non-negative entries for token bytes,
1762	* and -1 for bytes that represent themselves in the data file's charset.
1763	* -2 entries are used for lead bytes.
1764	*
1765	* Direct bytes (-1 entries) must be translated from the input charset family
1766	* to the output charset family.
1767	* makeTokenMap() writes a permutation mapping for this.
1768	* Use it once for single-/lead-byte tokens and once more for all trail byte
1769	* tokens. (';' is an unused trail byte marked with -1.)
1770	*/
1771	static void
1772	makeTokenMap(const UDataSwapper *ds,
1773	int16_t tokens[], uint16_t tokenCount,
1774	uint8_t map[`256`],
1775	UErrorCode *pErrorCode) {
1776	UBool usedOutChar[`256`];
1777	uint16_t i, j;
1778	uint8_t c1, c2;
1779
1780	if(U_FAILURE(*pErrorCode)) {
1781	return;
1782	}
1783
1784	if(ds->inCharset==ds->outCharset) {
1785	/ Same charset family: identity permutation /
1786	for(i=`0`; i<`256`; ++i) {
1787	map[i]=(uint8_t)i;
1788	}
1789	} else {
1790	uprv_memset(map, `0`, `256`);
1791	uprv_memset(usedOutChar, `0`, `256`);
1792
1793	if(tokenCount>`256`) {
1794	tokenCount=`256`;
1795	}
1796
1797	/ set the direct bytes (byte 0 always maps to itself) /
1798	for(i=`1`; i<tokenCount; ++i) {
1799	if(tokens[i]==-`1`) {
1800	/ convert the direct byte character /
1801	c1=(uint8_t)i;
1802	ds->swapInvChars(ds, &c1, `1`, &c2, pErrorCode);
1803	if(U_FAILURE(*pErrorCode)) {
1804	udata_printError(ds, "unames/makeTokenMap() finds variant character 0x%02x used (input charset family %d)\n",
1805	i, ds->inCharset);
1806	return;
1807	}
1808
1809	/ enter the converted character into the map and mark it used /
1810	map[c1]=c2;
1811	usedOutChar[c2]=TRUE;
1812	}
1813	}
1814
1815	/ set the mappings for the rest of the permutation /
1816	for(i=j=`1`; i<tokenCount; ++i) {
1817	/ set mappings that were not set for direct bytes /
1818	if(map[i]==`0`) {
1819	/ set an output byte value that was not used as an output byte above /
1820	while(usedOutChar[j]) {
1821	++j;
1822	}
1823	map[i]=(uint8_t)j++;
1824	}
1825	}
1826
1827	/*
1828	* leave mappings at tokenCount and above unset if tokenCount<256
1829	* because they won't be used
1830	*/
1831	}
1832	}
1833
1834	U_CAPI int32_t U_EXPORT2
1835	uchar_swapNames(const UDataSwapper *ds,
1836	const void inData, int32_t length, void* *outData,
1837	UErrorCode *pErrorCode) {
1838	const UDataInfo *pInfo;
1839	int32_t headerSize;
1840
1841	const uint8_t *inBytes;
1842	uint8_t *outBytes;
1843
1844	uint32_t tokenStringOffset, groupsOffset, groupStringOffset, algNamesOffset,
1845	offset, i, count, stringsCount;
1846
1847	const AlgorithmicRange *inRange;
1848	AlgorithmicRange *outRange;
1849
1850	/ udata_swapDataHeader checks the arguments /
1851	headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
1852	if(pErrorCode==NULL \|\| U_FAILURE(*pErrorCode)) {
1853	return `0`;
1854	}
1855
1856	/ check data format and format version /
1857	pInfo=(const UDataInfo )((const* char *)inData+`4`);
1858	if(!(
1859	pInfo->dataFormat[`0`]==`0x75` && / dataFormat="unam" /
1860	pInfo->dataFormat[`1`]==`0x6e` &&
1861	pInfo->dataFormat[`2`]==`0x61` &&
1862	pInfo->dataFormat[`3`]==`0x6d` &&
1863	pInfo->formatVersion[`0`]==`1`
1864	)) {
1865	udata_printError(ds, "uchar_swapNames(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as unames.icu\n",
1866	pInfo->dataFormat[`0`], pInfo->dataFormat[`1`],
1867	pInfo->dataFormat[`2`], pInfo->dataFormat[`3`],
1868	pInfo->formatVersion[`0`]);
1869	*pErrorCode=U_UNSUPPORTED_ERROR;
1870	return `0`;
1871	}
1872
1873	inBytes=(const uint8_t *)inData+headerSize;
1874	outBytes=(uint8_t *)outData+headerSize;
1875	if(length<`0`) {
1876	algNamesOffset=ds->readUInt32(((const uint32_t *)inBytes)[`3`]);
1877	} else {
1878	length-=headerSize;
1879	if( length<`20` \|\|
1880	(uint32_t)length<(algNamesOffset=ds->readUInt32(((const uint32_t *)inBytes)[`3`]))
1881	) {
1882	udata_printError(ds, "uchar_swapNames(): too few bytes (%d after header) for unames.icu\n",
1883	length);
1884	*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
1885	return `0`;
1886	}
1887	}
1888
1889	if(length<`0`) {
1890	/ preflighting: iterate through algorithmic ranges /
1891	offset=algNamesOffset;
1892	count=ds->readUInt32(((const* uint32_t *)(inBytes+offset)));
1893	offset+=`4`;
1894
1895	for(i=`0`; i<count; ++i) {
1896	inRange=(const AlgorithmicRange *)(inBytes+offset);
1897	offset+=ds->readUInt16(inRange->size);
1898	}
1899	} else {
1900	/ swap data /
1901	const uint16_t *p;
1902	uint16_t q, temp;
1903
1904	int16_t tokens[`512`];
1905	uint16_t tokenCount;
1906
1907	uint8_t map[`256`], trailMap[`256`];
1908
1909	/ copy the data for inaccessible bytes /
1910	if(inBytes!=outBytes) {
1911	uprv_memcpy(outBytes, inBytes, length);
1912	}
1913
1914	/ the initial 4 offsets first /
1915	tokenStringOffset=ds->readUInt32(((const uint32_t *)inBytes)[`0`]);
1916	groupsOffset=ds->readUInt32(((const uint32_t *)inBytes)[`1`]);
1917	groupStringOffset=ds->readUInt32(((const uint32_t *)inBytes)[`2`]);
1918	ds->swapArray32(ds, inBytes, `16`, outBytes, pErrorCode);
1919
1920	/*
1921	* now the tokens table
1922	* it needs to be permutated along with the compressed name strings
1923	*/
1924	p=(const uint16_t *)(inBytes+`16`);
1925	q=(uint16_t *)(outBytes+`16`);
1926
1927	/ read and swap the tokenCount /
1928	tokenCount=ds->readUInt16(*p);
1929	ds->swapArray16(ds, p, `2`, q, pErrorCode);
1930	++p;
1931	++q;
1932
1933	/ read the first 512 tokens and make the token maps /
1934	if(tokenCount<=`512`) {
1935	count=tokenCount;
1936	} else {
1937	count=`512`;
1938	}
1939	for(i=`0`; i<count; ++i) {
1940	tokens[i]=udata_readInt16(ds, p[i]);
1941	}
1942	for(; i<`512`; ++i) {
1943	tokens[i]=`0`; / fill the rest of the tokens array if tokenCount<512 /
1944	}
1945	makeTokenMap(ds, tokens, tokenCount, map, pErrorCode);
1946	makeTokenMap(ds, tokens+`256`, (uint16_t)(tokenCount>`256` ? tokenCount-`256` : `0`), trailMap, pErrorCode);
1947	if(U_FAILURE(*pErrorCode)) {
1948	return `0`;
1949	}
1950
1951	/*
1952	* swap and permutate the tokens
1953	* go through a temporary array to support in-place swapping
1954	*/
1955	temp=(uint16_t )uprv_malloc(tokenCount`2`);
1956	if(temp==NULL) {
1957	udata_printError(ds, "out of memory swapping %u unames.icu tokens\n",
1958	tokenCount);
1959	*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
1960	return `0`;
1961	}
1962
1963	/ swap and permutate single-/lead-byte tokens /
1964	for(i=`0`; i<tokenCount && i<`256`; ++i) {
1965	ds->swapArray16(ds, p+i, `2`, temp+map[i], pErrorCode);
1966	}
1967
1968	/ swap and permutate trail-byte tokens /
1969	for(; i<tokenCount; ++i) {
1970	ds->swapArray16(ds, p+i, `2`, temp+(i&`0xffffff00`)+trailMap[i&`0xff`], pErrorCode);
1971	}
1972
1973	/ copy the result into the output and free the temporary array /
1974	uprv_memcpy(q, temp, tokenCount*`2`);
1975	uprv_free(temp);
1976
1977	/*
1978	* swap the token strings but not a possible padding byte after
1979	* the terminating NUL of the last string
1980	*/
1981	udata_swapInvStringBlock(ds, inBytes+tokenStringOffset, (int32_t)(groupsOffset-tokenStringOffset),
1982	outBytes+tokenStringOffset, pErrorCode);
1983	if(U_FAILURE(*pErrorCode)) {
1984	udata_printError(ds, "uchar_swapNames(token strings) failed\n");
1985	return `0`;
1986	}
1987
1988	/ swap the group table /
1989	count=ds->readUInt16(((const* uint16_t *)(inBytes+groupsOffset)));
1990	ds->swapArray16(ds, inBytes+groupsOffset, (int32_t)((`1`+count`3`)`2`),
1991	outBytes+groupsOffset, pErrorCode);
1992
1993	/*
1994	* swap the group strings
1995	* swap the string bytes but not the nibble-encoded string lengths
1996	*/
1997	if(ds->inCharset!=ds->outCharset) {
1998	uint16_t offsets[LINES_PER_GROUP+`1`], lengths[LINES_PER_GROUP+`1`];
1999
2000	const uint8_t inStrings, nextInStrings;
2001	uint8_t *outStrings;
2002
2003	uint8_t c;
2004
2005	inStrings=inBytes+groupStringOffset;
2006	outStrings=outBytes+groupStringOffset;
2007
2008	stringsCount=algNamesOffset-groupStringOffset;
2009
2010	/ iterate through string groups until only a few padding bytes are left /
2011	while(stringsCount>`32`) {
2012	nextInStrings=expandGroupLengths(inStrings, offsets, lengths);
2013
2014	/ move past the length bytes /
2015	stringsCount-=(uint32_t)(nextInStrings-inStrings);
2016	outStrings+=nextInStrings-inStrings;
2017	inStrings=nextInStrings;
2018
2019	count=offsets[`31`]+lengths[`31`]; / total number of string bytes in this group /
2020	stringsCount-=count;
2021
2022	/ swap the string bytes using map[] and trailMap[] /
2023	while(count>`0`) {
2024	c=*inStrings++;
2025	*outStrings++=map[c];
2026	if(tokens[c]!=-`2`) {
2027	--count;
2028	} else {
2029	/ token lead byte: swap the trail byte, too /
2030	outStrings++=trailMap[inStrings++];
2031	count-=`2`;
2032	}
2033	}
2034	}
2035	}
2036
2037	/ swap the algorithmic ranges /
2038	offset=algNamesOffset;
2039	count=ds->readUInt32(((const* uint32_t *)(inBytes+offset)));
2040	ds->swapArray32(ds, inBytes+offset, `4`, outBytes+offset, pErrorCode);
2041	offset+=`4`;
2042
2043	for(i=`0`; i<count; ++i) {
2044	if(offset>(uint32_t)length) {
2045	udata_printError(ds, "uchar_swapNames(): too few bytes (%d after header) for unames.icu algorithmic range %u\n",
2046	length, i);
2047	*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
2048	return `0`;
2049	}
2050
2051	inRange=(const AlgorithmicRange *)(inBytes+offset);
2052	outRange=(AlgorithmicRange *)(outBytes+offset);
2053	offset+=ds->readUInt16(inRange->size);
2054
2055	ds->swapArray32(ds, inRange, `8`, outRange, pErrorCode);
2056	ds->swapArray16(ds, &inRange->size, `2`, &outRange->size, pErrorCode);
2057	switch(inRange->type) {
2058	case `0`:
2059	/ swap prefix string /
2060	ds->swapInvChars(ds, inRange+`1`, (int32_t)uprv_strlen((const char *)(inRange+`1`)),
2061	outRange+`1`, pErrorCode);
2062	if(U_FAILURE(*pErrorCode)) {
2063	udata_printError(ds, "uchar_swapNames(prefix string of algorithmic range %u) failed\n",
2064	i);
2065	return `0`;
2066	}
2067	break;
2068	case `1`:
2069	{
2070	/ swap factors and the prefix and factor strings /
2071	uint32_t factorsCount;
2072
2073	factorsCount=inRange->variant;
2074	p=(const uint16_t *)(inRange+`1`);
2075	q=(uint16_t *)(outRange+`1`);
2076	ds->swapArray16(ds, p, (int32_t)(factorsCount*`2`), q, pErrorCode);
2077
2078	/ swap the strings, up to the last terminating NUL /
2079	p+=factorsCount;
2080	q+=factorsCount;
2081	stringsCount=(uint32_t)((inBytes+offset)-(const uint8_t *)p);
2082	while(stringsCount>`0` && ((const uint8_t *)p)[stringsCount-`1`]!=`0`) {
2083	--stringsCount;
2084	}
2085	ds->swapInvChars(ds, p, (int32_t)stringsCount, q, pErrorCode);
2086	}
2087	break;
2088	default:
2089	udata_printError(ds, "uchar_swapNames(): unknown type %u of algorithmic range %u\n",
2090	inRange->type, i);
2091	*pErrorCode=U_UNSUPPORTED_ERROR;
2092	return `0`;
2093	}
2094	}
2095	}
2096
2097	return headerSize+(int32_t)offset;
2098	}
2099
2100	/*
2101	* Hey, Emacs, please set the following:
2102	*
2103	* Local Variables:
2104	* indent-tabs-mode: nil
2105	* End:
2106	*
2107	*/
2108

Browse the source code of ClickHouse/contrib/icu/icu4c/source/common/unames.cpp