ucol_swp.cpp source code [ClickHouse/contrib/icu/icu4c/source/common/ucol_swp.cpp]

1	// © 2016 and later: Unicode, Inc. and others.
2	// License & terms of use: http://www.unicode.org/copyright.html
3	/*
4	*******************************************************************************
5	*
6	* Copyright (C) 2003-2015, International Business Machines
7	* Corporation and others. All Rights Reserved.
8	*
9	*******************************************************************************
10	* file name: ucol_swp.cpp
11	* encoding: UTF-8
12	* tab size: 8 (not used)
13	* indentation:4
14	*
15	* created on: 2003sep10
16	* created by: Markus W. Scherer
17	*
18	* Swap collation binaries.
19	*/
20
21	#include "unicode/udata.h" /* UDataInfo */
22	#include "utrie.h"
23	#include "utrie2.h"
24	#include "udataswp.h"
25	#include "cmemory.h"
26	#include "ucol_data.h"
27	#include "ucol_swp.h"
28
29	/ swapping ----------------------------------------------------------------- /
30
31	#if !UCONFIG_NO_COLLATION
32
33	U_CAPI UBool U_EXPORT2
34	ucol_looksLikeCollationBinary(const UDataSwapper *ds,
35	const void *inData, int32_t length) {
36	if(ds==NULL \|\| inData==NULL \|\| length<-`1`) {
37	return FALSE;
38	}
39
40	// First check for format version 4+ which has a standard data header.
41	UErrorCode errorCode=U_ZERO_ERROR;
42	(void)udata_swapDataHeader(ds, inData, -`1`, NULL, &errorCode);
43	if(U_SUCCESS(errorCode)) {
44	const UDataInfo &info=(const* UDataInfo )((const* char *)inData+`4`);
45	if(info.dataFormat[`0`]==`0x55` && // dataFormat="UCol"
46	info.dataFormat[`1`]==`0x43` &&
47	info.dataFormat[`2`]==`0x6f` &&
48	info.dataFormat[`3`]==`0x6c`) {
49	return TRUE;
50	}
51	}
52
53	// Else check for format version 3.
54	const UCATableHeader inHeader=(const* UCATableHeader *)inData;
55
56	/*
57	* The collation binary must contain at least the UCATableHeader,
58	* starting with its size field.
59	* sizeof(UCATableHeader)==42*4 in ICU 2.8
60	* check the length against the header size before reading the size field
61	*/
62	UCATableHeader header;
63	uprv_memset(&header, `0`, sizeof(header));
64	if(length<`0`) {
65	header.size=udata_readInt32(ds, inHeader->size);
66	} else if((length<(`42`*`4`) \|\| length<(header.size=udata_readInt32(ds, inHeader->size)))) {
67	return FALSE;
68	}
69
70	header.magic=ds->readUInt32(inHeader->magic);
71	if(!(
72	header.magic==UCOL_HEADER_MAGIC &&
73	inHeader->formatVersion[`0`]==`3` /&&*
74	inHeader->formatVersion[1]>=0/*
75	)) {
76	return FALSE;
77	}
78
79	if(inHeader->isBigEndian!=ds->inIsBigEndian \|\| inHeader->charSetFamily!=ds->inCharset) {
80	return FALSE;
81	}
82
83	return TRUE;
84	}
85
86	namespace {
87
88	/ swap a header-less collation formatVersion=3 binary, inside a resource bundle or ucadata.icu /
89	int32_t
90	swapFormatVersion3(const UDataSwapper *ds,
91	const void inData, int32_t length, void* *outData,
92	UErrorCode *pErrorCode) {
93	const uint8_t *inBytes;
94	uint8_t *outBytes;
95
96	const UCATableHeader *inHeader;
97	UCATableHeader *outHeader;
98	UCATableHeader header;
99
100	uint32_t count;
101
102	/ argument checking in case we were not called from ucol_swap() /
103	if(U_FAILURE(*pErrorCode)) {
104	return `0`;
105	}
106	if(ds==NULL \|\| inData==NULL \|\| length<-`1` \|\| (length>`0` && outData==NULL)) {
107	*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
108	return `0`;
109	}
110
111	inBytes=(const uint8_t *)inData;
112	outBytes=(uint8_t *)outData;
113
114	inHeader=(const UCATableHeader *)inData;
115	outHeader=(UCATableHeader *)outData;
116
117	/*
118	* The collation binary must contain at least the UCATableHeader,
119	* starting with its size field.
120	* sizeof(UCATableHeader)==42*4 in ICU 2.8
121	* check the length against the header size before reading the size field
122	*/
123	uprv_memset(&header, `0`, sizeof(header));
124	if(length<`0`) {
125	header.size=udata_readInt32(ds, inHeader->size);
126	} else if((length<(`42`*`4`) \|\| length<(header.size=udata_readInt32(ds, inHeader->size)))) {
127	udata_printError(ds, "ucol_swap(formatVersion=3): too few bytes (%d after header) for collation data\n",
128	length);
129	*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
130	return `0`;
131	}
132
133	header.magic=ds->readUInt32(inHeader->magic);
134	if(!(
135	header.magic==UCOL_HEADER_MAGIC &&
136	inHeader->formatVersion[`0`]==`3` /&&*
137	inHeader->formatVersion[1]>=0/*
138	)) {
139	udata_printError(ds, "ucol_swap(formatVersion=3): magic 0x%08x or format version %02x.%02x is not a collation binary\n",
140	header.magic,
141	inHeader->formatVersion[`0`], inHeader->formatVersion[`1`]);
142	*pErrorCode=U_UNSUPPORTED_ERROR;
143	return `0`;
144	}
145
146	if(inHeader->isBigEndian!=ds->inIsBigEndian \|\| inHeader->charSetFamily!=ds->inCharset) {
147	udata_printError(ds, "ucol_swap(formatVersion=3): endianness %d or charset %d does not match the swapper\n",
148	inHeader->isBigEndian, inHeader->charSetFamily);
149	*pErrorCode=U_INVALID_FORMAT_ERROR;
150	return `0`;
151	}
152
153	if(length>=`0`) {
154	/ copy everything, takes care of data that needs no swapping /
155	if(inBytes!=outBytes) {
156	uprv_memcpy(outBytes, inBytes, header.size);
157	}
158
159	/ swap the necessary pieces in the order of their occurrence in the data /
160
161	/ read more of the UCATableHeader (the size field was read above) /
162	header.options= ds->readUInt32(inHeader->options);
163	header.UCAConsts= ds->readUInt32(inHeader->UCAConsts);
164	header.contractionUCACombos= ds->readUInt32(inHeader->contractionUCACombos);
165	header.mappingPosition= ds->readUInt32(inHeader->mappingPosition);
166	header.expansion= ds->readUInt32(inHeader->expansion);
167	header.contractionIndex= ds->readUInt32(inHeader->contractionIndex);
168	header.contractionCEs= ds->readUInt32(inHeader->contractionCEs);
169	header.contractionSize= ds->readUInt32(inHeader->contractionSize);
170	header.endExpansionCE= ds->readUInt32(inHeader->endExpansionCE);
171	header.expansionCESize= ds->readUInt32(inHeader->expansionCESize);
172	header.endExpansionCECount= udata_readInt32(ds, inHeader->endExpansionCECount);
173	header.contractionUCACombosSize=udata_readInt32(ds, inHeader->contractionUCACombosSize);
174	header.scriptToLeadByte= ds->readUInt32(inHeader->scriptToLeadByte);
175	header.leadByteToScript= ds->readUInt32(inHeader->leadByteToScript);
176
177	/ swap the 32-bit integers in the header /
178	ds->swapArray32(ds, inHeader, (int32_t)((const char )&inHeader->jamoSpecial-(const* char *)inHeader),
179	outHeader, pErrorCode);
180	ds->swapArray32(ds, &(inHeader->scriptToLeadByte), sizeof(header.scriptToLeadByte) + sizeof(header.leadByteToScript),
181	&(outHeader->scriptToLeadByte), pErrorCode);
182	/ set the output platform properties /
183	outHeader->isBigEndian=ds->outIsBigEndian;
184	outHeader->charSetFamily=ds->outCharset;
185
186	/ swap the options /
187	if(header.options!=`0`) {
188	ds->swapArray32(ds, inBytes+header.options, header.expansion-header.options,
189	outBytes+header.options, pErrorCode);
190	}
191
192	/ swap the expansions /
193	if(header.mappingPosition!=`0` && header.expansion!=`0`) {
194	if(header.contractionIndex!=`0`) {
195	/ expansions bounded by contractions /
196	count=header.contractionIndex-header.expansion;
197	} else {
198	/ no contractions: expansions bounded by the main trie /
199	count=header.mappingPosition-header.expansion;
200	}
201	ds->swapArray32(ds, inBytes+header.expansion, (int32_t)count,
202	outBytes+header.expansion, pErrorCode);
203	}
204
205	/ swap the contractions /
206	if(header.contractionSize!=`0`) {
207	/ contractionIndex: UChar[] /
208	ds->swapArray16(ds, inBytes+header.contractionIndex, header.contractionSize*`2`,
209	outBytes+header.contractionIndex, pErrorCode);
210
211	/ contractionCEs: CEs[] /
212	ds->swapArray32(ds, inBytes+header.contractionCEs, header.contractionSize*`4`,
213	outBytes+header.contractionCEs, pErrorCode);
214	}
215
216	/ swap the main trie /
217	if(header.mappingPosition!=`0`) {
218	count=header.endExpansionCE-header.mappingPosition;
219	utrie_swap(ds, inBytes+header.mappingPosition, (int32_t)count,
220	outBytes+header.mappingPosition, pErrorCode);
221	}
222
223	/ swap the max expansion table /
224	if(header.endExpansionCECount!=`0`) {
225	ds->swapArray32(ds, inBytes+header.endExpansionCE, header.endExpansionCECount*`4`,
226	outBytes+header.endExpansionCE, pErrorCode);
227	}
228
229	/ expansionCESize, unsafeCP, contrEndCP: uint8_t[], no need to swap /
230
231	/ swap UCA constants /
232	if(header.UCAConsts!=`0`) {
233	/*
234	* if UCAConsts!=0 then contractionUCACombos because we are swapping
235	* the UCA data file, and we know that the UCA contains contractions
236	*/
237	ds->swapArray32(ds, inBytes+header.UCAConsts, header.contractionUCACombos-header.UCAConsts,
238	outBytes+header.UCAConsts, pErrorCode);
239	}
240
241	/ swap UCA contractions /
242	if(header.contractionUCACombosSize!=`0`) {
243	count=header.contractionUCACombosSizeinHeader->contractionUCACombosWidthU_SIZEOF_UCHAR;
244	ds->swapArray16(ds, inBytes+header.contractionUCACombos, (int32_t)count,
245	outBytes+header.contractionUCACombos, pErrorCode);
246	}
247
248	/ swap the script to lead bytes /
249	if(header.scriptToLeadByte!=`0`) {
250	int indexCount = ds->readUInt16(((uint16_t)(inBytes+header.scriptToLeadByte))); // each entry = 2 uint16*
251	int dataCount = ds->readUInt16(((uint16_t)(inBytes+header.scriptToLeadByte + `2`))); // each entry = uint16
252	ds->swapArray16(ds, inBytes+header.scriptToLeadByte,
253	`4` + (`4` * indexCount) + (`2` * dataCount),
254	outBytes+header.scriptToLeadByte, pErrorCode);
255	}
256
257	/ swap the lead byte to scripts /
258	if(header.leadByteToScript!=`0`) {
259	int indexCount = ds->readUInt16(((uint16_t)(inBytes+header.leadByteToScript))); // each entry = uint16
260	int dataCount = ds->readUInt16(((uint16_t)(inBytes+header.leadByteToScript + `2`))); // each entry = uint16
261	ds->swapArray16(ds, inBytes+header.leadByteToScript,
262	`4` + (`2` * indexCount) + (`2` * dataCount),
263	outBytes+header.leadByteToScript, pErrorCode);
264	}
265	}
266
267	return header.size;
268	}
269
270	// swap formatVersion 4 or 5 ----------------------------------------------- ***
271
272	// The following are copied from CollationDataReader, trading an awkward copy of constants
273	// for an awkward relocation of the i18n collationdatareader.h file into the common library.
274	// Keep them in sync!
275
276	enum {
277	IX_INDEXES_LENGTH, // 0
278	IX_OPTIONS,
279	IX_RESERVED2,
280	IX_RESERVED3,
281
282	IX_JAMO_CE32S_START, // 4
283	IX_REORDER_CODES_OFFSET,
284	IX_REORDER_TABLE_OFFSET,
285	IX_TRIE_OFFSET,
286
287	IX_RESERVED8_OFFSET, // 8
288	IX_CES_OFFSET,
289	IX_RESERVED10_OFFSET,
290	IX_CE32S_OFFSET,
291
292	IX_ROOT_ELEMENTS_OFFSET, // 12
293	IX_CONTEXTS_OFFSET,
294	IX_UNSAFE_BWD_OFFSET,
295	IX_FAST_LATIN_TABLE_OFFSET,
296
297	IX_SCRIPTS_OFFSET, // 16
298	IX_COMPRESSIBLE_BYTES_OFFSET,
299	IX_RESERVED18_OFFSET,
300	IX_TOTAL_SIZE
301	};
302
303	int32_t
304	swapFormatVersion4(const UDataSwapper *ds,
305	const void inData, int32_t length, void* *outData,
306	UErrorCode &errorCode) {
307	if(U_FAILURE(errorCode)) { return `0`; }
308
309	const uint8_t inBytes=(const* uint8_t *)inData;
310	uint8_t outBytes=(uint8_t )outData;
311
312	const int32_t inIndexes=(const* int32_t *)inBytes;
313	int32_t indexes[IX_TOTAL_SIZE+`1`];
314
315	// Need at least IX_INDEXES_LENGTH and IX_OPTIONS.
316	if(`0`<=length && length<`8`) {
317	udata_printError(ds, "ucol_swap(formatVersion=4): too few bytes "
318	"(%d after header) for collation data\n",
319	length);
320	errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
321	return `0`;
322	}
323
324	int32_t indexesLength=indexes[`0`]=udata_readInt32(ds, inIndexes[`0`]);
325	if(`0`<=length && length<(indexesLength*`4`)) {
326	udata_printError(ds, "ucol_swap(formatVersion=4): too few bytes "
327	"(%d after header) for collation data\n",
328	length);
329	errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
330	return `0`;
331	}
332
333	for(int32_t i=`1`; i<=IX_TOTAL_SIZE && i<indexesLength; ++i) {
334	indexes[i]=udata_readInt32(ds, inIndexes[i]);
335	}
336	for(int32_t i=indexesLength; i<=IX_TOTAL_SIZE; ++i) {
337	indexes[i]=-`1`;
338	}
339	inIndexes=NULL; // Make sure we do not accidentally use these instead of indexes[].
340
341	// Get the total length of the data.
342	int32_t size;
343	if(indexesLength>IX_TOTAL_SIZE) {
344	size=indexes[IX_TOTAL_SIZE];
345	} else if(indexesLength>IX_REORDER_CODES_OFFSET) {
346	size=indexes[indexesLength-`1`];
347	} else {
348	size=indexesLength*`4`;
349	}
350	if(length<`0`) { return size; }
351
352	if(length<size) {
353	udata_printError(ds, "ucol_swap(formatVersion=4): too few bytes "
354	"(%d after header) for collation data\n",
355	length);
356	errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
357	return `0`;
358	}
359
360	// Copy the data for inaccessible bytes and arrays of bytes.
361	if(inBytes!=outBytes) {
362	uprv_memcpy(outBytes, inBytes, size);
363	}
364
365	// Swap the int32_t indexes[].
366	ds->swapArray32(ds, inBytes, indexesLength * `4`, outBytes, &errorCode);
367
368	// The following is a modified version of CollationDataReader::read().
369	// Here we use indexes[] not inIndexes[] because
370	// the inIndexes[] may not be in this machine's endianness.
371	int32_t index; // one of the indexes[] slots
372	int32_t offset; // byte offset for the index part
373	// int32_t length; // number of bytes in the index part
374
375	index = IX_REORDER_CODES_OFFSET;
376	offset = indexes[index];
377	length = indexes[index + `1`] - offset;
378	if(length > `0`) {
379	ds->swapArray32(ds, inBytes + offset, length, outBytes + offset, &errorCode);
380	}
381
382	// Skip the IX_REORDER_TABLE_OFFSET byte array.
383
384	index = IX_TRIE_OFFSET;
385	offset = indexes[index];
386	length = indexes[index + `1`] - offset;
387	if(length > `0`) {
388	utrie2_swap(ds, inBytes + offset, length, outBytes + offset, &errorCode);
389	}
390
391	index = IX_RESERVED8_OFFSET;
392	offset = indexes[index];
393	length = indexes[index + `1`] - offset;
394	if(length > `0`) {
395	udata_printError(ds, "ucol_swap(formatVersion=4): unknown data at IX_RESERVED8_OFFSET\n", length);
396	errorCode = U_UNSUPPORTED_ERROR;
397	return `0`;
398	}
399
400	index = IX_CES_OFFSET;
401	offset = indexes[index];
402	length = indexes[index + `1`] - offset;
403	if(length > `0`) {
404	ds->swapArray64(ds, inBytes + offset, length, outBytes + offset, &errorCode);
405	}
406
407	index = IX_RESERVED10_OFFSET;
408	offset = indexes[index];
409	length = indexes[index + `1`] - offset;
410	if(length > `0`) {
411	udata_printError(ds, "ucol_swap(formatVersion=4): unknown data at IX_RESERVED10_OFFSET\n", length);
412	errorCode = U_UNSUPPORTED_ERROR;
413	return `0`;
414	}
415
416	index = IX_CE32S_OFFSET;
417	offset = indexes[index];
418	length = indexes[index + `1`] - offset;
419	if(length > `0`) {
420	ds->swapArray32(ds, inBytes + offset, length, outBytes + offset, &errorCode);
421	}
422
423	index = IX_ROOT_ELEMENTS_OFFSET;
424	offset = indexes[index];
425	length = indexes[index + `1`] - offset;
426	if(length > `0`) {
427	ds->swapArray32(ds, inBytes + offset, length, outBytes + offset, &errorCode);
428	}
429
430	index = IX_CONTEXTS_OFFSET;
431	offset = indexes[index];
432	length = indexes[index + `1`] - offset;
433	if(length > `0`) {
434	ds->swapArray16(ds, inBytes + offset, length, outBytes + offset, &errorCode);
435	}
436
437	index = IX_UNSAFE_BWD_OFFSET;
438	offset = indexes[index];
439	length = indexes[index + `1`] - offset;
440	if(length > `0`) {
441	ds->swapArray16(ds, inBytes + offset, length, outBytes + offset, &errorCode);
442	}
443
444	index = IX_FAST_LATIN_TABLE_OFFSET;
445	offset = indexes[index];
446	length = indexes[index + `1`] - offset;
447	if(length > `0`) {
448	ds->swapArray16(ds, inBytes + offset, length, outBytes + offset, &errorCode);
449	}
450
451	index = IX_SCRIPTS_OFFSET;
452	offset = indexes[index];
453	length = indexes[index + `1`] - offset;
454	if(length > `0`) {
455	ds->swapArray16(ds, inBytes + offset, length, outBytes + offset, &errorCode);
456	}
457
458	// Skip the IX_COMPRESSIBLE_BYTES_OFFSET byte array.
459
460	index = IX_RESERVED18_OFFSET;
461	offset = indexes[index];
462	length = indexes[index + `1`] - offset;
463	if(length > `0`) {
464	udata_printError(ds, "ucol_swap(formatVersion=4): unknown data at IX_RESERVED18_OFFSET\n", length);
465	errorCode = U_UNSUPPORTED_ERROR;
466	return `0`;
467	}
468
469	return size;
470	}
471
472	} // namespace
473
474	/ swap ICU collation data like ucadata.icu /
475	U_CAPI int32_t U_EXPORT2
476	ucol_swap(const UDataSwapper *ds,
477	const void inData, int32_t length, void* *outData,
478	UErrorCode *pErrorCode) {
479	if(U_FAILURE(pErrorCode)) { return* `0`; }
480
481	/ udata_swapDataHeader checks the arguments /
482	int32_t headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
483	if(U_FAILURE(*pErrorCode)) {
484	// Try to swap the old format version which did not have a standard data header.
485	*pErrorCode=U_ZERO_ERROR;
486	return swapFormatVersion3(ds, inData, length, outData, pErrorCode);
487	}
488
489	/ check data format and format version /
490	const UDataInfo &info=(const* UDataInfo )((const* char *)inData+`4`);
491	if(!(
492	info.dataFormat[`0`]==`0x55` && // dataFormat="UCol"
493	info.dataFormat[`1`]==`0x43` &&
494	info.dataFormat[`2`]==`0x6f` &&
495	info.dataFormat[`3`]==`0x6c` &&
496	(`3`<=info.formatVersion[`0`] && info.formatVersion[`0`]<=`5`)
497	)) {
498	udata_printError(ds, "ucol_swap(): data format %02x.%02x.%02x.%02x "
499	"(format version %02x.%02x) is not recognized as collation data\n",
500	info.dataFormat[`0`], info.dataFormat[`1`],
501	info.dataFormat[`2`], info.dataFormat[`3`],
502	info.formatVersion[`0`], info.formatVersion[`1`]);
503	*pErrorCode=U_UNSUPPORTED_ERROR;
504	return `0`;
505	}
506
507	inData=(const char *)inData+headerSize;
508	if(length>=`0`) { length-=headerSize; }
509	outData=(char *)outData+headerSize;
510	int32_t collationSize;
511	if(info.formatVersion[`0`]>=`4`) {
512	collationSize=swapFormatVersion4(ds, inData, length, outData, *pErrorCode);
513	} else {
514	collationSize=swapFormatVersion3(ds, inData, length, outData, pErrorCode);
515	}
516	if(U_SUCCESS(*pErrorCode)) {
517	return headerSize+collationSize;
518	} else {
519	return `0`;
520	}
521	}
522
523	/ swap inverse UCA collation data (invuca.icu) /
524	U_CAPI int32_t U_EXPORT2
525	ucol_swapInverseUCA(const UDataSwapper *ds,
526	const void inData, int32_t length, void* *outData,
527	UErrorCode *pErrorCode) {
528	const UDataInfo *pInfo;
529	int32_t headerSize;
530
531	const uint8_t *inBytes;
532	uint8_t *outBytes;
533
534	const InverseUCATableHeader *inHeader;
535	InverseUCATableHeader *outHeader;
536	InverseUCATableHeader header={ `0`,`0`,`0`,`0`,`0`,{`0`,`0`,`0`,`0`},{`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`} };
537
538	/ udata_swapDataHeader checks the arguments /
539	headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
540	if(pErrorCode==NULL \|\| U_FAILURE(*pErrorCode)) {
541	return `0`;
542	}
543
544	/ check data format and format version /
545	pInfo=(const UDataInfo )((const* char *)inData+`4`);
546	if(!(
547	pInfo->dataFormat[`0`]==`0x49` && / dataFormat="InvC" /
548	pInfo->dataFormat[`1`]==`0x6e` &&
549	pInfo->dataFormat[`2`]==`0x76` &&
550	pInfo->dataFormat[`3`]==`0x43` &&
551	pInfo->formatVersion[`0`]==`2` &&
552	pInfo->formatVersion[`1`]>=`1`
553	)) {
554	udata_printError(ds, "ucol_swapInverseUCA(): data format %02x.%02x.%02x.%02x (format version %02x.%02x) is not an inverse UCA collation file\n",
555	pInfo->dataFormat[`0`], pInfo->dataFormat[`1`],
556	pInfo->dataFormat[`2`], pInfo->dataFormat[`3`],
557	pInfo->formatVersion[`0`], pInfo->formatVersion[`1`]);
558	*pErrorCode=U_UNSUPPORTED_ERROR;
559	return `0`;
560	}
561
562	inBytes=(const uint8_t *)inData+headerSize;
563	outBytes=(uint8_t *)outData+headerSize;
564
565	inHeader=(const InverseUCATableHeader *)inBytes;
566	outHeader=(InverseUCATableHeader *)outBytes;
567
568	/*
569	* The inverse UCA collation binary must contain at least the InverseUCATableHeader,
570	* starting with its size field.
571	* sizeof(UCATableHeader)==8*4 in ICU 2.8
572	* check the length against the header size before reading the size field
573	*/
574	if(length<`0`) {
575	header.byteSize=udata_readInt32(ds, inHeader->byteSize);
576	} else if(
577	((length-headerSize)<(`8`*`4`) \|\|
578	(uint32_t)(length-headerSize)<(header.byteSize=udata_readInt32(ds, inHeader->byteSize)))
579	) {
580	udata_printError(ds, "ucol_swapInverseUCA(): too few bytes (%d after header) for inverse UCA collation data\n",
581	length);
582	*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
583	return `0`;
584	}
585
586	if(length>=`0`) {
587	/ copy everything, takes care of data that needs no swapping /
588	if(inBytes!=outBytes) {
589	uprv_memcpy(outBytes, inBytes, header.byteSize);
590	}
591
592	/ swap the necessary pieces in the order of their occurrence in the data /
593
594	/ read more of the InverseUCATableHeader (the byteSize field was read above) /
595	header.tableSize= ds->readUInt32(inHeader->tableSize);
596	header.contsSize= ds->readUInt32(inHeader->contsSize);
597	header.table= ds->readUInt32(inHeader->table);
598	header.conts= ds->readUInt32(inHeader->conts);
599
600	/ swap the 32-bit integers in the header /
601	ds->swapArray32(ds, inHeader, `5`*`4`, outHeader, pErrorCode);
602
603	/ swap the inverse table; tableSize counts uint32_t[3] rows /
604	ds->swapArray32(ds, inBytes+header.table, header.tableSize`3``4`,
605	outBytes+header.table, pErrorCode);
606
607	/ swap the continuation table; contsSize counts UChars /
608	ds->swapArray16(ds, inBytes+header.conts, header.contsSize*U_SIZEOF_UCHAR,
609	outBytes+header.conts, pErrorCode);
610	}
611
612	return headerSize+header.byteSize;
613	}
614
615	#endif /* #if !UCONFIG_NO_COLLATION */
616

Browse the source code of ClickHouse/contrib/icu/icu4c/source/common/ucol_swp.cpp