ucnvlat1.cpp source code [ClickHouse/contrib/icu/icu4c/source/common/ucnvlat1.cpp]

1	// © 2016 and later: Unicode, Inc. and others.
2	// License & terms of use: http://www.unicode.org/copyright.html
3	/*
4	**********************************************************************
5	* Copyright (C) 2000-2015, International Business Machines
6	* Corporation and others. All Rights Reserved.
7	**********************************************************************
8	* file name: ucnvlat1.cpp
9	* encoding: UTF-8
10	* tab size: 8 (not used)
11	* indentation:4
12	*
13	* created on: 2000feb07
14	* created by: Markus W. Scherer
15	*/
16
17	#include "unicode/utypes.h"
18
19	#if !UCONFIG_NO_CONVERSION
20
21	#include "unicode/ucnv.h"
22	#include "unicode/uset.h"
23	#include "unicode/utf8.h"
24	#include "ucnv_bld.h"
25	#include "ucnv_cnv.h"
26	#include "ustr_imp.h"
27
28	/ control optimizations according to the platform /
29	#define LATIN1_UNROLL_FROM_UNICODE 1
30
31	/ ISO 8859-1 --------------------------------------------------------------- /
32
33	/ This is a table-less and callback-less version of ucnv_MBCSSingleToBMPWithOffsets(). /
34	U_CDECL_BEGIN
35	static void U_CALLCONV
36	_Latin1ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
37	UErrorCode *pErrorCode) {
38	const uint8_t *source;
39	UChar *target;
40	int32_t targetCapacity, length;
41	int32_t *offsets;
42
43	int32_t sourceIndex;
44
45	/ set up the local pointers /
46	source=(const uint8_t *)pArgs->source;
47	target=pArgs->target;
48	targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
49	offsets=pArgs->offsets;
50
51	sourceIndex=`0`;
52
53	/*
54	* since the conversion here is 1:1 UChar:uint8_t, we need only one counter
55	* for the minimum of the sourceLength and targetCapacity
56	*/
57	length=(int32_t)((const uint8_t *)pArgs->sourceLimit-source);
58	if(length<=targetCapacity) {
59	targetCapacity=length;
60	} else {
61	/ target will be full /
62	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
63	length=targetCapacity;
64	}
65
66	if(targetCapacity>=`8`) {
67	/ This loop is unrolled for speed and improved pipelining. /
68	int32_t count, loops;
69
70	loops=count=targetCapacity>>`3`;
71	length=targetCapacity&=`0x7`;
72	do {
73	target[`0`]=source[`0`];
74	target[`1`]=source[`1`];
75	target[`2`]=source[`2`];
76	target[`3`]=source[`3`];
77	target[`4`]=source[`4`];
78	target[`5`]=source[`5`];
79	target[`6`]=source[`6`];
80	target[`7`]=source[`7`];
81	target+=`8`;
82	source+=`8`;
83	} while(--count>`0`);
84
85	if(offsets!=NULL) {
86	do {
87	offsets[`0`]=sourceIndex++;
88	offsets[`1`]=sourceIndex++;
89	offsets[`2`]=sourceIndex++;
90	offsets[`3`]=sourceIndex++;
91	offsets[`4`]=sourceIndex++;
92	offsets[`5`]=sourceIndex++;
93	offsets[`6`]=sourceIndex++;
94	offsets[`7`]=sourceIndex++;
95	offsets+=`8`;
96	} while(--loops>`0`);
97	}
98	}
99
100	/ conversion loop /
101	while(targetCapacity>`0`) {
102	target++=source++;
103	--targetCapacity;
104	}
105
106	/ write back the updated pointers /
107	pArgs->source=(const char *)source;
108	pArgs->target=target;
109
110	/ set offsets /
111	if(offsets!=NULL) {
112	while(length>`0`) {
113	*offsets++=sourceIndex++;
114	--length;
115	}
116	pArgs->offsets=offsets;
117	}
118	}
119
120	/ This is a table-less and callback-less version of ucnv_MBCSSingleGetNextUChar(). /
121	static UChar32 U_CALLCONV
122	_Latin1GetNextUChar(UConverterToUnicodeArgs *pArgs,
123	UErrorCode *pErrorCode) {
124	const uint8_t source=(const* uint8_t *)pArgs->source;
125	if(source<(const uint8_t *)pArgs->sourceLimit) {
126	pArgs->source=(const char *)(source+`1`);
127	return *source;
128	}
129
130	/ no output because of empty input /
131	*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
132	return `0xffff`;
133	}
134
135	/ This is a table-less version of ucnv_MBCSSingleFromBMPWithOffsets(). /
136	static void U_CALLCONV
137	_Latin1FromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
138	UErrorCode *pErrorCode) {
139	UConverter *cnv;
140	const UChar source, sourceLimit;
141	uint8_t target, oldTarget;
142	int32_t targetCapacity, length;
143	int32_t *offsets;
144
145	UChar32 cp;
146	UChar c, max;
147
148	int32_t sourceIndex;
149
150	/ set up the local pointers /
151	cnv=pArgs->converter;
152	source=pArgs->source;
153	sourceLimit=pArgs->sourceLimit;
154	target=oldTarget=(uint8_t *)pArgs->target;
155	targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
156	offsets=pArgs->offsets;
157
158	if(cnv->sharedData==&_Latin1Data) {
159	max=`0xff`; / Latin-1 /
160	} else {
161	max=`0x7f`; / US-ASCII /
162	}
163
164	/ get the converter state from UConverter /
165	cp=cnv->fromUChar32;
166
167	/ sourceIndex=-1 if the current character began in the previous buffer /
168	sourceIndex= cp==`0` ? `0` : -`1`;
169
170	/*
171	* since the conversion here is 1:1 UChar:uint8_t, we need only one counter
172	* for the minimum of the sourceLength and targetCapacity
173	*/
174	length=(int32_t)(sourceLimit-source);
175	if(length<targetCapacity) {
176	targetCapacity=length;
177	}
178
179	/ conversion loop /
180	if(cp!=`0` && targetCapacity>`0`) {
181	goto getTrail;
182	}
183
184	#if LATIN1_UNROLL_FROM_UNICODE
185	/ unroll the loop with the most common case /
186	if(targetCapacity>=`16`) {
187	int32_t count, loops;
188	UChar u, oredChars;
189
190	loops=count=targetCapacity>>`4`;
191	do {
192	oredChars=u=*source++;
193	*target++=(uint8_t)u;
194	oredChars\|=u=*source++;
195	*target++=(uint8_t)u;
196	oredChars\|=u=*source++;
197	*target++=(uint8_t)u;
198	oredChars\|=u=*source++;
199	*target++=(uint8_t)u;
200	oredChars\|=u=*source++;
201	*target++=(uint8_t)u;
202	oredChars\|=u=*source++;
203	*target++=(uint8_t)u;
204	oredChars\|=u=*source++;
205	*target++=(uint8_t)u;
206	oredChars\|=u=*source++;
207	*target++=(uint8_t)u;
208	oredChars\|=u=*source++;
209	*target++=(uint8_t)u;
210	oredChars\|=u=*source++;
211	*target++=(uint8_t)u;
212	oredChars\|=u=*source++;
213	*target++=(uint8_t)u;
214	oredChars\|=u=*source++;
215	*target++=(uint8_t)u;
216	oredChars\|=u=*source++;
217	*target++=(uint8_t)u;
218	oredChars\|=u=*source++;
219	*target++=(uint8_t)u;
220	oredChars\|=u=*source++;
221	*target++=(uint8_t)u;
222	oredChars\|=u=*source++;
223	*target++=(uint8_t)u;
224
225	/ were all 16 entries really valid? /
226	if(oredChars>max) {
227	/ no, return to the first of these 16 /
228	source-=`16`;
229	target-=`16`;
230	break;
231	}
232	} while(--count>`0`);
233	count=loops-count;
234	targetCapacity-=`16`*count;
235
236	if(offsets!=NULL) {
237	oldTarget+=`16`*count;
238	while(count>`0`) {
239	*offsets++=sourceIndex++;
240	*offsets++=sourceIndex++;
241	*offsets++=sourceIndex++;
242	*offsets++=sourceIndex++;
243	*offsets++=sourceIndex++;
244	*offsets++=sourceIndex++;
245	*offsets++=sourceIndex++;
246	*offsets++=sourceIndex++;
247	*offsets++=sourceIndex++;
248	*offsets++=sourceIndex++;
249	*offsets++=sourceIndex++;
250	*offsets++=sourceIndex++;
251	*offsets++=sourceIndex++;
252	*offsets++=sourceIndex++;
253	*offsets++=sourceIndex++;
254	*offsets++=sourceIndex++;
255	--count;
256	}
257	}
258	}
259	#endif
260
261	/ conversion loop /
262	c=`0`;
263	while(targetCapacity>`0` && (c=*source++)<=max) {
264	/ convert the Unicode code point /
265	*target++=(uint8_t)c;
266	--targetCapacity;
267	}
268
269	if(c>max) {
270	cp=c;
271	if(!U_IS_SURROGATE(cp)) {
272	/ callback(unassigned) /
273	} else if(U_IS_SURROGATE_LEAD(cp)) {
274	getTrail:
275	if(source<sourceLimit) {
276	/ test the following code unit /
277	UChar trail=*source;
278	if(U16_IS_TRAIL(trail)) {
279	++source;
280	cp=U16_GET_SUPPLEMENTARY(cp, trail);
281	/ this codepage does not map supplementary code points /
282	/ callback(unassigned) /
283	} else {
284	/ this is an unmatched lead code unit (1st surrogate) /
285	/ callback(illegal) /
286	}
287	} else {
288	/ no more input /
289	cnv->fromUChar32=cp;
290	goto noMoreInput;
291	}
292	} else {
293	/ this is an unmatched trail code unit (2nd surrogate) /
294	/ callback(illegal) /
295	}
296
297	*pErrorCode= U_IS_SURROGATE(cp) ? U_ILLEGAL_CHAR_FOUND : U_INVALID_CHAR_FOUND;
298	cnv->fromUChar32=cp;
299	}
300	noMoreInput:
301
302	/ set offsets since the start /
303	if(offsets!=NULL) {
304	size_t count=target-oldTarget;
305	while(count>`0`) {
306	*offsets++=sourceIndex++;
307	--count;
308	}
309	}
310
311	if(U_SUCCESS(pErrorCode) && source<sourceLimit && target>=(uint8_t )pArgs->targetLimit) {
312	/ target is full /
313	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
314	}
315
316	/ write back the updated pointers /
317	pArgs->source=source;
318	pArgs->target=(char *)target;
319	pArgs->offsets=offsets;
320	}
321
322	/ Convert UTF-8 to Latin-1. Adapted from ucnv_SBCSFromUTF8(). /
323	static void U_CALLCONV
324	ucnv_Latin1FromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
325	UConverterToUnicodeArgs *pToUArgs,
326	UErrorCode *pErrorCode) {
327	UConverter *utf8;
328	const uint8_t source, sourceLimit;
329	uint8_t *target;
330	int32_t targetCapacity;
331
332	UChar32 c;
333	uint8_t b, t1;
334
335	/ set up the local pointers /
336	utf8=pToUArgs->converter;
337	source=(uint8_t *)pToUArgs->source;
338	sourceLimit=(uint8_t *)pToUArgs->sourceLimit;
339	target=(uint8_t *)pFromUArgs->target;
340	targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target);
341
342	/ get the converter state from the UTF-8 UConverter /
343	if (utf8->toULength > `0`) {
344	c=(UChar32)utf8->toUnicodeStatus;
345	} else {
346	c = `0`;
347	}
348	if(c!=`0` && source<sourceLimit) {
349	if(targetCapacity==`0`) {
350	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
351	return;
352	} else if(c>=`0xc2` && c<=`0xc3` && (t1=(uint8_t)(*source-`0x80`)) <= `0x3f`) {
353	++source;
354	*target++=(uint8_t)(((c&`3`)<<`6`)\|t1);
355	--targetCapacity;
356
357	utf8->toUnicodeStatus=`0`;
358	utf8->toULength=`0`;
359	} else {
360	/ complicated, illegal or unmappable input: fall back to the pivoting implementation /
361	*pErrorCode=U_USING_DEFAULT_WARNING;
362	return;
363	}
364	}
365
366	/*
367	* Make sure that the last byte sequence before sourceLimit is complete
368	* or runs into a lead byte.
369	* In the conversion loop compare source with sourceLimit only once
370	* per multi-byte character.
371	* For Latin-1, adjust sourceLimit only for 1 trail byte because
372	* the conversion loop handles at most 2-byte sequences.
373	*/
374	if(source<sourceLimit && U8_IS_LEAD(*(sourceLimit-`1`))) {
375	--sourceLimit;
376	}
377
378	/ conversion loop /
379	while(source<sourceLimit) {
380	if(targetCapacity>`0`) {
381	b=*source++;
382	if(U8_IS_SINGLE(b)) {
383	/ convert ASCII /
384	*target++=(uint8_t)b;
385	--targetCapacity;
386	} else if( / handle U+0080..U+00FF inline /
387	b>=`0xc2` && b<=`0xc3` &&
388	(t1=(uint8_t)(*source-`0x80`)) <= `0x3f`
389	) {
390	++source;
391	*target++=(uint8_t)(((b&`3`)<<`6`)\|t1);
392	--targetCapacity;
393	} else {
394	/ complicated, illegal or unmappable input: fall back to the pivoting implementation /
395	pToUArgs->source=(char *)(source-`1`);
396	pFromUArgs->target=(char *)target;
397	*pErrorCode=U_USING_DEFAULT_WARNING;
398	return;
399	}
400	} else {
401	/ target is full /
402	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
403	break;
404	}
405	}
406
407	/*
408	* The sourceLimit may have been adjusted before the conversion loop
409	* to stop before a truncated sequence.
410	* If so, then collect the truncated sequence now.
411	* For Latin-1, there is at most exactly one lead byte because of the
412	* smaller sourceLimit adjustment logic.
413	*/
414	if(U_SUCCESS(pErrorCode) && source<(sourceLimit=(uint8_t )pToUArgs->sourceLimit)) {
415	utf8->toUnicodeStatus=utf8->toUBytes[`0`]=b=*source++;
416	utf8->toULength=`1`;
417	utf8->mode=U8_COUNT_BYTES(b);
418	}
419
420	/ write back the updated pointers /
421	pToUArgs->source=(char *)source;
422	pFromUArgs->target=(char *)target;
423	}
424
425	static void U_CALLCONV
426	_Latin1GetUnicodeSet(const UConverter *cnv,
427	const USetAdder *sa,
428	UConverterUnicodeSet which,
429	UErrorCode *pErrorCode) {
430	(void)cnv;
431	(void)which;
432	(void)pErrorCode;
433	sa->addRange(sa->set, `0`, `0xff`);
434	}
435	U_CDECL_END
436
437
438	static const UConverterImpl _Latin1Impl={
439	UCNV_LATIN_1,
440
441	NULL,
442	NULL,
443
444	NULL,
445	NULL,
446	NULL,
447
448	_Latin1ToUnicodeWithOffsets,
449	_Latin1ToUnicodeWithOffsets,
450	_Latin1FromUnicodeWithOffsets,
451	_Latin1FromUnicodeWithOffsets,
452	_Latin1GetNextUChar,
453
454	NULL,
455	NULL,
456	NULL,
457	NULL,
458	_Latin1GetUnicodeSet,
459
460	NULL,
461	ucnv_Latin1FromUTF8
462	};
463
464	static const UConverterStaticData _Latin1StaticData={
465	sizeof(UConverterStaticData),
466	"ISO-8859-1",
467	`819`, UCNV_IBM, UCNV_LATIN_1, `1`, `1`,
468	{ `0x1a`, `0`, `0`, `0` }, `1`, FALSE, FALSE,
469	`0`,
470	`0`,
471	{ `0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0` } / reserved /
472	};
473
474	const UConverterSharedData _Latin1Data=
475	UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_Latin1StaticData, &_Latin1Impl);
476
477	/ US-ASCII ----------------------------------------------------------------- /
478
479	U_CDECL_BEGIN
480	/ This is a table-less version of ucnv_MBCSSingleToBMPWithOffsets(). /
481	static void U_CALLCONV
482	_ASCIIToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
483	UErrorCode *pErrorCode) {
484	const uint8_t source, sourceLimit;
485	UChar target, oldTarget;
486	int32_t targetCapacity, length;
487	int32_t *offsets;
488
489	int32_t sourceIndex;
490
491	uint8_t c;
492
493	/ set up the local pointers /
494	source=(const uint8_t *)pArgs->source;
495	sourceLimit=(const uint8_t *)pArgs->sourceLimit;
496	target=oldTarget=pArgs->target;
497	targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
498	offsets=pArgs->offsets;
499
500	/ sourceIndex=-1 if the current character began in the previous buffer /
501	sourceIndex=`0`;
502
503	/*
504	* since the conversion here is 1:1 UChar:uint8_t, we need only one counter
505	* for the minimum of the sourceLength and targetCapacity
506	*/
507	length=(int32_t)(sourceLimit-source);
508	if(length<targetCapacity) {
509	targetCapacity=length;
510	}
511
512	if(targetCapacity>=`8`) {
513	/ This loop is unrolled for speed and improved pipelining. /
514	int32_t count, loops;
515	UChar oredChars;
516
517	loops=count=targetCapacity>>`3`;
518	do {
519	oredChars=target[`0`]=source[`0`];
520	oredChars\|=target[`1`]=source[`1`];
521	oredChars\|=target[`2`]=source[`2`];
522	oredChars\|=target[`3`]=source[`3`];
523	oredChars\|=target[`4`]=source[`4`];
524	oredChars\|=target[`5`]=source[`5`];
525	oredChars\|=target[`6`]=source[`6`];
526	oredChars\|=target[`7`]=source[`7`];
527
528	/ were all 16 entries really valid? /
529	if(oredChars>`0x7f`) {
530	/ no, return to the first of these 16 /
531	break;
532	}
533	source+=`8`;
534	target+=`8`;
535	} while(--count>`0`);
536	count=loops-count;
537	targetCapacity-=count*`8`;
538
539	if(offsets!=NULL) {
540	oldTarget+=count*`8`;
541	while(count>`0`) {
542	offsets[`0`]=sourceIndex++;
543	offsets[`1`]=sourceIndex++;
544	offsets[`2`]=sourceIndex++;
545	offsets[`3`]=sourceIndex++;
546	offsets[`4`]=sourceIndex++;
547	offsets[`5`]=sourceIndex++;
548	offsets[`6`]=sourceIndex++;
549	offsets[`7`]=sourceIndex++;
550	offsets+=`8`;
551	--count;
552	}
553	}
554	}
555
556	/ conversion loop /
557	c=`0`;
558	while(targetCapacity>`0` && (c=*source++)<=`0x7f`) {
559	*target++=c;
560	--targetCapacity;
561	}
562
563	if(c>`0x7f`) {
564	/ callback(illegal); copy the current bytes to toUBytes[] /
565	UConverter *cnv=pArgs->converter;
566	cnv->toUBytes[`0`]=c;
567	cnv->toULength=`1`;
568	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
569	} else if(source<sourceLimit && target>=pArgs->targetLimit) {
570	/ target is full /
571	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
572	}
573
574	/ set offsets since the start /
575	if(offsets!=NULL) {
576	size_t count=target-oldTarget;
577	while(count>`0`) {
578	*offsets++=sourceIndex++;
579	--count;
580	}
581	}
582
583	/ write back the updated pointers /
584	pArgs->source=(const char *)source;
585	pArgs->target=target;
586	pArgs->offsets=offsets;
587	}
588
589	/ This is a table-less version of ucnv_MBCSSingleGetNextUChar(). /
590	static UChar32 U_CALLCONV
591	_ASCIIGetNextUChar(UConverterToUnicodeArgs *pArgs,
592	UErrorCode *pErrorCode) {
593	const uint8_t *source;
594	uint8_t b;
595
596	source=(const uint8_t *)pArgs->source;
597	if(source<(const uint8_t *)pArgs->sourceLimit) {
598	b=*source++;
599	pArgs->source=(const char *)source;
600	if(b<=`0x7f`) {
601	return b;
602	} else {
603	UConverter *cnv=pArgs->converter;
604	cnv->toUBytes[`0`]=b;
605	cnv->toULength=`1`;
606	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
607	return `0xffff`;
608	}
609	}
610
611	/ no output because of empty input /
612	*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
613	return `0xffff`;
614	}
615
616	/ "Convert" UTF-8 to US-ASCII: Validate and copy. /
617	static void U_CALLCONV
618	ucnv_ASCIIFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
619	UConverterToUnicodeArgs *pToUArgs,
620	UErrorCode *pErrorCode) {
621	const uint8_t source, sourceLimit;
622	uint8_t *target;
623	int32_t targetCapacity, length;
624
625	uint8_t c;
626
627	if(pToUArgs->converter->toULength > `0`) {
628	/ no handling of partial UTF-8 characters here, fall back to pivoting /
629	*pErrorCode=U_USING_DEFAULT_WARNING;
630	return;
631	}
632
633	/ set up the local pointers /
634	source=(const uint8_t *)pToUArgs->source;
635	sourceLimit=(const uint8_t *)pToUArgs->sourceLimit;
636	target=(uint8_t *)pFromUArgs->target;
637	targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target);
638
639	/*
640	* since the conversion here is 1:1 uint8_t:uint8_t, we need only one counter
641	* for the minimum of the sourceLength and targetCapacity
642	*/
643	length=(int32_t)(sourceLimit-source);
644	if(length<targetCapacity) {
645	targetCapacity=length;
646	}
647
648	/ unroll the loop with the most common case /
649	if(targetCapacity>=`16`) {
650	int32_t count, loops;
651	uint8_t oredChars;
652
653	loops=count=targetCapacity>>`4`;
654	do {
655	oredChars=target++=source++;
656	oredChars\|=target++=source++;
657	oredChars\|=target++=source++;
658	oredChars\|=target++=source++;
659	oredChars\|=target++=source++;
660	oredChars\|=target++=source++;
661	oredChars\|=target++=source++;
662	oredChars\|=target++=source++;
663	oredChars\|=target++=source++;
664	oredChars\|=target++=source++;
665	oredChars\|=target++=source++;
666	oredChars\|=target++=source++;
667	oredChars\|=target++=source++;
668	oredChars\|=target++=source++;
669	oredChars\|=target++=source++;
670	oredChars\|=target++=source++;
671
672	/ were all 16 entries really valid? /
673	if(oredChars>`0x7f`) {
674	/ no, return to the first of these 16 /
675	source-=`16`;
676	target-=`16`;
677	break;
678	}
679	} while(--count>`0`);
680	count=loops-count;
681	targetCapacity-=`16`*count;
682	}
683
684	/ conversion loop /
685	c=`0`;
686	while(targetCapacity>`0` && (c=*source)<=`0x7f`) {
687	++source;
688	*target++=c;
689	--targetCapacity;
690	}
691
692	if(c>`0x7f`) {
693	/ non-ASCII character, handle in standard converter /
694	*pErrorCode=U_USING_DEFAULT_WARNING;
695	} else if(source<sourceLimit && target>=(const uint8_t *)pFromUArgs->targetLimit) {
696	/ target is full /
697	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
698	}
699
700	/ write back the updated pointers /
701	pToUArgs->source=(const char *)source;
702	pFromUArgs->target=(char *)target;
703	}
704
705	static void U_CALLCONV
706	_ASCIIGetUnicodeSet(const UConverter *cnv,
707	const USetAdder *sa,
708	UConverterUnicodeSet which,
709	UErrorCode *pErrorCode) {
710	(void)cnv;
711	(void)which;
712	(void)pErrorCode;
713	sa->addRange(sa->set, `0`, `0x7f`);
714	}
715	U_CDECL_END
716
717	static const UConverterImpl _ASCIIImpl={
718	UCNV_US_ASCII,
719
720	NULL,
721	NULL,
722
723	NULL,
724	NULL,
725	NULL,
726
727	_ASCIIToUnicodeWithOffsets,
728	_ASCIIToUnicodeWithOffsets,
729	_Latin1FromUnicodeWithOffsets,
730	_Latin1FromUnicodeWithOffsets,
731	_ASCIIGetNextUChar,
732
733	NULL,
734	NULL,
735	NULL,
736	NULL,
737	_ASCIIGetUnicodeSet,
738
739	NULL,
740	ucnv_ASCIIFromUTF8
741	};
742
743	static const UConverterStaticData _ASCIIStaticData={
744	sizeof(UConverterStaticData),
745	"US-ASCII",
746	`367`, UCNV_IBM, UCNV_US_ASCII, `1`, `1`,
747	{ `0x1a`, `0`, `0`, `0` }, `1`, FALSE, FALSE,
748	`0`,
749	`0`,
750	{ `0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0` } / reserved /
751	};
752
753	const UConverterSharedData _ASCIIData=
754	UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ASCIIStaticData, &_ASCIIImpl);
755
756	#endif
757

Browse the source code of ClickHouse/contrib/icu/icu4c/source/common/ucnvlat1.cpp