ucnv2022.cpp source code [engine/third_party/icu/source/common/ucnv2022.cpp]

1	// © 2016 and later: Unicode, Inc. and others.
2	// License & terms of use: http://www.unicode.org/copyright.html
3	/*
4	**********************************************************************
5	* Copyright (C) 2000-2016, International Business Machines
6	* Corporation and others. All Rights Reserved.
7	**********************************************************************
8	* file name: ucnv2022.cpp
9	* encoding: UTF-8
10	* tab size: 8 (not used)
11	* indentation:4
12	*
13	* created on: 2000feb03
14	* created by: Markus W. Scherer
15	*
16	* Change history:
17	*
18	* 06/29/2000 helena Major rewrite of the callback APIs.
19	* 08/08/2000 Ram Included support for ISO-2022-JP-2
20	* Changed implementation of toUnicode
21	* function
22	* 08/21/2000 Ram Added support for ISO-2022-KR
23	* 08/29/2000 Ram Seperated implementation of EBCDIC to
24	* ucnvebdc.c
25	* 09/20/2000 Ram Added support for ISO-2022-CN
26	* Added implementations for getNextUChar()
27	* for specific 2022 country variants.
28	* 10/31/2000 Ram Implemented offsets logic functions
29	*/
30
31	#include "unicode/utypes.h"
32
33	#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
34
35	#include "unicode/ucnv.h"
36	#include "unicode/uset.h"
37	#include "unicode/ucnv_err.h"
38	#include "unicode/ucnv_cb.h"
39	#include "unicode/utf16.h"
40	#include "ucnv_imp.h"
41	#include "ucnv_bld.h"
42	#include "ucnv_cnv.h"
43	#include "ucnvmbcs.h"
44	#include "cstring.h"
45	#include "cmemory.h"
46	#include "uassert.h"
47
48	#ifdef U_ENABLE_GENERIC_ISO_2022
49	/*
50	* I am disabling the generic ISO-2022 converter after proposing to do so on
51	* the icu mailing list two days ago.
52	*
53	* Reasons:
54	* 1. It does not fully support the ISO-2022/ECMA-35 specification with all of
55	* its designation sequences, single shifts with return to the previous state,
56	* switch-with-no-return to UTF-16BE or similar, etc.
57	* This is unlike the language-specific variants like ISO-2022-JP which
58	* require a much smaller repertoire of ISO-2022 features.
59	* These variants continue to be supported.
60	* 2. I believe that no one is really using the generic ISO-2022 converter
61	* but rather always one of the language-specific variants.
62	* Note that ICU's generic ISO-2022 converter has always output one escape
63	* sequence followed by UTF-8 for the whole stream.
64	* 3. Switching between subcharsets is extremely slow, because each time
65	* the previous converter is closed and a new one opened,
66	* without any kind of caching, least-recently-used list, etc.
67	* 4. The code is currently buggy, and given the above it does not seem
68	* reasonable to spend the time on maintenance.
69	* 5. ISO-2022 subcharsets should normally be used with 7-bit byte encodings.
70	* This means, for example, that when ISO-8859-7 is designated, the following
71	* ISO-2022 bytes 00..7f should be interpreted as ISO-8859-7 bytes 80..ff.
72	* The ICU ISO-2022 converter does not handle this - and has no information
73	* about which subconverter would have to be shifted vs. which is designed
74	* for 7-bit ISO-2022.
75	*
76	* Markus Scherer 2003-dec-03
77	*/
78	#endif
79
80	#if !UCONFIG_ONLY_HTML_CONVERSION
81	static const char SHIFT_IN_STR[] = "\x0F";
82	// static const char SHIFT_OUT_STR[] = "\x0E";
83	#endif
84
85	#define CR 0x0D
86	#define LF 0x0A
87	#define H_TAB 0x09
88	#define V_TAB 0x0B
89	#define SPACE 0x20
90
91	enum {
92	HWKANA_START=`0xff61`,
93	HWKANA_END=`0xff9f`
94	};
95
96	/*
97	* 94-character sets with native byte values A1..FE are encoded in ISO 2022
98	* as bytes 21..7E. (Subtract 0x80.)
99	* 96-character sets with native byte values A0..FF are encoded in ISO 2022
100	* as bytes 20..7F. (Subtract 0x80.)
101	* Do not encode C1 control codes with native bytes 80..9F
102	* as bytes 00..1F (C0 control codes).
103	*/
104	enum {
105	GR94_START=`0xa1`,
106	GR94_END=`0xfe`,
107	GR96_START=`0xa0`,
108	GR96_END=`0xff`
109	};
110
111	/*
112	* ISO 2022 control codes must not be converted from Unicode
113	* because they would mess up the byte stream.
114	* The bit mask 0x0800c000 has bits set at bit positions 0xe, 0xf, 0x1b
115	* corresponding to SO, SI, and ESC.
116	*/
117	#define IS_2022_CONTROL(c) (((c)<0x20) && (((uint32_t)1<<(c))&0x0800c000)!=0)
118
119	/ for ISO-2022-JP and -CN implementations /
120	typedef enum {
121	/ shared values /
122	INVALID_STATE=-`1`,
123	ASCII = `0`,
124
125	SS2_STATE=`0x10`,
126	SS3_STATE,
127
128	/ JP /
129	ISO8859_1 = `1` ,
130	ISO8859_7 = `2` ,
131	JISX201 = `3`,
132	JISX208 = `4`,
133	JISX212 = `5`,
134	GB2312 =`6`,
135	KSC5601 =`7`,
136	HWKANA_7BIT=`8`, / Halfwidth Katakana 7 bit /
137
138	/ CN /
139	/ the first few enum constants must keep their values because they correspond to myConverterArray[] /
140	GB2312_1=`1`,
141	ISO_IR_165=`2`,
142	CNS_11643=`3`,
143
144	/*
145	* these are used in StateEnum and ISO2022State variables,
146	* but CNS_11643 must be used to index into myConverterArray[]
147	*/
148	CNS_11643_0=`0x20`,
149	CNS_11643_1,
150	CNS_11643_2,
151	CNS_11643_3,
152	CNS_11643_4,
153	CNS_11643_5,
154	CNS_11643_6,
155	CNS_11643_7
156	} StateEnum;
157
158	/ is the StateEnum charset value for a DBCS charset? /
159	#if UCONFIG_ONLY_HTML_CONVERSION
160	#define IS_JP_DBCS(cs) (JISX208==(cs))
161	#else
162	#define IS_JP_DBCS(cs) (JISX208<=(cs) && (cs)<=KSC5601)
163	#endif
164
165	#define CSM(cs) ((uint16_t)1<<(cs))
166
167	/*
168	* Each of these charset masks (with index x) contains a bit for a charset in exact correspondence
169	* to whether that charset is used in the corresponding version x of ISO_2022,locale=ja,version=x
170	*
171	* Note: The converter uses some leniency:
172	* - The escape sequence ESC ( I for half-width 7-bit Katakana is recognized in
173	* all versions, not just JIS7 and JIS8.
174	* - ICU does not distinguish between different versions of JIS X 0208.
175	*/
176	#if UCONFIG_ONLY_HTML_CONVERSION
177	enum { MAX_JA_VERSION=`0` };
178	#else
179	enum { MAX_JA_VERSION=`4` };
180	#endif
181	static const uint16_t jpCharsetMasks[MAX_JA_VERSION+`1`]={
182	CSM(ASCII)\|CSM(JISX201)\|CSM(JISX208)\|CSM(HWKANA_7BIT),
183	#if !UCONFIG_ONLY_HTML_CONVERSION
184	CSM(ASCII)\|CSM(JISX201)\|CSM(JISX208)\|CSM(HWKANA_7BIT)\|CSM(JISX212),
185	CSM(ASCII)\|CSM(JISX201)\|CSM(JISX208)\|CSM(HWKANA_7BIT)\|CSM(JISX212)\|CSM(GB2312)\|CSM(KSC5601)\|CSM(ISO8859_1)\|CSM(ISO8859_7),
186	CSM(ASCII)\|CSM(JISX201)\|CSM(JISX208)\|CSM(HWKANA_7BIT)\|CSM(JISX212)\|CSM(GB2312)\|CSM(KSC5601)\|CSM(ISO8859_1)\|CSM(ISO8859_7),
187	CSM(ASCII)\|CSM(JISX201)\|CSM(JISX208)\|CSM(HWKANA_7BIT)\|CSM(JISX212)\|CSM(GB2312)\|CSM(KSC5601)\|CSM(ISO8859_1)\|CSM(ISO8859_7)
188	#endif
189	};
190
191	typedef enum {
192	ASCII1=`0`,
193	LATIN1,
194	SBCS,
195	DBCS,
196	MBCS,
197	HWKANA
198	}Cnv2022Type;
199
200	typedef struct ISO2022State {
201	int8_t cs[`4`]; / charset number for SI (G0)/SO (G1)/SS2 (G2)/SS3 (G3) /
202	int8_t g; / 0..3 for G0..G3 (SI/SO/SS2/SS3) /
203	int8_t prevG; / g before single shift (SS2 or SS3) /
204	} ISO2022State;
205
206	#define UCNV_OPTIONS_VERSION_MASK 0xf
207	#define UCNV_2022_MAX_CONVERTERS 10
208
209	typedef struct{
210	UConverterSharedData *myConverterArray[UCNV_2022_MAX_CONVERTERS];
211	UConverter *currentConverter;
212	Cnv2022Type currentType;
213	ISO2022State toU2022State, fromU2022State;
214	uint32_t key;
215	uint32_t version;
216	#ifdef U_ENABLE_GENERIC_ISO_2022
217	UBool isFirstBuffer;
218	#endif
219	UBool isEmptySegment;
220	char name[`30`];
221	char locale[`3`];
222	}UConverterDataISO2022;
223
224	/ Protos /
225	/ ISO-2022 ----------------------------------------------------------------- /
226
227	/Forward declaration /
228	U_CFUNC void U_CALLCONV
229	ucnv_fromUnicode_UTF8(UConverterFromUnicodeArgs * args,
230	UErrorCode * err);
231	U_CFUNC void U_CALLCONV
232	ucnv_fromUnicode_UTF8_OFFSETS_LOGIC(UConverterFromUnicodeArgs * args,
233	UErrorCode * err);
234
235	#define ESC_2022 0x1B /ESC/
236
237	typedef enum
238	{
239	INVALID_2022 = -`1`, /Doesn't correspond to a valid iso 2022 escape sequence/
240	VALID_NON_TERMINAL_2022 = `0`, /so far corresponds to a valid iso 2022 escape sequence/
241	VALID_TERMINAL_2022 = `1`, /corresponds to a valid iso 2022 escape sequence/
242	VALID_MAYBE_TERMINAL_2022 = `2` /so far matches one iso 2022 escape sequence, but by adding more characters might match another escape sequence/
243	} UCNV_TableStates_2022;
244
245	/*
246	* The way these state transition arrays work is:
247	* ex : ESC$B is the sequence for JISX208
248	* a) First Iteration: char is ESC
249	* i) Get the value of ESC from normalize_esq_chars_2022[] with int value of ESC as index
250	* int x = normalize_esq_chars_2022[27] which is equal to 1
251	* ii) Search for this value in escSeqStateTable_Key_2022[]
252	* value of x is stored at escSeqStateTable_Key_2022[0]
253	* iii) Save this index as offset
254	* iv) Get state of this sequence from escSeqStateTable_Value_2022[]
255	* escSeqStateTable_Value_2022[offset], which is VALID_NON_TERMINAL_2022
256	* b) Switch on this state and continue to next char
257	* i) Get the value of $ from normalize_esq_chars_2022[] with int value of $ as index
258	* which is normalize_esq_chars_2022[36] == 4
259	* ii) x is currently 1(from above)
260	* x<<=5 -- x is now 32
261	* x+=normalize_esq_chars_2022[36]
262	* now x is 36
263	* iii) Search for this value in escSeqStateTable_Key_2022[]
264	* value of x is stored at escSeqStateTable_Key_2022[2], so offset is 2
265	* iv) Get state of this sequence from escSeqStateTable_Value_2022[]
266	* escSeqStateTable_Value_2022[offset], which is VALID_NON_TERMINAL_2022
267	* c) Switch on this state and continue to next char
268	* i) Get the value of B from normalize_esq_chars_2022[] with int value of B as index
269	* ii) x is currently 36 (from above)
270	* x<<=5 -- x is now 1152
271	* x+=normalize_esq_chars_2022[66]
272	* now x is 1161
273	* iii) Search for this value in escSeqStateTable_Key_2022[]
274	* value of x is stored at escSeqStateTable_Key_2022[21], so offset is 21
275	* iv) Get state of this sequence from escSeqStateTable_Value_2022[21]
276	* escSeqStateTable_Value_2022[offset], which is VALID_TERMINAL_2022
277	* v) Get the converter name form escSeqStateTable_Result_2022[21] which is JISX208
278	*/
279
280
281	/Below are the 3 arrays depicting a state transition table/
282	static const int8_t normalize_esq_chars_2022[`256`] = {
283	/ 0 1 2 3 4 5 6 7 8 9 /
284
285	`0` ,`0` ,`0` ,`0` ,`0` ,`0` ,`0` ,`0` ,`0` ,`0`
286	,`0` ,`0` ,`0` ,`0` ,`0` ,`0` ,`0` ,`0` ,`0` ,`0`
287	,`0` ,`0` ,`0` ,`0` ,`0` ,`0` ,`0` ,`1` ,`0` ,`0`
288	,`0` ,`0` ,`0` ,`0` ,`0` ,`0` ,`4` ,`7` ,`29` ,`0`
289	,`2` ,`24` ,`26` ,`27` ,`0` ,`3` ,`23` ,`6` ,`0` ,`0`
290	,`0` ,`0` ,`0` ,`0` ,`0` ,`0` ,`0` ,`0` ,`0` ,`0`
291	,`0` ,`0` ,`0` ,`0` ,`5` ,`8` ,`9` ,`10` ,`11` ,`12`
292	,`13` ,`14` ,`15` ,`16` ,`17` ,`18` ,`19` ,`20` ,`25` ,`28`
293	,`0` ,`0` ,`21` ,`0` ,`0` ,`0` ,`0` ,`0` ,`0` ,`0`
294	,`22` ,`0` ,`0` ,`0` ,`0` ,`0` ,`0` ,`0` ,`0` ,`0`
295	,`0` ,`0` ,`0` ,`0` ,`0` ,`0` ,`0` ,`0` ,`0` ,`0`
296	,`0` ,`0` ,`0` ,`0` ,`0` ,`0` ,`0` ,`0` ,`0` ,`0`
297	,`0` ,`0` ,`0` ,`0` ,`0` ,`0` ,`0` ,`0` ,`0` ,`0`
298	,`0` ,`0` ,`0` ,`0` ,`0` ,`0` ,`0` ,`0` ,`0` ,`0`
299	,`0` ,`0` ,`0` ,`0` ,`0` ,`0` ,`0` ,`0` ,`0` ,`0`
300	,`0` ,`0` ,`0` ,`0` ,`0` ,`0` ,`0` ,`0` ,`0` ,`0`
301	,`0` ,`0` ,`0` ,`0` ,`0` ,`0` ,`0` ,`0` ,`0` ,`0`
302	,`0` ,`0` ,`0` ,`0` ,`0` ,`0` ,`0` ,`0` ,`0` ,`0`
303	,`0` ,`0` ,`0` ,`0` ,`0` ,`0` ,`0` ,`0` ,`0` ,`0`
304	,`0` ,`0` ,`0` ,`0` ,`0` ,`0` ,`0` ,`0` ,`0` ,`0`
305	,`0` ,`0` ,`0` ,`0` ,`0` ,`0` ,`0` ,`0` ,`0` ,`0`
306	,`0` ,`0` ,`0` ,`0` ,`0` ,`0` ,`0` ,`0` ,`0` ,`0`
307	,`0` ,`0` ,`0` ,`0` ,`0` ,`0` ,`0` ,`0` ,`0` ,`0`
308	,`0` ,`0` ,`0` ,`0` ,`0` ,`0` ,`0` ,`0` ,`0` ,`0`
309	,`0` ,`0` ,`0` ,`0` ,`0` ,`0` ,`0` ,`0` ,`0` ,`0`
310	,`0` ,`0` ,`0` ,`0` ,`0` ,`0`
311	};
312
313	#ifdef U_ENABLE_GENERIC_ISO_2022
314	/*
315	* When the generic ISO-2022 converter is completely removed, not just disabled
316	* per #ifdef, then the following state table and the associated tables that are
317	* dimensioned with MAX_STATES_2022 should be trimmed.
318	*
319	* Especially, VALID_MAYBE_TERMINAL_2022 will not be used any more, and all of
320	* the associated escape sequences starting with ESC ( B should be removed.
321	* This includes the ones with key values 1097 and all of the ones above 1000000.
322	*
323	* For the latter, the tables can simply be truncated.
324	* For the former, since the tables must be kept parallel, it is probably best
325	* to simply duplicate an adjacent table cell, parallel in all tables.
326	*
327	* It may make sense to restructure the tables, especially by using small search
328	* tables for the variants instead of indexing them parallel to the table here.
329	*/
330	#endif
331
332	#define MAX_STATES_2022 74
333	static const int32_t escSeqStateTable_Key_2022[MAX_STATES_2022] = {
334	/ 0 1 2 3 4 5 6 7 8 9 /
335
336	`1` ,`34` ,`36` ,`39` ,`55` ,`57` ,`60` ,`61` ,`1093` ,`1096`
337	,`1097` ,`1098` ,`1099` ,`1100` ,`1101` ,`1102` ,`1103` ,`1104` ,`1105` ,`1106`
338	,`1109` ,`1154` ,`1157` ,`1160` ,`1161` ,`1176` ,`1178` ,`1179` ,`1254` ,`1257`
339	,`1768` ,`1773` ,`1957` ,`35105` ,`36933` ,`36936` ,`36937` ,`36938` ,`36939` ,`36940`
340	,`36942` ,`36943` ,`36944` ,`36945` ,`36946` ,`36947` ,`36948` ,`37640` ,`37642` ,`37644`
341	,`37646` ,`37711` ,`37744` ,`37745` ,`37746` ,`37747` ,`37748` ,`40133` ,`40136` ,`40138`
342	,`40139` ,`40140` ,`40141` ,`1123363` ,`35947624` ,`35947625` ,`35947626` ,`35947627` ,`35947629` ,`35947630`
343	,`35947631` ,`35947635` ,`35947636` ,`35947638`
344	};
345
346	#ifdef U_ENABLE_GENERIC_ISO_2022
347
348	static const char* const escSeqStateTable_Result_2022[MAX_STATES_2022] = {
349	/* 0 1 2 3 4 5 6 7 8 9 */
350
351	NULL ,NULL ,NULL ,NULL ,NULL ,NULL ,NULL ,NULL ,"latin1" ,"latin1"
352	,"latin1" ,"ibm-865" ,"ibm-865" ,"ibm-865" ,"ibm-865" ,"ibm-865" ,"ibm-865" ,"JISX0201" ,"JISX0201" ,"latin1"
353	,"latin1" ,NULL ,"JISX-208" ,"ibm-5478" ,"JISX-208" ,NULL ,NULL ,NULL ,NULL ,"UTF8"
354	,"ISO-8859-1" ,"ISO-8859-7" ,"JIS-X-208" ,NULL ,"ibm-955" ,"ibm-367" ,"ibm-952" ,"ibm-949" ,"JISX-212" ,"ibm-1383"
355	,"ibm-952" ,"ibm-964" ,"ibm-964" ,"ibm-964" ,"ibm-964" ,"ibm-964" ,"ibm-964" ,"ibm-5478" ,"ibm-949" ,"ISO-IR-165"
356	,"CNS-11643-1992,1" ,"CNS-11643-1992,2" ,"CNS-11643-1992,3" ,"CNS-11643-1992,4" ,"CNS-11643-1992,5" ,"CNS-11643-1992,6" ,"CNS-11643-1992,7" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian"
357	,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,NULL ,"latin1" ,"ibm-912" ,"ibm-913" ,"ibm-914" ,"ibm-813" ,"ibm-1089"
358	,"ibm-920" ,"ibm-915" ,"ibm-915" ,"latin1"
359	};
360
361	#endif
362
363	static const int8_t escSeqStateTable_Value_2022[MAX_STATES_2022] = {
364	/* 0 1 2 3 4 5 6 7 8 9 */
365	VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
366	,VALID_MAYBE_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
367	,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022
368	,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
369	,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
370	,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
371	,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
372	,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
373	};
374
375	/ Type def for refactoring changeState_2022 code/
376	typedef enum{
377	#ifdef U_ENABLE_GENERIC_ISO_2022
378	ISO_2022=`0`,
379	#endif
380	ISO_2022_JP=`1`,
381	#if !UCONFIG_ONLY_HTML_CONVERSION
382	ISO_2022_KR=`2`,
383	ISO_2022_CN=`3`
384	#endif
385	} Variant2022;
386
387	/******** ISO 2022 Converter Protos ********/
388	static void U_CALLCONV
389	_ISO2022Open(UConverter cnv, UConverterLoadArgs pArgs, UErrorCode *errorCode);
390
391	static void U_CALLCONV
392	_ISO2022Close(UConverter *converter);
393
394	static void U_CALLCONV
395	_ISO2022Reset(UConverter *converter, UConverterResetChoice choice);
396
397	U_CDECL_BEGIN
398	static const char * U_CALLCONV
399	_ISO2022getName(const UConverter* cnv);
400	U_CDECL_END
401
402	static void U_CALLCONV
403	_ISO_2022_WriteSub(UConverterFromUnicodeArgs args, int32_t offsetIndex, UErrorCode err);
404
405	U_CDECL_BEGIN
406	static UConverter * U_CALLCONV
407	_ISO_2022_SafeClone(const UConverter cnv, void* stackBuffer, int32_t pBufferSize, UErrorCode *status);
408
409	U_CDECL_END
410
411	#ifdef U_ENABLE_GENERIC_ISO_2022
412	static void U_CALLCONV
413	T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC(UConverterToUnicodeArgs* args, UErrorCode* err);
414	#endif
415
416	namespace {
417
418	/const UConverterSharedData _ISO2022Data;/
419	extern const UConverterSharedData _ISO2022JPData;
420
421	#if !UCONFIG_ONLY_HTML_CONVERSION
422	extern const UConverterSharedData _ISO2022KRData;
423	extern const UConverterSharedData _ISO2022CNData;
424	#endif
425
426	} // namespace
427
428	/************ Converter implementations ***************/
429
430	/ The purpose of this function is to get around gcc compiler warnings. /
431	static inline void
432	fromUWriteUInt8(UConverter *cnv,
433	const char *bytes, int32_t length,
434	uint8_t *target, const* char *targetLimit,
435	int32_t **offsets,
436	int32_t sourceIndex,
437	UErrorCode *pErrorCode)
438	{
439	char targetChars = (char* )target;
440	ucnv_fromUWriteBytes(cnv, bytes, length, &targetChars, targetLimit,
441	offsets, sourceIndex, pErrorCode);
442	target = (uint8_t)targetChars;
443
444	}
445
446	static inline void
447	setInitialStateToUnicodeKR(UConverter* /converter/, UConverterDataISO2022 *myConverterData){
448	if(myConverterData->version == `1`) {
449	UConverter *cnv = myConverterData->currentConverter;
450
451	cnv->toUnicodeStatus=`0`; / offset /
452	cnv->mode=`0`; / state /
453	cnv->toULength=`0`; / byteIndex /
454	}
455	}
456
457	static inline void
458	setInitialStateFromUnicodeKR(UConverter* converter,UConverterDataISO2022 *myConverterData){
459	/ in ISO-2022-KR the designator sequence appears only once*
460	* in a file so we append it only once
461	*/
462	if( converter->charErrorBufferLength==`0`){
463
464	converter->charErrorBufferLength = `4`;
465	converter->charErrorBuffer[`0`] = `0x1b`;
466	converter->charErrorBuffer[`1`] = `0x24`;
467	converter->charErrorBuffer[`2`] = `0x29`;
468	converter->charErrorBuffer[`3`] = `0x43`;
469	}
470	if(myConverterData->version == `1`) {
471	UConverter *cnv = myConverterData->currentConverter;
472
473	cnv->fromUChar32=`0`;
474	cnv->fromUnicodeStatus=`1`; / prevLength /
475	}
476	}
477
478	static void U_CALLCONV
479	_ISO2022Open(UConverter cnv, UConverterLoadArgs pArgs, UErrorCode *errorCode){
480
481	char myLocale[`7`]={`' '`,`' '`,`' '`,`' '`,`' '`,`' '`, `'\0'`};
482
483	cnv->extraInfo = uprv_malloc (sizeof (UConverterDataISO2022));
484	if(cnv->extraInfo != NULL) {
485	UConverterNamePieces stackPieces;
486	UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER;
487	UConverterDataISO2022 myConverterData=(UConverterDataISO2022 ) cnv->extraInfo;
488	uint32_t version;
489
490	stackArgs.onlyTestIsLoadable = pArgs->onlyTestIsLoadable;
491
492	uprv_memset(myConverterData, `0`, sizeof(UConverterDataISO2022));
493	myConverterData->currentType = ASCII1;
494	cnv->fromUnicodeStatus =FALSE;
495	if(pArgs->locale){
496	uprv_strncpy(myLocale, pArgs->locale, sizeof(myLocale)-`1`);
497	}
498	version = pArgs->options & UCNV_OPTIONS_VERSION_MASK;
499	myConverterData->version = version;
500	if(myLocale[`0`]==`'j'` && (myLocale[`1`]==`'a'`\|\| myLocale[`1`]==`'p'`) &&
501	(myLocale[`2`]==`'_'` \|\| myLocale[`2`]==`'\0'`))
502	{
503	/ open the required converters and cache them /
504	if(version>MAX_JA_VERSION) {
505	// ICU 55 fails to open a converter for an unsupported version.
506	// Previously, it fell back to version 0, but that would yield
507	// unexpected behavior.
508	*errorCode = U_MISSING_RESOURCE_ERROR;
509	return;
510	}
511	if(jpCharsetMasks[version]&CSM(ISO8859_7)) {
512	myConverterData->myConverterArray[ISO8859_7] =
513	ucnv_loadSharedData("ISO8859_7", &stackPieces, &stackArgs, errorCode);
514	}
515	myConverterData->myConverterArray[JISX208] =
516	ucnv_loadSharedData("EUC-JP", &stackPieces, &stackArgs, errorCode);
517	if(jpCharsetMasks[version]&CSM(JISX212)) {
518	myConverterData->myConverterArray[JISX212] =
519	ucnv_loadSharedData("jisx-212", &stackPieces, &stackArgs, errorCode);
520	}
521	if(jpCharsetMasks[version]&CSM(GB2312)) {
522	myConverterData->myConverterArray[GB2312] =
523	ucnv_loadSharedData("ibm-5478", &stackPieces, &stackArgs, errorCode); / gb_2312_80-1 /
524	}
525	if(jpCharsetMasks[version]&CSM(KSC5601)) {
526	myConverterData->myConverterArray[KSC5601] =
527	ucnv_loadSharedData("ksc_5601", &stackPieces, &stackArgs, errorCode);
528	}
529
530	/ set the function pointers to appropriate funtions /
531	cnv->sharedData=(UConverterSharedData*)(&_ISO2022JPData);
532	uprv_strcpy(myConverterData->locale,"ja");
533
534	(void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ja,version=");
535	size_t len = uprv_strlen(myConverterData->name);
536	myConverterData->name[len]=(char)(myConverterData->version+(int)`'0'`);
537	myConverterData->name[len+`1`]=`'\0'`;
538	}
539	#if !UCONFIG_ONLY_HTML_CONVERSION
540	else if(myLocale[`0`]==`'k'` && (myLocale[`1`]==`'o'`\|\| myLocale[`1`]==`'r'`) &&
541	(myLocale[`2`]==`'_'` \|\| myLocale[`2`]==`'\0'`))
542	{
543	if(version>`1`) {
544	// ICU 55 fails to open a converter for an unsupported version.
545	// Previously, it fell back to version 0, but that would yield
546	// unexpected behavior.
547	*errorCode = U_MISSING_RESOURCE_ERROR;
548	return;
549	}
550	const char *cnvName;
551	if(version==`1`) {
552	cnvName="icu-internal-25546";
553	} else {
554	cnvName="ibm-949";
555	myConverterData->version=version=`0`;
556	}
557	if(pArgs->onlyTestIsLoadable) {
558	ucnv_canCreateConverter(cnvName, errorCode); / errorCode carries result /
559	uprv_free(cnv->extraInfo);
560	cnv->extraInfo=NULL;
561	return;
562	} else {
563	myConverterData->currentConverter=ucnv_open(cnvName, errorCode);
564	if (U_FAILURE(*errorCode)) {
565	_ISO2022Close(cnv);
566	return;
567	}
568
569	if(version==`1`) {
570	(void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ko,version=1");
571	uprv_memcpy(cnv->subChars, myConverterData->currentConverter->subChars, `4`);
572	cnv->subCharLen = myConverterData->currentConverter->subCharLen;
573	}else{
574	(void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ko,version=0");
575	}
576
577	/ initialize the state variables /
578	setInitialStateToUnicodeKR(cnv, myConverterData);
579	setInitialStateFromUnicodeKR(cnv, myConverterData);
580
581	/ set the function pointers to appropriate funtions /
582	cnv->sharedData=(UConverterSharedData*)&_ISO2022KRData;
583	uprv_strcpy(myConverterData->locale,"ko");
584	}
585	}
586	else if(((myLocale[`0`]==`'z'` && myLocale[`1`]==`'h'`) \|\| (myLocale[`0`]==`'c'`&& myLocale[`1`]==`'n'`))&&
587	(myLocale[`2`]==`'_'` \|\| myLocale[`2`]==`'\0'`))
588	{
589	if(version>`2`) {
590	// ICU 55 fails to open a converter for an unsupported version.
591	// Previously, it fell back to version 0, but that would yield
592	// unexpected behavior.
593	*errorCode = U_MISSING_RESOURCE_ERROR;
594	return;
595	}
596
597	/ open the required converters and cache them /
598	myConverterData->myConverterArray[GB2312_1] =
599	ucnv_loadSharedData("ibm-5478", &stackPieces, &stackArgs, errorCode);
600	if(version==`1`) {
601	myConverterData->myConverterArray[ISO_IR_165] =
602	ucnv_loadSharedData("iso-ir-165", &stackPieces, &stackArgs, errorCode);
603	}
604	myConverterData->myConverterArray[CNS_11643] =
605	ucnv_loadSharedData("cns-11643-1992", &stackPieces, &stackArgs, errorCode);
606
607
608	/ set the function pointers to appropriate funtions /
609	cnv->sharedData=(UConverterSharedData*)&_ISO2022CNData;
610	uprv_strcpy(myConverterData->locale,"cn");
611
612	if (version==`0`){
613	myConverterData->version = `0`;
614	(void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,version=0");
615	}else if (version==`1`){
616	myConverterData->version = `1`;
617	(void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,version=1");
618	}else {
619	myConverterData->version = `2`;
620	(void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,version=2");
621	}
622	}
623	#endif // !UCONFIG_ONLY_HTML_CONVERSION
624	else{
625	#ifdef U_ENABLE_GENERIC_ISO_2022
626	myConverterData->isFirstBuffer = TRUE;
627
628	/ append the UTF-8 escape sequence /
629	cnv->charErrorBufferLength = `3`;
630	cnv->charErrorBuffer[`0`] = `0x1b`;
631	cnv->charErrorBuffer[`1`] = `0x25`;
632	cnv->charErrorBuffer[`2`] = `0x42`;
633
634	cnv->sharedData=(UConverterSharedData*)&_ISO2022Data;
635	/ initialize the state variables /
636	uprv_strcpy(myConverterData->name,"ISO_2022");
637	#else
638	*errorCode = U_MISSING_RESOURCE_ERROR;
639	// Was U_UNSUPPORTED_ERROR but changed in ICU 55 to a more standard
640	// data loading error code.
641	return;
642	#endif
643	}
644
645	cnv->maxBytesPerUChar=cnv->sharedData->staticData->maxBytesPerChar;
646
647	if(U_FAILURE(*errorCode) \|\| pArgs->onlyTestIsLoadable) {
648	_ISO2022Close(cnv);
649	}
650	} else {
651	*errorCode = U_MEMORY_ALLOCATION_ERROR;
652	}
653	}
654
655
656	static void U_CALLCONV
657	_ISO2022Close(UConverter *converter) {
658	UConverterDataISO2022* myData =(UConverterDataISO2022 *) (converter->extraInfo);
659	UConverterSharedData **array = myData->myConverterArray;
660	int32_t i;
661
662	if (converter->extraInfo != NULL) {
663	/close the array of converter pointers and free the memory/
664	for (i=`0`; i<UCNV_2022_MAX_CONVERTERS; i++) {
665	if(array[i]!=NULL) {
666	ucnv_unloadSharedDataIfReady(array[i]);
667	}
668	}
669
670	ucnv_close(myData->currentConverter);
671
672	if(!converter->isExtraLocal){
673	uprv_free (converter->extraInfo);
674	converter->extraInfo = NULL;
675	}
676	}
677	}
678
679	static void U_CALLCONV
680	_ISO2022Reset(UConverter *converter, UConverterResetChoice choice) {
681	UConverterDataISO2022 myConverterData=(UConverterDataISO2022 ) (converter->extraInfo);
682	if(choice<=UCNV_RESET_TO_UNICODE) {
683	uprv_memset(&myConverterData->toU2022State, `0`, sizeof(ISO2022State));
684	myConverterData->key = `0`;
685	myConverterData->isEmptySegment = FALSE;
686	}
687	if(choice!=UCNV_RESET_TO_UNICODE) {
688	uprv_memset(&myConverterData->fromU2022State, `0`, sizeof(ISO2022State));
689	}
690	#ifdef U_ENABLE_GENERIC_ISO_2022
691	if(myConverterData->locale[`0`] == `0`){
692	if(choice<=UCNV_RESET_TO_UNICODE) {
693	myConverterData->isFirstBuffer = TRUE;
694	myConverterData->key = `0`;
695	if (converter->mode == UCNV_SO){
696	ucnv_close (myConverterData->currentConverter);
697	myConverterData->currentConverter=NULL;
698	}
699	converter->mode = UCNV_SI;
700	}
701	if(choice!=UCNV_RESET_TO_UNICODE) {
702	/ re-append UTF-8 escape sequence /
703	converter->charErrorBufferLength = `3`;
704	converter->charErrorBuffer[`0`] = `0x1b`;
705	converter->charErrorBuffer[`1`] = `0x28`;
706	converter->charErrorBuffer[`2`] = `0x42`;
707	}
708	}
709	else
710	#endif
711	{
712	/ reset the state variables /
713	if(myConverterData->locale[`0`] == `'k'`){
714	if(choice<=UCNV_RESET_TO_UNICODE) {
715	setInitialStateToUnicodeKR(converter, myConverterData);
716	}
717	if(choice!=UCNV_RESET_TO_UNICODE) {
718	setInitialStateFromUnicodeKR(converter, myConverterData);
719	}
720	}
721	}
722	}
723
724	U_CDECL_BEGIN
725
726	static const char * U_CALLCONV
727	_ISO2022getName(const UConverter* cnv){
728	if(cnv->extraInfo){
729	UConverterDataISO2022* myData= (UConverterDataISO2022*)cnv->extraInfo;
730	return myData->name;
731	}
732	return NULL;
733	}
734
735	U_CDECL_END
736
737
738	/************ to unicode ****************/
739	/****************************************************************************
740	* Recognized escape sequences are
741	* <ESC>(B ASCII
742	* <ESC>.A ISO-8859-1
743	* <ESC>.F ISO-8859-7
744	* <ESC>(J JISX-201
745	* <ESC>(I JISX-201
746	* <ESC>$B JISX-208
747	* <ESC>$@ JISX-208
748	* <ESC>$(D JISX-212
749	* <ESC>$A GB2312
750	* <ESC>$(C KSC5601
751	*/
752	static const int8_t nextStateToUnicodeJP[MAX_STATES_2022]= {
753	/ 0 1 2 3 4 5 6 7 8 9 /
754	INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,SS2_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
755	,ASCII ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,JISX201 ,HWKANA_7BIT ,JISX201 ,INVALID_STATE
756	,INVALID_STATE ,INVALID_STATE ,JISX208 ,GB2312 ,JISX208 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
757	,ISO8859_1 ,ISO8859_7 ,JISX208 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,KSC5601 ,JISX212 ,INVALID_STATE
758	,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
759	,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
760	,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
761	,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
762	};
763
764	#if !UCONFIG_ONLY_HTML_CONVERSION
765	/************ to unicode ****************/
766	static const int8_t nextStateToUnicodeCN[MAX_STATES_2022]= {
767	/ 0 1 2 3 4 5 6 7 8 9 /
768	INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,SS2_STATE ,SS3_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
769	,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
770	,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
771	,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
772	,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,GB2312_1 ,INVALID_STATE ,ISO_IR_165
773	,CNS_11643_1 ,CNS_11643_2 ,CNS_11643_3 ,CNS_11643_4 ,CNS_11643_5 ,CNS_11643_6 ,CNS_11643_7 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
774	,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
775	,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
776	};
777	#endif
778
779
780	static UCNV_TableStates_2022
781	getKey_2022(char c,int32_t* key,int32_t* offset){
782	int32_t togo;
783	int32_t low = `0`;
784	int32_t hi = MAX_STATES_2022;
785	int32_t oldmid=`0`;
786
787	togo = normalize_esq_chars_2022[(uint8_t)c];
788	if(togo == `0`) {
789	/ not a valid character anywhere in an escape sequence /
790	*key = `0`;
791	*offset = `0`;
792	return INVALID_2022;
793	}
794	togo = (*key << `5`) + togo;
795
796	while (hi != low) /binary search/{
797
798	int32_t mid = (hi+low) >> `1`; /Finds median/
799
800	if (mid == oldmid)
801	break;
802
803	if (escSeqStateTable_Key_2022[mid] > togo){
804	hi = mid;
805	}
806	else if (escSeqStateTable_Key_2022[mid] < togo){
807	low = mid;
808	}
809	else /we found it/{
810	*key = togo;
811	*offset = mid;
812	return (UCNV_TableStates_2022)escSeqStateTable_Value_2022[mid];
813	}
814	oldmid = mid;
815
816	}
817
818	*key = `0`;
819	*offset = `0`;
820	return INVALID_2022;
821	}
822
823	/runs through a state machine to determine the escape sequence - codepage correspondance*
824	*/
825	static void
826	changeState_2022(UConverter* _this,
827	const char** source,
828	const char* sourceLimit,
829	Variant2022 var,
830	UErrorCode* err){
831	UCNV_TableStates_2022 value;
832	UConverterDataISO2022* myData2022 = ((UConverterDataISO2022*)_this->extraInfo);
833	uint32_t key = myData2022->key;
834	int32_t offset = `0`;
835	int8_t initialToULength = _this->toULength;
836	char c;
837
838	value = VALID_NON_TERMINAL_2022;
839	while (*source < sourceLimit) {
840	c = (source)++;
841	_this->toUBytes[_this->toULength++]=(uint8_t)c;
842	value = getKey_2022(c,(int32_t *) &key, &offset);
843
844	switch (value){
845
846	case VALID_NON_TERMINAL_2022 :
847	/ continue with the loop /
848	break;
849
850	case VALID_TERMINAL_2022:
851	key = `0`;
852	goto DONE;
853
854	case INVALID_2022:
855	goto DONE;
856
857	case VALID_MAYBE_TERMINAL_2022:
858	#ifdef U_ENABLE_GENERIC_ISO_2022
859	/ ESC ( B is ambiguous only for ISO_2022 itself /
860	if(var == ISO_2022) {
861	/ discard toUBytes[] for ESC ( B because this sequence is correct and complete /
862	_this->toULength = `0`;
863
864	/ TODO need to indicate that ESC ( B was seen; if failure, then need to replay from source or from MBCS-style replay /
865
866	/ continue with the loop /
867	value = VALID_NON_TERMINAL_2022;
868	break;
869	} else
870	#endif
871	{
872	/ not ISO_2022 itself, finish here /
873	value = VALID_TERMINAL_2022;
874	key = `0`;
875	goto DONE;
876	}
877	}
878	}
879
880	DONE:
881	myData2022->key = key;
882
883	if (value == VALID_NON_TERMINAL_2022) {
884	/ indicate that the escape sequence is incomplete: key!=0 /
885	return;
886	} else if (value == INVALID_2022 ) {
887	*err = U_ILLEGAL_ESCAPE_SEQUENCE;
888	} else / value == VALID_TERMINAL_2022 / {
889	switch(var){
890	#ifdef U_ENABLE_GENERIC_ISO_2022
891	case ISO_2022:
892	{
893	const char *chosenConverterName = escSeqStateTable_Result_2022[offset];
894	if(chosenConverterName == NULL) {
895	/ SS2 or SS3 /
896	*err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
897	_this->toUCallbackReason = UCNV_UNASSIGNED;
898	return;
899	}
900
901	_this->mode = UCNV_SI;
902	ucnv_close(myData2022->currentConverter);
903	myData2022->currentConverter = myUConverter = ucnv_open(chosenConverterName, err);
904	if(U_SUCCESS(*err)) {
905	myUConverter->fromCharErrorBehaviour = UCNV_TO_U_CALLBACK_STOP;
906	_this->mode = UCNV_SO;
907	}
908	break;
909	}
910	#endif
911	case ISO_2022_JP:
912	{
913	StateEnum tempState=(StateEnum)nextStateToUnicodeJP[offset];
914	switch(tempState) {
915	case INVALID_STATE:
916	*err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
917	break;
918	case SS2_STATE:
919	if(myData2022->toU2022State.cs[`2`]!=`0`) {
920	if(myData2022->toU2022State.g<`2`) {
921	myData2022->toU2022State.prevG=myData2022->toU2022State.g;
922	}
923	myData2022->toU2022State.g=`2`;
924	} else {
925	/ illegal to have SS2 before a matching designator /
926	*err = U_ILLEGAL_ESCAPE_SEQUENCE;
927	}
928	break;
929	/ case SS3_STATE: not used in ISO-2022-JP-x /
930	case ISO8859_1:
931	case ISO8859_7:
932	if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == `0`) {
933	*err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
934	} else {
935	/ G2 charset for SS2 /
936	myData2022->toU2022State.cs[`2`]=(int8_t)tempState;
937	}
938	break;
939	default:
940	if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == `0`) {
941	*err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
942	} else {
943	/ G0 charset /
944	myData2022->toU2022State.cs[`0`]=(int8_t)tempState;
945	}
946	break;
947	}
948	}
949	break;
950	#if !UCONFIG_ONLY_HTML_CONVERSION
951	case ISO_2022_CN:
952	{
953	StateEnum tempState=(StateEnum)nextStateToUnicodeCN[offset];
954	switch(tempState) {
955	case INVALID_STATE:
956	*err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
957	break;
958	case SS2_STATE:
959	if(myData2022->toU2022State.cs[`2`]!=`0`) {
960	if(myData2022->toU2022State.g<`2`) {
961	myData2022->toU2022State.prevG=myData2022->toU2022State.g;
962	}
963	myData2022->toU2022State.g=`2`;
964	} else {
965	/ illegal to have SS2 before a matching designator /
966	*err = U_ILLEGAL_ESCAPE_SEQUENCE;
967	}
968	break;
969	case SS3_STATE:
970	if(myData2022->toU2022State.cs[`3`]!=`0`) {
971	if(myData2022->toU2022State.g<`2`) {
972	myData2022->toU2022State.prevG=myData2022->toU2022State.g;
973	}
974	myData2022->toU2022State.g=`3`;
975	} else {
976	/ illegal to have SS3 before a matching designator /
977	*err = U_ILLEGAL_ESCAPE_SEQUENCE;
978	}
979	break;
980	case ISO_IR_165:
981	if(myData2022->version==`0`) {
982	*err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
983	break;
984	}
985	U_FALLTHROUGH;
986	case GB2312_1:
987	U_FALLTHROUGH;
988	case CNS_11643_1:
989	myData2022->toU2022State.cs[`1`]=(int8_t)tempState;
990	break;
991	case CNS_11643_2:
992	myData2022->toU2022State.cs[`2`]=(int8_t)tempState;
993	break;
994	default:
995	/ other CNS 11643 planes /
996	if(myData2022->version==`0`) {
997	*err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
998	} else {
999	myData2022->toU2022State.cs[`3`]=(int8_t)tempState;
1000	}
1001	break;
1002	}
1003	}
1004	break;
1005	case ISO_2022_KR:
1006	if(offset==`0x30`){
1007	/ nothing to be done, just accept this one escape sequence /
1008	} else {
1009	*err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
1010	}
1011	break;
1012	#endif // !UCONFIG_ONLY_HTML_CONVERSION
1013
1014	default:
1015	*err = U_ILLEGAL_ESCAPE_SEQUENCE;
1016	break;
1017	}
1018	}
1019	if(U_SUCCESS(*err)) {
1020	_this->toULength = `0`;
1021	} else if(*err==U_ILLEGAL_ESCAPE_SEQUENCE) {
1022	if(_this->toULength>`1`) {
1023	/*
1024	* Ticket 5691: consistent illegal sequences:
1025	* - We include at least the first byte (ESC) in the illegal sequence.
1026	* - If any of the non-initial bytes could be the start of a character,
1027	* we stop the illegal sequence before the first one of those.
1028	* In escape sequences, all following bytes are "printable", that is,
1029	* unless they are completely illegal (>7f in SBCS, outside 21..7e in DBCS),
1030	* they are valid single/lead bytes.
1031	* For simplicity, we always only report the initial ESC byte as the
1032	* illegal sequence and back out all other bytes we looked at.
1033	*/
1034	/ Back out some bytes. /
1035	int8_t backOutDistance=_this->toULength-`1`;
1036	int8_t bytesFromThisBuffer=_this->toULength-initialToULength;
1037	if(backOutDistance<=bytesFromThisBuffer) {
1038	/ same as initialToULength<=1 /
1039	*source-=backOutDistance;
1040	} else {
1041	/ Back out bytes from the previous buffer: Need to replay them. /
1042	_this->preToULength=(int8_t)(bytesFromThisBuffer-backOutDistance);
1043	/ same as -(initialToULength-1) /
1044	/ preToULength is negative! /
1045	uprv_memcpy(_this->preToU, _this->toUBytes+`1`, -_this->preToULength);
1046	*source-=bytesFromThisBuffer;
1047	}
1048	_this->toULength=`1`;
1049	}
1050	} else if(*err==U_UNSUPPORTED_ESCAPE_SEQUENCE) {
1051	_this->toUCallbackReason = UCNV_UNASSIGNED;
1052	}
1053	}
1054
1055	#if !UCONFIG_ONLY_HTML_CONVERSION
1056	/Checks the characters of the buffer against valid 2022 escape sequences*
1057	*if the match we return a pointer to the initial start of the sequence otherwise
1058	*we return sourceLimit
1059	*/
1060	/for 2022 looks ahead in the stream*
1061	*to determine the longest possible convertible
1062	*data stream
1063	*/
1064	static inline const char*
1065	getEndOfBuffer_2022(const char** source,
1066	const char* sourceLimit,
1067	UBool /flush/){
1068
1069	const char* mySource = *source;
1070
1071	#ifdef U_ENABLE_GENERIC_ISO_2022
1072	if (*source >= sourceLimit)
1073	return sourceLimit;
1074
1075	do{
1076
1077	if (*mySource == ESC_2022){
1078	int8_t i;
1079	int32_t key = `0`;
1080	int32_t offset;
1081	UCNV_TableStates_2022 value = VALID_NON_TERMINAL_2022;
1082
1083	/ Kludge: I could not*
1084	* figure out the reason for validating an escape sequence
1085	* twice - once here and once in changeState_2022().
1086	* is it possible to have an ESC character in a ISO2022
1087	* byte stream which is valid in a code page? Is it legal?
1088	*/
1089	for (i=`0`;
1090	(mySource+i < sourceLimit)&&(value == VALID_NON_TERMINAL_2022);
1091	i++) {
1092	value = getKey_2022(*(mySource+i), &key, &offset);
1093	}
1094	if (value > `0` \|\| *mySource==ESC_2022)
1095	return mySource;
1096
1097	if ((value == VALID_NON_TERMINAL_2022)&&(!flush) )
1098	return sourceLimit;
1099	}
1100	}while (++mySource < sourceLimit);
1101
1102	return sourceLimit;
1103	#else
1104	while(mySource < sourceLimit && *mySource != ESC_2022) {
1105	++mySource;
1106	}
1107	return mySource;
1108	#endif
1109	}
1110	#endif
1111
1112	/ This inline function replicates code in _MBCSFromUChar32() function in ucnvmbcs.c*
1113	* any future change in _MBCSFromUChar32() function should be reflected here.
1114	* @return number of bytes in *value; negative number if fallback; 0 if no mapping
1115	*/
1116	static inline int32_t
1117	MBCS_FROM_UCHAR32_ISO2022(UConverterSharedData* sharedData,
1118	UChar32 c,
1119	uint32_t* value,
1120	UBool useFallback,
1121	int outputType)
1122	{
1123	const int32_t *cx;
1124	const uint16_t *table;
1125	uint32_t stage2Entry;
1126	uint32_t myValue;
1127	int32_t length;
1128	const uint8_t *p;
1129	/*
1130	* TODO(markus): Use and require new, faster MBCS conversion table structures.
1131	* Use internal version of ucnv_open() that verifies that the new structures are available,
1132	* else U_INTERNAL_PROGRAM_ERROR.
1133	*/
1134	/ BMP-only codepages are stored without stage 1 entries for supplementary code points /
1135	if(c<`0x10000` \|\| (sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
1136	table=sharedData->mbcs.fromUnicodeTable;
1137	stage2Entry=MBCS_STAGE_2_FROM_U(table, c);
1138	/ get the bytes and the length for the output /
1139	if(outputType==MBCS_OUTPUT_2){
1140	myValue=MBCS_VALUE_2_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
1141	if(myValue<=`0xff`) {
1142	length=`1`;
1143	} else {
1144	length=`2`;
1145	}
1146	} else / outputType==MBCS_OUTPUT_3 / {
1147	p=MBCS_POINTER_3_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
1148	myValue=((uint32_t)*p<<`16`)\|((uint32_t)p[`1`]<<`8`)\|p[`2`];
1149	if(myValue<=`0xff`) {
1150	length=`1`;
1151	} else if(myValue<=`0xffff`) {
1152	length=`2`;
1153	} else {
1154	length=`3`;
1155	}
1156	}
1157	/ is this code point assigned, or do we use fallbacks? /
1158	if((stage2Entry&(`1`<<(`16`+(c&`0xf`))))!=`0`) {
1159	/ assigned /
1160	*value=myValue;
1161	return length;
1162	} else if(FROM_U_USE_FALLBACK(useFallback, c) && myValue!=`0`) {
1163	/*
1164	* We allow a 0 byte output if the "assigned" bit is set for this entry.
1165	* There is no way with this data structure for fallback output
1166	* to be a zero byte.
1167	*/
1168	*value=myValue;
1169	return -length;
1170	}
1171	}
1172
1173	cx=sharedData->mbcs.extIndexes;
1174	if(cx!=NULL) {
1175	return ucnv_extSimpleMatchFromU(cx, c, value, useFallback);
1176	}
1177
1178	/ unassigned /
1179	return `0`;
1180	}
1181
1182	/ This inline function replicates code in _MBCSSingleFromUChar32() function in ucnvmbcs.c*
1183	* any future change in _MBCSSingleFromUChar32() function should be reflected here.
1184	* @param retval pointer to output byte
1185	* @return 1 roundtrip byte 0 no mapping -1 fallback byte
1186	*/
1187	static inline int32_t
1188	MBCS_SINGLE_FROM_UCHAR32(UConverterSharedData* sharedData,
1189	UChar32 c,
1190	uint32_t* retval,
1191	UBool useFallback)
1192	{
1193	const uint16_t *table;
1194	int32_t value;
1195	/ BMP-only codepages are stored without stage 1 entries for supplementary code points /
1196	if(c>=`0x10000` && !(sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
1197	return `0`;
1198	}
1199	/ convert the Unicode code point in c into codepage bytes (same as in _MBCSFromUnicodeWithOffsets) /
1200	table=sharedData->mbcs.fromUnicodeTable;
1201	/ get the byte for the output /
1202	value=MBCS_SINGLE_RESULT_FROM_U(table, (uint16_t *)sharedData->mbcs.fromUnicodeBytes, c);
1203	/ is this code point assigned, or do we use fallbacks? /
1204	*retval=(uint32_t)(value&`0xff`);
1205	if(value>=`0xf00`) {
1206	return `1`; / roundtrip /
1207	} else if(useFallback ? value>=`0x800` : value>=`0xc00`) {
1208	return -`1`; / fallback taken /
1209	} else {
1210	return `0`; / no mapping /
1211	}
1212	}
1213
1214	/*
1215	* Check that the result is a 2-byte value with each byte in the range A1..FE
1216	* (strict EUC DBCS) before accepting it and subtracting 0x80 from each byte
1217	* to move it to the ISO 2022 range 21..7E.
1218	* Return 0 if out of range.
1219	*/
1220	static inline uint32_t
1221	_2022FromGR94DBCS(uint32_t value) {
1222	if( (uint16_t)(value - `0xa1a1`) <= (`0xfefe` - `0xa1a1`) &&
1223	(uint8_t)(value - `0xa1`) <= (`0xfe` - `0xa1`)
1224	) {
1225	return value - `0x8080`; / shift down to 21..7e byte range /
1226	} else {
1227	return `0`; / not valid for ISO 2022 /
1228	}
1229	}
1230
1231	#if 0 /* 5691: Call sites now check for validity. They can just += 0x8080 after that. */
1232	/*
1233	* This method does the reverse of _2022FromGR94DBCS(). Given the 2022 code point, it returns the
1234	* 2 byte value that is in the range A1..FE for each byte. Otherwise it returns the 2022 code point
1235	* unchanged.
1236	*/
1237	static inline uint32_t
1238	_2022ToGR94DBCS(uint32_t value) {
1239	uint32_t returnValue = value + `0x8080`;
1240	if( (uint16_t)(returnValue - `0xa1a1`) <= (`0xfefe` - `0xa1a1`) &&
1241	(uint8_t)(returnValue - `0xa1`) <= (`0xfe` - `0xa1`)) {
1242	return returnValue;
1243	} else {
1244	return value;
1245	}
1246	}
1247	#endif
1248
1249	#ifdef U_ENABLE_GENERIC_ISO_2022
1250
1251	/**********************************************************************************
1252	* ISO-2022 Converter
1253	*
1254	*
1255	*/
1256
1257	static void U_CALLCONV
1258	T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC(UConverterToUnicodeArgs* args,
1259	UErrorCode* err){
1260	const char* mySourceLimit, *realSourceLimit;
1261	const char* sourceStart;
1262	const UChar* myTargetStart;
1263	UConverter* saveThis;
1264	UConverterDataISO2022* myData;
1265	int8_t length;
1266
1267	saveThis = args->converter;
1268	myData=((UConverterDataISO2022*)(saveThis->extraInfo));
1269
1270	realSourceLimit = args->sourceLimit;
1271	while (args->source < realSourceLimit) {
1272	if(myData->key == `0`) { / are we in the middle of an escape sequence? /
1273	/Find the end of the buffer e.g : Next Escape Seq \| end of Buffer/
1274	mySourceLimit = getEndOfBuffer_2022(&(args->source), realSourceLimit, args->flush);
1275
1276	if(args->source < mySourceLimit) {
1277	if(myData->currentConverter==NULL) {
1278	myData->currentConverter = ucnv_open("ASCII",err);
1279	if(U_FAILURE(*err)){
1280	return;
1281	}
1282
1283	myData->currentConverter->fromCharErrorBehaviour = UCNV_TO_U_CALLBACK_STOP;
1284	saveThis->mode = UCNV_SO;
1285	}
1286
1287	/ convert to before the ESC or until the end of the buffer /
1288	myData->isFirstBuffer=FALSE;
1289	sourceStart = args->source;
1290	myTargetStart = args->target;
1291	args->converter = myData->currentConverter;
1292	ucnv_toUnicode(args->converter,
1293	&args->target,
1294	args->targetLimit,
1295	&args->source,
1296	mySourceLimit,
1297	args->offsets,
1298	(UBool)(args->flush && mySourceLimit == realSourceLimit),
1299	err);
1300	args->converter = saveThis;
1301
1302	if (*err == U_BUFFER_OVERFLOW_ERROR) {
1303	/ move the overflow buffer /
1304	length = saveThis->UCharErrorBufferLength = myData->currentConverter->UCharErrorBufferLength;
1305	myData->currentConverter->UCharErrorBufferLength = `0`;
1306	if(length > `0`) {
1307	uprv_memcpy(saveThis->UCharErrorBuffer,
1308	myData->currentConverter->UCharErrorBuffer,
1309	length*U_SIZEOF_UCHAR);
1310	}
1311	return;
1312	}
1313
1314	/*
1315	* At least one of:
1316	* -Error while converting
1317	* -Done with entire buffer
1318	* -Need to write offsets or update the current offset
1319	* (leave that up to the code in ucnv.c)
1320	*
1321	* or else we just stopped at an ESC byte and continue with changeState_2022()
1322	*/
1323	if (U_FAILURE(*err) \|\|
1324	(args->source == realSourceLimit) \|\|
1325	(args->offsets != NULL && (args->target != myTargetStart \|\| args->source != sourceStart) \|\|
1326	(mySourceLimit < realSourceLimit && myData->currentConverter->toULength > `0`))
1327	) {
1328	/ copy partial or error input for truncated detection and error handling /
1329	if(U_FAILURE(*err)) {
1330	length = saveThis->invalidCharLength = myData->currentConverter->invalidCharLength;
1331	if(length > `0`) {
1332	uprv_memcpy(saveThis->invalidCharBuffer, myData->currentConverter->invalidCharBuffer, length);
1333	}
1334	} else {
1335	length = saveThis->toULength = myData->currentConverter->toULength;
1336	if(length > `0`) {
1337	uprv_memcpy(saveThis->toUBytes, myData->currentConverter->toUBytes, length);
1338	if(args->source < mySourceLimit) {
1339	err = U_TRUNCATED_CHAR_FOUND; /* truncated input before ESC /
1340	}
1341	}
1342	}
1343	return;
1344	}
1345	}
1346	}
1347
1348	sourceStart = args->source;
1349	changeState_2022(args->converter,
1350	&(args->source),
1351	realSourceLimit,
1352	ISO_2022,
1353	err);
1354	if (U_FAILURE(*err) \|\| (args->source != sourceStart && args->offsets != NULL)) {
1355	/ let the ucnv.c code update its current offset /
1356	return;
1357	}
1358	}
1359	}
1360
1361	#endif
1362
1363	/*
1364	* To Unicode Callback helper function
1365	*/
1366	static void
1367	toUnicodeCallback(UConverter *cnv,
1368	const uint32_t sourceChar, const uint32_t targetUniChar,
1369	UErrorCode* err){
1370	if(sourceChar>`0xff`){
1371	cnv->toUBytes[`0`] = (uint8_t)(sourceChar>>`8`);
1372	cnv->toUBytes[`1`] = (uint8_t)sourceChar;
1373	cnv->toULength = `2`;
1374	}
1375	else{
1376	cnv->toUBytes[`0`] =(char) sourceChar;
1377	cnv->toULength = `1`;
1378	}
1379
1380	if(targetUniChar == (missingCharMarker-`1`/0xfffe/)){
1381	*err = U_INVALID_CHAR_FOUND;
1382	}
1383	else{
1384	*err = U_ILLEGAL_CHAR_FOUND;
1385	}
1386	}
1387
1388	/***********************************ISO-2022-JP**********************************************/
1389
1390	/************************************ IMPORTANT ************************************************
1391	* The UConverter_fromUnicode_ISO2022_JP converter does not use ucnv_fromUnicode() functions for SBCS,DBCS and
1392	* MBCS; instead, the values are obtained directly by calling _MBCSFromUChar32().
1393	* The converter iterates over each Unicode codepoint
1394	* to obtain the equivalent codepoints from the codepages supported. Since the source buffer is
1395	* processed one char at a time it would make sense to reduce the extra processing a canned converter
1396	* would do as far as possible.
1397	*
1398	* If the implementation of these macros or structure of sharedData struct change in the future, make
1399	* sure that ISO-2022 is also changed.
1400	***************************************************************************************************
1401	*/
1402
1403	/***************************************************************************************************
1404	* Rules for ISO-2022-jp encoding
1405	* (i) Escape sequences must be fully contained within a line they should not
1406	* span new lines or CRs
1407	* (ii) If the last character on a line is represented by two bytes then an ASCII or
1408	* JIS-Roman character escape sequence should follow before the line terminates
1409	* (iii) If the first character on the line is represented by two bytes then a two
1410	* byte character escape sequence should precede it
1411	* (iv) If no escape sequence is encountered then the characters are ASCII
1412	* (v) Latin(ISO-8859-1) and Greek(ISO-8859-7) characters must be designated to G2,
1413	* and invoked with SS2 (ESC N).
1414	* (vi) If there is any G0 designation in text, there must be a switch to
1415	* ASCII or to JIS X 0201-Roman before a space character (but not
1416	* necessarily before "ESC 4/14 2/0" or "ESC N ' '") or control
1417	* characters such as tab or CRLF.
1418	* (vi) Supported encodings:
1419	* ASCII, JISX201, JISX208, JISX212, GB2312, KSC5601, ISO-8859-1,ISO-8859-7
1420	*
1421	* source : RFC-1554
1422	*
1423	* JISX201, JISX208,JISX212 : new .cnv data files created
1424	* KSC5601 : alias to ibm-949 mapping table
1425	* GB2312 : alias to ibm-1386 mapping table
1426	* ISO-8859-1 : Algorithmic implemented as LATIN1 case
1427	* ISO-8859-7 : alisas to ibm-9409 mapping table
1428	*/
1429
1430	/ preference order of JP charsets /
1431	static const StateEnum jpCharsetPref[]={
1432	ASCII,
1433	JISX201,
1434	ISO8859_1,
1435	JISX208,
1436	ISO8859_7,
1437	JISX212,
1438	GB2312,
1439	KSC5601,
1440	HWKANA_7BIT
1441	};
1442
1443	/*
1444	* The escape sequences must be in order of the enum constants like JISX201 = 3,
1445	* not in order of jpCharsetPref[]!
1446	*/
1447	static const char escSeqChars[][`6`] ={
1448	"\x1B\x28\x42", / <ESC>(B ASCII /
1449	"\x1B\x2E\x41", / <ESC>.A ISO-8859-1 /
1450	"\x1B\x2E\x46", / <ESC>.F ISO-8859-7 /
1451	"\x1B\x28\x4A", / <ESC>(J JISX-201 /
1452	"\x1B\x24\x42", / <ESC>$B JISX-208 /
1453	"\x1B\x24\x28\x44", / <ESC>$(D JISX-212 /
1454	"\x1B\x24\x41", / <ESC>$A GB2312 /
1455	"\x1B\x24\x28\x43", / <ESC>$(C KSC5601 /
1456	"\x1B\x28\x49" / <ESC>(I HWKANA_7BIT /
1457
1458	};
1459	static const int8_t escSeqCharsLen[] ={
1460	`3`, / length of <ESC>(B ASCII /
1461	`3`, / length of <ESC>.A ISO-8859-1 /
1462	`3`, / length of <ESC>.F ISO-8859-7 /
1463	`3`, / length of <ESC>(J JISX-201 /
1464	`3`, / length of <ESC>$B JISX-208 /
1465	`4`, / length of <ESC>$(D JISX-212 /
1466	`3`, / length of <ESC>$A GB2312 /
1467	`4`, / length of <ESC>$(C KSC5601 /
1468	`3` / length of <ESC>(I HWKANA_7BIT /
1469	};
1470
1471	/*
1472	* The iteration over various code pages works this way:
1473	* i) Get the currentState from myConverterData->currentState
1474	* ii) Check if the character is mapped to a valid character in the currentState
1475	* Yes -> a) set the initIterState to currentState
1476	* b) remain in this state until an invalid character is found
1477	* No -> a) go to the next code page and find the character
1478	* iii) Before changing the state increment the current state check if the current state
1479	* is equal to the intitIteration state
1480	* Yes -> A character that cannot be represented in any of the supported encodings
1481	* break and return a U_INVALID_CHARACTER error
1482	* No -> Continue and find the character in next code page
1483	*
1484	*
1485	* TODO: Implement a priority technique where the users are allowed to set the priority of code pages
1486	*/
1487
1488	/ Map 00..7F to Unicode according to JIS X 0201. /
1489	static inline uint32_t
1490	jisx201ToU(uint32_t value) {
1491	if(value < `0x5c`) {
1492	return value;
1493	} else if(value == `0x5c`) {
1494	return `0xa5`;
1495	} else if(value == `0x7e`) {
1496	return `0x203e`;
1497	} else / value <= 0x7f / {
1498	return value;
1499	}
1500	}
1501
1502	/ Map Unicode to 00..7F according to JIS X 0201. Return U+FFFE if unmappable. /
1503	static inline uint32_t
1504	jisx201FromU(uint32_t value) {
1505	if(value<=`0x7f`) {
1506	if(value!=`0x5c` && value!=`0x7e`) {
1507	return value;
1508	}
1509	} else if(value==`0xa5`) {
1510	return `0x5c`;
1511	} else if(value==`0x203e`) {
1512	return `0x7e`;
1513	}
1514	return `0xfffe`;
1515	}
1516
1517	/*
1518	* JIS X 0208 has fallbacks from Unicode half-width Katakana to full-width (DBCS)
1519	* Katakana.
1520	* Now that we use a Shift-JIS table for JIS X 0208 we need to hardcode these fallbacks
1521	* because Shift-JIS roundtrips half-width Katakana to single bytes.
1522	* These were the only fallbacks in ICU's jisx-208.ucm file.
1523	*/
1524	static const uint16_t hwkana_fb[HWKANA_END - HWKANA_START + `1`] = {
1525	`0x2123`, / U+FF61 /
1526	`0x2156`,
1527	`0x2157`,
1528	`0x2122`,
1529	`0x2126`,
1530	`0x2572`,
1531	`0x2521`,
1532	`0x2523`,
1533	`0x2525`,
1534	`0x2527`,
1535	`0x2529`,
1536	`0x2563`,
1537	`0x2565`,
1538	`0x2567`,
1539	`0x2543`,
1540	`0x213C`, / U+FF70 /
1541	`0x2522`,
1542	`0x2524`,
1543	`0x2526`,
1544	`0x2528`,
1545	`0x252A`,
1546	`0x252B`,
1547	`0x252D`,
1548	`0x252F`,
1549	`0x2531`,
1550	`0x2533`,
1551	`0x2535`,
1552	`0x2537`,
1553	`0x2539`,
1554	`0x253B`,
1555	`0x253D`,
1556	`0x253F`, / U+FF80 /
1557	`0x2541`,
1558	`0x2544`,
1559	`0x2546`,
1560	`0x2548`,
1561	`0x254A`,
1562	`0x254B`,
1563	`0x254C`,
1564	`0x254D`,
1565	`0x254E`,
1566	`0x254F`,
1567	`0x2552`,
1568	`0x2555`,
1569	`0x2558`,
1570	`0x255B`,
1571	`0x255E`,
1572	`0x255F`, / U+FF90 /
1573	`0x2560`,
1574	`0x2561`,
1575	`0x2562`,
1576	`0x2564`,
1577	`0x2566`,
1578	`0x2568`,
1579	`0x2569`,
1580	`0x256A`,
1581	`0x256B`,
1582	`0x256C`,
1583	`0x256D`,
1584	`0x256F`,
1585	`0x2573`,
1586	`0x212B`,
1587	`0x212C` / U+FF9F /
1588	};
1589
1590	static void U_CALLCONV
1591	UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args, UErrorCode* err) {
1592	UConverter *cnv = args->converter;
1593	UConverterDataISO2022 *converterData;
1594	ISO2022State *pFromU2022State;
1595	uint8_t target = (uint8_t ) args->target;
1596	const uint8_t targetLimit = (const* uint8_t *) args->targetLimit;
1597	const UChar* source = args->source;
1598	const UChar* sourceLimit = args->sourceLimit;
1599	int32_t* offsets = args->offsets;
1600	UChar32 sourceChar;
1601	char buffer[`8`];
1602	int32_t len, outLen;
1603	int8_t choices[`10`];
1604	int32_t choiceCount;
1605	uint32_t targetValue = `0`;
1606	UBool useFallback;
1607
1608	int32_t i;
1609	int8_t cs, g;
1610
1611	/ set up the state /
1612	converterData = (UConverterDataISO2022*)cnv->extraInfo;
1613	pFromU2022State = &converterData->fromU2022State;
1614
1615	choiceCount = `0`;
1616
1617	/ check if the last codepoint of previous buffer was a lead surrogate/
1618	if((sourceChar = cnv->fromUChar32)!=`0` && target< targetLimit) {
1619	goto getTrail;
1620	}
1621
1622	while(source < sourceLimit) {
1623	if(target < targetLimit) {
1624
1625	sourceChar = *(source++);
1626	/check if the char is a First surrogate/
1627	if(U16_IS_SURROGATE(sourceChar)) {
1628	if(U16_IS_SURROGATE_LEAD(sourceChar)) {
1629	getTrail:
1630	/look ahead to find the trail surrogate/
1631	if(source < sourceLimit) {
1632	/ test the following code unit /
1633	UChar trail=(UChar) *source;
1634	if(U16_IS_TRAIL(trail)) {
1635	source++;
1636	sourceChar=U16_GET_SUPPLEMENTARY(sourceChar, trail);
1637	cnv->fromUChar32=`0x00`;
1638	/ convert this supplementary code point /
1639	/ exit this condition tree /
1640	} else {
1641	/ this is an unmatched lead code unit (1st surrogate) /
1642	/ callback(illegal) /
1643	*err=U_ILLEGAL_CHAR_FOUND;
1644	cnv->fromUChar32=sourceChar;
1645	break;
1646	}
1647	} else {
1648	/ no more input /
1649	cnv->fromUChar32=sourceChar;
1650	break;
1651	}
1652	} else {
1653	/ this is an unmatched trail code unit (2nd surrogate) /
1654	/ callback(illegal) /
1655	*err=U_ILLEGAL_CHAR_FOUND;
1656	cnv->fromUChar32=sourceChar;
1657	break;
1658	}
1659	}
1660
1661	/ do not convert SO/SI/ESC /
1662	if(IS_2022_CONTROL(sourceChar)) {
1663	/ callback(illegal) /
1664	*err=U_ILLEGAL_CHAR_FOUND;
1665	cnv->fromUChar32=sourceChar;
1666	break;
1667	}
1668
1669	/ do the conversion /
1670
1671	if(choiceCount == `0`) {
1672	uint16_t csm;
1673
1674	/*
1675	* The csm variable keeps track of which charsets are allowed
1676	* and not used yet while building the choices[].
1677	*/
1678	csm = jpCharsetMasks[converterData->version];
1679	choiceCount = `0`;
1680
1681	/ JIS7/8: try single-byte half-width Katakana before JISX208 /
1682	if(converterData->version == `3` \|\| converterData->version == `4`) {
1683	choices[choiceCount++] = (int8_t)HWKANA_7BIT;
1684	}
1685	/ Do not try single-byte half-width Katakana for other versions. /
1686	csm &= ~CSM(HWKANA_7BIT);
1687
1688	/ try the current G0 charset /
1689	choices[choiceCount++] = cs = pFromU2022State->cs[`0`];
1690	csm &= ~CSM(cs);
1691
1692	/ try the current G2 charset /
1693	if((cs = pFromU2022State->cs[`2`]) != `0`) {
1694	choices[choiceCount++] = cs;
1695	csm &= ~CSM(cs);
1696	}
1697
1698	/ try all the other possible charsets /
1699	for(i = `0`; i < UPRV_LENGTHOF(jpCharsetPref); ++i) {
1700	cs = (int8_t)jpCharsetPref[i];
1701	if(CSM(cs) & csm) {
1702	choices[choiceCount++] = cs;
1703	csm &= ~CSM(cs);
1704	}
1705	}
1706	}
1707
1708	cs = g = `0`;
1709	/*
1710	* len==0: no mapping found yet
1711	* len<0: found a fallback result: continue looking for a roundtrip but no further fallbacks
1712	* len>0: found a roundtrip result, done
1713	*/
1714	len = `0`;
1715	/*
1716	* We will turn off useFallback after finding a fallback,
1717	* but we still get fallbacks from PUA code points as usual.
1718	* Therefore, we will also need to check that we don't overwrite
1719	* an early fallback with a later one.
1720	*/
1721	useFallback = cnv->useFallback;
1722
1723	for(i = `0`; i < choiceCount && len <= `0`; ++i) {
1724	uint32_t value;
1725	int32_t len2;
1726	int8_t cs0 = choices[i];
1727	switch(cs0) {
1728	case ASCII:
1729	if(sourceChar <= `0x7f`) {
1730	targetValue = (uint32_t)sourceChar;
1731	len = `1`;
1732	cs = cs0;
1733	g = `0`;
1734	}
1735	break;
1736	case ISO8859_1:
1737	if(GR96_START <= sourceChar && sourceChar <= GR96_END) {
1738	targetValue = (uint32_t)sourceChar - `0x80`;
1739	len = `1`;
1740	cs = cs0;
1741	g = `2`;
1742	}
1743	break;
1744	case HWKANA_7BIT:
1745	if((uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_END - HWKANA_START)) {
1746	if(converterData->version==`3`) {
1747	/ JIS7: use G1 (SO) /
1748	/ Shift U+FF61..U+FF9F to bytes 21..5F. /
1749	targetValue = (uint32_t)(sourceChar - (HWKANA_START - `0x21`));
1750	len = `1`;
1751	pFromU2022State->cs[`1`] = cs = cs0; / do not output an escape sequence /
1752	g = `1`;
1753	} else if(converterData->version==`4`) {
1754	/ JIS8: use 8-bit bytes with any single-byte charset, see escape sequence output below /
1755	/ Shift U+FF61..U+FF9F to bytes A1..DF. /
1756	targetValue = (uint32_t)(sourceChar - (HWKANA_START - `0xa1`));
1757	len = `1`;
1758
1759	cs = pFromU2022State->cs[`0`];
1760	if(IS_JP_DBCS(cs)) {
1761	/ switch from a DBCS charset to JISX201 /
1762	cs = (int8_t)JISX201;
1763	}
1764	/ else stay in the current G0 charset /
1765	g = `0`;
1766	}
1767	/ else do not use HWKANA_7BIT with other versions /
1768	}
1769	break;
1770	case JISX201:
1771	/ G0 SBCS /
1772	value = jisx201FromU(sourceChar);
1773	if(value <= `0x7f`) {
1774	targetValue = value;
1775	len = `1`;
1776	cs = cs0;
1777	g = `0`;
1778	useFallback = FALSE;
1779	}
1780	break;
1781	case JISX208:
1782	/ G0 DBCS from Shift-JIS table /
1783	len2 = MBCS_FROM_UCHAR32_ISO2022(
1784	converterData->myConverterArray[cs0],
1785	sourceChar, &value,
1786	useFallback, MBCS_OUTPUT_2);
1787	// Only accept DBCS char (abs(len2) == 2).
1788	// With EUC-JP table for JIS X 208, half-width Kana
1789	// represented with DBCS starting with 0x8E has to be
1790	// filtered out so that they can be converted with
1791	// hwkana_fb table.
1792	if((len2 == `2` && ((value & `0xFF00`) != `0x8E00`)) \|\| (len2 == -`2` && len == `0`)) {
1793	value &= `0x7F7F`;
1794	if(value != `0`) {
1795	targetValue = value;
1796	len = len2;
1797	cs = cs0;
1798	g = `0`;
1799	useFallback = FALSE;
1800	}
1801	} else if(len == `0` && useFallback &&
1802	(uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_END - HWKANA_START)) {
1803	targetValue = hwkana_fb[sourceChar - HWKANA_START];
1804	len = -`2`;
1805	cs = cs0;
1806	g = `0`;
1807	useFallback = FALSE;
1808	}
1809	break;
1810	case ISO8859_7:
1811	/ G0 SBCS forced to 7-bit output /
1812	len2 = MBCS_SINGLE_FROM_UCHAR32(
1813	converterData->myConverterArray[cs0],
1814	sourceChar, &value,
1815	useFallback);
1816	if(len2 != `0` && !(len2 < `0` && len != `0`) && GR96_START <= value && value <= GR96_END) {
1817	targetValue = value - `0x80`;
1818	len = len2;
1819	cs = cs0;
1820	g = `2`;
1821	useFallback = FALSE;
1822	}
1823	break;
1824	default:
1825	/ G0 DBCS /
1826	len2 = MBCS_FROM_UCHAR32_ISO2022(
1827	converterData->myConverterArray[cs0],
1828	sourceChar, &value,
1829	useFallback, MBCS_OUTPUT_2);
1830	if(len2 == `2` \|\| (len2 == -`2` && len == `0`)) { / only accept DBCS: abs(len)==2 /
1831	if(cs0 == KSC5601) {
1832	/*
1833	* Check for valid bytes for the encoding scheme.
1834	* This is necessary because the sub-converter (windows-949)
1835	* has a broader encoding scheme than is valid for 2022.
1836	*/
1837	value = _2022FromGR94DBCS(value);
1838	if(value == `0`) {
1839	break;
1840	}
1841	}
1842	targetValue = value;
1843	len = len2;
1844	cs = cs0;
1845	g = `0`;
1846	useFallback = FALSE;
1847	}
1848	break;
1849	}
1850	}
1851
1852	if(len != `0`) {
1853	if(len < `0`) {
1854	len = -len; / fallback /
1855	}
1856	outLen = `0`; / count output bytes /
1857
1858	/ write SI if necessary (only for JIS7) /
1859	if(pFromU2022State->g == `1` && g == `0`) {
1860	buffer[outLen++] = UCNV_SI;
1861	pFromU2022State->g = `0`;
1862	}
1863
1864	/ write the designation sequence if necessary /
1865	if(cs != pFromU2022State->cs[g]) {
1866	int32_t escLen = escSeqCharsLen[cs];
1867	uprv_memcpy(buffer + outLen, escSeqChars[cs], escLen);
1868	outLen += escLen;
1869	pFromU2022State->cs[g] = cs;
1870
1871	/ invalidate the choices[] /
1872	choiceCount = `0`;
1873	}
1874
1875	/ write the shift sequence if necessary /
1876	if(g != pFromU2022State->g) {
1877	switch(g) {
1878	/ case 0 handled before writing escapes /
1879	case `1`:
1880	buffer[outLen++] = UCNV_SO;
1881	pFromU2022State->g = `1`;
1882	break;
1883	default: / case 2 /
1884	buffer[outLen++] = `0x1b`;
1885	buffer[outLen++] = `0x4e`;
1886	break;
1887	/ no case 3: no SS3 in ISO-2022-JP-x /
1888	}
1889	}
1890
1891	/ write the output bytes /
1892	if(len == `1`) {
1893	buffer[outLen++] = (char)targetValue;
1894	} else / len == 2 / {
1895	buffer[outLen++] = (char)(targetValue >> `8`);
1896	buffer[outLen++] = (char)targetValue;
1897	}
1898	} else {
1899	/*
1900	* if we cannot find the character after checking all codepages
1901	* then this is an error
1902	*/
1903	*err = U_INVALID_CHAR_FOUND;
1904	cnv->fromUChar32=sourceChar;
1905	break;
1906	}
1907
1908	if(sourceChar == CR \|\| sourceChar == LF) {
1909	/ reset the G2 state at the end of a line (conversion got us into ASCII or JISX201 already) /
1910	pFromU2022State->cs[`2`] = `0`;
1911	choiceCount = `0`;
1912	}
1913
1914	/ output outLen>0 bytes in buffer[] /
1915	if(outLen == `1`) {
1916	*target++ = buffer[`0`];
1917	if(offsets) {
1918	offsets++ = (int32_t)(source - args->source - `1`); /* -1: known to be ASCII /
1919	}
1920	} else if(outLen == `2` && (target + `2`) <= targetLimit) {
1921	*target++ = buffer[`0`];
1922	*target++ = buffer[`1`];
1923	if(offsets) {
1924	int32_t sourceIndex = (int32_t)(source - args->source - U16_LENGTH(sourceChar));
1925	*offsets++ = sourceIndex;
1926	*offsets++ = sourceIndex;
1927	}
1928	} else {
1929	fromUWriteUInt8(
1930	cnv,
1931	buffer, outLen,
1932	&target, (const char *)targetLimit,
1933	&offsets, (int32_t)(source - args->source - U16_LENGTH(sourceChar)),
1934	err);
1935	if(U_FAILURE(*err)) {
1936	break;
1937	}
1938	}
1939	} / end if(myTargetIndex<myTargetLength) /
1940	else{
1941	*err =U_BUFFER_OVERFLOW_ERROR;
1942	break;
1943	}
1944
1945	}/ end while(mySourceIndex<mySourceLength) /
1946
1947	/*
1948	* the end of the input stream and detection of truncated input
1949	* are handled by the framework, but for ISO-2022-JP conversion
1950	* we need to be in ASCII mode at the very end
1951	*
1952	* conditions:
1953	* successful
1954	* in SO mode or not in ASCII mode
1955	* end of input and no truncated input
1956	*/
1957	if( U_SUCCESS(*err) &&
1958	(pFromU2022State->g!=`0` \|\| pFromU2022State->cs[`0`]!=ASCII) &&
1959	args->flush && source>=sourceLimit && cnv->fromUChar32==`0`
1960	) {
1961	int32_t sourceIndex;
1962
1963	outLen = `0`;
1964
1965	if(pFromU2022State->g != `0`) {
1966	buffer[outLen++] = UCNV_SI;
1967	pFromU2022State->g = `0`;
1968	}
1969
1970	if(pFromU2022State->cs[`0`] != ASCII) {
1971	int32_t escLen = escSeqCharsLen[ASCII];
1972	uprv_memcpy(buffer + outLen, escSeqChars[ASCII], escLen);
1973	outLen += escLen;
1974	pFromU2022State->cs[`0`] = (int8_t)ASCII;
1975	}
1976
1977	/ get the source index of the last input character /
1978	/*
1979	* TODO this would be simpler and more reliable if we used a pair
1980	* of sourceIndex/prevSourceIndex like in ucnvmbcs.c
1981	* so that we could simply use the prevSourceIndex here;
1982	* this code gives an incorrect result for the rare case of an unmatched
1983	* trail surrogate that is alone in the last buffer of the text stream
1984	*/
1985	sourceIndex=(int32_t)(source-args->source);
1986	if(sourceIndex>`0`) {
1987	--sourceIndex;
1988	if( U16_IS_TRAIL(args->source[sourceIndex]) &&
1989	(sourceIndex==`0` \|\| U16_IS_LEAD(args->source[sourceIndex-`1`]))
1990	) {
1991	--sourceIndex;
1992	}
1993	} else {
1994	sourceIndex=-`1`;
1995	}
1996
1997	fromUWriteUInt8(
1998	cnv,
1999	buffer, outLen,
2000	&target, (const char *)targetLimit,
2001	&offsets, sourceIndex,
2002	err);
2003	}
2004
2005	/save the state and return /
2006	args->source = source;
2007	args->target = (char*)target;
2008	}
2009
2010	/************ to unicode ****************/
2011
2012	static void U_CALLCONV
2013	UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
2014	UErrorCode* err){
2015	char tempBuf[`2`];
2016	const char mySource = (char* *) args->source;
2017	UChar *myTarget = args->target;
2018	const char *mySourceLimit = args->sourceLimit;
2019	uint32_t targetUniChar = `0x0000`;
2020	uint32_t mySourceChar = `0x0000`;
2021	uint32_t tmpSourceChar = `0x0000`;
2022	UConverterDataISO2022* myData;
2023	ISO2022State *pToU2022State;
2024	StateEnum cs;
2025
2026	myData=(UConverterDataISO2022*)(args->converter->extraInfo);
2027	pToU2022State = &myData->toU2022State;
2028
2029	if(myData->key != `0`) {
2030	/ continue with a partial escape sequence /
2031	goto escape;
2032	} else if(args->converter->toULength == `1` && mySource < mySourceLimit && myTarget < args->targetLimit) {
2033	/ continue with a partial double-byte character /
2034	mySourceChar = args->converter->toUBytes[`0`];
2035	args->converter->toULength = `0`;
2036	cs = (StateEnum)pToU2022State->cs[pToU2022State->g];
2037	targetUniChar = missingCharMarker;
2038	goto getTrailByte;
2039	}
2040
2041	while(mySource < mySourceLimit){
2042
2043	targetUniChar =missingCharMarker;
2044
2045	if(myTarget < args->targetLimit){
2046
2047	mySourceChar= (unsigned char) *mySource++;
2048
2049	switch(mySourceChar) {
2050	case UCNV_SI:
2051	if(myData->version==`3`) {
2052	pToU2022State->g=`0`;
2053	continue;
2054	} else {
2055	/ only JIS7 uses SI/SO, not ISO-2022-JP-x /
2056	myData->isEmptySegment = FALSE; / reset this, we have a different error /
2057	break;
2058	}
2059
2060	case UCNV_SO:
2061	if(myData->version==`3`) {
2062	/ JIS7: switch to G1 half-width Katakana /
2063	pToU2022State->cs[`1`] = (int8_t)HWKANA_7BIT;
2064	pToU2022State->g=`1`;
2065	continue;
2066	} else {
2067	/ only JIS7 uses SI/SO, not ISO-2022-JP-x /
2068	myData->isEmptySegment = FALSE; / reset this, we have a different error /
2069	break;
2070	}
2071
2072	case ESC_2022:
2073	mySource--;
2074	escape:
2075	{
2076	const char * mySourceBefore = mySource;
2077	int8_t toULengthBefore = args->converter->toULength;
2078
2079	changeState_2022(args->converter,&(mySource),
2080	mySourceLimit, ISO_2022_JP,err);
2081
2082	/ If in ISO-2022-JP only and we successully completed an escape sequence, but previous segment was empty, create an error /
2083	if(myData->version==`0` && myData->key==`0` && U_SUCCESS(*err) && myData->isEmptySegment) {
2084	*err = U_ILLEGAL_ESCAPE_SEQUENCE;
2085	args->converter->toUCallbackReason = UCNV_IRREGULAR;
2086	args->converter->toULength = (int8_t)(toULengthBefore + (mySource - mySourceBefore));
2087	}
2088	}
2089
2090	/ invalid or illegal escape sequence /
2091	if(U_FAILURE(*err)){
2092	args->target = myTarget;
2093	args->source = mySource;
2094	myData->isEmptySegment = FALSE; / Reset to avoid future spurious errors /
2095	return;
2096	}
2097	/ If we successfully completed an escape sequence, we begin a new segment, empty so far /
2098	if(myData->key==`0`) {
2099	myData->isEmptySegment = TRUE;
2100	}
2101	continue;
2102
2103	/ ISO-2022-JP does not use single-byte (C1) SS2 and SS3 /
2104
2105	case CR:
2106	case LF:
2107	/ automatically reset to single-byte mode /
2108	if((StateEnum)pToU2022State->cs[`0`] != ASCII && (StateEnum)pToU2022State->cs[`0`] != JISX201) {
2109	pToU2022State->cs[`0`] = (int8_t)ASCII;
2110	}
2111	pToU2022State->cs[`2`] = `0`;
2112	pToU2022State->g = `0`;
2113	U_FALLTHROUGH;
2114	default:
2115	/ convert one or two bytes /
2116	myData->isEmptySegment = FALSE;
2117	cs = (StateEnum)pToU2022State->cs[pToU2022State->g];
2118	if( (uint8_t)(mySourceChar - `0xa1`) <= (`0xdf` - `0xa1`) && myData->version==`4` &&
2119	!IS_JP_DBCS(cs)
2120	) {
2121	/ 8-bit halfwidth katakana in any single-byte mode for JIS8 /
2122	targetUniChar = mySourceChar + (HWKANA_START - `0xa1`);
2123
2124	/ return from a single-shift state to the previous one /
2125	if(pToU2022State->g >= `2`) {
2126	pToU2022State->g=pToU2022State->prevG;
2127	}
2128	} else switch(cs) {
2129	case ASCII:
2130	if(mySourceChar <= `0x7f`) {
2131	targetUniChar = mySourceChar;
2132	}
2133	break;
2134	case ISO8859_1:
2135	if(mySourceChar <= `0x7f`) {
2136	targetUniChar = mySourceChar + `0x80`;
2137	}
2138	/ return from a single-shift state to the previous one /
2139	pToU2022State->g=pToU2022State->prevG;
2140	break;
2141	case ISO8859_7:
2142	if(mySourceChar <= `0x7f`) {
2143	/ convert mySourceChar+0x80 to use a normal 8-bit table /
2144	targetUniChar =
2145	_MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(
2146	myData->myConverterArray[cs],
2147	mySourceChar + `0x80`);
2148	}
2149	/ return from a single-shift state to the previous one /
2150	pToU2022State->g=pToU2022State->prevG;
2151	break;
2152	case JISX201:
2153	if(mySourceChar <= `0x7f`) {
2154	targetUniChar = jisx201ToU(mySourceChar);
2155	}
2156	break;
2157	case HWKANA_7BIT:
2158	if((uint8_t)(mySourceChar - `0x21`) <= (`0x5f` - `0x21`)) {
2159	/ 7-bit halfwidth Katakana /
2160	targetUniChar = mySourceChar + (HWKANA_START - `0x21`);
2161	}
2162	break;
2163	default:
2164	/ G0 DBCS /
2165	if(mySource < mySourceLimit) {
2166	int leadIsOk, trailIsOk;
2167	uint8_t trailByte;
2168	getTrailByte:
2169	trailByte = (uint8_t)*mySource;
2170	/*
2171	* Ticket 5691: consistent illegal sequences:
2172	* - We include at least the first byte in the illegal sequence.
2173	* - If any of the non-initial bytes could be the start of a character,
2174	* we stop the illegal sequence before the first one of those.
2175	*
2176	* In ISO-2022 DBCS, if the second byte is in the 21..7e range or is
2177	* an ESC/SO/SI, we report only the first byte as the illegal sequence.
2178	* Otherwise we convert or report the pair of bytes.
2179	*/
2180	leadIsOk = (uint8_t)(mySourceChar - `0x21`) <= (`0x7e` - `0x21`);
2181	trailIsOk = (uint8_t)(trailByte - `0x21`) <= (`0x7e` - `0x21`);
2182	if (leadIsOk && trailIsOk) {
2183	++mySource;
2184	tmpSourceChar = (mySourceChar << `8`) \| trailByte;
2185	/ Copy before we modify tmpSourceChar so toUnicodeCallback() sees the correct bytes. /
2186	mySourceChar = tmpSourceChar;
2187	if (cs == JISX208 \|\| cs == KSC5601) {
2188	tmpSourceChar += `0x8080`; / = _2022ToGR94DBCS(tmpSourceChar) /
2189	}
2190	tempBuf[`0`] = (char)(tmpSourceChar >> `8`);
2191	tempBuf[`1`] = (char)(tmpSourceChar);
2192	targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->myConverterArray[cs], tempBuf, `2`, FALSE);
2193	} else if (!(trailIsOk \|\| IS_2022_CONTROL(trailByte))) {
2194	/ report a pair of illegal bytes if the second byte is not a DBCS starter /
2195	++mySource;
2196	/ add another bit so that the code below writes 2 bytes in case of error /
2197	mySourceChar = `0x10000` \| (mySourceChar << `8`) \| trailByte;
2198	}
2199	} else {
2200	args->converter->toUBytes[`0`] = (uint8_t)mySourceChar;
2201	args->converter->toULength = `1`;
2202	goto endloop;
2203	}
2204	} / End of inner switch /
2205	break;
2206	} / End of outer switch /
2207	if(targetUniChar < (missingCharMarker-`1`/0xfffe/)){
2208	if(args->offsets){
2209	args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= `0xff` ? `1` : `2`));
2210	}
2211	*(myTarget++)=(UChar)targetUniChar;
2212	}
2213	else if(targetUniChar > missingCharMarker){
2214	/ disassemble the surrogate pair and write to output/
2215	targetUniChar-=`0x0010000`;
2216	*myTarget = (UChar)(`0xd800`+(UChar)(targetUniChar>>`10`));
2217	if(args->offsets){
2218	args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= `0xff` ? `1` : `2`));
2219	}
2220	++myTarget;
2221	if(myTarget< args->targetLimit){
2222	*myTarget = (UChar)(`0xdc00`+(UChar)(targetUniChar&`0x3ff`));
2223	if(args->offsets){
2224	args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= `0xff` ? `1` : `2`));
2225	}
2226	++myTarget;
2227	}else{
2228	args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]=
2229	(UChar)(`0xdc00`+(UChar)(targetUniChar&`0x3ff`));
2230	}
2231
2232	}
2233	else{
2234	/ Call the callback function/
2235	toUnicodeCallback(args->converter,mySourceChar,targetUniChar,err);
2236	break;
2237	}
2238	}
2239	else{ / goes with "if(myTarget < args->targetLimit)" way up near top of function /
2240	*err =U_BUFFER_OVERFLOW_ERROR;
2241	break;
2242	}
2243	}
2244	endloop:
2245	args->target = myTarget;
2246	args->source = mySource;
2247	}
2248
2249
2250	#if !UCONFIG_ONLY_HTML_CONVERSION
2251	/***************************************************************
2252	* Rules for ISO-2022-KR encoding
2253	* i) The KSC5601 designator sequence should appear only once in a file,
2254	* at the begining of a line before any KSC5601 characters. This usually
2255	* means that it appears by itself on the first line of the file
2256	* ii) There are only 2 shifting sequences SO to shift into double byte mode
2257	* and SI to shift into single byte mode
2258	*/
2259	static void U_CALLCONV
2260	UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(UConverterFromUnicodeArgs* args, UErrorCode* err){
2261
2262	UConverter* saveConv = args->converter;
2263	UConverterDataISO2022 myConverterData=(UConverterDataISO2022)saveConv->extraInfo;
2264	args->converter=myConverterData->currentConverter;
2265
2266	myConverterData->currentConverter->fromUChar32 = saveConv->fromUChar32;
2267	ucnv_MBCSFromUnicodeWithOffsets(args,err);
2268	saveConv->fromUChar32 = myConverterData->currentConverter->fromUChar32;
2269
2270	if(*err == U_BUFFER_OVERFLOW_ERROR) {
2271	if(myConverterData->currentConverter->charErrorBufferLength > `0`) {
2272	uprv_memcpy(
2273	saveConv->charErrorBuffer,
2274	myConverterData->currentConverter->charErrorBuffer,
2275	myConverterData->currentConverter->charErrorBufferLength);
2276	}
2277	saveConv->charErrorBufferLength = myConverterData->currentConverter->charErrorBufferLength;
2278	myConverterData->currentConverter->charErrorBufferLength = `0`;
2279	}
2280	args->converter=saveConv;
2281	}
2282
2283	static void U_CALLCONV
2284	UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args, UErrorCode* err){
2285
2286	const UChar *source = args->source;
2287	const UChar *sourceLimit = args->sourceLimit;
2288	unsigned char target = (unsigned* char *) args->target;
2289	unsigned char targetLimit = (unsigned* char *) args->targetLimit;
2290	int32_t* offsets = args->offsets;
2291	uint32_t targetByteUnit = `0x0000`;
2292	UChar32 sourceChar = `0x0000`;
2293	UBool isTargetByteDBCS;
2294	UBool oldIsTargetByteDBCS;
2295	UConverterDataISO2022 *converterData;
2296	UConverterSharedData* sharedData;
2297	UBool useFallback;
2298	int32_t length =`0`;
2299
2300	converterData=(UConverterDataISO2022*)args->converter->extraInfo;
2301	/ if the version is 1 then the user is requesting*
2302	* conversion with ibm-25546 pass the arguments to
2303	* MBCS converter and return
2304	*/
2305	if(converterData->version==`1`){
2306	UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(args,err);
2307	return;
2308	}
2309
2310	/ initialize data /
2311	sharedData = converterData->currentConverter->sharedData;
2312	useFallback = args->converter->useFallback;
2313	isTargetByteDBCS=(UBool)args->converter->fromUnicodeStatus;
2314	oldIsTargetByteDBCS = isTargetByteDBCS;
2315
2316	isTargetByteDBCS = (UBool) args->converter->fromUnicodeStatus;
2317	if((sourceChar = args->converter->fromUChar32)!=`0` && target <targetLimit) {
2318	goto getTrail;
2319	}
2320	while(source < sourceLimit){
2321
2322	targetByteUnit = missingCharMarker;
2323
2324	if(target < (unsigned char*) args->targetLimit){
2325	sourceChar = *source++;
2326
2327	/ do not convert SO/SI/ESC /
2328	if(IS_2022_CONTROL(sourceChar)) {
2329	/ callback(illegal) /
2330	*err=U_ILLEGAL_CHAR_FOUND;
2331	args->converter->fromUChar32=sourceChar;
2332	break;
2333	}
2334
2335	length = MBCS_FROM_UCHAR32_ISO2022(sharedData,sourceChar,&targetByteUnit,useFallback,MBCS_OUTPUT_2);
2336	if(length < `0`) {
2337	length = -length; / fallback /
2338	}
2339	/ only DBCS or SBCS characters are expected/
2340	/ DB characters with high bit set to 1 are expected /
2341	if( length > `2` \|\| length==`0` \|\|
2342	(length == `1` && targetByteUnit > `0x7f`) \|\|
2343	(length == `2` &&
2344	((uint16_t)(targetByteUnit - `0xa1a1`) > (`0xfefe` - `0xa1a1`) \|\|
2345	(uint8_t)(targetByteUnit - `0xa1`) > (`0xfe` - `0xa1`)))
2346	) {
2347	targetByteUnit=missingCharMarker;
2348	}
2349	if (targetByteUnit != missingCharMarker){
2350
2351	oldIsTargetByteDBCS = isTargetByteDBCS;
2352	isTargetByteDBCS = (UBool)(targetByteUnit>`0x00FF`);
2353	/ append the shift sequence /
2354	if (oldIsTargetByteDBCS != isTargetByteDBCS ){
2355
2356	if (isTargetByteDBCS)
2357	*target++ = UCNV_SO;
2358	else
2359	*target++ = UCNV_SI;
2360	if(offsets)
2361	*(offsets++) = (int32_t)(source - args->source-`1`);
2362	}
2363	/ write the targetUniChar to target /
2364	if(targetByteUnit <= `0x00FF`){
2365	if( target < targetLimit){
2366	(target++) = (unsigned* char) targetByteUnit;
2367	if(offsets){
2368	*(offsets++) = (int32_t)(source - args->source-`1`);
2369	}
2370
2371	}else{
2372	args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) (targetByteUnit);
2373	*err = U_BUFFER_OVERFLOW_ERROR;
2374	}
2375	}else{
2376	if(target < targetLimit){
2377	(target++) =(unsigned* char) ((targetByteUnit>>`8`) -`0x80`);
2378	if(offsets){
2379	*(offsets++) = (int32_t)(source - args->source-`1`);
2380	}
2381	if(target < targetLimit){
2382	(target++) =(unsigned* char) (targetByteUnit -`0x80`);
2383	if(offsets){
2384	*(offsets++) = (int32_t)(source - args->source-`1`);
2385	}
2386	}else{
2387	args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) (targetByteUnit -`0x80`);
2388	*err = U_BUFFER_OVERFLOW_ERROR;
2389	}
2390	}else{
2391	args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) ((targetByteUnit>>`8`) -`0x80`);
2392	args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) (targetByteUnit-`0x80`);
2393	*err = U_BUFFER_OVERFLOW_ERROR;
2394	}
2395	}
2396
2397	}
2398	else{
2399	/ oops.. the code point is unassingned*
2400	* set the error and reason
2401	*/
2402
2403	/check if the char is a First surrogate/
2404	if(U16_IS_SURROGATE(sourceChar)) {
2405	if(U16_IS_SURROGATE_LEAD(sourceChar)) {
2406	getTrail:
2407	/look ahead to find the trail surrogate/
2408	if(source < sourceLimit) {
2409	/ test the following code unit /
2410	UChar trail=(UChar) *source;
2411	if(U16_IS_TRAIL(trail)) {
2412	source++;
2413	sourceChar=U16_GET_SUPPLEMENTARY(sourceChar, trail);
2414	*err = U_INVALID_CHAR_FOUND;
2415	/ convert this surrogate code point /
2416	/ exit this condition tree /
2417	} else {
2418	/ this is an unmatched lead code unit (1st surrogate) /
2419	/ callback(illegal) /
2420	*err=U_ILLEGAL_CHAR_FOUND;
2421	}
2422	} else {
2423	/ no more input /
2424	*err = U_ZERO_ERROR;
2425	}
2426	} else {
2427	/ this is an unmatched trail code unit (2nd surrogate) /
2428	/ callback(illegal) /
2429	*err=U_ILLEGAL_CHAR_FOUND;
2430	}
2431	} else {
2432	/ callback(unassigned) for a BMP code point /
2433	*err = U_INVALID_CHAR_FOUND;
2434	}
2435
2436	args->converter->fromUChar32=sourceChar;
2437	break;
2438	}
2439	} / end if(myTargetIndex<myTargetLength) /
2440	else{
2441	*err =U_BUFFER_OVERFLOW_ERROR;
2442	break;
2443	}
2444
2445	}/ end while(mySourceIndex<mySourceLength) /
2446
2447	/*
2448	* the end of the input stream and detection of truncated input
2449	* are handled by the framework, but for ISO-2022-KR conversion
2450	* we need to be in ASCII mode at the very end
2451	*
2452	* conditions:
2453	* successful
2454	* not in ASCII mode
2455	* end of input and no truncated input
2456	*/
2457	if( U_SUCCESS(*err) &&
2458	isTargetByteDBCS &&
2459	args->flush && source>=sourceLimit && args->converter->fromUChar32==`0`
2460	) {
2461	int32_t sourceIndex;
2462
2463	/ we are switching to ASCII /
2464	isTargetByteDBCS=FALSE;
2465
2466	/ get the source index of the last input character /
2467	/*
2468	* TODO this would be simpler and more reliable if we used a pair
2469	* of sourceIndex/prevSourceIndex like in ucnvmbcs.c
2470	* so that we could simply use the prevSourceIndex here;
2471	* this code gives an incorrect result for the rare case of an unmatched
2472	* trail surrogate that is alone in the last buffer of the text stream
2473	*/
2474	sourceIndex=(int32_t)(source-args->source);
2475	if(sourceIndex>`0`) {
2476	--sourceIndex;
2477	if( U16_IS_TRAIL(args->source[sourceIndex]) &&
2478	(sourceIndex==`0` \|\| U16_IS_LEAD(args->source[sourceIndex-`1`]))
2479	) {
2480	--sourceIndex;
2481	}
2482	} else {
2483	sourceIndex=-`1`;
2484	}
2485
2486	fromUWriteUInt8(
2487	args->converter,
2488	SHIFT_IN_STR, `1`,
2489	&target, (const char *)targetLimit,
2490	&offsets, sourceIndex,
2491	err);
2492	}
2493
2494	/save the state and return /
2495	args->source = source;
2496	args->target = (char*)target;
2497	args->converter->fromUnicodeStatus = (uint32_t)isTargetByteDBCS;
2498	}
2499
2500	/********************* To Unicode ************************************/
2501
2502	static void U_CALLCONV
2503	UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(UConverterToUnicodeArgs *args,
2504	UErrorCode* err){
2505	char const* sourceStart;
2506	UConverterDataISO2022* myData=(UConverterDataISO2022*)(args->converter->extraInfo);
2507
2508	UConverterToUnicodeArgs subArgs;
2509	int32_t minArgsSize;
2510
2511	/ set up the subconverter arguments /
2512	if(args->size<sizeof(UConverterToUnicodeArgs)) {
2513	minArgsSize = args->size;
2514	} else {
2515	minArgsSize = (int32_t)sizeof(UConverterToUnicodeArgs);
2516	}
2517
2518	uprv_memcpy(&subArgs, args, minArgsSize);
2519	subArgs.size = (uint16_t)minArgsSize;
2520	subArgs.converter = myData->currentConverter;
2521
2522	/ remember the original start of the input for offsets /
2523	sourceStart = args->source;
2524
2525	if(myData->key != `0`) {
2526	/ continue with a partial escape sequence /
2527	goto escape;
2528	}
2529
2530	while(U_SUCCESS(*err) && args->source < args->sourceLimit) {
2531	/Find the end of the buffer e.g : Next Escape Seq \| end of Buffer/
2532	subArgs.source = args->source;
2533	subArgs.sourceLimit = getEndOfBuffer_2022(&(args->source), args->sourceLimit, args->flush);
2534	if(subArgs.source != subArgs.sourceLimit) {
2535	/*
2536	* get the current partial byte sequence
2537	*
2538	* it needs to be moved between the public and the subconverter
2539	* so that the conversion framework, which only sees the public
2540	* converter, can handle truncated and illegal input etc.
2541	*/
2542	if(args->converter->toULength > `0`) {
2543	uprv_memcpy(subArgs.converter->toUBytes, args->converter->toUBytes, args->converter->toULength);
2544	}
2545	subArgs.converter->toULength = args->converter->toULength;
2546
2547	/*
2548	* Convert up to the end of the input, or to before the next escape character.
2549	* Does not handle conversion extensions because the preToU[] state etc.
2550	* is not copied.
2551	*/
2552	ucnv_MBCSToUnicodeWithOffsets(&subArgs, err);
2553
2554	if(args->offsets != NULL && sourceStart != args->source) {
2555	/ update offsets to base them on the actual start of the input /
2556	int32_t *offsets = args->offsets;
2557	UChar *target = args->target;
2558	int32_t delta = (int32_t)(args->source - sourceStart);
2559	while(target < subArgs.target) {
2560	if(*offsets >= `0`) {
2561	*offsets += delta;
2562	}
2563	++offsets;
2564	++target;
2565	}
2566	}
2567	args->source = subArgs.source;
2568	args->target = subArgs.target;
2569	args->offsets = subArgs.offsets;
2570
2571	/ copy input/error/overflow buffers /
2572	if(subArgs.converter->toULength > `0`) {
2573	uprv_memcpy(args->converter->toUBytes, subArgs.converter->toUBytes, subArgs.converter->toULength);
2574	}
2575	args->converter->toULength = subArgs.converter->toULength;
2576
2577	if(*err == U_BUFFER_OVERFLOW_ERROR) {
2578	if(subArgs.converter->UCharErrorBufferLength > `0`) {
2579	uprv_memcpy(args->converter->UCharErrorBuffer, subArgs.converter->UCharErrorBuffer,
2580	subArgs.converter->UCharErrorBufferLength);
2581	}
2582	args->converter->UCharErrorBufferLength=subArgs.converter->UCharErrorBufferLength;
2583	subArgs.converter->UCharErrorBufferLength = `0`;
2584	}
2585	}
2586
2587	if (U_FAILURE(*err) \|\| (args->source == args->sourceLimit)) {
2588	return;
2589	}
2590
2591	escape:
2592	changeState_2022(args->converter,
2593	&(args->source),
2594	args->sourceLimit,
2595	ISO_2022_KR,
2596	err);
2597	}
2598	}
2599
2600	static void U_CALLCONV
2601	UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
2602	UErrorCode* err){
2603	char tempBuf[`2`];
2604	const char mySource = ( char* *) args->source;
2605	UChar *myTarget = args->target;
2606	const char *mySourceLimit = args->sourceLimit;
2607	UChar32 targetUniChar = `0x0000`;
2608	UChar mySourceChar = `0x0000`;
2609	UConverterDataISO2022* myData;
2610	UConverterSharedData* sharedData ;
2611	UBool useFallback;
2612
2613	myData=(UConverterDataISO2022*)(args->converter->extraInfo);
2614	if(myData->version==`1`){
2615	UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(args,err);
2616	return;
2617	}
2618
2619	/ initialize state /
2620	sharedData = myData->currentConverter->sharedData;
2621	useFallback = args->converter->useFallback;
2622
2623	if(myData->key != `0`) {
2624	/ continue with a partial escape sequence /
2625	goto escape;
2626	} else if(args->converter->toULength == `1` && mySource < mySourceLimit && myTarget < args->targetLimit) {
2627	/ continue with a partial double-byte character /
2628	mySourceChar = args->converter->toUBytes[`0`];
2629	args->converter->toULength = `0`;
2630	goto getTrailByte;
2631	}
2632
2633	while(mySource< mySourceLimit){
2634
2635	if(myTarget < args->targetLimit){
2636
2637	mySourceChar= (unsigned char) *mySource++;
2638
2639	if(mySourceChar==UCNV_SI){
2640	myData->toU2022State.g = `0`;
2641	if (myData->isEmptySegment) {
2642	myData->isEmptySegment = FALSE; / we are handling it, reset to avoid future spurious errors /
2643	*err = U_ILLEGAL_ESCAPE_SEQUENCE;
2644	args->converter->toUCallbackReason = UCNV_IRREGULAR;
2645	args->converter->toUBytes[`0`] = (uint8_t)mySourceChar;
2646	args->converter->toULength = `1`;
2647	args->target = myTarget;
2648	args->source = mySource;
2649	return;
2650	}
2651	/consume the source /
2652	continue;
2653	}else if(mySourceChar==UCNV_SO){
2654	myData->toU2022State.g = `1`;
2655	myData->isEmptySegment = TRUE; / Begin a new segment, empty so far /
2656	/consume the source /
2657	continue;
2658	}else if(mySourceChar==ESC_2022){
2659	mySource--;
2660	escape:
2661	myData->isEmptySegment = FALSE; / Any invalid ESC sequences will be detected separately, so just reset this /
2662	changeState_2022(args->converter,&(mySource),
2663	mySourceLimit, ISO_2022_KR, err);
2664	if(U_FAILURE(*err)){
2665	args->target = myTarget;
2666	args->source = mySource;
2667	return;
2668	}
2669	continue;
2670	}
2671
2672	myData->isEmptySegment = FALSE; / Any invalid char errors will be detected separately, so just reset this /
2673	if(myData->toU2022State.g == `1`) {
2674	if(mySource < mySourceLimit) {
2675	int leadIsOk, trailIsOk;
2676	uint8_t trailByte;
2677	getTrailByte:
2678	targetUniChar = missingCharMarker;
2679	trailByte = (uint8_t)*mySource;
2680	/*
2681	* Ticket 5691: consistent illegal sequences:
2682	* - We include at least the first byte in the illegal sequence.
2683	* - If any of the non-initial bytes could be the start of a character,
2684	* we stop the illegal sequence before the first one of those.
2685	*
2686	* In ISO-2022 DBCS, if the second byte is in the 21..7e range or is
2687	* an ESC/SO/SI, we report only the first byte as the illegal sequence.
2688	* Otherwise we convert or report the pair of bytes.
2689	*/
2690	leadIsOk = (uint8_t)(mySourceChar - `0x21`) <= (`0x7e` - `0x21`);
2691	trailIsOk = (uint8_t)(trailByte - `0x21`) <= (`0x7e` - `0x21`);
2692	if (leadIsOk && trailIsOk) {
2693	++mySource;
2694	tempBuf[`0`] = (char)(mySourceChar + `0x80`);
2695	tempBuf[`1`] = (char)(trailByte + `0x80`);
2696	targetUniChar = ucnv_MBCSSimpleGetNextUChar(sharedData, tempBuf, `2`, useFallback);
2697	mySourceChar = (mySourceChar << `8`) \| trailByte;
2698	} else if (!(trailIsOk \|\| IS_2022_CONTROL(trailByte))) {
2699	/ report a pair of illegal bytes if the second byte is not a DBCS starter /
2700	++mySource;
2701	/ add another bit so that the code below writes 2 bytes in case of error /
2702	mySourceChar = static_cast<UChar>(`0x10000` \| (mySourceChar << `8`) \| trailByte);
2703	}
2704	} else {
2705	args->converter->toUBytes[`0`] = (uint8_t)mySourceChar;
2706	args->converter->toULength = `1`;
2707	break;
2708	}
2709	}
2710	else if(mySourceChar <= `0x7f`) {
2711	targetUniChar = ucnv_MBCSSimpleGetNextUChar(sharedData, mySource - `1`, `1`, useFallback);
2712	} else {
2713	targetUniChar = `0xffff`;
2714	}
2715	if(targetUniChar < `0xfffe`){
2716	if(args->offsets) {
2717	args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= `0xff` ? `1` : `2`));
2718	}
2719	*(myTarget++)=(UChar)targetUniChar;
2720	}
2721	else {
2722	/ Call the callback function/
2723	toUnicodeCallback(args->converter,mySourceChar,targetUniChar,err);
2724	break;
2725	}
2726	}
2727	else{
2728	*err =U_BUFFER_OVERFLOW_ERROR;
2729	break;
2730	}
2731	}
2732	args->target = myTarget;
2733	args->source = mySource;
2734	}
2735
2736	/************************ END ISO2022-KR ******************************/
2737
2738	/************************* ISO-2022-CN *******************************
2739	*
2740	* Rules for ISO-2022-CN Encoding:
2741	* i) The designator sequence must appear once on a line before any instance
2742	* of character set it designates.
2743	* ii) If two lines contain characters from the same character set, both lines
2744	* must include the designator sequence.
2745	* iii) Once the designator sequence is known, a shifting sequence has to be found
2746	* to invoke the shifting
2747	* iv) All lines start in ASCII and end in ASCII.
2748	* v) Four shifting sequences are employed for this purpose:
2749	*
2750	* Sequcence ASCII Eq Charsets
2751	* ---------- ------- ---------
2752	* SI <SI> US-ASCII
2753	* SO <SO> CNS-11643-1992 Plane 1, GB2312, ISO-IR-165
2754	* SS2 <ESC>N CNS-11643-1992 Plane 2
2755	* SS3 <ESC>O CNS-11643-1992 Planes 3-7
2756	*
2757	* vi)
2758	* SOdesignator : ESC "$" ")" finalchar_for_SO
2759	* SS2designator : ESC "$" "*" finalchar_for_SS2
2760	* SS3designator : ESC "$" "+" finalchar_for_SS3
2761	*
2762	* ESC $ ) A Indicates the bytes following SO are Chinese
2763	* characters as defined in GB 2312-80, until
2764	* another SOdesignation appears
2765	*
2766	*
2767	* ESC $ ) E Indicates the bytes following SO are as defined
2768	* in ISO-IR-165 (for details, see section 2.1),
2769	* until another SOdesignation appears
2770	*
2771	* ESC $ ) G Indicates the bytes following SO are as defined
2772	* in CNS 11643-plane-1, until another
2773	* SOdesignation appears
2774	*
2775	* ESC $ * H Indicates the two bytes immediately following
2776	* SS2 is a Chinese character as defined in CNS
2777	* 11643-plane-2, until another SS2designation
2778	* appears
2779	* (Meaning <ESC>N must preceed every 2 byte
2780	* sequence.)
2781	*
2782	* ESC $ + I Indicates the immediate two bytes following SS3
2783	* is a Chinese character as defined in CNS
2784	* 11643-plane-3, until another SS3designation
2785	* appears
2786	* (Meaning <ESC>O must preceed every 2 byte
2787	* sequence.)
2788	*
2789	* ESC $ + J Indicates the immediate two bytes following SS3
2790	* is a Chinese character as defined in CNS
2791	* 11643-plane-4, until another SS3designation
2792	* appears
2793	* (In English: <ESC>O must preceed every 2 byte
2794	* sequence.)
2795	*
2796	* ESC $ + K Indicates the immediate two bytes following SS3
2797	* is a Chinese character as defined in CNS
2798	* 11643-plane-5, until another SS3designation
2799	* appears
2800	*
2801	* ESC $ + L Indicates the immediate two bytes following SS3
2802	* is a Chinese character as defined in CNS
2803	* 11643-plane-6, until another SS3designation
2804	* appears
2805	*
2806	* ESC $ + M Indicates the immediate two bytes following SS3
2807	* is a Chinese character as defined in CNS
2808	* 11643-plane-7, until another SS3designation
2809	* appears
2810	*
2811	* As in ISO-2022-CN, each line starts in ASCII, and ends in ASCII, and
2812	* has its own designation information before any Chinese characters
2813	* appear
2814	*
2815	*/
2816
2817	/ The following are defined this way to make the strings truly readonly /
2818	static const char GB_2312_80_STR[] = "\x1B\x24\x29\x41";
2819	static const char ISO_IR_165_STR[] = "\x1B\x24\x29\x45";
2820	static const char CNS_11643_1992_Plane_1_STR[] = "\x1B\x24\x29\x47";
2821	static const char CNS_11643_1992_Plane_2_STR[] = "\x1B\x24\x2A\x48";
2822	static const char CNS_11643_1992_Plane_3_STR[] = "\x1B\x24\x2B\x49";
2823	static const char CNS_11643_1992_Plane_4_STR[] = "\x1B\x24\x2B\x4A";
2824	static const char CNS_11643_1992_Plane_5_STR[] = "\x1B\x24\x2B\x4B";
2825	static const char CNS_11643_1992_Plane_6_STR[] = "\x1B\x24\x2B\x4C";
2826	static const char CNS_11643_1992_Plane_7_STR[] = "\x1B\x24\x2B\x4D";
2827
2828	/******************* ISO2022-CN Data ***********************/
2829	static const char* const escSeqCharsCN[`10`] ={
2830	SHIFT_IN_STR, / 0 ASCII /
2831	GB_2312_80_STR, / 1 GB2312_1 /
2832	ISO_IR_165_STR, / 2 ISO_IR_165 /
2833	CNS_11643_1992_Plane_1_STR,
2834	CNS_11643_1992_Plane_2_STR,
2835	CNS_11643_1992_Plane_3_STR,
2836	CNS_11643_1992_Plane_4_STR,
2837	CNS_11643_1992_Plane_5_STR,
2838	CNS_11643_1992_Plane_6_STR,
2839	CNS_11643_1992_Plane_7_STR
2840	};
2841
2842	static void U_CALLCONV
2843	UConverter_fromUnicode_ISO_2022_CN_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args, UErrorCode* err){
2844	UConverter *cnv = args->converter;
2845	UConverterDataISO2022 *converterData;
2846	ISO2022State *pFromU2022State;
2847	uint8_t target = (uint8_t ) args->target;
2848	const uint8_t targetLimit = (const* uint8_t *) args->targetLimit;
2849	const UChar* source = args->source;
2850	const UChar* sourceLimit = args->sourceLimit;
2851	int32_t* offsets = args->offsets;
2852	UChar32 sourceChar;
2853	char buffer[`8`];
2854	int32_t len;
2855	int8_t choices[`3`];
2856	int32_t choiceCount;
2857	uint32_t targetValue = `0`;
2858	UBool useFallback;
2859
2860	/ set up the state /
2861	converterData = (UConverterDataISO2022*)cnv->extraInfo;
2862	pFromU2022State = &converterData->fromU2022State;
2863
2864	choiceCount = `0`;
2865
2866	/ check if the last codepoint of previous buffer was a lead surrogate/
2867	if((sourceChar = cnv->fromUChar32)!=`0` && target< targetLimit) {
2868	goto getTrail;
2869	}
2870
2871	while( source < sourceLimit){
2872	if(target < targetLimit){
2873
2874	sourceChar = *(source++);
2875	/check if the char is a First surrogate/
2876	if(U16_IS_SURROGATE(sourceChar)) {
2877	if(U16_IS_SURROGATE_LEAD(sourceChar)) {
2878	getTrail:
2879	/look ahead to find the trail surrogate/
2880	if(source < sourceLimit) {
2881	/ test the following code unit /
2882	UChar trail=(UChar) *source;
2883	if(U16_IS_TRAIL(trail)) {
2884	source++;
2885	sourceChar=U16_GET_SUPPLEMENTARY(sourceChar, trail);
2886	cnv->fromUChar32=`0x00`;
2887	/ convert this supplementary code point /
2888	/ exit this condition tree /
2889	} else {
2890	/ this is an unmatched lead code unit (1st surrogate) /
2891	/ callback(illegal) /
2892	*err=U_ILLEGAL_CHAR_FOUND;
2893	cnv->fromUChar32=sourceChar;
2894	break;
2895	}
2896	} else {
2897	/ no more input /
2898	cnv->fromUChar32=sourceChar;
2899	break;
2900	}
2901	} else {
2902	/ this is an unmatched trail code unit (2nd surrogate) /
2903	/ callback(illegal) /
2904	*err=U_ILLEGAL_CHAR_FOUND;
2905	cnv->fromUChar32=sourceChar;
2906	break;
2907	}
2908	}
2909
2910	/ do the conversion /
2911	if(sourceChar <= `0x007f` ){
2912	/ do not convert SO/SI/ESC /
2913	if(IS_2022_CONTROL(sourceChar)) {
2914	/ callback(illegal) /
2915	*err=U_ILLEGAL_CHAR_FOUND;
2916	cnv->fromUChar32=sourceChar;
2917	break;
2918	}
2919
2920	/ US-ASCII /
2921	if(pFromU2022State->g == `0`) {
2922	buffer[`0`] = (char)sourceChar;
2923	len = `1`;
2924	} else {
2925	buffer[`0`] = UCNV_SI;
2926	buffer[`1`] = (char)sourceChar;
2927	len = `2`;
2928	pFromU2022State->g = `0`;
2929	choiceCount = `0`;
2930	}
2931	if(sourceChar == CR \|\| sourceChar == LF) {
2932	/ reset the state at the end of a line /
2933	uprv_memset(pFromU2022State, `0`, sizeof(ISO2022State));
2934	choiceCount = `0`;
2935	}
2936	}
2937	else{
2938	/ convert U+0080..U+10ffff /
2939	int32_t i;
2940	int8_t cs, g;
2941
2942	if(choiceCount == `0`) {
2943	/ try the current SO/G1 converter first /
2944	choices[`0`] = pFromU2022State->cs[`1`];
2945
2946	/ default to GB2312_1 if none is designated yet /
2947	if(choices[`0`] == `0`) {
2948	choices[`0`] = GB2312_1;
2949	}
2950
2951	if(converterData->version == `0`) {
2952	/ ISO-2022-CN /
2953
2954	/ try the other SO/G1 converter; a CNS_11643_1 lookup may result in any plane /
2955	if(choices[`0`] == GB2312_1) {
2956	choices[`1`] = (int8_t)CNS_11643_1;
2957	} else {
2958	choices[`1`] = (int8_t)GB2312_1;
2959	}
2960
2961	choiceCount = `2`;
2962	} else if (converterData->version == `1`) {
2963	/ ISO-2022-CN-EXT /
2964
2965	/ try one of the other converters /
2966	switch(choices[`0`]) {
2967	case GB2312_1:
2968	choices[`1`] = (int8_t)CNS_11643_1;
2969	choices[`2`] = (int8_t)ISO_IR_165;
2970	break;
2971	case ISO_IR_165:
2972	choices[`1`] = (int8_t)GB2312_1;
2973	choices[`2`] = (int8_t)CNS_11643_1;
2974	break;
2975	default: / CNS_11643_x /
2976	choices[`1`] = (int8_t)GB2312_1;
2977	choices[`2`] = (int8_t)ISO_IR_165;
2978	break;
2979	}
2980
2981	choiceCount = `3`;
2982	} else {
2983	choices[`0`] = (int8_t)CNS_11643_1;
2984	choices[`1`] = (int8_t)GB2312_1;
2985	}
2986	}
2987
2988	cs = g = `0`;
2989	/*
2990	* len==0: no mapping found yet
2991	* len<0: found a fallback result: continue looking for a roundtrip but no further fallbacks
2992	* len>0: found a roundtrip result, done
2993	*/
2994	len = `0`;
2995	/*
2996	* We will turn off useFallback after finding a fallback,
2997	* but we still get fallbacks from PUA code points as usual.
2998	* Therefore, we will also need to check that we don't overwrite
2999	* an early fallback with a later one.
3000	*/
3001	useFallback = cnv->useFallback;
3002
3003	for(i = `0`; i < choiceCount && len <= `0`; ++i) {
3004	int8_t cs0 = choices[i];
3005	if(cs0 > `0`) {
3006	uint32_t value;
3007	int32_t len2;
3008	if(cs0 >= CNS_11643_0) {
3009	len2 = MBCS_FROM_UCHAR32_ISO2022(
3010	converterData->myConverterArray[CNS_11643],
3011	sourceChar,
3012	&value,
3013	useFallback,
3014	MBCS_OUTPUT_3);
3015	if(len2 == `3` \|\| (len2 == -`3` && len == `0`)) {
3016	targetValue = value;
3017	cs = (int8_t)(CNS_11643_0 + (value >> `16`) - `0x80`);
3018	if(len2 >= `0`) {
3019	len = `2`;
3020	} else {
3021	len = -`2`;
3022	useFallback = FALSE;
3023	}
3024	if(cs == CNS_11643_1) {
3025	g = `1`;
3026	} else if(cs == CNS_11643_2) {
3027	g = `2`;
3028	} else / plane 3..7 / if(converterData->version == `1`) {
3029	g = `3`;
3030	} else {
3031	/ ISO-2022-CN (without -EXT) does not support plane 3..7 /
3032	len = `0`;
3033	}
3034	}
3035	} else {
3036	/ GB2312_1 or ISO-IR-165 /
3037	U_ASSERT(cs0<UCNV_2022_MAX_CONVERTERS);
3038	len2 = MBCS_FROM_UCHAR32_ISO2022(
3039	converterData->myConverterArray[cs0],
3040	sourceChar,
3041	&value,
3042	useFallback,
3043	MBCS_OUTPUT_2);
3044	if(len2 == `2` \|\| (len2 == -`2` && len == `0`)) {
3045	targetValue = value;
3046	len = len2;
3047	cs = cs0;
3048	g = `1`;
3049	useFallback = FALSE;
3050	}
3051	}
3052	}
3053	}
3054
3055	if(len != `0`) {
3056	len = `0`; / count output bytes; it must have been abs(len) == 2 /
3057
3058	/ write the designation sequence if necessary /
3059	if(cs != pFromU2022State->cs[g]) {
3060	if(cs < CNS_11643) {
3061	uprv_memcpy(buffer, escSeqCharsCN[cs], `4`);
3062	} else {
3063	U_ASSERT(cs >= CNS_11643_1);
3064	uprv_memcpy(buffer, escSeqCharsCN[CNS_11643 + (cs - CNS_11643_1)], `4`);
3065	}
3066	len = `4`;
3067	pFromU2022State->cs[g] = cs;
3068	if(g == `1`) {
3069	/ changing the SO/G1 charset invalidates the choices[] /
3070	choiceCount = `0`;
3071	}
3072	}
3073
3074	/ write the shift sequence if necessary /
3075	if(g != pFromU2022State->g) {
3076	switch(g) {
3077	case `1`:
3078	buffer[len++] = UCNV_SO;
3079
3080	/ set the new state only if it is the locking shift SO/G1, not for SS2 or SS3 /
3081	pFromU2022State->g = `1`;
3082	break;
3083	case `2`:
3084	buffer[len++] = `0x1b`;
3085	buffer[len++] = `0x4e`;
3086	break;
3087	default: / case 3 /
3088	buffer[len++] = `0x1b`;
3089	buffer[len++] = `0x4f`;
3090	break;
3091	}
3092	}
3093
3094	/ write the two output bytes /
3095	buffer[len++] = (char)(targetValue >> `8`);
3096	buffer[len++] = (char)targetValue;
3097	} else {
3098	/ if we cannot find the character after checking all codepages*
3099	* then this is an error
3100	*/
3101	*err = U_INVALID_CHAR_FOUND;
3102	cnv->fromUChar32=sourceChar;
3103	break;
3104	}
3105	}
3106
3107	/ output len>0 bytes in buffer[] /
3108	if(len == `1`) {
3109	*target++ = buffer[`0`];
3110	if(offsets) {
3111	offsets++ = (int32_t)(source - args->source - `1`); /* -1: known to be ASCII /
3112	}
3113	} else if(len == `2` && (target + `2`) <= targetLimit) {
3114	*target++ = buffer[`0`];
3115	*target++ = buffer[`1`];
3116	if(offsets) {
3117	int32_t sourceIndex = (int32_t)(source - args->source - U16_LENGTH(sourceChar));
3118	*offsets++ = sourceIndex;
3119	*offsets++ = sourceIndex;
3120	}
3121	} else {
3122	fromUWriteUInt8(
3123	cnv,
3124	buffer, len,
3125	&target, (const char *)targetLimit,
3126	&offsets, (int32_t)(source - args->source - U16_LENGTH(sourceChar)),
3127	err);
3128	if(U_FAILURE(*err)) {
3129	break;
3130	}
3131	}
3132	} / end if(myTargetIndex<myTargetLength) /
3133	else{
3134	*err =U_BUFFER_OVERFLOW_ERROR;
3135	break;
3136	}
3137
3138	}/ end while(mySourceIndex<mySourceLength) /
3139
3140	/*
3141	* the end of the input stream and detection of truncated input
3142	* are handled by the framework, but for ISO-2022-CN conversion
3143	* we need to be in ASCII mode at the very end
3144	*
3145	* conditions:
3146	* successful
3147	* not in ASCII mode
3148	* end of input and no truncated input
3149	*/
3150	if( U_SUCCESS(*err) &&
3151	pFromU2022State->g!=`0` &&
3152	args->flush && source>=sourceLimit && cnv->fromUChar32==`0`
3153	) {
3154	int32_t sourceIndex;
3155
3156	/ we are switching to ASCII /
3157	pFromU2022State->g=`0`;
3158
3159	/ get the source index of the last input character /
3160	/*
3161	* TODO this would be simpler and more reliable if we used a pair
3162	* of sourceIndex/prevSourceIndex like in ucnvmbcs.c
3163	* so that we could simply use the prevSourceIndex here;
3164	* this code gives an incorrect result for the rare case of an unmatched
3165	* trail surrogate that is alone in the last buffer of the text stream
3166	*/
3167	sourceIndex=(int32_t)(source-args->source);
3168	if(sourceIndex>`0`) {
3169	--sourceIndex;
3170	if( U16_IS_TRAIL(args->source[sourceIndex]) &&
3171	(sourceIndex==`0` \|\| U16_IS_LEAD(args->source[sourceIndex-`1`]))
3172	) {
3173	--sourceIndex;
3174	}
3175	} else {
3176	sourceIndex=-`1`;
3177	}
3178
3179	fromUWriteUInt8(
3180	cnv,
3181	SHIFT_IN_STR, `1`,
3182	&target, (const char *)targetLimit,
3183	&offsets, sourceIndex,
3184	err);
3185	}
3186
3187	/save the state and return /
3188	args->source = source;
3189	args->target = (char*)target;
3190	}
3191
3192
3193	static void U_CALLCONV
3194	UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
3195	UErrorCode* err){
3196	char tempBuf[`3`];
3197	const char mySource = (char* *) args->source;
3198	UChar *myTarget = args->target;
3199	const char *mySourceLimit = args->sourceLimit;
3200	uint32_t targetUniChar = `0x0000`;
3201	uint32_t mySourceChar = `0x0000`;
3202	UConverterDataISO2022* myData;
3203	ISO2022State *pToU2022State;
3204
3205	myData=(UConverterDataISO2022*)(args->converter->extraInfo);
3206	pToU2022State = &myData->toU2022State;
3207
3208	if(myData->key != `0`) {
3209	/ continue with a partial escape sequence /
3210	goto escape;
3211	} else if(args->converter->toULength == `1` && mySource < mySourceLimit && myTarget < args->targetLimit) {
3212	/ continue with a partial double-byte character /
3213	mySourceChar = args->converter->toUBytes[`0`];
3214	args->converter->toULength = `0`;
3215	targetUniChar = missingCharMarker;
3216	goto getTrailByte;
3217	}
3218
3219	while(mySource < mySourceLimit){
3220
3221	targetUniChar =missingCharMarker;
3222
3223	if(myTarget < args->targetLimit){
3224
3225	mySourceChar= (unsigned char) *mySource++;
3226
3227	switch(mySourceChar){
3228	case UCNV_SI:
3229	pToU2022State->g=`0`;
3230	if (myData->isEmptySegment) {
3231	myData->isEmptySegment = FALSE; / we are handling it, reset to avoid future spurious errors /
3232	*err = U_ILLEGAL_ESCAPE_SEQUENCE;
3233	args->converter->toUCallbackReason = UCNV_IRREGULAR;
3234	args->converter->toUBytes[`0`] = static_cast<uint8_t>(mySourceChar);
3235	args->converter->toULength = `1`;
3236	args->target = myTarget;
3237	args->source = mySource;
3238	return;
3239	}
3240	continue;
3241
3242	case UCNV_SO:
3243	if(pToU2022State->cs[`1`] != `0`) {
3244	pToU2022State->g=`1`;
3245	myData->isEmptySegment = TRUE; / Begin a new segment, empty so far /
3246	continue;
3247	} else {
3248	/ illegal to have SO before a matching designator /
3249	myData->isEmptySegment = FALSE; / Handling a different error, reset this to avoid future spurious errs /
3250	break;
3251	}
3252
3253	case ESC_2022:
3254	mySource--;
3255	escape:
3256	{
3257	const char * mySourceBefore = mySource;
3258	int8_t toULengthBefore = args->converter->toULength;
3259
3260	changeState_2022(args->converter,&(mySource),
3261	mySourceLimit, ISO_2022_CN,err);
3262
3263	/ After SO there must be at least one character before a designator (designator error handled separately) /
3264	if(myData->key==`0` && U_SUCCESS(*err) && myData->isEmptySegment) {
3265	*err = U_ILLEGAL_ESCAPE_SEQUENCE;
3266	args->converter->toUCallbackReason = UCNV_IRREGULAR;
3267	args->converter->toULength = (int8_t)(toULengthBefore + (mySource - mySourceBefore));
3268	}
3269	}
3270
3271	/ invalid or illegal escape sequence /
3272	if(U_FAILURE(*err)){
3273	args->target = myTarget;
3274	args->source = mySource;
3275	myData->isEmptySegment = FALSE; / Reset to avoid future spurious errors /
3276	return;
3277	}
3278	continue;
3279
3280	/ ISO-2022-CN does not use single-byte (C1) SS2 and SS3 /
3281
3282	case CR:
3283	case LF:
3284	uprv_memset(pToU2022State, `0`, sizeof(ISO2022State));
3285	U_FALLTHROUGH;
3286	default:
3287	/ convert one or two bytes /
3288	myData->isEmptySegment = FALSE;
3289	if(pToU2022State->g != `0`) {
3290	if(mySource < mySourceLimit) {
3291	UConverterSharedData *cnv;
3292	StateEnum tempState;
3293	int32_t tempBufLen;
3294	int leadIsOk, trailIsOk;
3295	uint8_t trailByte;
3296	getTrailByte:
3297	trailByte = (uint8_t)*mySource;
3298	/*
3299	* Ticket 5691: consistent illegal sequences:
3300	* - We include at least the first byte in the illegal sequence.
3301	* - If any of the non-initial bytes could be the start of a character,
3302	* we stop the illegal sequence before the first one of those.
3303	*
3304	* In ISO-2022 DBCS, if the second byte is in the 21..7e range or is
3305	* an ESC/SO/SI, we report only the first byte as the illegal sequence.
3306	* Otherwise we convert or report the pair of bytes.
3307	*/
3308	leadIsOk = (uint8_t)(mySourceChar - `0x21`) <= (`0x7e` - `0x21`);
3309	trailIsOk = (uint8_t)(trailByte - `0x21`) <= (`0x7e` - `0x21`);
3310	if (leadIsOk && trailIsOk) {
3311	++mySource;
3312	tempState = (StateEnum)pToU2022State->cs[pToU2022State->g];
3313	if(tempState >= CNS_11643_0) {
3314	cnv = myData->myConverterArray[CNS_11643];
3315	tempBuf[`0`] = (char) (`0x80`+(tempState-CNS_11643_0));
3316	tempBuf[`1`] = (char) (mySourceChar);
3317	tempBuf[`2`] = (char) trailByte;
3318	tempBufLen = `3`;
3319
3320	}else{
3321	U_ASSERT(tempState<UCNV_2022_MAX_CONVERTERS);
3322	cnv = myData->myConverterArray[tempState];
3323	tempBuf[`0`] = (char) (mySourceChar);
3324	tempBuf[`1`] = (char) trailByte;
3325	tempBufLen = `2`;
3326	}
3327	targetUniChar = ucnv_MBCSSimpleGetNextUChar(cnv, tempBuf, tempBufLen, FALSE);
3328	mySourceChar = (mySourceChar << `8`) \| trailByte;
3329	} else if (!(trailIsOk \|\| IS_2022_CONTROL(trailByte))) {
3330	/ report a pair of illegal bytes if the second byte is not a DBCS starter /
3331	++mySource;
3332	/ add another bit so that the code below writes 2 bytes in case of error /
3333	mySourceChar = `0x10000` \| (mySourceChar << `8`) \| trailByte;
3334	}
3335	if(pToU2022State->g>=`2`) {
3336	/ return from a single-shift state to the previous one /
3337	pToU2022State->g=pToU2022State->prevG;
3338	}
3339	} else {
3340	args->converter->toUBytes[`0`] = (uint8_t)mySourceChar;
3341	args->converter->toULength = `1`;
3342	goto endloop;
3343	}
3344	}
3345	else{
3346	if(mySourceChar <= `0x7f`) {
3347	targetUniChar = (UChar) mySourceChar;
3348	}
3349	}
3350	break;
3351	}
3352	if(targetUniChar < (missingCharMarker-`1`/0xfffe/)){
3353	if(args->offsets){
3354	args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= `0xff` ? `1` : `2`));
3355	}
3356	*(myTarget++)=(UChar)targetUniChar;
3357	}
3358	else if(targetUniChar > missingCharMarker){
3359	/ disassemble the surrogate pair and write to output/
3360	targetUniChar-=`0x0010000`;
3361	*myTarget = (UChar)(`0xd800`+(UChar)(targetUniChar>>`10`));
3362	if(args->offsets){
3363	args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= `0xff` ? `1` : `2`));
3364	}
3365	++myTarget;
3366	if(myTarget< args->targetLimit){
3367	*myTarget = (UChar)(`0xdc00`+(UChar)(targetUniChar&`0x3ff`));
3368	if(args->offsets){
3369	args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= `0xff` ? `1` : `2`));
3370	}
3371	++myTarget;
3372	}else{
3373	args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]=
3374	(UChar)(`0xdc00`+(UChar)(targetUniChar&`0x3ff`));
3375	}
3376
3377	}
3378	else{
3379	/ Call the callback function/
3380	toUnicodeCallback(args->converter,mySourceChar,targetUniChar,err);
3381	break;
3382	}
3383	}
3384	else{
3385	*err =U_BUFFER_OVERFLOW_ERROR;
3386	break;
3387	}
3388	}
3389	endloop:
3390	args->target = myTarget;
3391	args->source = mySource;
3392	}
3393	#endif /* #if !UCONFIG_ONLY_HTML_CONVERSION */
3394
3395	static void U_CALLCONV
3396	_ISO_2022_WriteSub(UConverterFromUnicodeArgs args, int32_t offsetIndex, UErrorCode err) {
3397	UConverter *cnv = args->converter;
3398	UConverterDataISO2022 myConverterData=(UConverterDataISO2022 ) cnv->extraInfo;
3399	ISO2022State *pFromU2022State=&myConverterData->fromU2022State;
3400	char p, subchar;
3401	char buffer[`8`];
3402	int32_t length;
3403
3404	subchar=(char *)cnv->subChars;
3405	length=cnv->subCharLen; / assume length==1 for most variants /
3406
3407	p = buffer;
3408	switch(myConverterData->locale[`0`]){
3409	case `'j'`:
3410	{
3411	int8_t cs;
3412
3413	if(pFromU2022State->g == `1`) {
3414	/ JIS7: switch from G1 to G0 /
3415	pFromU2022State->g = `0`;
3416	*p++ = UCNV_SI;
3417	}
3418
3419	cs = pFromU2022State->cs[`0`];
3420	if(cs != ASCII && cs != JISX201) {
3421	/ not in ASCII or JIS X 0201: switch to ASCII /
3422	pFromU2022State->cs[`0`] = (int8_t)ASCII;
3423	*p++ = `'\x1b'`;
3424	*p++ = `'\x28'`;
3425	*p++ = `'\x42'`;
3426	}
3427
3428	*p++ = subchar[`0`];
3429	break;
3430	}
3431	case `'c'`:
3432	if(pFromU2022State->g != `0`) {
3433	/ not in ASCII mode: switch to ASCII /
3434	pFromU2022State->g = `0`;
3435	*p++ = UCNV_SI;
3436	}
3437	*p++ = subchar[`0`];
3438	break;
3439	case `'k'`:
3440	if(myConverterData->version == `0`) {
3441	if(length == `1`) {
3442	if(args->converter->fromUnicodeStatus) {
3443	/ in DBCS mode: switch to SBCS /
3444	args->converter->fromUnicodeStatus = `0`;
3445	*p++ = UCNV_SI;
3446	}
3447	*p++ = subchar[`0`];
3448	} else / length == 2/ {
3449	if(!args->converter->fromUnicodeStatus) {
3450	/ in SBCS mode: switch to DBCS /
3451	args->converter->fromUnicodeStatus = `1`;
3452	*p++ = UCNV_SO;
3453	}
3454	*p++ = subchar[`0`];
3455	*p++ = subchar[`1`];
3456	}
3457	break;
3458	} else {
3459	/ save the subconverter's substitution string /
3460	uint8_t *currentSubChars = myConverterData->currentConverter->subChars;
3461	int8_t currentSubCharLen = myConverterData->currentConverter->subCharLen;
3462
3463	/ set our substitution string into the subconverter /
3464	myConverterData->currentConverter->subChars = (uint8_t *)subchar;
3465	myConverterData->currentConverter->subCharLen = (int8_t)length;
3466
3467	/ let the subconverter write the subchar, set/retrieve fromUChar32 state /
3468	args->converter = myConverterData->currentConverter;
3469	myConverterData->currentConverter->fromUChar32 = cnv->fromUChar32;
3470	ucnv_cbFromUWriteSub(args, `0`, err);
3471	cnv->fromUChar32 = myConverterData->currentConverter->fromUChar32;
3472	args->converter = cnv;
3473
3474	/ restore the subconverter's substitution string /
3475	myConverterData->currentConverter->subChars = currentSubChars;
3476	myConverterData->currentConverter->subCharLen = currentSubCharLen;
3477
3478	if(*err == U_BUFFER_OVERFLOW_ERROR) {
3479	if(myConverterData->currentConverter->charErrorBufferLength > `0`) {
3480	uprv_memcpy(
3481	cnv->charErrorBuffer,
3482	myConverterData->currentConverter->charErrorBuffer,
3483	myConverterData->currentConverter->charErrorBufferLength);
3484	}
3485	cnv->charErrorBufferLength = myConverterData->currentConverter->charErrorBufferLength;
3486	myConverterData->currentConverter->charErrorBufferLength = `0`;
3487	}
3488	return;
3489	}
3490	default:
3491	/ not expected /
3492	break;
3493	}
3494	ucnv_cbFromUWriteBytes(args,
3495	buffer, (int32_t)(p - buffer),
3496	offsetIndex, err);
3497	}
3498
3499	/*
3500	* Structure for cloning an ISO 2022 converter into a single memory block.
3501	*/
3502	struct cloneStruct
3503	{
3504	UConverter cnv;
3505	UConverter currentConverter;
3506	UConverterDataISO2022 mydata;
3507	};
3508
3509
3510	U_CDECL_BEGIN
3511
3512	static UConverter * U_CALLCONV
3513	_ISO_2022_SafeClone(
3514	const UConverter *cnv,
3515	void *stackBuffer,
3516	int32_t *pBufferSize,
3517	UErrorCode *status)
3518	{
3519	struct cloneStruct * localClone;
3520	UConverterDataISO2022 *cnvData;
3521	int32_t i, size;
3522
3523	if (U_FAILURE(*status)){
3524	return nullptr;
3525	}
3526
3527	if (pBufferSize == `0`) { /* 'preflighting' request - set needed size into pBufferSize /*
3528	pBufferSize = (int32_t)sizeof(struct* cloneStruct);
3529	return NULL;
3530	}
3531
3532	cnvData = (UConverterDataISO2022 *)cnv->extraInfo;
3533	localClone = (struct cloneStruct *)stackBuffer;
3534
3535	/ ucnv.c/ucnv_safeClone() copied the main UConverter already /
3536
3537	uprv_memcpy(&localClone->mydata, cnvData, sizeof(UConverterDataISO2022));
3538	localClone->cnv.extraInfo = &localClone->mydata; / set pointer to extra data /
3539	localClone->cnv.isExtraLocal = TRUE;
3540
3541	/ share the subconverters /
3542
3543	if(cnvData->currentConverter != NULL) {
3544	size = (int32_t)sizeof(UConverter);
3545	localClone->mydata.currentConverter =
3546	ucnv_safeClone(cnvData->currentConverter,
3547	&localClone->currentConverter,
3548	&size, status);
3549	if(U_FAILURE(*status)) {
3550	return NULL;
3551	}
3552	}
3553
3554	for(i=`0`; i<UCNV_2022_MAX_CONVERTERS; ++i) {
3555	if(cnvData->myConverterArray[i] != NULL) {
3556	ucnv_incrementRefCount(cnvData->myConverterArray[i]);
3557	}
3558	}
3559
3560	return &localClone->cnv;
3561	}
3562
3563	U_CDECL_END
3564
3565	static void U_CALLCONV
3566	_ISO_2022_GetUnicodeSet(const UConverter *cnv,
3567	const USetAdder *sa,
3568	UConverterUnicodeSet which,
3569	UErrorCode *pErrorCode)
3570	{
3571	int32_t i;
3572	UConverterDataISO2022* cnvData;
3573
3574	if (U_FAILURE(*pErrorCode)) {
3575	return;
3576	}
3577	#ifdef U_ENABLE_GENERIC_ISO_2022
3578	if (cnv->sharedData == &_ISO2022Data) {
3579	/ We use UTF-8 in this case /
3580	sa->addRange(sa->set, `0`, `0xd7FF`);
3581	sa->addRange(sa->set, `0xE000`, `0x10FFFF`);
3582	return;
3583	}
3584	#endif
3585
3586	cnvData = (UConverterDataISO2022*)cnv->extraInfo;
3587
3588	/ open a set and initialize it with code points that are algorithmically round-tripped /
3589	switch(cnvData->locale[`0`]){
3590	case `'j'`:
3591	/ include JIS X 0201 which is hardcoded /
3592	sa->add(sa->set, `0xa5`);
3593	sa->add(sa->set, `0x203e`);
3594	if(jpCharsetMasks[cnvData->version]&CSM(ISO8859_1)) {
3595	/ include Latin-1 for some variants of JP /
3596	sa->addRange(sa->set, `0`, `0xff`);
3597	} else {
3598	/ include ASCII for JP /
3599	sa->addRange(sa->set, `0`, `0x7f`);
3600	}
3601	if(cnvData->version==`3` \|\| cnvData->version==`4` \|\| which==UCNV_ROUNDTRIP_AND_FALLBACK_SET) {
3602	/*
3603	* Do not test (jpCharsetMasks[cnvData->version]&CSM(HWKANA_7BIT))!=0
3604	* because the bit is on for all JP versions although only versions 3 & 4 (JIS7 & JIS8)
3605	* use half-width Katakana.
3606	* This is because all ISO-2022-JP variants are lenient in that they accept (in toUnicode)
3607	* half-width Katakana via the ESC ( I sequence.
3608	* However, we only emit (fromUnicode) half-width Katakana according to the
3609	* definition of each variant.
3610	*
3611	* When including fallbacks,
3612	* we need to include half-width Katakana Unicode code points for all JP variants because
3613	* JIS X 0208 has hardcoded fallbacks for them (which map to full-width Katakana).
3614	*/
3615	/ include half-width Katakana for JP /
3616	sa->addRange(sa->set, HWKANA_START, HWKANA_END);
3617	}
3618	break;
3619	#if !UCONFIG_ONLY_HTML_CONVERSION
3620	case `'c'`:
3621	case `'z'`:
3622	/ include ASCII for CN /
3623	sa->addRange(sa->set, `0`, `0x7f`);
3624	break;
3625	case `'k'`:
3626	/ there is only one converter for KR, and it is not in the myConverterArray[] /
3627	cnvData->currentConverter->sharedData->impl->getUnicodeSet(
3628	cnvData->currentConverter, sa, which, pErrorCode);
3629	/ the loop over myConverterArray[] will simply not find another converter /
3630	break;
3631	#endif
3632	default:
3633	break;
3634	}
3635
3636	#if 0 /* Replaced by ucnv_MBCSGetFilteredUnicodeSetForUnicode() until we implement ucnv_getUnicodeSet() with reverse fallbacks. */
3637	if( (cnvData->locale[`0`]==`'c'` \|\| cnvData->locale[`0`]==`'z'`) &&
3638	cnvData->version==`0` && i==CNS_11643
3639	) {
3640	/ special handling for non-EXT ISO-2022-CN: add only code points for CNS planes 1 and 2 /
3641	ucnv_MBCSGetUnicodeSetForBytes(
3642	cnvData->myConverterArray[i],
3643	sa, UCNV_ROUNDTRIP_SET,
3644	`0`, `0x81`, `0x82`,
3645	pErrorCode);
3646	}
3647	#endif
3648
3649	for (i=`0`; i<UCNV_2022_MAX_CONVERTERS; i++) {
3650	UConverterSetFilter filter;
3651	if(cnvData->myConverterArray[i]!=NULL) {
3652	if(cnvData->locale[`0`]==`'j'` && i==JISX208) {
3653	/*
3654	* Only add code points that map to Shift-JIS codes
3655	* corresponding to JIS X 0208.
3656	*/
3657	filter=UCNV_SET_FILTER_SJIS;
3658	#if !UCONFIG_ONLY_HTML_CONVERSION
3659	} else if( (cnvData->locale[`0`]==`'c'` \|\| cnvData->locale[`0`]==`'z'`) &&
3660	cnvData->version==`0` && i==CNS_11643) {
3661	/*
3662	* Version-specific for CN:
3663	* CN version 0 does not map CNS planes 3..7 although
3664	* they are all available in the CNS conversion table;
3665	* CN version 1 (-EXT) does map them all.
3666	* The two versions create different Unicode sets.
3667	*/
3668	filter=UCNV_SET_FILTER_2022_CN;
3669	} else if(i==KSC5601) {
3670	/*
3671	* Some of the KSC 5601 tables (convrtrs.txt has this aliases on multiple tables)
3672	* are broader than GR94.
3673	*/
3674	filter=UCNV_SET_FILTER_GR94DBCS;
3675	#endif
3676	} else {
3677	filter=UCNV_SET_FILTER_NONE;
3678	}
3679	ucnv_MBCSGetFilteredUnicodeSetForUnicode(cnvData->myConverterArray[i], sa, which, filter, pErrorCode);
3680	}
3681	}
3682
3683	/*
3684	* ISO 2022 converters must not convert SO/SI/ESC despite what
3685	* sub-converters do by themselves.
3686	* Remove these characters from the set.
3687	*/
3688	sa->remove(sa->set, `0x0e`);
3689	sa->remove(sa->set, `0x0f`);
3690	sa->remove(sa->set, `0x1b`);
3691
3692	/ ISO 2022 converters do not convert C1 controls either /
3693	sa->removeRange(sa->set, `0x80`, `0x9f`);
3694	}
3695
3696	static const UConverterImpl _ISO2022Impl={
3697	UCNV_ISO_2022,
3698
3699	NULL,
3700	NULL,
3701
3702	_ISO2022Open,
3703	_ISO2022Close,
3704	_ISO2022Reset,
3705
3706	#ifdef U_ENABLE_GENERIC_ISO_2022
3707	T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC,
3708	T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC,
3709	ucnv_fromUnicode_UTF8,
3710	ucnv_fromUnicode_UTF8_OFFSETS_LOGIC,
3711	#else
3712	NULL,
3713	NULL,
3714	NULL,
3715	NULL,
3716	#endif
3717	NULL,
3718
3719	NULL,
3720	_ISO2022getName,
3721	_ISO_2022_WriteSub,
3722	_ISO_2022_SafeClone,
3723	_ISO_2022_GetUnicodeSet,
3724
3725	NULL,
3726	NULL
3727	};
3728	static const UConverterStaticData _ISO2022StaticData={
3729	sizeof(UConverterStaticData),
3730	"ISO_2022",
3731	`2022`,
3732	UCNV_IBM,
3733	UCNV_ISO_2022,
3734	`1`,
3735	`3`, / max 3 bytes per UChar from UTF-8 (4 bytes from surrogate _pair_) /
3736	{ `0x1a`, `0`, `0`, `0` },
3737	`1`,
3738	FALSE,
3739	FALSE,
3740	`0`,
3741	`0`,
3742	{ `0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0` } / reserved /
3743	};
3744	const UConverterSharedData _ISO2022Data=
3745	UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ISO2022StaticData, &_ISO2022Impl);
3746
3747	/**********JP*************/
3748	static const UConverterImpl _ISO2022JPImpl={
3749	UCNV_ISO_2022,
3750
3751	NULL,
3752	NULL,
3753
3754	_ISO2022Open,
3755	_ISO2022Close,
3756	_ISO2022Reset,
3757
3758	UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC,
3759	UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC,
3760	UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC,
3761	UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC,
3762	NULL,
3763
3764	NULL,
3765	_ISO2022getName,
3766	_ISO_2022_WriteSub,
3767	_ISO_2022_SafeClone,
3768	_ISO_2022_GetUnicodeSet,
3769
3770	NULL,
3771	NULL
3772	};
3773	static const UConverterStaticData _ISO2022JPStaticData={
3774	sizeof(UConverterStaticData),
3775	"ISO_2022_JP",
3776	`0`,
3777	UCNV_IBM,
3778	UCNV_ISO_2022,
3779	`1`,
3780	`6`, / max 6 bytes per UChar: 4-byte escape sequence + DBCS /
3781	{ `0x1a`, `0`, `0`, `0` },
3782	`1`,
3783	FALSE,
3784	FALSE,
3785	`0`,
3786	`0`,
3787	{ `0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0` } / reserved /
3788	};
3789
3790	namespace {
3791
3792	const UConverterSharedData _ISO2022JPData=
3793	UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ISO2022JPStaticData, &_ISO2022JPImpl);
3794
3795	} // namespace
3796
3797	#if !UCONFIG_ONLY_HTML_CONVERSION
3798	/********** KR ************/
3799	static const UConverterImpl _ISO2022KRImpl={
3800	UCNV_ISO_2022,
3801
3802	NULL,
3803	NULL,
3804
3805	_ISO2022Open,
3806	_ISO2022Close,
3807	_ISO2022Reset,
3808
3809	UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC,
3810	UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC,
3811	UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC,
3812	UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC,
3813	NULL,
3814
3815	NULL,
3816	_ISO2022getName,
3817	_ISO_2022_WriteSub,
3818	_ISO_2022_SafeClone,
3819	_ISO_2022_GetUnicodeSet,
3820
3821	NULL,
3822	NULL
3823	};
3824	static const UConverterStaticData _ISO2022KRStaticData={
3825	sizeof(UConverterStaticData),
3826	"ISO_2022_KR",
3827	`0`,
3828	UCNV_IBM,
3829	UCNV_ISO_2022,
3830	`1`,
3831	`8`, / max 8 bytes per UChar /
3832	{ `0x1a`, `0`, `0`, `0` },
3833	`1`,
3834	FALSE,
3835	FALSE,
3836	`0`,
3837	`0`,
3838	{ `0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0` } / reserved /
3839	};
3840
3841	namespace {
3842
3843	const UConverterSharedData _ISO2022KRData=
3844	UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ISO2022KRStaticData, &_ISO2022KRImpl);
3845
3846	} // namespace
3847
3848	/************ CN ************/
3849	static const UConverterImpl _ISO2022CNImpl={
3850
3851	UCNV_ISO_2022,
3852
3853	NULL,
3854	NULL,
3855
3856	_ISO2022Open,
3857	_ISO2022Close,
3858	_ISO2022Reset,
3859
3860	UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC,
3861	UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC,
3862	UConverter_fromUnicode_ISO_2022_CN_OFFSETS_LOGIC,
3863	UConverter_fromUnicode_ISO_2022_CN_OFFSETS_LOGIC,
3864	NULL,
3865
3866	NULL,
3867	_ISO2022getName,
3868	_ISO_2022_WriteSub,
3869	_ISO_2022_SafeClone,
3870	_ISO_2022_GetUnicodeSet,
3871
3872	NULL,
3873	NULL
3874	};
3875	static const UConverterStaticData _ISO2022CNStaticData={
3876	sizeof(UConverterStaticData),
3877	"ISO_2022_CN",
3878	`0`,
3879	UCNV_IBM,
3880	UCNV_ISO_2022,
3881	`1`,
3882	`8`, / max 8 bytes per UChar: 4-byte CNS designator + 2 bytes for SS2/SS3 + DBCS /
3883	{ `0x1a`, `0`, `0`, `0` },
3884	`1`,
3885	FALSE,
3886	FALSE,
3887	`0`,
3888	`0`,
3889	{ `0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0` } / reserved /
3890	};
3891
3892	namespace {
3893
3894	const UConverterSharedData _ISO2022CNData=
3895	UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ISO2022CNStaticData, &_ISO2022CNImpl);
3896
3897	} // namespace
3898	#endif /* #if !UCONFIG_ONLY_HTML_CONVERSION */
3899
3900	#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */
3901

Browse the source code of engine/third_party/icu/source/common/ucnv2022.cpp