ucnvisci.cpp source code [ClickHouse/contrib/icu/icu4c/source/common/ucnvisci.cpp]

1	// © 2016 and later: Unicode, Inc. and others.
2	// License & terms of use: http://www.unicode.org/copyright.html
3	/*
4	**********************************************************************
5	* Copyright (C) 2000-2016, International Business Machines
6	* Corporation and others. All Rights Reserved.
7	**********************************************************************
8	* file name: ucnvisci.c
9	* encoding: UTF-8
10	* tab size: 8 (not used)
11	* indentation:4
12	*
13	* created on: 2001JUN26
14	* created by: Ram Viswanadha
15	*
16	* Date Name Description
17	* 24/7/2001 Ram Added support for EXT character handling
18	*/
19
20	#include "unicode/utypes.h"
21
22	#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
23
24	#include "unicode/ucnv.h"
25	#include "unicode/ucnv_cb.h"
26	#include "unicode/utf16.h"
27	#include "cmemory.h"
28	#include "ucnv_bld.h"
29	#include "ucnv_cnv.h"
30	#include "cstring.h"
31	#include "uassert.h"
32
33	#define UCNV_OPTIONS_VERSION_MASK 0xf
34	#define NUKTA 0x093c
35	#define HALANT 0x094d
36	#define ZWNJ 0x200c /* Zero Width Non Joiner */
37	#define ZWJ 0x200d /* Zero width Joiner */
38	#define INVALID_CHAR 0xffff
39	#define ATR 0xEF /* Attribute code */
40	#define EXT 0xF0 /* Extension code */
41	#define DANDA 0x0964
42	#define DOUBLE_DANDA 0x0965
43	#define ISCII_NUKTA 0xE9
44	#define ISCII_HALANT 0xE8
45	#define ISCII_DANDA 0xEA
46	#define ISCII_INV 0xD9
47	#define ISCII_VOWEL_SIGN_E 0xE0
48	#define INDIC_BLOCK_BEGIN 0x0900
49	#define INDIC_BLOCK_END 0x0D7F
50	#define INDIC_RANGE (INDIC_BLOCK_END - INDIC_BLOCK_BEGIN)
51	#define VOCALLIC_RR 0x0931
52	#define LF 0x0A
53	#define ASCII_END 0xA0
54	#define NO_CHAR_MARKER 0xFFFE
55	#define TELUGU_DELTA DELTA * TELUGU
56	#define DEV_ABBR_SIGN 0x0970
57	#define DEV_ANUDATTA 0x0952
58	#define EXT_RANGE_BEGIN 0xA1
59	#define EXT_RANGE_END 0xEE
60
61	#define PNJ_DELTA 0x0100
62	#define PNJ_BINDI 0x0A02
63	#define PNJ_TIPPI 0x0A70
64	#define PNJ_SIGN_VIRAMA 0x0A4D
65	#define PNJ_ADHAK 0x0A71
66	#define PNJ_HA 0x0A39
67	#define PNJ_RRA 0x0A5C
68
69	typedef enum {
70	DEVANAGARI =`0`,
71	BENGALI,
72	GURMUKHI,
73	GUJARATI,
74	ORIYA,
75	TAMIL,
76	TELUGU,
77	KANNADA,
78	MALAYALAM,
79	DELTA=`0x80`
80	}UniLang;
81
82	/**
83	* Enumeration for switching code pages if <ATR>+<one of below values>
84	* is encountered
85	*/
86	typedef enum {
87	DEF = `0x40`,
88	RMN = `0x41`,
89	DEV = `0x42`,
90	BNG = `0x43`,
91	TML = `0x44`,
92	TLG = `0x45`,
93	ASM = `0x46`,
94	ORI = `0x47`,
95	KND = `0x48`,
96	MLM = `0x49`,
97	GJR = `0x4A`,
98	PNJ = `0x4B`,
99	ARB = `0x71`,
100	PES = `0x72`,
101	URD = `0x73`,
102	SND = `0x74`,
103	KSM = `0x75`,
104	PST = `0x76`
105	}ISCIILang;
106
107	typedef enum {
108	DEV_MASK =`0x80`,
109	PNJ_MASK =`0x40`,
110	GJR_MASK =`0x20`,
111	ORI_MASK =`0x10`,
112	BNG_MASK =`0x08`,
113	KND_MASK =`0x04`,
114	MLM_MASK =`0x02`,
115	TML_MASK =`0x01`,
116	ZERO =`0x00`
117	}MaskEnum;
118
119	#define ISCII_CNV_PREFIX "ISCII,version="
120
121	typedef struct {
122	UChar contextCharToUnicode; / previous Unicode codepoint for contextual analysis /
123	UChar contextCharFromUnicode; / previous Unicode codepoint for contextual analysis /
124	uint16_t defDeltaToUnicode; / delta for switching to default state when DEF is encountered /
125	uint16_t currentDeltaFromUnicode; / current delta in Indic block /
126	uint16_t currentDeltaToUnicode; / current delta in Indic block /
127	MaskEnum currentMaskFromUnicode; / mask for current state in toUnicode /
128	MaskEnum currentMaskToUnicode; / mask for current state in toUnicode /
129	MaskEnum defMaskToUnicode; / mask for default state in toUnicode /
130	UBool isFirstBuffer; / boolean for fromUnicode to see if we need to announce the first script /
131	UBool resetToDefaultToUnicode; / boolean for reseting to default delta and mask when a newline is encountered/
132	char name[sizeof(ISCII_CNV_PREFIX) + `1`];
133	UChar32 prevToUnicodeStatus; / Hold the previous toUnicodeStatus. This is necessary because we may need to know the last two code points. /
134	} UConverterDataISCII;
135
136	typedef struct LookupDataStruct {
137	UniLang uniLang;
138	MaskEnum maskEnum;
139	ISCIILang isciiLang;
140	} LookupDataStruct;
141
142	static const LookupDataStruct lookupInitialData[]={
143	{ DEVANAGARI, DEV_MASK, DEV },
144	{ BENGALI, BNG_MASK, BNG },
145	{ GURMUKHI, PNJ_MASK, PNJ },
146	{ GUJARATI, GJR_MASK, GJR },
147	{ ORIYA, ORI_MASK, ORI },
148	{ TAMIL, TML_MASK, TML },
149	{ TELUGU, KND_MASK, TLG },
150	{ KANNADA, KND_MASK, KND },
151	{ MALAYALAM, MLM_MASK, MLM }
152	};
153
154	/*
155	* For special handling of certain Gurmukhi characters.
156	* Bit 0 (value 1): PNJ consonant
157	* Bit 1 (value 2): PNJ Bindi Tippi
158	*/
159	static const uint8_t pnjMap[`80`] = {
160	/ 0A00..0A0F /
161	`0`, `0`, `0`, `0`, `0`, `2`, `0`, `2`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
162	/ 0A10..0A1F /
163	`0`, `0`, `0`, `0`, `0`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`,
164	/ 0A20..0A2F /
165	`3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `0`, `3`, `3`, `3`, `3`, `3`, `3`,
166	/ 0A30..0A3F /
167	`3`, `0`, `0`, `0`, `0`, `3`, `3`, `0`, `3`, `3`, `0`, `0`, `0`, `0`, `0`, `2`,
168	/ 0A40..0A4F /
169	`0`, `2`, `2`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`
170	};
171
172	static UBool
173	isPNJConsonant(UChar32 c) {
174	if (c < `0xa00` \|\| `0xa50` <= c) {
175	return FALSE;
176	} else {
177	return (UBool)(pnjMap[c - `0xa00`] & `1`);
178	}
179	}
180
181	static UBool
182	isPNJBindiTippi(UChar32 c) {
183	if (c < `0xa00` \|\| `0xa50` <= c) {
184	return FALSE;
185	} else {
186	return (UBool)(pnjMap[c - `0xa00`] >> `1`);
187	}
188	}
189	U_CDECL_BEGIN
190	static void U_CALLCONV
191	_ISCIIOpen(UConverter cnv, UConverterLoadArgs pArgs, UErrorCode *errorCode) {
192	if(pArgs->onlyTestIsLoadable) {
193	return;
194	}
195
196	cnv->extraInfo = uprv_malloc(sizeof(UConverterDataISCII));
197
198	if (cnv->extraInfo != NULL) {
199	int32_t len=`0`;
200	UConverterDataISCII *converterData=
201	(UConverterDataISCII *) cnv->extraInfo;
202	converterData->contextCharToUnicode=NO_CHAR_MARKER;
203	cnv->toUnicodeStatus = missingCharMarker;
204	converterData->contextCharFromUnicode=`0x0000`;
205	converterData->resetToDefaultToUnicode=FALSE;
206	/ check if the version requested is supported /
207	if ((pArgs->options & UCNV_OPTIONS_VERSION_MASK) < `9`) {
208	/ initialize state variables /
209	converterData->currentDeltaFromUnicode
210	= converterData->currentDeltaToUnicode
211	= converterData->defDeltaToUnicode = (uint16_t)(lookupInitialData[pArgs->options & UCNV_OPTIONS_VERSION_MASK].uniLang * DELTA);
212
213	converterData->currentMaskFromUnicode
214	= converterData->currentMaskToUnicode
215	= converterData->defMaskToUnicode = lookupInitialData[pArgs->options & UCNV_OPTIONS_VERSION_MASK].maskEnum;
216
217	converterData->isFirstBuffer=TRUE;
218	(void)uprv_strcpy(converterData->name, ISCII_CNV_PREFIX);
219	len = (int32_t)uprv_strlen(converterData->name);
220	converterData->name[len]= (char)((pArgs->options & UCNV_OPTIONS_VERSION_MASK) + `'0'`);
221	converterData->name[len+`1`]=`0`;
222
223	converterData->prevToUnicodeStatus = `0x0000`;
224	} else {
225	uprv_free(cnv->extraInfo);
226	cnv->extraInfo = NULL;
227	*errorCode = U_ILLEGAL_ARGUMENT_ERROR;
228	}
229
230	} else {
231	*errorCode =U_MEMORY_ALLOCATION_ERROR;
232	}
233	}
234
235	static void U_CALLCONV
236	_ISCIIClose(UConverter *cnv) {
237	if (cnv->extraInfo!=NULL) {
238	if (!cnv->isExtraLocal) {
239	uprv_free(cnv->extraInfo);
240	}
241	cnv->extraInfo=NULL;
242	}
243	}
244
245	static const char* U_CALLCONV
246	_ISCIIgetName(const UConverter* cnv) {
247	if (cnv->extraInfo) {
248	UConverterDataISCII* myData= (UConverterDataISCII*)cnv->extraInfo;
249	return myData->name;
250	}
251	return NULL;
252	}
253
254	static void U_CALLCONV
255	_ISCIIReset(UConverter *cnv, UConverterResetChoice choice) {
256	UConverterDataISCII* data =(UConverterDataISCII *) (cnv->extraInfo);
257	if (choice<=UCNV_RESET_TO_UNICODE) {
258	cnv->toUnicodeStatus = missingCharMarker;
259	cnv->mode=`0`;
260	data->currentDeltaToUnicode=data->defDeltaToUnicode;
261	data->currentMaskToUnicode = data->defMaskToUnicode;
262	data->contextCharToUnicode=NO_CHAR_MARKER;
263	data->prevToUnicodeStatus = `0x0000`;
264	}
265	if (choice!=UCNV_RESET_TO_UNICODE) {
266	cnv->fromUChar32=`0x0000`;
267	data->contextCharFromUnicode=`0x00`;
268	data->currentMaskFromUnicode=data->defMaskToUnicode;
269	data->currentDeltaFromUnicode=data->defDeltaToUnicode;
270	data->isFirstBuffer=TRUE;
271	data->resetToDefaultToUnicode=FALSE;
272	}
273	}
274
275	/**
276	* The values in validity table are indexed by the lower bits of Unicode
277	* range 0x0900 - 0x09ff. The values have a structure like:
278	* ---------------------------------------------------------------
279	* \| DEV \| PNJ \| GJR \| ORI \| BNG \| TLG \| MLM \| TML \|
280	* \| \| \| \| \| ASM \| KND \| \| \|
281	* ---------------------------------------------------------------
282	* If a code point is valid in a particular script
283	* then that bit is turned on
284	*
285	* Unicode does not distinguish between Bengali and Assamese so we use 1 bit for
286	* to represent these languages
287	*
288	* Telugu and Kannada have same codepoints except for Vocallic_RR which we special case
289	* and combine and use 1 bit to represent these languages.
290	*
291	* TODO: It is probably easier to understand and maintain to change this
292	* to use uint16_t and give each of the 9 Unicode/script blocks its own bit.
293	*/
294
295	static const uint8_t validityTable[`128`] = {
296	/ This state table is tool generated please do not edit unless you know exactly what you are doing /
297	/ Note: This table was edited to mirror the Windows XP implementation /
298	/ISCII:Valid:Unicode /
299	/0xa0 : 0x00: 0x900 / ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
300	/0xa1 : 0xb8: 0x901 / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + ZERO + ZERO + ZERO ,
301	/0xa2 : 0xfe: 0x902 / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
302	/0xa3 : 0xbf: 0x903 / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
303	/0x00 : 0x00: 0x904 / DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
304	/0xa4 : 0xff: 0x905 / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
305	/0xa5 : 0xff: 0x906 / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
306	/0xa6 : 0xff: 0x907 / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
307	/0xa7 : 0xff: 0x908 / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
308	/0xa8 : 0xff: 0x909 / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
309	/0xa9 : 0xff: 0x90a / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
310	/0xaa : 0xfe: 0x90b / DEV_MASK + ZERO + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
311	/0x00 : 0x00: 0x90c / DEV_MASK + ZERO + ZERO + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
312	/0xae : 0x80: 0x90d / DEV_MASK + ZERO + GJR_MASK + ZERO + ZERO + ZERO + ZERO + ZERO ,
313	/0xab : 0x87: 0x90e / DEV_MASK + ZERO + ZERO + ZERO + ZERO + KND_MASK + MLM_MASK + TML_MASK ,
314	/0xac : 0xff: 0x90f / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
315	/0xad : 0xff: 0x910 / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
316	/0xb2 : 0x80: 0x911 / DEV_MASK + ZERO + GJR_MASK + ZERO + ZERO + ZERO + ZERO + ZERO ,
317	/0xaf : 0x87: 0x912 / DEV_MASK + ZERO + ZERO + ZERO + ZERO + KND_MASK + MLM_MASK + TML_MASK ,
318	/0xb0 : 0xff: 0x913 / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
319	/0xb1 : 0xff: 0x914 / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
320	/0xb3 : 0xff: 0x915 / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
321	/0xb4 : 0xfe: 0x916 / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
322	/0xb5 : 0xfe: 0x917 / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
323	/0xb6 : 0xfe: 0x918 / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
324	/0xb7 : 0xff: 0x919 / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
325	/0xb8 : 0xff: 0x91a / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
326	/0xb9 : 0xfe: 0x91b / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
327	/0xba : 0xff: 0x91c / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
328	/0xbb : 0xfe: 0x91d / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
329	/0xbc : 0xff: 0x91e / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
330	/0xbd : 0xff: 0x91f / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
331	/0xbe : 0xfe: 0x920 / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
332	/0xbf : 0xfe: 0x921 / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
333	/0xc0 : 0xfe: 0x922 / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
334	/0xc1 : 0xff: 0x923 / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
335	/0xc2 : 0xff: 0x924 / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
336	/0xc3 : 0xfe: 0x925 / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
337	/0xc4 : 0xfe: 0x926 / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
338	/0xc5 : 0xfe: 0x927 / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
339	/0xc6 : 0xff: 0x928 / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
340	/0xc7 : 0x81: 0x929 / DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + TML_MASK ,
341	/0xc8 : 0xff: 0x92a / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
342	/0xc9 : 0xfe: 0x92b / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
343	/0xca : 0xfe: 0x92c / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
344	/0xcb : 0xfe: 0x92d / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
345	/0xcc : 0xfe: 0x92e / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
346	/0xcd : 0xff: 0x92f / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
347	/0xcf : 0xff: 0x930 / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
348	/0xd0 : 0x87: 0x931 / DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + MLM_MASK + TML_MASK ,
349	/0xd1 : 0xff: 0x932 / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
350	/0xd2 : 0xb7: 0x933 / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + ZERO + KND_MASK + MLM_MASK + TML_MASK ,
351	/0xd3 : 0x83: 0x934 / DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + MLM_MASK + TML_MASK ,
352	/0xd4 : 0xff: 0x935 / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + ZERO + KND_MASK + MLM_MASK + TML_MASK ,
353	/0xd5 : 0xfe: 0x936 / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
354	/0xd6 : 0xbf: 0x937 / DEV_MASK + ZERO + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
355	/0xd7 : 0xff: 0x938 / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
356	/0xd8 : 0xff: 0x939 / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
357	/0x00 : 0x00: 0x93A / ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
358	/0x00 : 0x00: 0x93B / ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
359	/0xe9 : 0xda: 0x93c / DEV_MASK + PNJ_MASK + ZERO + ORI_MASK + BNG_MASK + ZERO + ZERO + ZERO ,
360	/0x00 : 0x00: 0x93d / DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
361	/0xda : 0xff: 0x93e / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
362	/0xdb : 0xff: 0x93f / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
363	/0xdc : 0xff: 0x940 / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
364	/0xdd : 0xff: 0x941 / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
365	/0xde : 0xff: 0x942 / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
366	/0xdf : 0xbe: 0x943 / DEV_MASK + ZERO + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
367	/0x00 : 0x00: 0x944 / DEV_MASK + ZERO + GJR_MASK + ZERO + BNG_MASK + KND_MASK + ZERO + ZERO ,
368	/0xe3 : 0x80: 0x945 / DEV_MASK + ZERO + GJR_MASK + ZERO + ZERO + ZERO + ZERO + ZERO ,
369	/0xe0 : 0x87: 0x946 / DEV_MASK + ZERO + ZERO + ZERO + ZERO + KND_MASK + MLM_MASK + TML_MASK ,
370	/0xe1 : 0xff: 0x947 / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
371	/0xe2 : 0xff: 0x948 / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
372	/0xe7 : 0x80: 0x949 / DEV_MASK + ZERO + GJR_MASK + ZERO + ZERO + ZERO + ZERO + ZERO ,
373	/0xe4 : 0x87: 0x94a / DEV_MASK + ZERO + ZERO + ZERO + ZERO + KND_MASK + MLM_MASK + TML_MASK ,
374	/0xe5 : 0xff: 0x94b / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
375	/0xe6 : 0xff: 0x94c / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
376	/0xe8 : 0xff: 0x94d / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
377	/0xec : 0x00: 0x94e / ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
378	/0xed : 0x00: 0x94f / ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
379	/0x00 : 0x00: 0x950 / DEV_MASK + ZERO + GJR_MASK + ZERO + ZERO + ZERO + ZERO + ZERO ,
380	/0x00 : 0x00: 0x951 / DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
381	/0x00 : 0x00: 0x952 / DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
382	/0x00 : 0x00: 0x953 / DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
383	/0x00 : 0x00: 0x954 / DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
384	/0x00 : 0x00: 0x955 / ZERO + ZERO + ZERO + ZERO + ZERO + KND_MASK + ZERO + ZERO ,
385	/0x00 : 0x00: 0x956 / ZERO + ZERO + ZERO + ORI_MASK + ZERO + KND_MASK + ZERO + ZERO ,
386	/0x00 : 0x00: 0x957 / ZERO + ZERO + ZERO + ORI_MASK + BNG_MASK + ZERO + MLM_MASK + ZERO ,
387	/0x00 : 0x00: 0x958 / DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
388	/0x00 : 0x00: 0x959 / DEV_MASK + PNJ_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
389	/0x00 : 0x00: 0x95a / DEV_MASK + PNJ_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
390	/0x00 : 0x00: 0x95b / DEV_MASK + PNJ_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
391	/0x00 : 0x00: 0x95c / DEV_MASK + PNJ_MASK + ZERO + ZERO + BNG_MASK + ZERO + ZERO + ZERO ,
392	/0x00 : 0x00: 0x95d / DEV_MASK + ZERO + ZERO + ORI_MASK + BNG_MASK + ZERO + ZERO + ZERO ,
393	/0x00 : 0x00: 0x95e / DEV_MASK + PNJ_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
394	/0xce : 0x98: 0x95f / DEV_MASK + ZERO + ZERO + ORI_MASK + BNG_MASK + ZERO + ZERO + ZERO ,
395	/0x00 : 0x00: 0x960 / DEV_MASK + ZERO + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
396	/0x00 : 0x00: 0x961 / DEV_MASK + ZERO + ZERO + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
397	/0x00 : 0x00: 0x962 / DEV_MASK + ZERO + ZERO + ZERO + BNG_MASK + ZERO + ZERO + ZERO ,
398	/0x00 : 0x00: 0x963 / DEV_MASK + ZERO + ZERO + ZERO + BNG_MASK + ZERO + ZERO + ZERO ,
399	/0xea : 0xf8: 0x964 / DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
400	/0xeaea : 0x00: 0x965/ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
401	/0xf1 : 0xff: 0x966 / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
402	/0xf2 : 0xff: 0x967 / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
403	/0xf3 : 0xff: 0x968 / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
404	/0xf4 : 0xff: 0x969 / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
405	/0xf5 : 0xff: 0x96a / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
406	/0xf6 : 0xff: 0x96b / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
407	/0xf7 : 0xff: 0x96c / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
408	/0xf8 : 0xff: 0x96d / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
409	/0xf9 : 0xff: 0x96e / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
410	/0xfa : 0xff: 0x96f / DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
411	/0x00 : 0x80: 0x970 / DEV_MASK + PNJ_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
412	/*
413	* The length of the array is 128 to provide values for 0x900..0x97f.
414	* The last 15 entries for 0x971..0x97f of the validity table are all zero
415	* because no Indic script uses such Unicode code points.
416	*/
417	/0x00 : 0x00: 0x9yz / ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO
418	};
419
420	static const uint16_t fromUnicodeTable[`128`]={
421	`0x00a0` ,/ 0x0900 /
422	`0x00a1` ,/ 0x0901 /
423	`0x00a2` ,/ 0x0902 /
424	`0x00a3` ,/ 0x0903 /
425	`0xa4e0` ,/ 0x0904 /
426	`0x00a4` ,/ 0x0905 /
427	`0x00a5` ,/ 0x0906 /
428	`0x00a6` ,/ 0x0907 /
429	`0x00a7` ,/ 0x0908 /
430	`0x00a8` ,/ 0x0909 /
431	`0x00a9` ,/ 0x090a /
432	`0x00aa` ,/ 0x090b /
433	`0xA6E9` ,/ 0x090c /
434	`0x00ae` ,/ 0x090d /
435	`0x00ab` ,/ 0x090e /
436	`0x00ac` ,/ 0x090f /
437	`0x00ad` ,/ 0x0910 /
438	`0x00b2` ,/ 0x0911 /
439	`0x00af` ,/ 0x0912 /
440	`0x00b0` ,/ 0x0913 /
441	`0x00b1` ,/ 0x0914 /
442	`0x00b3` ,/ 0x0915 /
443	`0x00b4` ,/ 0x0916 /
444	`0x00b5` ,/ 0x0917 /
445	`0x00b6` ,/ 0x0918 /
446	`0x00b7` ,/ 0x0919 /
447	`0x00b8` ,/ 0x091a /
448	`0x00b9` ,/ 0x091b /
449	`0x00ba` ,/ 0x091c /
450	`0x00bb` ,/ 0x091d /
451	`0x00bc` ,/ 0x091e /
452	`0x00bd` ,/ 0x091f /
453	`0x00be` ,/ 0x0920 /
454	`0x00bf` ,/ 0x0921 /
455	`0x00c0` ,/ 0x0922 /
456	`0x00c1` ,/ 0x0923 /
457	`0x00c2` ,/ 0x0924 /
458	`0x00c3` ,/ 0x0925 /
459	`0x00c4` ,/ 0x0926 /
460	`0x00c5` ,/ 0x0927 /
461	`0x00c6` ,/ 0x0928 /
462	`0x00c7` ,/ 0x0929 /
463	`0x00c8` ,/ 0x092a /
464	`0x00c9` ,/ 0x092b /
465	`0x00ca` ,/ 0x092c /
466	`0x00cb` ,/ 0x092d /
467	`0x00cc` ,/ 0x092e /
468	`0x00cd` ,/ 0x092f /
469	`0x00cf` ,/ 0x0930 /
470	`0x00d0` ,/ 0x0931 /
471	`0x00d1` ,/ 0x0932 /
472	`0x00d2` ,/ 0x0933 /
473	`0x00d3` ,/ 0x0934 /
474	`0x00d4` ,/ 0x0935 /
475	`0x00d5` ,/ 0x0936 /
476	`0x00d6` ,/ 0x0937 /
477	`0x00d7` ,/ 0x0938 /
478	`0x00d8` ,/ 0x0939 /
479	`0xFFFF` ,/ 0x093A /
480	`0xFFFF` ,/ 0x093B /
481	`0x00e9` ,/ 0x093c /
482	`0xEAE9` ,/ 0x093d /
483	`0x00da` ,/ 0x093e /
484	`0x00db` ,/ 0x093f /
485	`0x00dc` ,/ 0x0940 /
486	`0x00dd` ,/ 0x0941 /
487	`0x00de` ,/ 0x0942 /
488	`0x00df` ,/ 0x0943 /
489	`0xDFE9` ,/ 0x0944 /
490	`0x00e3` ,/ 0x0945 /
491	`0x00e0` ,/ 0x0946 /
492	`0x00e1` ,/ 0x0947 /
493	`0x00e2` ,/ 0x0948 /
494	`0x00e7` ,/ 0x0949 /
495	`0x00e4` ,/ 0x094a /
496	`0x00e5` ,/ 0x094b /
497	`0x00e6` ,/ 0x094c /
498	`0x00e8` ,/ 0x094d /
499	`0x00ec` ,/ 0x094e /
500	`0x00ed` ,/ 0x094f /
501	`0xA1E9` ,/ 0x0950 / / OM Symbol /
502	`0xFFFF` ,/ 0x0951 /
503	`0xF0B8` ,/ 0x0952 /
504	`0xFFFF` ,/ 0x0953 /
505	`0xFFFF` ,/ 0x0954 /
506	`0xFFFF` ,/ 0x0955 /
507	`0xFFFF` ,/ 0x0956 /
508	`0xFFFF` ,/ 0x0957 /
509	`0xb3e9` ,/ 0x0958 /
510	`0xb4e9` ,/ 0x0959 /
511	`0xb5e9` ,/ 0x095a /
512	`0xbae9` ,/ 0x095b /
513	`0xbfe9` ,/ 0x095c /
514	`0xC0E9` ,/ 0x095d /
515	`0xc9e9` ,/ 0x095e /
516	`0x00ce` ,/ 0x095f /
517	`0xAAe9` ,/ 0x0960 /
518	`0xA7E9` ,/ 0x0961 /
519	`0xDBE9` ,/ 0x0962 /
520	`0xDCE9` ,/ 0x0963 /
521	`0x00ea` ,/ 0x0964 /
522	`0xeaea` ,/ 0x0965 /
523	`0x00f1` ,/ 0x0966 /
524	`0x00f2` ,/ 0x0967 /
525	`0x00f3` ,/ 0x0968 /
526	`0x00f4` ,/ 0x0969 /
527	`0x00f5` ,/ 0x096a /
528	`0x00f6` ,/ 0x096b /
529	`0x00f7` ,/ 0x096c /
530	`0x00f8` ,/ 0x096d /
531	`0x00f9` ,/ 0x096e /
532	`0x00fa` ,/ 0x096f /
533	`0xF0BF` ,/ 0x0970 /
534	`0xFFFF` ,/ 0x0971 /
535	`0xFFFF` ,/ 0x0972 /
536	`0xFFFF` ,/ 0x0973 /
537	`0xFFFF` ,/ 0x0974 /
538	`0xFFFF` ,/ 0x0975 /
539	`0xFFFF` ,/ 0x0976 /
540	`0xFFFF` ,/ 0x0977 /
541	`0xFFFF` ,/ 0x0978 /
542	`0xFFFF` ,/ 0x0979 /
543	`0xFFFF` ,/ 0x097a /
544	`0xFFFF` ,/ 0x097b /
545	`0xFFFF` ,/ 0x097c /
546	`0xFFFF` ,/ 0x097d /
547	`0xFFFF` ,/ 0x097e /
548	`0xFFFF` ,/ 0x097f /
549	};
550	static const uint16_t toUnicodeTable[`256`]={
551	`0x0000`,/ 0x00 /
552	`0x0001`,/ 0x01 /
553	`0x0002`,/ 0x02 /
554	`0x0003`,/ 0x03 /
555	`0x0004`,/ 0x04 /
556	`0x0005`,/ 0x05 /
557	`0x0006`,/ 0x06 /
558	`0x0007`,/ 0x07 /
559	`0x0008`,/ 0x08 /
560	`0x0009`,/ 0x09 /
561	`0x000a`,/ 0x0a /
562	`0x000b`,/ 0x0b /
563	`0x000c`,/ 0x0c /
564	`0x000d`,/ 0x0d /
565	`0x000e`,/ 0x0e /
566	`0x000f`,/ 0x0f /
567	`0x0010`,/ 0x10 /
568	`0x0011`,/ 0x11 /
569	`0x0012`,/ 0x12 /
570	`0x0013`,/ 0x13 /
571	`0x0014`,/ 0x14 /
572	`0x0015`,/ 0x15 /
573	`0x0016`,/ 0x16 /
574	`0x0017`,/ 0x17 /
575	`0x0018`,/ 0x18 /
576	`0x0019`,/ 0x19 /
577	`0x001a`,/ 0x1a /
578	`0x001b`,/ 0x1b /
579	`0x001c`,/ 0x1c /
580	`0x001d`,/ 0x1d /
581	`0x001e`,/ 0x1e /
582	`0x001f`,/ 0x1f /
583	`0x0020`,/ 0x20 /
584	`0x0021`,/ 0x21 /
585	`0x0022`,/ 0x22 /
586	`0x0023`,/ 0x23 /
587	`0x0024`,/ 0x24 /
588	`0x0025`,/ 0x25 /
589	`0x0026`,/ 0x26 /
590	`0x0027`,/ 0x27 /
591	`0x0028`,/ 0x28 /
592	`0x0029`,/ 0x29 /
593	`0x002a`,/ 0x2a /
594	`0x002b`,/ 0x2b /
595	`0x002c`,/ 0x2c /
596	`0x002d`,/ 0x2d /
597	`0x002e`,/ 0x2e /
598	`0x002f`,/ 0x2f /
599	`0x0030`,/ 0x30 /
600	`0x0031`,/ 0x31 /
601	`0x0032`,/ 0x32 /
602	`0x0033`,/ 0x33 /
603	`0x0034`,/ 0x34 /
604	`0x0035`,/ 0x35 /
605	`0x0036`,/ 0x36 /
606	`0x0037`,/ 0x37 /
607	`0x0038`,/ 0x38 /
608	`0x0039`,/ 0x39 /
609	`0x003A`,/ 0x3A /
610	`0x003B`,/ 0x3B /
611	`0x003c`,/ 0x3c /
612	`0x003d`,/ 0x3d /
613	`0x003e`,/ 0x3e /
614	`0x003f`,/ 0x3f /
615	`0x0040`,/ 0x40 /
616	`0x0041`,/ 0x41 /
617	`0x0042`,/ 0x42 /
618	`0x0043`,/ 0x43 /
619	`0x0044`,/ 0x44 /
620	`0x0045`,/ 0x45 /
621	`0x0046`,/ 0x46 /
622	`0x0047`,/ 0x47 /
623	`0x0048`,/ 0x48 /
624	`0x0049`,/ 0x49 /
625	`0x004a`,/ 0x4a /
626	`0x004b`,/ 0x4b /
627	`0x004c`,/ 0x4c /
628	`0x004d`,/ 0x4d /
629	`0x004e`,/ 0x4e /
630	`0x004f`,/ 0x4f /
631	`0x0050`,/ 0x50 /
632	`0x0051`,/ 0x51 /
633	`0x0052`,/ 0x52 /
634	`0x0053`,/ 0x53 /
635	`0x0054`,/ 0x54 /
636	`0x0055`,/ 0x55 /
637	`0x0056`,/ 0x56 /
638	`0x0057`,/ 0x57 /
639	`0x0058`,/ 0x58 /
640	`0x0059`,/ 0x59 /
641	`0x005a`,/ 0x5a /
642	`0x005b`,/ 0x5b /
643	`0x005c`,/ 0x5c /
644	`0x005d`,/ 0x5d /
645	`0x005e`,/ 0x5e /
646	`0x005f`,/ 0x5f /
647	`0x0060`,/ 0x60 /
648	`0x0061`,/ 0x61 /
649	`0x0062`,/ 0x62 /
650	`0x0063`,/ 0x63 /
651	`0x0064`,/ 0x64 /
652	`0x0065`,/ 0x65 /
653	`0x0066`,/ 0x66 /
654	`0x0067`,/ 0x67 /
655	`0x0068`,/ 0x68 /
656	`0x0069`,/ 0x69 /
657	`0x006a`,/ 0x6a /
658	`0x006b`,/ 0x6b /
659	`0x006c`,/ 0x6c /
660	`0x006d`,/ 0x6d /
661	`0x006e`,/ 0x6e /
662	`0x006f`,/ 0x6f /
663	`0x0070`,/ 0x70 /
664	`0x0071`,/ 0x71 /
665	`0x0072`,/ 0x72 /
666	`0x0073`,/ 0x73 /
667	`0x0074`,/ 0x74 /
668	`0x0075`,/ 0x75 /
669	`0x0076`,/ 0x76 /
670	`0x0077`,/ 0x77 /
671	`0x0078`,/ 0x78 /
672	`0x0079`,/ 0x79 /
673	`0x007a`,/ 0x7a /
674	`0x007b`,/ 0x7b /
675	`0x007c`,/ 0x7c /
676	`0x007d`,/ 0x7d /
677	`0x007e`,/ 0x7e /
678	`0x007f`,/ 0x7f /
679	`0x0080`,/ 0x80 /
680	`0x0081`,/ 0x81 /
681	`0x0082`,/ 0x82 /
682	`0x0083`,/ 0x83 /
683	`0x0084`,/ 0x84 /
684	`0x0085`,/ 0x85 /
685	`0x0086`,/ 0x86 /
686	`0x0087`,/ 0x87 /
687	`0x0088`,/ 0x88 /
688	`0x0089`,/ 0x89 /
689	`0x008a`,/ 0x8a /
690	`0x008b`,/ 0x8b /
691	`0x008c`,/ 0x8c /
692	`0x008d`,/ 0x8d /
693	`0x008e`,/ 0x8e /
694	`0x008f`,/ 0x8f /
695	`0x0090`,/ 0x90 /
696	`0x0091`,/ 0x91 /
697	`0x0092`,/ 0x92 /
698	`0x0093`,/ 0x93 /
699	`0x0094`,/ 0x94 /
700	`0x0095`,/ 0x95 /
701	`0x0096`,/ 0x96 /
702	`0x0097`,/ 0x97 /
703	`0x0098`,/ 0x98 /
704	`0x0099`,/ 0x99 /
705	`0x009a`,/ 0x9a /
706	`0x009b`,/ 0x9b /
707	`0x009c`,/ 0x9c /
708	`0x009d`,/ 0x9d /
709	`0x009e`,/ 0x9e /
710	`0x009f`,/ 0x9f /
711	`0x00A0`,/ 0xa0 /
712	`0x0901`,/ 0xa1 /
713	`0x0902`,/ 0xa2 /
714	`0x0903`,/ 0xa3 /
715	`0x0905`,/ 0xa4 /
716	`0x0906`,/ 0xa5 /
717	`0x0907`,/ 0xa6 /
718	`0x0908`,/ 0xa7 /
719	`0x0909`,/ 0xa8 /
720	`0x090a`,/ 0xa9 /
721	`0x090b`,/ 0xaa /
722	`0x090e`,/ 0xab /
723	`0x090f`,/ 0xac /
724	`0x0910`,/ 0xad /
725	`0x090d`,/ 0xae /
726	`0x0912`,/ 0xaf /
727	`0x0913`,/ 0xb0 /
728	`0x0914`,/ 0xb1 /
729	`0x0911`,/ 0xb2 /
730	`0x0915`,/ 0xb3 /
731	`0x0916`,/ 0xb4 /
732	`0x0917`,/ 0xb5 /
733	`0x0918`,/ 0xb6 /
734	`0x0919`,/ 0xb7 /
735	`0x091a`,/ 0xb8 /
736	`0x091b`,/ 0xb9 /
737	`0x091c`,/ 0xba /
738	`0x091d`,/ 0xbb /
739	`0x091e`,/ 0xbc /
740	`0x091f`,/ 0xbd /
741	`0x0920`,/ 0xbe /
742	`0x0921`,/ 0xbf /
743	`0x0922`,/ 0xc0 /
744	`0x0923`,/ 0xc1 /
745	`0x0924`,/ 0xc2 /
746	`0x0925`,/ 0xc3 /
747	`0x0926`,/ 0xc4 /
748	`0x0927`,/ 0xc5 /
749	`0x0928`,/ 0xc6 /
750	`0x0929`,/ 0xc7 /
751	`0x092a`,/ 0xc8 /
752	`0x092b`,/ 0xc9 /
753	`0x092c`,/ 0xca /
754	`0x092d`,/ 0xcb /
755	`0x092e`,/ 0xcc /
756	`0x092f`,/ 0xcd /
757	`0x095f`,/ 0xce /
758	`0x0930`,/ 0xcf /
759	`0x0931`,/ 0xd0 /
760	`0x0932`,/ 0xd1 /
761	`0x0933`,/ 0xd2 /
762	`0x0934`,/ 0xd3 /
763	`0x0935`,/ 0xd4 /
764	`0x0936`,/ 0xd5 /
765	`0x0937`,/ 0xd6 /
766	`0x0938`,/ 0xd7 /
767	`0x0939`,/ 0xd8 /
768	`0x200D`,/ 0xd9 /
769	`0x093e`,/ 0xda /
770	`0x093f`,/ 0xdb /
771	`0x0940`,/ 0xdc /
772	`0x0941`,/ 0xdd /
773	`0x0942`,/ 0xde /
774	`0x0943`,/ 0xdf /
775	`0x0946`,/ 0xe0 /
776	`0x0947`,/ 0xe1 /
777	`0x0948`,/ 0xe2 /
778	`0x0945`,/ 0xe3 /
779	`0x094a`,/ 0xe4 /
780	`0x094b`,/ 0xe5 /
781	`0x094c`,/ 0xe6 /
782	`0x0949`,/ 0xe7 /
783	`0x094d`,/ 0xe8 /
784	`0x093c`,/ 0xe9 /
785	`0x0964`,/ 0xea /
786	`0xFFFF`,/ 0xeb /
787	`0xFFFF`,/ 0xec /
788	`0xFFFF`,/ 0xed /
789	`0xFFFF`,/ 0xee /
790	`0xFFFF`,/ 0xef /
791	`0xFFFF`,/ 0xf0 /
792	`0x0966`,/ 0xf1 /
793	`0x0967`,/ 0xf2 /
794	`0x0968`,/ 0xf3 /
795	`0x0969`,/ 0xf4 /
796	`0x096a`,/ 0xf5 /
797	`0x096b`,/ 0xf6 /
798	`0x096c`,/ 0xf7 /
799	`0x096d`,/ 0xf8 /
800	`0x096e`,/ 0xf9 /
801	`0x096f`,/ 0xfa /
802	`0xFFFF`,/ 0xfb /
803	`0xFFFF`,/ 0xfc /
804	`0xFFFF`,/ 0xfd /
805	`0xFFFF`,/ 0xfe /
806	`0xFFFF` / 0xff /
807	};
808
809	static const uint16_t vowelSignESpecialCases[][`2`]={
810	{ `2` /length of array/ , `0` },
811	{ `0xA4` , `0x0904` },
812	};
813
814	static const uint16_t nuktaSpecialCases[][`2`]={
815	{ `16` /length of array/ , `0` },
816	{ `0xA6` , `0x090c` },
817	{ `0xEA` , `0x093D` },
818	{ `0xDF` , `0x0944` },
819	{ `0xA1` , `0x0950` },
820	{ `0xb3` , `0x0958` },
821	{ `0xb4` , `0x0959` },
822	{ `0xb5` , `0x095a` },
823	{ `0xba` , `0x095b` },
824	{ `0xbf` , `0x095c` },
825	{ `0xC0` , `0x095d` },
826	{ `0xc9` , `0x095e` },
827	{ `0xAA` , `0x0960` },
828	{ `0xA7` , `0x0961` },
829	{ `0xDB` , `0x0962` },
830	{ `0xDC` , `0x0963` },
831	};
832
833
834	#define WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,targetByteUnit,err) UPRV_BLOCK_MACRO_BEGIN { \
835	int32_t offset = (int32_t)(source - args->source-1); \
836	/* write the targetUniChar to target */ \
837	if(target < targetLimit){ \
838	if(targetByteUnit <= 0xFF){ \
839	*(target)++ = (uint8_t)(targetByteUnit); \
840	if(offsets){ \
841	*(offsets++) = offset; \
842	} \
843	}else{ \
844	if (targetByteUnit > 0xFFFF) { \
845	*(target)++ = (uint8_t)(targetByteUnit>>16); \
846	if (offsets) { \
847	--offset; \
848	*(offsets++) = offset; \
849	} \
850	} \
851	if (!(target < targetLimit)) { \
852	args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = \
853	(uint8_t)(targetByteUnit >> 8); \
854	args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = \
855	(uint8_t)targetByteUnit; \
856	*err = U_BUFFER_OVERFLOW_ERROR; \
857	} else { \
858	*(target)++ = (uint8_t)(targetByteUnit>>8); \
859	if(offsets){ \
860	*(offsets++) = offset; \
861	} \
862	if(target < targetLimit){ \
863	*(target)++ = (uint8_t) targetByteUnit; \
864	if(offsets){ \
865	*(offsets++) = offset ; \
866	} \
867	}else{ \
868	args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =\
869	(uint8_t) (targetByteUnit); \
870	*err = U_BUFFER_OVERFLOW_ERROR; \
871	} \
872	} \
873	} \
874	}else{ \
875	if (targetByteUnit & 0xFF0000) { \
876	args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = \
877	(uint8_t) (targetByteUnit >>16); \
878	} \
879	if(targetByteUnit & 0xFF00){ \
880	args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = \
881	(uint8_t) (targetByteUnit >>8); \
882	} \
883	args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = \
884	(uint8_t) (targetByteUnit); \
885	*err = U_BUFFER_OVERFLOW_ERROR; \
886	} \
887	} UPRV_BLOCK_MACRO_END
888
889	/ Rules:*
890	* Explicit Halant :
891	* <HALANT> + <ZWNJ>
892	* Soft Halant :
893	* <HALANT> + <ZWJ>
894	*/
895	static void U_CALLCONV
896	UConverter_fromUnicode_ISCII_OFFSETS_LOGIC(
897	UConverterFromUnicodeArgs * args, UErrorCode * err) {
898	const UChar *source = args->source;
899	const UChar *sourceLimit = args->sourceLimit;
900	unsigned char target = (unsigned* char *) args->target;
901	unsigned char targetLimit = (unsigned* char *) args->targetLimit;
902	int32_t* offsets = args->offsets;
903	uint32_t targetByteUnit = `0x0000`;
904	UChar32 sourceChar = `0x0000`;
905	UChar32 tempContextFromUnicode = `0x0000`; / For special handling of the Gurmukhi script. /
906	UConverterDataISCII *converterData;
907	uint16_t newDelta=`0`;
908	uint16_t range = `0`;
909	UBool deltaChanged = FALSE;
910
911	if ((args->converter == NULL) \|\| (args->targetLimit < args->target) \|\| (args->sourceLimit < args->source)) {
912	*err = U_ILLEGAL_ARGUMENT_ERROR;
913	return;
914	}
915	/ initialize data /
916	converterData=(UConverterDataISCII*)args->converter->extraInfo;
917	newDelta=converterData->currentDeltaFromUnicode;
918	range = (uint16_t)(newDelta/DELTA);
919
920	if ((sourceChar = args->converter->fromUChar32)!=`0`) {
921	goto getTrail;
922	}
923
924	/writing the char to the output stream /
925	while (source < sourceLimit) {
926	/ Write the language code following LF only if LF is not the last character. /
927	if (args->converter->fromUnicodeStatus == LF) {
928	targetByteUnit = ATR<<`8`;
929	targetByteUnit += (uint8_t) lookupInitialData[range].isciiLang;
930	args->converter->fromUnicodeStatus = `0x0000`;
931	/ now append ATR and language code /
932	WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,targetByteUnit,err);
933	if (U_FAILURE(*err)) {
934	break;
935	}
936	}
937
938	sourceChar = *source++;
939	tempContextFromUnicode = converterData->contextCharFromUnicode;
940
941	targetByteUnit = missingCharMarker;
942
943	/check if input is in ASCII and C0 control codes range/
944	if (sourceChar <= ASCII_END) {
945	args->converter->fromUnicodeStatus = sourceChar;
946	WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,sourceChar,err);
947	if (U_FAILURE(*err)) {
948	break;
949	}
950	continue;
951	}
952	switch (sourceChar) {
953	case ZWNJ:
954	/ contextChar has HALANT /
955	if (converterData->contextCharFromUnicode) {
956	converterData->contextCharFromUnicode = `0x00`;
957	targetByteUnit = ISCII_HALANT;
958	} else {
959	/ consume ZWNJ and continue /
960	converterData->contextCharFromUnicode = `0x00`;
961	continue;
962	}
963	break;
964	case ZWJ:
965	/ contextChar has HALANT /
966	if (converterData->contextCharFromUnicode) {
967	targetByteUnit = ISCII_NUKTA;
968	} else {
969	targetByteUnit =ISCII_INV;
970	}
971	converterData->contextCharFromUnicode = `0x00`;
972	break;
973	default:
974	/ is the sourceChar in the INDIC_RANGE? /
975	if ((uint16_t)(INDIC_BLOCK_END-sourceChar) <= INDIC_RANGE) {
976	/ Danda and Double Danda are valid in Northern scripts.. since Unicode*
977	* does not include these codepoints in all Northern scrips we need to
978	* filter them out
979	*/
980	if (sourceChar!= DANDA && sourceChar != DOUBLE_DANDA) {
981	/ find out to which block the souceChar belongs/
982	range =(uint16_t)((sourceChar-INDIC_BLOCK_BEGIN)/DELTA);
983	newDelta =(uint16_t)(range*DELTA);
984
985	/ Now are we in the same block as the previous? /
986	if (newDelta!= converterData->currentDeltaFromUnicode \|\| converterData->isFirstBuffer) {
987	converterData->currentDeltaFromUnicode = newDelta;
988	converterData->currentMaskFromUnicode = lookupInitialData[range].maskEnum;
989	deltaChanged =TRUE;
990	converterData->isFirstBuffer=FALSE;
991	}
992
993	if (converterData->currentDeltaFromUnicode == PNJ_DELTA) {
994	if (sourceChar == PNJ_TIPPI) {
995	/ Make sure Tippi is converterd to Bindi. /
996	sourceChar = PNJ_BINDI;
997	} else if (sourceChar == PNJ_ADHAK) {
998	/ This is for consonant cluster handling. /
999	converterData->contextCharFromUnicode = PNJ_ADHAK;
1000	}
1001
1002	}
1003	/ Normalize all Indic codepoints to Devanagari and map them to ISCII /
1004	/ now subtract the new delta from sourceChar/
1005	sourceChar -= converterData->currentDeltaFromUnicode;
1006	}
1007
1008	/ get the target byte unit /
1009	targetByteUnit=fromUnicodeTable[(uint8_t)sourceChar];
1010
1011	/ is the code point valid in current script? /
1012	if ((validityTable[(uint8_t)sourceChar] & converterData->currentMaskFromUnicode)==`0`) {
1013	/ Vocallic RR is assigned in ISCII Telugu and Unicode /
1014	if (converterData->currentDeltaFromUnicode!=(TELUGU_DELTA) \|\| sourceChar!=VOCALLIC_RR) {
1015	targetByteUnit=missingCharMarker;
1016	}
1017	}
1018
1019	if (deltaChanged) {
1020	/ we are in a script block which is different than*
1021	* previous sourceChar's script block write ATR and language codes
1022	*/
1023	uint32_t temp=`0`;
1024	temp =(uint16_t)(ATR<<`8`);
1025	temp += (uint16_t)((uint8_t) lookupInitialData[range].isciiLang);
1026	/ reset /
1027	deltaChanged=FALSE;
1028	/ now append ATR and language code /
1029	WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,temp,err);
1030	if (U_FAILURE(*err)) {
1031	break;
1032	}
1033	}
1034
1035	if (converterData->currentDeltaFromUnicode == PNJ_DELTA && (sourceChar + PNJ_DELTA) == PNJ_ADHAK) {
1036	continue;
1037	}
1038	}
1039	/ reset context char /
1040	converterData->contextCharFromUnicode = `0x00`;
1041	break;
1042	}
1043	if (converterData->currentDeltaFromUnicode == PNJ_DELTA && tempContextFromUnicode == PNJ_ADHAK && isPNJConsonant((sourceChar + PNJ_DELTA))) {
1044	/ If the previous codepoint is Adhak and the current codepoint is a consonant, the targetByteUnit should be C + Halant + C. /
1045	/ reset context char /
1046	converterData->contextCharFromUnicode = `0x0000`;
1047	targetByteUnit = targetByteUnit << `16` \| ISCII_HALANT << `8` \| targetByteUnit;
1048	/ write targetByteUnit to target /
1049	WRITE_TO_TARGET_FROM_U(args, offsets, source, target, targetLimit, targetByteUnit,err);
1050	if (U_FAILURE(*err)) {
1051	break;
1052	}
1053	} else if (targetByteUnit != missingCharMarker) {
1054	if (targetByteUnit==ISCII_HALANT) {
1055	converterData->contextCharFromUnicode = (UChar)targetByteUnit;
1056	}
1057	/ write targetByteUnit to target/
1058	WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,targetByteUnit,err);
1059	if (U_FAILURE(*err)) {
1060	break;
1061	}
1062	} else {
1063	/ oops.. the code point is unassigned /
1064	/check if the char is a First surrogate/
1065	if (U16_IS_SURROGATE(sourceChar)) {
1066	if (U16_IS_SURROGATE_LEAD(sourceChar)) {
1067	getTrail:
1068	/look ahead to find the trail surrogate/
1069	if (source < sourceLimit) {
1070	/ test the following code unit /
1071	UChar trail= (*source);
1072	if (U16_IS_TRAIL(trail)) {
1073	source++;
1074	sourceChar=U16_GET_SUPPLEMENTARY(sourceChar, trail);
1075	*err =U_INVALID_CHAR_FOUND;
1076	/ convert this surrogate code point /
1077	/ exit this condition tree /
1078	} else {
1079	/ this is an unmatched lead code unit (1st surrogate) /
1080	/ callback(illegal) /
1081	*err=U_ILLEGAL_CHAR_FOUND;
1082	}
1083	} else {
1084	/ no more input /
1085	*err = U_ZERO_ERROR;
1086	}
1087	} else {
1088	/ this is an unmatched trail code unit (2nd surrogate) /
1089	/ callback(illegal) /
1090	*err=U_ILLEGAL_CHAR_FOUND;
1091	}
1092	} else {
1093	/ callback(unassigned) for a BMP code point /
1094	*err = U_INVALID_CHAR_FOUND;
1095	}
1096
1097	args->converter->fromUChar32=sourceChar;
1098	break;
1099	}
1100	}/ end while(mySourceIndex<mySourceLength) /
1101
1102	/save the state and return /
1103	args->source = source;
1104	args->target = (char*)target;
1105	}
1106
1107	static const uint16_t lookupTable[][`2`]={
1108	{ ZERO, ZERO }, /DEFALT/
1109	{ ZERO, ZERO }, /ROMAN/
1110	{ DEVANAGARI, DEV_MASK },
1111	{ BENGALI, BNG_MASK },
1112	{ TAMIL, TML_MASK },
1113	{ TELUGU, KND_MASK },
1114	{ BENGALI, BNG_MASK },
1115	{ ORIYA, ORI_MASK },
1116	{ KANNADA, KND_MASK },
1117	{ MALAYALAM, MLM_MASK },
1118	{ GUJARATI, GJR_MASK },
1119	{ GURMUKHI, PNJ_MASK }
1120	};
1121
1122	#define WRITE_TO_TARGET_TO_U(args,source,target,offsets,offset,targetUniChar,delta, err) UPRV_BLOCK_MACRO_BEGIN { \
1123	/* add offset to current Indic Block */ \
1124	if(targetUniChar>ASCII_END && \
1125	targetUniChar != ZWJ && \
1126	targetUniChar != ZWNJ && \
1127	targetUniChar != DANDA && \
1128	targetUniChar != DOUBLE_DANDA){ \
1129	\
1130	targetUniChar+=(uint16_t)(delta); \
1131	} \
1132	/* now write the targetUniChar */ \
1133	if(target<args->targetLimit){ \
1134	*(target)++ = (UChar)targetUniChar; \
1135	if(offsets){ \
1136	*(offsets)++ = (int32_t)(offset); \
1137	} \
1138	}else{ \
1139	args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++] = \
1140	(UChar)targetUniChar; \
1141	*err = U_BUFFER_OVERFLOW_ERROR; \
1142	} \
1143	} UPRV_BLOCK_MACRO_END
1144
1145	#define GET_MAPPING(sourceChar,targetUniChar,data) UPRV_BLOCK_MACRO_BEGIN { \
1146	targetUniChar = toUnicodeTable[(sourceChar)] ; \
1147	/* is the code point valid in current script? */ \
1148	if(sourceChar> ASCII_END && \
1149	(validityTable[(targetUniChar & 0x7F)] & data->currentMaskToUnicode)==0){ \
1150	/* Vocallic RR is assigne in ISCII Telugu and Unicode */ \
1151	if(data->currentDeltaToUnicode!=(TELUGU_DELTA) \|\| \
1152	targetUniChar!=VOCALLIC_RR){ \
1153	targetUniChar=missingCharMarker; \
1154	} \
1155	} \
1156	} UPRV_BLOCK_MACRO_END
1157
1158	/***********
1159	* Rules for ISCII to Unicode converter
1160	* ISCII is stateful encoding. To convert ISCII bytes to Unicode,
1161	* which has both precomposed and decomposed forms characters
1162	* pre-context and post-context need to be considered.
1163	*
1164	* Post context
1165	* i) ATR : Attribute code is used to declare the font and script switching.
1166	* Currently we only switch scripts and font codes consumed without generating an error
1167	* ii) EXT : Extention code is used to declare switching to Sanskrit and for obscure,
1168	* obsolete characters
1169	* Pre context
1170	* i) Halant: if preceeded by a halant then it is a explicit halant
1171	* ii) Nukta :
1172	* a) if preceeded by a halant then it is a soft halant
1173	* b) if preceeded by specific consonants and the ligatures have pre-composed
1174	* characters in Unicode then convert to pre-composed characters
1175	* iii) Danda: If Danda is preceeded by a Danda then convert to Double Danda
1176	*
1177	*/
1178
1179	static void U_CALLCONV
1180	UConverter_toUnicode_ISCII_OFFSETS_LOGIC(UConverterToUnicodeArgs args, UErrorCode err) {
1181	const char source = ( char* *) args->source;
1182	UChar *target = args->target;
1183	const char *sourceLimit = args->sourceLimit;
1184	const UChar* targetLimit = args->targetLimit;
1185	uint32_t targetUniChar = `0x0000`;
1186	uint8_t sourceChar = `0x0000`;
1187	UConverterDataISCII* data;
1188	UChar32* toUnicodeStatus=NULL;
1189	UChar32 tempTargetUniChar = `0x0000`;
1190	UChar* contextCharToUnicode= NULL;
1191	UBool found;
1192	int i;
1193	int offset = `0`;
1194
1195	if ((args->converter == NULL) \|\| (target < args->target) \|\| (source < args->source)) {
1196	*err = U_ILLEGAL_ARGUMENT_ERROR;
1197	return;
1198	}
1199
1200	data = (UConverterDataISCII*)(args->converter->extraInfo);
1201	contextCharToUnicode = &data->contextCharToUnicode; / contains previous ISCII codepoint visited /
1202	toUnicodeStatus = (UChar32)&args->converter->toUnicodeStatus;/* contains the mapping to Unicode of the above codepoint/
1203
1204	while (U_SUCCESS(*err) && source<sourceLimit) {
1205
1206	targetUniChar = missingCharMarker;
1207
1208	if (target < targetLimit) {
1209	sourceChar = (unsigned char)*(source)++;
1210
1211	/ look at the post-context preform special processing /
1212	if (*contextCharToUnicode==ATR) {
1213
1214	/ If we have ATR in contextCharToUnicode then we need to change our
1215	* state to the Indic Script specified by sourceChar
1216	*/
1217
1218	/ check if the sourceChar is supported script range/
1219	if ((uint8_t)(PNJ-sourceChar)<=PNJ-DEV) {
1220	data->currentDeltaToUnicode = (uint16_t)(lookupTable[sourceChar & `0x0F`][`0`] * DELTA);
1221	data->currentMaskToUnicode = (MaskEnum)lookupTable[sourceChar & `0x0F`][`1`];
1222	} else if (sourceChar==DEF) {
1223	/ switch back to default /
1224	data->currentDeltaToUnicode = data->defDeltaToUnicode;
1225	data->currentMaskToUnicode = data->defMaskToUnicode;
1226	} else {
1227	if ((sourceChar >= `0x21` && sourceChar <= `0x3F`)) {
1228	/ these are display codes consume and continue /
1229	} else {
1230	*err =U_ILLEGAL_CHAR_FOUND;
1231	/ reset /
1232	*contextCharToUnicode=NO_CHAR_MARKER;
1233	goto CALLBACK;
1234	}
1235	}
1236
1237	/ reset /
1238	*contextCharToUnicode=NO_CHAR_MARKER;
1239
1240	continue;
1241
1242	} else if (*contextCharToUnicode==EXT) {
1243	/ check if sourceChar is in 0xA1-0xEE range /
1244	if ((uint8_t) (EXT_RANGE_END - sourceChar) <= (EXT_RANGE_END - EXT_RANGE_BEGIN)) {
1245	/ We currently support only Anudatta and Devanagari abbreviation sign /
1246	if (sourceChar==`0xBF` \|\| sourceChar == `0xB8`) {
1247	targetUniChar = (sourceChar==`0xBF`) ? DEV_ABBR_SIGN : DEV_ANUDATTA;
1248
1249	/ find out if the mapping is valid in this state /
1250	if (validityTable[(uint8_t)targetUniChar] & data->currentMaskToUnicode) {
1251	*contextCharToUnicode= NO_CHAR_MARKER;
1252
1253	/ Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. /
1254	if (data->prevToUnicodeStatus) {
1255	WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -`1`),data->prevToUnicodeStatus,`0`,err);
1256	data->prevToUnicodeStatus = `0x0000`;
1257	}
1258	/ write to target /
1259	WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -`2`),targetUniChar,data->currentDeltaToUnicode,err);
1260
1261	continue;
1262	}
1263	}
1264	/ byte unit is unassigned /
1265	targetUniChar = missingCharMarker;
1266	*err= U_INVALID_CHAR_FOUND;
1267	} else {
1268	/ only 0xA1 - 0xEE are legal after EXT char /
1269	*contextCharToUnicode= NO_CHAR_MARKER;
1270	*err = U_ILLEGAL_CHAR_FOUND;
1271	}
1272	goto CALLBACK;
1273	} else if (*contextCharToUnicode==ISCII_INV) {
1274	if (sourceChar==ISCII_HALANT) {
1275	targetUniChar = `0x0020`; / replace with space accoding to Indic FAQ /
1276	} else {
1277	targetUniChar = ZWJ;
1278	}
1279
1280	/ Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. /
1281	if (data->prevToUnicodeStatus) {
1282	WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -`1`),data->prevToUnicodeStatus,`0`,err);
1283	data->prevToUnicodeStatus = `0x0000`;
1284	}
1285	/ write to target /
1286	WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -`2`),targetUniChar,data->currentDeltaToUnicode,err);
1287	/ reset /
1288	*contextCharToUnicode=NO_CHAR_MARKER;
1289	}
1290
1291	/ look at the pre-context and perform special processing /
1292	switch (sourceChar) {
1293	case ISCII_INV:
1294	case EXT:
1295	case ATR:
1296	*contextCharToUnicode = (UChar)sourceChar;
1297
1298	if (*toUnicodeStatus != missingCharMarker) {
1299	/ Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. /
1300	if (data->prevToUnicodeStatus) {
1301	WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -`1`),data->prevToUnicodeStatus,`0`,err);
1302	data->prevToUnicodeStatus = `0x0000`;
1303	}
1304	WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -`2`),*toUnicodeStatus,data->currentDeltaToUnicode,err);
1305	*toUnicodeStatus = missingCharMarker;
1306	}
1307	continue;
1308	case ISCII_DANDA:
1309	/ handle double danda/
1310	if (*contextCharToUnicode== ISCII_DANDA) {
1311	targetUniChar = DOUBLE_DANDA;
1312	/ clear the context /
1313	*contextCharToUnicode = NO_CHAR_MARKER;
1314	*toUnicodeStatus = missingCharMarker;
1315	} else {
1316	GET_MAPPING(sourceChar,targetUniChar,data);
1317	*contextCharToUnicode = sourceChar;
1318	}
1319	break;
1320	case ISCII_HALANT:
1321	/ handle explicit halant /
1322	if (*contextCharToUnicode == ISCII_HALANT) {
1323	targetUniChar = ZWNJ;
1324	/ clear the context /
1325	*contextCharToUnicode = NO_CHAR_MARKER;
1326	} else {
1327	GET_MAPPING(sourceChar,targetUniChar,data);
1328	*contextCharToUnicode = sourceChar;
1329	}
1330	break;
1331	case `0x0A`:
1332	case `0x0D`:
1333	data->resetToDefaultToUnicode = TRUE;
1334	GET_MAPPING(sourceChar,targetUniChar,data)
1335	;
1336	*contextCharToUnicode = sourceChar;
1337	break;
1338
1339	case ISCII_VOWEL_SIGN_E:
1340	i=`1`;
1341	found=FALSE;
1342	for (; i<vowelSignESpecialCases[`0`][`0`]; i++) {
1343	U_ASSERT(i<UPRV_LENGTHOF(vowelSignESpecialCases));
1344	if (vowelSignESpecialCases[i][`0`]==(uint8_t)*contextCharToUnicode) {
1345	targetUniChar=vowelSignESpecialCases[i][`1`];
1346	found=TRUE;
1347	break;
1348	}
1349	}
1350	if (found) {
1351	/ find out if the mapping is valid in this state /
1352	if (validityTable[(uint8_t)targetUniChar] & data->currentMaskToUnicode) {
1353	/targetUniChar += data->currentDeltaToUnicode ;/
1354	*contextCharToUnicode= NO_CHAR_MARKER;
1355	*toUnicodeStatus = missingCharMarker;
1356	break;
1357	}
1358	}
1359	GET_MAPPING(sourceChar,targetUniChar,data);
1360	*contextCharToUnicode = sourceChar;
1361	break;
1362
1363	case ISCII_NUKTA:
1364	/ handle soft halant /
1365	if (*contextCharToUnicode == ISCII_HALANT) {
1366	targetUniChar = ZWJ;
1367	/ clear the context /
1368	*contextCharToUnicode = NO_CHAR_MARKER;
1369	break;
1370	} else if (data->currentDeltaToUnicode == PNJ_DELTA && data->contextCharToUnicode == `0xc0`) {
1371	/ Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. /
1372	if (data->prevToUnicodeStatus) {
1373	WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -`1`),data->prevToUnicodeStatus,`0`,err);
1374	data->prevToUnicodeStatus = `0x0000`;
1375	}
1376	/ We got here because ISCII_NUKTA was preceded by 0xc0 and we are converting Gurmukhi.*
1377	* In that case we must convert (0xc0 0xe9) to (\u0a5c\u0a4d\u0a39).
1378	*/
1379	targetUniChar = PNJ_RRA;
1380	WRITE_TO_TARGET_TO_U(args, source, target, args->offsets, (source-args->source)-`2`, targetUniChar, `0`, err);
1381	if (U_SUCCESS(*err)) {
1382	targetUniChar = PNJ_SIGN_VIRAMA;
1383	WRITE_TO_TARGET_TO_U(args, source, target, args->offsets, (source-args->source)-`2`, targetUniChar, `0`, err);
1384	if (U_SUCCESS(*err)) {
1385	targetUniChar = PNJ_HA;
1386	WRITE_TO_TARGET_TO_U(args, source, target, args->offsets, (source-args->source)-`2`, targetUniChar, `0`, err);
1387	} else {
1388	args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]= PNJ_HA;
1389	}
1390	} else {
1391	args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]= PNJ_SIGN_VIRAMA;
1392	args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]= PNJ_HA;
1393	}
1394	*toUnicodeStatus = missingCharMarker;
1395	data->contextCharToUnicode = NO_CHAR_MARKER;
1396	continue;
1397	} else {
1398	/ try to handle <CHAR> + ISCII_NUKTA special mappings /
1399	i=`1`;
1400	found =FALSE;
1401	for (; i<nuktaSpecialCases[`0`][`0`]; i++) {
1402	if (nuktaSpecialCases[i][`0`]==(uint8_t)
1403	*contextCharToUnicode) {
1404	targetUniChar=nuktaSpecialCases[i][`1`];
1405	found =TRUE;
1406	break;
1407	}
1408	}
1409	if (found) {
1410	/ find out if the mapping is valid in this state /
1411	if (validityTable[(uint8_t)targetUniChar] & data->currentMaskToUnicode) {
1412	/targetUniChar += data->currentDeltaToUnicode ;/
1413	*contextCharToUnicode= NO_CHAR_MARKER;
1414	*toUnicodeStatus = missingCharMarker;
1415	if (data->currentDeltaToUnicode == PNJ_DELTA) {
1416	/ Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. /
1417	if (data->prevToUnicodeStatus) {
1418	WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -`1`),data->prevToUnicodeStatus,`0`,err);
1419	data->prevToUnicodeStatus = `0x0000`;
1420	}
1421	WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -`2`),targetUniChar,data->currentDeltaToUnicode,err);
1422	continue;
1423	}
1424	break;
1425	}
1426	/ else fall through to default /
1427	}
1428	/ else fall through to default /
1429	U_FALLTHROUGH;
1430	}
1431	default:GET_MAPPING(sourceChar,targetUniChar,data)
1432	;
1433	*contextCharToUnicode = sourceChar;
1434	break;
1435	}
1436
1437	if (*toUnicodeStatus != missingCharMarker) {
1438	/ Check to make sure that consonant clusters are handled correct for Gurmukhi script. /
1439	if (data->currentDeltaToUnicode == PNJ_DELTA && data->prevToUnicodeStatus != `0` && isPNJConsonant(data->prevToUnicodeStatus) &&
1440	(*toUnicodeStatus + PNJ_DELTA) == PNJ_SIGN_VIRAMA && ((UChar32)(targetUniChar + PNJ_DELTA) == data->prevToUnicodeStatus)) {
1441	/ Consonant clusters C + HALANT + C should be encoded as ADHAK + C /
1442	offset = (int)(source-args->source - `3`);
1443	tempTargetUniChar = PNJ_ADHAK; / This is necessary to avoid some compiler warnings. /
1444	WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,offset,tempTargetUniChar,`0`,err);
1445	WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,offset,data->prevToUnicodeStatus,`0`,err);
1446	data->prevToUnicodeStatus = `0x0000`; / reset the previous unicode code point /
1447	*toUnicodeStatus = missingCharMarker;
1448	continue;
1449	} else {
1450	/ Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. /
1451	if (data->prevToUnicodeStatus) {
1452	WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -`1`),data->prevToUnicodeStatus,`0`,err);
1453	data->prevToUnicodeStatus = `0x0000`;
1454	}
1455	/ Check to make sure that Bindi and Tippi are handled correctly for Gurmukhi script.*
1456	* If 0xA2 is preceded by a codepoint in the PNJ_BINDI_TIPPI_SET then the target codepoint should be Tippi instead of Bindi.
1457	*/
1458	if (data->currentDeltaToUnicode == PNJ_DELTA && (targetUniChar + PNJ_DELTA) == PNJ_BINDI && isPNJBindiTippi((*toUnicodeStatus + PNJ_DELTA))) {
1459	targetUniChar = PNJ_TIPPI - PNJ_DELTA;
1460	WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -`2`),*toUnicodeStatus,PNJ_DELTA,err);
1461	} else if (data->currentDeltaToUnicode == PNJ_DELTA && (targetUniChar + PNJ_DELTA) == PNJ_SIGN_VIRAMA && isPNJConsonant((*toUnicodeStatus + PNJ_DELTA))) {
1462	/ Store the current toUnicodeStatus code point for later handling of consonant cluster in Gurmukhi. /
1463	data->prevToUnicodeStatus = *toUnicodeStatus + PNJ_DELTA;
1464	} else {
1465	/ write the previously mapped codepoint /
1466	WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -`2`),*toUnicodeStatus,data->currentDeltaToUnicode,err);
1467	}
1468	}
1469	*toUnicodeStatus = missingCharMarker;
1470	}
1471
1472	if (targetUniChar != missingCharMarker) {
1473	/ now save the targetUniChar for delayed write /
1474	*toUnicodeStatus = (UChar) targetUniChar;
1475	if (data->resetToDefaultToUnicode==TRUE) {
1476	data->currentDeltaToUnicode = data->defDeltaToUnicode;
1477	data->currentMaskToUnicode = data->defMaskToUnicode;
1478	data->resetToDefaultToUnicode=FALSE;
1479	}
1480	} else {
1481
1482	/ we reach here only if targetUniChar == missingCharMarker*
1483	* so assign codes to reason and err
1484	*/
1485	*err = U_INVALID_CHAR_FOUND;
1486	CALLBACK:
1487	args->converter->toUBytes[`0`] = (uint8_t) sourceChar;
1488	args->converter->toULength = `1`;
1489	break;
1490	}
1491
1492	} else {
1493	*err =U_BUFFER_OVERFLOW_ERROR;
1494	break;
1495	}
1496	}
1497
1498	if (U_SUCCESS(*err) && args->flush && source == sourceLimit) {
1499	/ end of the input stream /
1500	UConverter *cnv = args->converter;
1501
1502	if (contextCharToUnicode==ATR \|\| contextCharToUnicode==EXT \|\| *contextCharToUnicode==ISCII_INV) {
1503	/ set toUBytes[] /
1504	cnv->toUBytes[`0`] = (uint8_t)*contextCharToUnicode;
1505	cnv->toULength = `1`;
1506
1507	/ avoid looping on truncated sequences /
1508	*contextCharToUnicode = NO_CHAR_MARKER;
1509	} else {
1510	cnv->toULength = `0`;
1511	}
1512
1513	if (*toUnicodeStatus != missingCharMarker) {
1514	/ output a remaining target character /
1515	WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source - args->source -`1`),*toUnicodeStatus,data->currentDeltaToUnicode,err);
1516	*toUnicodeStatus = missingCharMarker;
1517	}
1518	}
1519
1520	args->target = target;
1521	args->source = source;
1522	}
1523
1524	/ structure for SafeClone calculations /
1525	struct cloneISCIIStruct {
1526	UConverter cnv;
1527	UConverterDataISCII mydata;
1528	};
1529
1530	static UConverter * U_CALLCONV
1531	_ISCII_SafeClone(const UConverter *cnv,
1532	void *stackBuffer,
1533	int32_t *pBufferSize,
1534	UErrorCode *status)
1535	{
1536	struct cloneISCIIStruct * localClone;
1537	int32_t bufferSizeNeeded = sizeof(struct cloneISCIIStruct);
1538
1539	if (U_FAILURE(*status)) {
1540	return `0`;
1541	}
1542
1543	if (pBufferSize == `0`) { /* 'preflighting' request - set needed size into pBufferSize /*
1544	*pBufferSize = bufferSizeNeeded;
1545	return `0`;
1546	}
1547
1548	localClone = (struct cloneISCIIStruct *)stackBuffer;
1549	/ ucnv.c/ucnv_safeClone() copied the main UConverter already /
1550
1551	uprv_memcpy(&localClone->mydata, cnv->extraInfo, sizeof(UConverterDataISCII));
1552	localClone->cnv.extraInfo = &localClone->mydata;
1553	localClone->cnv.isExtraLocal = TRUE;
1554
1555	return &localClone->cnv;
1556	}
1557
1558	static void U_CALLCONV
1559	_ISCIIGetUnicodeSet(const UConverter *cnv,
1560	const USetAdder *sa,
1561	UConverterUnicodeSet which,
1562	UErrorCode *pErrorCode)
1563	{
1564	(void)cnv;
1565	(void)which;
1566	(void)pErrorCode;
1567	int32_t idx, script;
1568	uint8_t mask;
1569
1570	/ Since all ISCII versions allow switching to other ISCII*
1571	scripts, we add all roundtrippable characters to this set. /*
1572	sa->addRange(sa->set, `0`, ASCII_END);
1573	for (script = DEVANAGARI; script <= MALAYALAM; script++) {
1574	mask = (uint8_t)(lookupInitialData[script].maskEnum);
1575	for (idx = `0`; idx < DELTA; idx++) {
1576	/ added check for TELUGU character /
1577	if ((validityTable[idx] & mask) \|\| (script==TELUGU && idx==`0x31`)) {
1578	sa->add(sa->set, idx + (script * DELTA) + INDIC_BLOCK_BEGIN);
1579	}
1580	}
1581	}
1582	sa->add(sa->set, DANDA);
1583	sa->add(sa->set, DOUBLE_DANDA);
1584	sa->add(sa->set, ZWNJ);
1585	sa->add(sa->set, ZWJ);
1586	}
1587	U_CDECL_END
1588	static const UConverterImpl _ISCIIImpl={
1589
1590	UCNV_ISCII,
1591
1592	NULL,
1593	NULL,
1594
1595	_ISCIIOpen,
1596	_ISCIIClose,
1597	_ISCIIReset,
1598
1599	UConverter_toUnicode_ISCII_OFFSETS_LOGIC,
1600	UConverter_toUnicode_ISCII_OFFSETS_LOGIC,
1601	UConverter_fromUnicode_ISCII_OFFSETS_LOGIC,
1602	UConverter_fromUnicode_ISCII_OFFSETS_LOGIC,
1603	NULL,
1604
1605	NULL,
1606	_ISCIIgetName,
1607	NULL,
1608	_ISCII_SafeClone,
1609	_ISCIIGetUnicodeSet,
1610	NULL,
1611	NULL
1612	};
1613
1614	static const UConverterStaticData _ISCIIStaticData={
1615	sizeof(UConverterStaticData),
1616	"ISCII",
1617	`0`,
1618	UCNV_IBM,
1619	UCNV_ISCII,
1620	`1`,
1621	`4`,
1622	{ `0x1a`, `0`, `0`, `0` },
1623	`0x1`,
1624	FALSE,
1625	FALSE,
1626	`0x0`,
1627	`0x0`,
1628	{ `0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0`,`0` }, / reserved /
1629
1630	};
1631
1632	const UConverterSharedData _ISCIIData=
1633	UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ISCIIStaticData, &_ISCIIImpl);
1634
1635	#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */
1636

Browse the source code of ClickHouse/contrib/icu/icu4c/source/common/ucnvisci.cpp