ucnv_cnv.h source code [ClickHouse/contrib/icu/icu4c/source/common/ucnv_cnv.h]

1	// © 2016 and later: Unicode, Inc. and others.
2	// License & terms of use: http://www.unicode.org/copyright.html
3	/*
4	**********************************************************************
5	* Copyright (C) 1999-2011, International Business Machines
6	* Corporation and others. All Rights Reserved.
7	**********************************************************************
8	*
9	* ucnv_cnv.h:
10	* Definitions for converter implementations.
11	*
12	* Modification History:
13	*
14	* Date Name Description
15	* 05/09/00 helena Added implementation to handle fallback mappings.
16	* 06/29/2000 helena Major rewrite of the callback APIs.
17	*/
18
19	#ifndef UCNV_CNV_H
20	#define UCNV_CNV_H
21
22	#include "unicode/utypes.h"
23
24	#if !UCONFIG_NO_CONVERSION
25
26	#include "unicode/ucnv.h"
27	#include "unicode/ucnv_err.h"
28	#include "unicode/uset.h"
29	#include "uset_imp.h"
30
31	U_CDECL_BEGIN
32
33	/ this is used in fromUnicode DBCS tables as an "unassigned" marker /
34	#define missingCharMarker 0xFFFF
35
36	/*
37	* #define missingUCharMarker 0xfffe
38	*
39	* commented out because there are actually two values used in toUnicode tables:
40	* U+fffe "unassigned"
41	* U+ffff "illegal"
42	*/
43
44	/* Forward declaration, see ucnv_bld.h /
45	struct UConverterSharedData;
46	typedef struct UConverterSharedData UConverterSharedData;
47
48	/ function types for UConverterImpl ---------------------------------------- /
49
50	/ struct with arguments for UConverterLoad and ucnv_load() /
51	typedef struct {
52	int32_t size; / sizeof(UConverterLoadArgs) /
53	int32_t nestedLoads; / count nested ucnv_load() calls /
54	UBool onlyTestIsLoadable; / input: don't actually load /
55	UBool reserved0; / reserved - for good alignment of the pointers /
56	int16_t reserved; / reserved - for good alignment of the pointers /
57	uint32_t options;
58	const char pkg, name, *locale;
59	} UConverterLoadArgs;
60
61	#define UCNV_LOAD_ARGS_INITIALIZER \
62	{ (int32_t)sizeof(UConverterLoadArgs), 0, FALSE, FALSE, 0, 0, NULL, NULL, NULL }
63
64	typedef void (UConverterLoad) (UConverterSharedData sharedData,
65	UConverterLoadArgs *pArgs,
66	const uint8_t raw, UErrorCode pErrorCode);
67	typedef void (UConverterUnload) (UConverterSharedData sharedData);
68
69	typedef void (UConverterOpen) (UConverter cnv, UConverterLoadArgs pArgs, UErrorCode pErrorCode);
70	typedef void (UConverterClose) (UConverter cnv);
71
72	typedef enum UConverterResetChoice {
73	UCNV_RESET_BOTH,
74	UCNV_RESET_TO_UNICODE,
75	UCNV_RESET_FROM_UNICODE
76	} UConverterResetChoice;
77
78	typedef void (UConverterReset) (UConverter cnv, UConverterResetChoice choice);
79
80	/*
81	* Converter implementation function(s) for ucnv_toUnicode().
82	* If the toUnicodeWithOffsets function pointer is NULL,
83	* then the toUnicode function will be used and the offsets will be set to -1.
84	*
85	* Must maintain state across buffers. Use toUBytes[toULength] for partial input
86	* sequences; it will be checked in ucnv.c at the end of the input stream
87	* to detect truncated input.
88	* Some converters may need additional detection and may then set U_TRUNCATED_CHAR_FOUND.
89	*
90	* The toUnicodeWithOffsets must write exactly as many offset values as target
91	* units. Write offset values of -1 for when the source index corresponding to
92	* the output unit is not known (e.g., the character started in an earlier buffer).
93	* The pArgs->offsets pointer need not be moved forward.
94	*
95	* At function return, either one of the following conditions must be true:
96	* - U_BUFFER_OVERFLOW_ERROR and the target is full: target==targetLimit
97	* - another error code with toUBytes[toULength] set to the offending input
98	* - no error, and the source is consumed: source==sourceLimit
99	*
100	* The ucnv.c code will handle the end of the input (reset)
101	* (reset, and truncation detection) and callbacks.
102	*/
103	typedef void (UConverterToUnicode) (UConverterToUnicodeArgs , UErrorCode *);
104
105	/*
106	* Same rules as for UConverterToUnicode.
107	* A lead surrogate is kept in fromUChar32 across buffers, and if an error
108	* occurs, then the offending input code point must be put into fromUChar32
109	* as well.
110	*/
111	typedef void (UConverterFromUnicode) (UConverterFromUnicodeArgs , UErrorCode *);
112
113	/*
114	* Converter implementation function for ucnv_convertEx(), for direct conversion
115	* between two charsets without pivoting through UTF-16.
116	* The rules are the same as for UConverterToUnicode and UConverterFromUnicode.
117	* In addition,
118	* - The toUnicode side must behave and keep state exactly like the
119	* UConverterToUnicode implementation for the same source charset.
120	* - A U_USING_DEFAULT_WARNING can be set to request to temporarily fall back
121	* to pivoting. When this function is called, the conversion framework makes
122	* sure that this warning is not set on input.
123	* - Continuing a partial match and flushing the toUnicode replay buffer
124	* are handled by pivoting, using the toUnicode and fromUnicode functions.
125	*/
126	typedef void (UConverterConvert) (UConverterFromUnicodeArgs pFromUArgs,
127	UConverterToUnicodeArgs *pToUArgs,
128	UErrorCode *pErrorCode);
129
130	/*
131	* Converter implementation function for ucnv_getNextUChar().
132	* If the function pointer is NULL, then the toUnicode function will be used.
133	*
134	* Will be called at a character boundary (toULength==0).
135	* May return with
136	* - U_INDEX_OUTOFBOUNDS_ERROR if there was no output for the input
137	* (the return value will be ignored)
138	* - U_TRUNCATED_CHAR_FOUND or another error code (never U_BUFFER_OVERFLOW_ERROR!)
139	* with toUBytes[toULength] set to the offending input
140	* (the return value will be ignored)
141	* - return UCNV_GET_NEXT_UCHAR_USE_TO_U, without moving the source pointer,
142	* to indicate that the ucnv.c code shall call the toUnicode function instead
143	* - return a real code point result
144	*
145	* Unless UCNV_GET_NEXT_UCHAR_USE_TO_U is returned, the source bytes must be consumed.
146	*
147	* The ucnv.c code will handle the end of the input (reset)
148	* (except for truncation detection!) and callbacks.
149	*/
150	typedef UChar32 (UConverterGetNextUChar) (UConverterToUnicodeArgs , UErrorCode *);
151
152	typedef void (UConverterGetStarters)(const* UConverter* converter,
153	UBool starters[`256`],
154	UErrorCode *pErrorCode);
155
156	/ If this function pointer is null or if the function returns null*
157	* the name field in static data struct should be returned by
158	* ucnv_getName() API function
159	*/
160	typedef const char * (UConverterGetName) (const* UConverter *cnv);
161
162	/**
163	* Write the codepage substitution character.
164	* If this function is not set, then ucnv_cbFromUWriteSub() writes
165	* the substitution character from UConverter.
166	* For stateful converters, it is typically necessary to handle this
167	* specificially for the converter in order to properly maintain the state.
168	*/
169	typedef void (UConverterWriteSub) (UConverterFromUnicodeArgs pArgs, int32_t offsetIndex, UErrorCode *pErrorCode);
170
171	/**
172	* For converter-specific safeClone processing
173	* If this function is not set, then ucnv_safeClone assumes that the converter has no private data that changes
174	* after the converter is done opening.
175	* If this function is set, then it is called just after a memcpy() of
176	* converter data to the new, empty converter, and is expected to set up
177	* the initial state of the converter. It is not expected to increment the
178	* reference counts of the standard data types such as the shared data.
179	*/
180	typedef UConverter * (UConverterSafeClone) (const* UConverter *cnv,
181	void *stackBuffer,
182	int32_t *pBufferSize,
183	UErrorCode *status);
184
185	/**
186	* Filters for some ucnv_getUnicodeSet() implementation code.
187	*/
188	typedef enum UConverterSetFilter {
189	UCNV_SET_FILTER_NONE,
190	UCNV_SET_FILTER_DBCS_ONLY,
191	UCNV_SET_FILTER_2022_CN,
192	UCNV_SET_FILTER_SJIS,
193	UCNV_SET_FILTER_GR94DBCS,
194	UCNV_SET_FILTER_HZ,
195	UCNV_SET_FILTER_COUNT
196	} UConverterSetFilter;
197
198	/**
199	* Fills the set of Unicode code points that can be converted by an ICU converter.
200	* The API function ucnv_getUnicodeSet() clears the USet before calling
201	* the converter's getUnicodeSet() implementation; the converter should only
202	* add the appropriate code points to allow recursive use.
203	* For example, the ISO-2022-JP converter will call each subconverter's
204	* getUnicodeSet() implementation to consecutively add code points to
205	* the same USet, which will result in a union of the sets of all subconverters.
206	*
207	* For more documentation, see ucnv_getUnicodeSet() in ucnv.h.
208	*/
209	typedef void (UConverterGetUnicodeSet) (const* UConverter *cnv,
210	const USetAdder *sa,
211	UConverterUnicodeSet which,
212	UErrorCode *pErrorCode);
213
214	UBool CONVERSION_U_SUCCESS (UErrorCode err);
215
216	/**
217	* UConverterImpl contains all the data and functions for a converter type.
218	* Its function pointers work much like a C++ vtable.
219	* Many converter types need to define only a subset of the functions;
220	* when a function pointer is NULL, then a default action will be performed.
221	*
222	* Every converter type must implement toUnicode, fromUnicode, and getNextUChar,
223	* otherwise the converter may crash.
224	* Every converter type that has variable-length codepage sequences should
225	* also implement toUnicodeWithOffsets and fromUnicodeWithOffsets for
226	* correct offset handling.
227	* All other functions may or may not be implemented - it depends only on
228	* whether the converter type needs them.
229	*
230	* When open() fails, then close() will be called, if present.
231	*/
232	struct UConverterImpl {
233	UConverterType type;
234
235	UConverterLoad load;
236	UConverterUnload unload;
237
238	UConverterOpen open;
239	UConverterClose close;
240	UConverterReset reset;
241
242	UConverterToUnicode toUnicode;
243	UConverterToUnicode toUnicodeWithOffsets;
244	UConverterFromUnicode fromUnicode;
245	UConverterFromUnicode fromUnicodeWithOffsets;
246	UConverterGetNextUChar getNextUChar;
247
248	UConverterGetStarters getStarters;
249	UConverterGetName getName;
250	UConverterWriteSub writeSub;
251	UConverterSafeClone safeClone;
252	UConverterGetUnicodeSet getUnicodeSet;
253
254	UConverterConvert toUTF8;
255	UConverterConvert fromUTF8;
256	};
257
258	extern const UConverterSharedData
259	_MBCSData, _Latin1Data,
260	_UTF8Data, _UTF16BEData, _UTF16LEData, _UTF32BEData, _UTF32LEData,
261	_ISO2022Data,
262	_LMBCSData1,_LMBCSData2, _LMBCSData3, _LMBCSData4, _LMBCSData5, _LMBCSData6,
263	_LMBCSData8,_LMBCSData11,_LMBCSData16,_LMBCSData17,_LMBCSData18,_LMBCSData19,
264	_HZData,_ISCIIData, _SCSUData, _ASCIIData,
265	_UTF7Data, _Bocu1Data, _UTF16Data, _UTF32Data, _CESU8Data, _IMAPData, _CompoundTextData;
266
267	U_CDECL_END
268
269	/* Always use fallbacks from codepage to Unicode /
270	#define TO_U_USE_FALLBACK(useFallback) TRUE
271	#define UCNV_TO_U_USE_FALLBACK(cnv) TRUE
272
273	/* Use fallbacks from Unicode to codepage when cnv->useFallback or for private-use code points /
274	#define IS_PRIVATE_USE(c) ((uint32_t)((c)-0xe000)<0x1900 \|\| (uint32_t)((c)-0xf0000)<0x20000)
275	#define FROM_U_USE_FALLBACK(useFallback, c) ((useFallback) \|\| IS_PRIVATE_USE(c))
276	#define UCNV_FROM_U_USE_FALLBACK(cnv, c) FROM_U_USE_FALLBACK((cnv)->useFallback, c)
277
278	/**
279	* Magic number for ucnv_getNextUChar(), returned by a
280	* getNextUChar() implementation to indicate to use the converter's toUnicode()
281	* instead of the native function.
282	* @internal
283	*/
284	#define UCNV_GET_NEXT_UCHAR_USE_TO_U -9
285
286	U_CFUNC void
287	ucnv_getCompleteUnicodeSet(const UConverter *cnv,
288	const USetAdder *sa,
289	UConverterUnicodeSet which,
290	UErrorCode *pErrorCode);
291
292	U_CFUNC void
293	ucnv_getNonSurrogateUnicodeSet(const UConverter *cnv,
294	const USetAdder *sa,
295	UConverterUnicodeSet which,
296	UErrorCode *pErrorCode);
297
298	U_CFUNC void
299	ucnv_fromUWriteBytes(UConverter *cnv,
300	const char *bytes, int32_t length,
301	char *target, const* char *targetLimit,
302	int32_t **offsets,
303	int32_t sourceIndex,
304	UErrorCode *pErrorCode);
305	U_CFUNC void
306	ucnv_toUWriteUChars(UConverter *cnv,
307	const UChar *uchars, int32_t length,
308	UChar *target, const* UChar *targetLimit,
309	int32_t **offsets,
310	int32_t sourceIndex,
311	UErrorCode *pErrorCode);
312
313	U_CFUNC void
314	ucnv_toUWriteCodePoint(UConverter *cnv,
315	UChar32 c,
316	UChar *target, const* UChar *targetLimit,
317	int32_t **offsets,
318	int32_t sourceIndex,
319	UErrorCode *pErrorCode);
320
321	#endif
322
323	#endif /* UCNV_CNV */
324

Browse the source code of ClickHouse/contrib/icu/icu4c/source/common/ucnv_cnv.h