ucnv_err.h source code [Godot/thirdparty/icu4c/common/unicode/ucnv_err.h]

1	// © 2016 and later: Unicode, Inc. and others.
2	// License & terms of use: http://www.unicode.org/copyright.html
3	/*
4	**********************************************************************
5	* Copyright (C) 1999-2009, International Business Machines
6	* Corporation and others. All Rights Reserved.
7	**********************************************************************
8	*
9	*
10	* ucnv_err.h:
11	*/
12
13	/**
14	* \file
15	* \brief C API: UConverter predefined error callbacks
16	*
17	* <h2>Error Behaviour Functions</h2>
18	* Defines some error behaviour functions called by ucnv_{from,to}Unicode
19	* These are provided as part of ICU and many are stable, but they
20	* can also be considered only as an example of what can be done with
21	* callbacks. You may of course write your own.
22	*
23	* If you want to write your own, you may also find the functions from
24	* ucnv_cb.h useful when writing your own callbacks.
25	*
26	* These functions, although public, should NEVER be called directly.
27	* They should be used as parameters to the ucnv_setFromUCallback
28	* and ucnv_setToUCallback functions, to set the behaviour of a converter
29	* when it encounters ILLEGAL/UNMAPPED/INVALID sequences.
30	*
31	* usage example: 'STOP' doesn't need any context, but newContext
32	* could be set to something other than 'NULL' if needed. The available
33	* contexts in this header can modify the default behavior of the callback.
34	*
35	* \code
36	* UErrorCode err = U_ZERO_ERROR;
37	* UConverter *myConverter = ucnv_open("ibm-949", &err);
38	* const void *oldContext;
39	* UConverterFromUCallback oldAction;
40	*
41	*
42	* if (U_SUCCESS(err))
43	* {
44	* ucnv_setFromUCallBack(myConverter,
45	* UCNV_FROM_U_CALLBACK_STOP,
46	* NULL,
47	* &oldAction,
48	* &oldContext,
49	* &status);
50	* }
51	* \endcode
52	*
53	* The code above tells "myConverter" to stop when it encounters an
54	* ILLEGAL/TRUNCATED/INVALID sequences when it is used to convert from
55	* Unicode -> Codepage. The behavior from Codepage to Unicode is not changed,
56	* and ucnv_setToUCallBack would need to be called in order to change
57	* that behavior too.
58	*
59	* Here is an example with a context:
60	*
61	* \code
62	* UErrorCode err = U_ZERO_ERROR;
63	* UConverter *myConverter = ucnv_open("ibm-949", &err);
64	* const void *oldContext;
65	* UConverterFromUCallback oldAction;
66	*
67	*
68	* if (U_SUCCESS(err))
69	* {
70	* ucnv_setToUCallBack(myConverter,
71	* UCNV_TO_U_CALLBACK_SUBSTITUTE,
72	* UCNV_SUB_STOP_ON_ILLEGAL,
73	* &oldAction,
74	* &oldContext,
75	* &status);
76	* }
77	* \endcode
78	*
79	* The code above tells "myConverter" to stop when it encounters an
80	* ILLEGAL/TRUNCATED/INVALID sequences when it is used to convert from
81	* Codepage -> Unicode. Any unmapped and legal characters will be
82	* substituted to be the default substitution character.
83	*/
84
85	#ifndef UCNV_ERR_H
86	#define UCNV_ERR_H
87
88	#include "unicode/utypes.h"
89
90	#if !UCONFIG_NO_CONVERSION
91
92	/* Forward declaring the UConverter structure. @stable ICU 2.0 /
93	struct UConverter;
94
95	/* @stable ICU 2.0 /
96	typedef struct UConverter UConverter;
97
98	/**
99	* FROM_U, TO_U context options for sub callback
100	* @stable ICU 2.0
101	*/
102	#define UCNV_SUB_STOP_ON_ILLEGAL "i"
103
104	/**
105	* FROM_U, TO_U context options for skip callback
106	* @stable ICU 2.0
107	*/
108	#define UCNV_SKIP_STOP_ON_ILLEGAL "i"
109
110	/**
111	* FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to ICU (%UXXXX)
112	* @stable ICU 2.0
113	*/
114	#define UCNV_ESCAPE_ICU NULL
115	/**
116	* FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to JAVA (\\uXXXX)
117	* @stable ICU 2.0
118	*/
119	#define UCNV_ESCAPE_JAVA "J"
120	/**
121	* FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to C (\\uXXXX \\UXXXXXXXX)
122	* TO_U_CALLBACK_ESCAPE option to escape the character value according to C (\\xXXXX)
123	* @stable ICU 2.0
124	*/
125	#define UCNV_ESCAPE_C "C"
126	/**
127	* FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Decimal escape \htmlonly(&#DDDD;)\endhtmlonly
128	* TO_U_CALLBACK_ESCAPE context option to escape the character value according to XML Decimal escape \htmlonly(&#DDDD;)\endhtmlonly
129	* @stable ICU 2.0
130	*/
131	#define UCNV_ESCAPE_XML_DEC "D"
132	/**
133	* FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Hex escape \htmlonly(&#xXXXX;)\endhtmlonly
134	* TO_U_CALLBACK_ESCAPE context option to escape the character value according to XML Hex escape \htmlonly(&#xXXXX;)\endhtmlonly
135	* @stable ICU 2.0
136	*/
137	#define UCNV_ESCAPE_XML_HEX "X"
138	/**
139	* FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to Unicode (U+XXXXX)
140	* @stable ICU 2.0
141	*/
142	#define UCNV_ESCAPE_UNICODE "U"
143
144	/**
145	* FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to CSS2 conventions (\\HH..H<space>, that is,
146	* a backslash, 1..6 hex digits, and a space)
147	* @stable ICU 4.0
148	*/
149	#define UCNV_ESCAPE_CSS2 "S"
150
151	/**
152	* The process condition code to be used with the callbacks.
153	* Codes which are greater than UCNV_IRREGULAR should be
154	* passed on to any chained callbacks.
155	* @stable ICU 2.0
156	*/
157	typedef enum {
158	UCNV_UNASSIGNED = `0`, /< The code point is unassigned.
159	The error code U_INVALID_CHAR_FOUND will be set. /*
160	UCNV_ILLEGAL = `1`, /< The code point is illegal. For example,
161	\\x81\\x2E is illegal in SJIS because \\x2E
162	is not a valid trail byte for the \\x81
163	lead byte.
164	Also, starting with Unicode 3.0.1, non-shortest byte sequences
165	in UTF-8 (like \\xC1\\xA1 instead of \\x61 for U+0061)
166	are also illegal, not just irregular.
167	The error code U_ILLEGAL_CHAR_FOUND will be set. /*
168	UCNV_IRREGULAR = `2`, /< The codepoint is not a regular sequence in
169	the encoding. For example, \\xED\\xA0\\x80..\\xED\\xBF\\xBF
170	are irregular UTF-8 byte sequences for single surrogate
171	code points.
172	The error code U_INVALID_CHAR_FOUND will be set. /*
173	UCNV_RESET = `3`, /< The callback is called with this reason when a
174	'reset' has occurred. Callback should reset all
175	state. /*
176	UCNV_CLOSE = `4`, /< Called when the converter is closed. The
177	callback should release any allocated memory./*
178	UCNV_CLONE = `5` /< Called when ucnv_safeClone() is called on the
179	converter. the pointer available as the
180	'context' is an alias to the original converters'
181	context pointer. If the context must be owned
182	by the new converter, the callback must clone
183	the data and call ucnv_setFromUCallback
184	(or setToUCallback) with the correct pointer.
185	@stable ICU 2.2
186	*/
187	} UConverterCallbackReason;
188
189
190	/**
191	* The structure for the fromUnicode callback function parameter.
192	* @stable ICU 2.0
193	*/
194	typedef struct {
195	uint16_t size; /< The size of this struct. @stable ICU 2.0 /*
196	UBool flush; /< The internal state of converter will be reset and data flushed if set to true. @stable ICU 2.0 /*
197	UConverter converter; /*< Pointer to the converter that is opened and to which this struct is passed as an argument. @stable ICU 2.0 /*
198	const UChar source; /*< Pointer to the source source buffer. @stable ICU 2.0 /*
199	const UChar sourceLimit; /*< Pointer to the limit (end + 1) of source buffer. @stable ICU 2.0 /*
200	char target; /*< Pointer to the target buffer. @stable ICU 2.0 /*
201	const char targetLimit; /*< Pointer to the limit (end + 1) of target buffer. @stable ICU 2.0 /*
202	int32_t offsets; /*< Pointer to the buffer that receives the offsets. offset = blah ; offset++;. @stable ICU 2.0 /
203	} UConverterFromUnicodeArgs;
204
205
206	/**
207	* The structure for the toUnicode callback function parameter.
208	* @stable ICU 2.0
209	*/
210	typedef struct {
211	uint16_t size; /< The size of this struct @stable ICU 2.0 /*
212	UBool flush; /< The internal state of converter will be reset and data flushed if set to true. @stable ICU 2.0 /*
213	UConverter converter; /*< Pointer to the converter that is opened and to which this struct is passed as an argument. @stable ICU 2.0 /*
214	const char source; /*< Pointer to the source source buffer. @stable ICU 2.0 /*
215	const char sourceLimit; /*< Pointer to the limit (end + 1) of source buffer. @stable ICU 2.0 /*
216	UChar target; /*< Pointer to the target buffer. @stable ICU 2.0 /*
217	const UChar targetLimit; /*< Pointer to the limit (end + 1) of target buffer. @stable ICU 2.0 /*
218	int32_t offsets; /*< Pointer to the buffer that receives the offsets. offset = blah ; offset++;. @stable ICU 2.0 /
219	} UConverterToUnicodeArgs;
220
221
222	/**
223	* DO NOT CALL THIS FUNCTION DIRECTLY!
224	* This From Unicode callback STOPS at the ILLEGAL_SEQUENCE,
225	* returning the error code back to the caller immediately.
226	*
227	* @param context Pointer to the callback's private data
228	* @param fromUArgs Information about the conversion in progress
229	* @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
230	* @param length Size (in bytes) of the concerned codepage sequence
231	* @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
232	* @param reason Defines the reason the callback was invoked
233	* @param err This should always be set to a failure status prior to calling.
234	* @stable ICU 2.0
235	*/
236	U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_STOP (
237	const void *context,
238	UConverterFromUnicodeArgs *fromUArgs,
239	const UChar* codeUnits,
240	int32_t length,
241	UChar32 codePoint,
242	UConverterCallbackReason reason,
243	UErrorCode * err);
244
245
246
247	/**
248	* DO NOT CALL THIS FUNCTION DIRECTLY!
249	* This To Unicode callback STOPS at the ILLEGAL_SEQUENCE,
250	* returning the error code back to the caller immediately.
251	*
252	* @param context Pointer to the callback's private data
253	* @param toUArgs Information about the conversion in progress
254	* @param codeUnits Points to 'length' bytes of the concerned codepage sequence
255	* @param length Size (in bytes) of the concerned codepage sequence
256	* @param reason Defines the reason the callback was invoked
257	* @param err This should always be set to a failure status prior to calling.
258	* @stable ICU 2.0
259	*/
260	U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_STOP (
261	const void *context,
262	UConverterToUnicodeArgs *toUArgs,
263	const char* codeUnits,
264	int32_t length,
265	UConverterCallbackReason reason,
266	UErrorCode * err);
267
268	/**
269	* DO NOT CALL THIS FUNCTION DIRECTLY!
270	* This From Unicode callback skips any ILLEGAL_SEQUENCE, or
271	* skips only UNASSIGNED_SEQUENCE depending on the context parameter
272	* simply ignoring those characters.
273	*
274	* @param context The function currently recognizes the callback options:
275	* UCNV_SKIP_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
276	* returning the error code back to the caller immediately.
277	* NULL: Skips any ILLEGAL_SEQUENCE
278	* @param fromUArgs Information about the conversion in progress
279	* @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
280	* @param length Size (in bytes) of the concerned codepage sequence
281	* @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
282	* @param reason Defines the reason the callback was invoked
283	* @param err Return value will be set to success if the callback was handled,
284	* otherwise this value will be set to a failure status.
285	* @stable ICU 2.0
286	*/
287	U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_SKIP (
288	const void *context,
289	UConverterFromUnicodeArgs *fromUArgs,
290	const UChar* codeUnits,
291	int32_t length,
292	UChar32 codePoint,
293	UConverterCallbackReason reason,
294	UErrorCode * err);
295
296	/**
297	* DO NOT CALL THIS FUNCTION DIRECTLY!
298	* This From Unicode callback will Substitute the ILLEGAL SEQUENCE, or
299	* UNASSIGNED_SEQUENCE depending on context parameter, with the
300	* current substitution string for the converter. This is the default
301	* callback.
302	*
303	* @param context The function currently recognizes the callback options:
304	* UCNV_SUB_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
305	* returning the error code back to the caller immediately.
306	* NULL: Substitutes any ILLEGAL_SEQUENCE
307	* @param fromUArgs Information about the conversion in progress
308	* @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
309	* @param length Size (in bytes) of the concerned codepage sequence
310	* @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
311	* @param reason Defines the reason the callback was invoked
312	* @param err Return value will be set to success if the callback was handled,
313	* otherwise this value will be set to a failure status.
314	* @see ucnv_setSubstChars
315	* @stable ICU 2.0
316	*/
317	U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_SUBSTITUTE (
318	const void *context,
319	UConverterFromUnicodeArgs *fromUArgs,
320	const UChar* codeUnits,
321	int32_t length,
322	UChar32 codePoint,
323	UConverterCallbackReason reason,
324	UErrorCode * err);
325
326	/**
327	* DO NOT CALL THIS FUNCTION DIRECTLY!
328	* This From Unicode callback will Substitute the ILLEGAL SEQUENCE with the
329	* hexadecimal representation of the illegal codepoints
330	*
331	* @param context The function currently recognizes the callback options:
332	* <ul>
333	* <li>UCNV_ESCAPE_ICU: Substitutes the ILLEGAL SEQUENCE with the hexadecimal
334	* representation in the format %UXXXX, e.g. "%uFFFE%u00AC%uC8FE").
335	* In the Event the converter doesn't support the characters {%,U}[A-F][0-9],
336	* it will substitute the illegal sequence with the substitution characters.
337	* Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as
338	* %UD84D%UDC56</li>
339	* <li>UCNV_ESCAPE_JAVA: Substitutes the ILLEGAL SEQUENCE with the hexadecimal
340	* representation in the format \\uXXXX, e.g. "\\uFFFE\\u00AC\\uC8FE").
341	* In the Event the converter doesn't support the characters {\,u}[A-F][0-9],
342	* it will substitute the illegal sequence with the substitution characters.
343	* Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as
344	* \\uD84D\\uDC56</li>
345	* <li>UCNV_ESCAPE_C: Substitutes the ILLEGAL SEQUENCE with the hexadecimal
346	* representation in the format \\uXXXX, e.g. "\\uFFFE\\u00AC\\uC8FE").
347	* In the Event the converter doesn't support the characters {\,u,U}[A-F][0-9],
348	* it will substitute the illegal sequence with the substitution characters.
349	* Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as
350	* \\U00023456</li>
351	* <li>UCNV_ESCAPE_XML_DEC: Substitutes the ILLEGAL SEQUENCE with the decimal
352	* representation in the format \htmlonly&#DDDDDDDD;, e.g. "&#65534;&#172;&#51454;")\endhtmlonly.
353	* In the Event the converter doesn't support the characters {&,#}[0-9],
354	* it will substitute the illegal sequence with the substitution characters.
355	* Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as
356	* &#144470; and Zero padding is ignored.</li>
357	* <li>UCNV_ESCAPE_XML_HEX:Substitutes the ILLEGAL SEQUENCE with the decimal
358	* representation in the format \htmlonly&#xXXXX; e.g. "&#xFFFE;&#x00AC;&#xC8FE;")\endhtmlonly.
359	* In the Event the converter doesn't support the characters {&,#,x}[0-9],
360	* it will substitute the illegal sequence with the substitution characters.
361	* Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as
362	* \htmlonly&#x23456;\endhtmlonly</li>
363	* </ul>
364	* @param fromUArgs Information about the conversion in progress
365	* @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
366	* @param length Size (in bytes) of the concerned codepage sequence
367	* @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
368	* @param reason Defines the reason the callback was invoked
369	* @param err Return value will be set to success if the callback was handled,
370	* otherwise this value will be set to a failure status.
371	* @stable ICU 2.0
372	*/
373	U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_ESCAPE (
374	const void *context,
375	UConverterFromUnicodeArgs *fromUArgs,
376	const UChar* codeUnits,
377	int32_t length,
378	UChar32 codePoint,
379	UConverterCallbackReason reason,
380	UErrorCode * err);
381
382
383	/**
384	* DO NOT CALL THIS FUNCTION DIRECTLY!
385	* This To Unicode callback skips any ILLEGAL_SEQUENCE, or
386	* skips only UNASSIGNED_SEQUENCE depending on the context parameter
387	* simply ignoring those characters.
388	*
389	* @param context The function currently recognizes the callback options:
390	* UCNV_SKIP_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
391	* returning the error code back to the caller immediately.
392	* NULL: Skips any ILLEGAL_SEQUENCE
393	* @param toUArgs Information about the conversion in progress
394	* @param codeUnits Points to 'length' bytes of the concerned codepage sequence
395	* @param length Size (in bytes) of the concerned codepage sequence
396	* @param reason Defines the reason the callback was invoked
397	* @param err Return value will be set to success if the callback was handled,
398	* otherwise this value will be set to a failure status.
399	* @stable ICU 2.0
400	*/
401	U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_SKIP (
402	const void *context,
403	UConverterToUnicodeArgs *toUArgs,
404	const char* codeUnits,
405	int32_t length,
406	UConverterCallbackReason reason,
407	UErrorCode * err);
408
409	/**
410	* DO NOT CALL THIS FUNCTION DIRECTLY!
411	* This To Unicode callback will Substitute the ILLEGAL SEQUENCE,or
412	* UNASSIGNED_SEQUENCE depending on context parameter, with the
413	* Unicode substitution character, U+FFFD.
414	*
415	* @param context The function currently recognizes the callback options:
416	* UCNV_SUB_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
417	* returning the error code back to the caller immediately.
418	* NULL: Substitutes any ILLEGAL_SEQUENCE
419	* @param toUArgs Information about the conversion in progress
420	* @param codeUnits Points to 'length' bytes of the concerned codepage sequence
421	* @param length Size (in bytes) of the concerned codepage sequence
422	* @param reason Defines the reason the callback was invoked
423	* @param err Return value will be set to success if the callback was handled,
424	* otherwise this value will be set to a failure status.
425	* @stable ICU 2.0
426	*/
427	U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_SUBSTITUTE (
428	const void *context,
429	UConverterToUnicodeArgs *toUArgs,
430	const char* codeUnits,
431	int32_t length,
432	UConverterCallbackReason reason,
433	UErrorCode * err);
434
435	/**
436	* DO NOT CALL THIS FUNCTION DIRECTLY!
437	* This To Unicode callback will Substitute the ILLEGAL SEQUENCE with the
438	* hexadecimal representation of the illegal bytes
439	* (in the format %XNN, e.g. "%XFF%X0A%XC8%X03").
440	*
441	* @param context This function currently recognizes the callback options:
442	* UCNV_ESCAPE_ICU, UCNV_ESCAPE_JAVA, UCNV_ESCAPE_C, UCNV_ESCAPE_XML_DEC,
443	* UCNV_ESCAPE_XML_HEX and UCNV_ESCAPE_UNICODE.
444	* @param toUArgs Information about the conversion in progress
445	* @param codeUnits Points to 'length' bytes of the concerned codepage sequence
446	* @param length Size (in bytes) of the concerned codepage sequence
447	* @param reason Defines the reason the callback was invoked
448	* @param err Return value will be set to success if the callback was handled,
449	* otherwise this value will be set to a failure status.
450	* @stable ICU 2.0
451	*/
452
453	U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_ESCAPE (
454	const void *context,
455	UConverterToUnicodeArgs *toUArgs,
456	const char* codeUnits,
457	int32_t length,
458	UConverterCallbackReason reason,
459	UErrorCode * err);
460
461	#endif
462
463	#endif
464
465	/UCNV_ERR_H/
466

Browse the source code of Godot/thirdparty/icu4c/common/unicode/ucnv_err.h