1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html |
3 | /* |
4 | ******************************************************************************* |
5 | * |
6 | * Copyright (C) 2003-2014, International Business Machines |
7 | * Corporation and others. All Rights Reserved. |
8 | * |
9 | ******************************************************************************* |
10 | * file name: udataswp.h |
11 | * encoding: UTF-8 |
12 | * tab size: 8 (not used) |
13 | * indentation:4 |
14 | * |
15 | * created on: 2003jun05 |
16 | * created by: Markus W. Scherer |
17 | * |
18 | * Definitions for ICU data transformations for different platforms, |
19 | * changing between big- and little-endian data and/or between |
20 | * charset families (ASCII<->EBCDIC). |
21 | */ |
22 | |
23 | #ifndef __UDATASWP_H__ |
24 | #define __UDATASWP_H__ |
25 | |
26 | #include <stdarg.h> |
27 | #include "unicode/utypes.h" |
28 | |
29 | /* forward declaration */ |
30 | |
31 | U_CDECL_BEGIN |
32 | |
33 | struct UDataSwapper; |
34 | typedef struct UDataSwapper UDataSwapper; |
35 | |
36 | /** |
37 | * Function type for data transformation. |
38 | * Transforms data, or just returns the length of the data if |
39 | * the input length is -1. |
40 | * Swap functions assume that their data pointers are aligned properly. |
41 | * |
42 | * Quick implementation outline: |
43 | * (best to copy and adapt and existing swapper implementation) |
44 | * check that the data looks like the expected format |
45 | * if(length<0) { |
46 | * preflight: |
47 | * never dereference outData |
48 | * read inData and determine the data size |
49 | * assume that inData is long enough for this |
50 | * } else { |
51 | * outData can be NULL if length==0 |
52 | * inData==outData (in-place swapping) possible but not required! |
53 | * verify that length>=(actual size) |
54 | * if there is a chance that not every byte up to size is reached |
55 | * due to padding etc.: |
56 | * if(inData!=outData) { |
57 | * memcpy(outData, inData, actual size); |
58 | * } |
59 | * swap contents |
60 | * } |
61 | * return actual size |
62 | * |
63 | * Further implementation notes: |
64 | * - read integers from inData before swapping them |
65 | * because in-place swapping can make them unreadable |
66 | * - compareInvChars compares a local Unicode string with already-swapped |
67 | * output charset strings |
68 | * |
69 | * @param ds Pointer to UDataSwapper containing global data about the |
70 | * transformation and function pointers for handling primitive |
71 | * types. |
72 | * @param inData Pointer to the input data to be transformed or examined. |
73 | * @param length Length of the data, counting bytes. May be -1 for preflighting. |
74 | * If length>=0, then transform the data. |
75 | * If length==-1, then only determine the length of the data. |
76 | * The length cannot be determined from the data itself for all |
77 | * types of data (e.g., not for simple arrays of integers). |
78 | * @param outData Pointer to the output data buffer. |
79 | * If length>=0 (transformation), then the output buffer must |
80 | * have a capacity of at least length. |
81 | * If length==-1, then outData will not be used and can be NULL. |
82 | * @param pErrorCode ICU UErrorCode parameter, must not be NULL and must |
83 | * fulfill U_SUCCESS on input. |
84 | * @return The actual length of the data. |
85 | * |
86 | * @see UDataSwapper |
87 | * @internal ICU 2.8 |
88 | */ |
89 | typedef int32_t U_CALLCONV |
90 | UDataSwapFn(const UDataSwapper *ds, |
91 | const void *inData, int32_t length, void *outData, |
92 | UErrorCode *pErrorCode); |
93 | |
94 | /** |
95 | * Convert one uint16_t from input to platform endianness. |
96 | * @internal ICU 2.8 |
97 | */ |
98 | typedef uint16_t U_CALLCONV |
99 | UDataReadUInt16(uint16_t x); |
100 | |
101 | /** |
102 | * Convert one uint32_t from input to platform endianness. |
103 | * @internal ICU 2.8 |
104 | */ |
105 | typedef uint32_t U_CALLCONV |
106 | UDataReadUInt32(uint32_t x); |
107 | |
108 | /** |
109 | * Convert one uint16_t from platform to input endianness. |
110 | * @internal ICU 2.8 |
111 | */ |
112 | typedef void U_CALLCONV |
113 | UDataWriteUInt16(uint16_t *p, uint16_t x); |
114 | |
115 | /** |
116 | * Convert one uint32_t from platform to input endianness. |
117 | * @internal ICU 2.8 |
118 | */ |
119 | typedef void U_CALLCONV |
120 | UDataWriteUInt32(uint32_t *p, uint32_t x); |
121 | |
122 | /** |
123 | * Compare invariant-character strings, one in the output data and the |
124 | * other one caller-provided in Unicode. |
125 | * An output data string is compared because strings are usually swapped |
126 | * before the rest of the data, to allow for sorting of string tables |
127 | * according to the output charset. |
128 | * You can use -1 for the length parameters of NUL-terminated strings as usual. |
129 | * Returns Unicode code point order for invariant characters. |
130 | * @internal ICU 2.8 |
131 | */ |
132 | typedef int32_t U_CALLCONV |
133 | UDataCompareInvChars(const UDataSwapper *ds, |
134 | const char *outString, int32_t outLength, |
135 | const UChar *localString, int32_t localLength); |
136 | |
137 | /** |
138 | * Function for message output when an error occurs during data swapping. |
139 | * A format string and variable number of arguments are passed |
140 | * like for vprintf(). |
141 | * |
142 | * @param context A function-specific context pointer. |
143 | * @param fmt The format string. |
144 | * @param args The arguments for format string inserts. |
145 | * |
146 | * @internal ICU 2.8 |
147 | */ |
148 | typedef void U_CALLCONV |
149 | UDataPrintError(void *context, const char *fmt, va_list args); |
150 | |
151 | struct UDataSwapper { |
152 | /** Input endianness. @internal ICU 2.8 */ |
153 | UBool inIsBigEndian; |
154 | /** Input charset family. @see U_CHARSET_FAMILY @internal ICU 2.8 */ |
155 | uint8_t inCharset; |
156 | /** Output endianness. @internal ICU 2.8 */ |
157 | UBool outIsBigEndian; |
158 | /** Output charset family. @see U_CHARSET_FAMILY @internal ICU 2.8 */ |
159 | uint8_t outCharset; |
160 | |
161 | /* basic functions for reading data values */ |
162 | |
163 | /** Convert one uint16_t from input to platform endianness. @internal ICU 2.8 */ |
164 | UDataReadUInt16 *readUInt16; |
165 | /** Convert one uint32_t from input to platform endianness. @internal ICU 2.8 */ |
166 | UDataReadUInt32 *readUInt32; |
167 | /** Compare an invariant-character output string with a local one. @internal ICU 2.8 */ |
168 | UDataCompareInvChars *compareInvChars; |
169 | |
170 | /* basic functions for writing data values */ |
171 | |
172 | /** Convert one uint16_t from platform to input endianness. @internal ICU 2.8 */ |
173 | UDataWriteUInt16 *writeUInt16; |
174 | /** Convert one uint32_t from platform to input endianness. @internal ICU 2.8 */ |
175 | UDataWriteUInt32 *writeUInt32; |
176 | |
177 | /* basic functions for data transformations */ |
178 | |
179 | /** Transform an array of 16-bit integers. @internal ICU 2.8 */ |
180 | UDataSwapFn *swapArray16; |
181 | /** Transform an array of 32-bit integers. @internal ICU 2.8 */ |
182 | UDataSwapFn *swapArray32; |
183 | /** Transform an array of 64-bit integers. @internal ICU 53 */ |
184 | UDataSwapFn *swapArray64; |
185 | /** Transform an invariant-character string. @internal ICU 2.8 */ |
186 | UDataSwapFn *swapInvChars; |
187 | |
188 | /** |
189 | * Function for message output when an error occurs during data swapping. |
190 | * Can be NULL. |
191 | * @internal ICU 2.8 |
192 | */ |
193 | UDataPrintError *printError; |
194 | /** Context pointer for printError. @internal ICU 2.8 */ |
195 | void *printErrorContext; |
196 | }; |
197 | |
198 | U_CDECL_END |
199 | |
200 | U_CAPI UDataSwapper * U_EXPORT2 |
201 | udata_openSwapper(UBool inIsBigEndian, uint8_t inCharset, |
202 | UBool outIsBigEndian, uint8_t outCharset, |
203 | UErrorCode *pErrorCode); |
204 | |
205 | /** |
206 | * Open a UDataSwapper for the given input data and the specified output |
207 | * characteristics. |
208 | * Values of -1 for any of the characteristics mean the local platform's |
209 | * characteristics. |
210 | * |
211 | * @see udata_swap |
212 | * @internal ICU 2.8 |
213 | */ |
214 | U_CAPI UDataSwapper * U_EXPORT2 |
215 | udata_openSwapperForInputData(const void *data, int32_t length, |
216 | UBool outIsBigEndian, uint8_t outCharset, |
217 | UErrorCode *pErrorCode); |
218 | |
219 | U_CAPI void U_EXPORT2 |
220 | udata_closeSwapper(UDataSwapper *ds); |
221 | |
222 | /** |
223 | * Read the beginning of an ICU data piece, recognize magic bytes, |
224 | * swap the structure. |
225 | * Set a U_UNSUPPORTED_ERROR if it does not look like an ICU data piece. |
226 | * |
227 | * @return The size of the data header, in bytes. |
228 | * |
229 | * @internal ICU 2.8 |
230 | */ |
231 | U_CAPI int32_t U_EXPORT2 |
232 | udata_swapDataHeader(const UDataSwapper *ds, |
233 | const void *inData, int32_t length, void *outData, |
234 | UErrorCode *pErrorCode); |
235 | |
236 | /** |
237 | * Convert one int16_t from input to platform endianness. |
238 | * @internal ICU 2.8 |
239 | */ |
240 | U_CAPI int16_t U_EXPORT2 |
241 | udata_readInt16(const UDataSwapper *ds, int16_t x); |
242 | |
243 | /** |
244 | * Convert one int32_t from input to platform endianness. |
245 | * @internal ICU 2.8 |
246 | */ |
247 | U_CAPI int32_t U_EXPORT2 |
248 | udata_readInt32(const UDataSwapper *ds, int32_t x); |
249 | |
250 | /** |
251 | * Swap a block of invariant, NUL-terminated strings, but not padding |
252 | * bytes after the last string. |
253 | * @internal |
254 | */ |
255 | U_CAPI int32_t U_EXPORT2 |
256 | udata_swapInvStringBlock(const UDataSwapper *ds, |
257 | const void *inData, int32_t length, void *outData, |
258 | UErrorCode *pErrorCode); |
259 | |
260 | U_CAPI void U_EXPORT2 |
261 | udata_printError(const UDataSwapper *ds, |
262 | const char *fmt, |
263 | ...); |
264 | |
265 | /* internal exports from putil.c -------------------------------------------- */ |
266 | |
267 | /* declared here to keep them out of the public putil.h */ |
268 | |
269 | /** |
270 | * Swap invariant char * strings ASCII->EBCDIC. |
271 | * @internal |
272 | */ |
273 | U_CAPI int32_t U_EXPORT2 |
274 | uprv_ebcdicFromAscii(const UDataSwapper *ds, |
275 | const void *inData, int32_t length, void *outData, |
276 | UErrorCode *pErrorCode); |
277 | |
278 | /** |
279 | * Copy invariant ASCII char * strings and verify they are invariant. |
280 | * @internal |
281 | */ |
282 | U_CFUNC int32_t |
283 | uprv_copyAscii(const UDataSwapper *ds, |
284 | const void *inData, int32_t length, void *outData, |
285 | UErrorCode *pErrorCode); |
286 | |
287 | /** |
288 | * Swap invariant char * strings EBCDIC->ASCII. |
289 | * @internal |
290 | */ |
291 | U_CFUNC int32_t |
292 | uprv_asciiFromEbcdic(const UDataSwapper *ds, |
293 | const void *inData, int32_t length, void *outData, |
294 | UErrorCode *pErrorCode); |
295 | |
296 | /** |
297 | * Copy invariant EBCDIC char * strings and verify they are invariant. |
298 | * @internal |
299 | */ |
300 | U_CFUNC int32_t |
301 | uprv_copyEbcdic(const UDataSwapper *ds, |
302 | const void *inData, int32_t length, void *outData, |
303 | UErrorCode *pErrorCode); |
304 | |
305 | /** |
306 | * Compare ASCII invariant char * with Unicode invariant UChar * |
307 | * @internal |
308 | */ |
309 | U_CFUNC int32_t |
310 | uprv_compareInvAscii(const UDataSwapper *ds, |
311 | const char *outString, int32_t outLength, |
312 | const UChar *localString, int32_t localLength); |
313 | |
314 | /** |
315 | * Compare EBCDIC invariant char * with Unicode invariant UChar * |
316 | * @internal |
317 | */ |
318 | U_CFUNC int32_t |
319 | uprv_compareInvEbcdic(const UDataSwapper *ds, |
320 | const char *outString, int32_t outLength, |
321 | const UChar *localString, int32_t localLength); |
322 | |
323 | /** |
324 | * \def uprv_compareInvWithUChar |
325 | * Compare an invariant-character strings with a UChar string |
326 | * @internal |
327 | */ |
328 | #if U_CHARSET_FAMILY==U_ASCII_FAMILY |
329 | # define uprv_compareInvWithUChar uprv_compareInvAscii |
330 | #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY |
331 | # define uprv_compareInvWithUChar uprv_compareInvEbcdic |
332 | #else |
333 | # error Unknown charset family! |
334 | #endif |
335 | |
336 | // utrie_swap.cpp -----------------------------------------------------------*** |
337 | |
338 | /** |
339 | * Swaps a serialized UTrie. |
340 | * @internal |
341 | */ |
342 | U_CAPI int32_t U_EXPORT2 |
343 | utrie_swap(const UDataSwapper *ds, |
344 | const void *inData, int32_t length, void *outData, |
345 | UErrorCode *pErrorCode); |
346 | |
347 | /** |
348 | * Swaps a serialized UTrie2. |
349 | * @internal |
350 | */ |
351 | U_CAPI int32_t U_EXPORT2 |
352 | utrie2_swap(const UDataSwapper *ds, |
353 | const void *inData, int32_t length, void *outData, |
354 | UErrorCode *pErrorCode); |
355 | |
356 | /** |
357 | * Swaps a serialized UCPTrie. |
358 | * @internal |
359 | */ |
360 | U_CAPI int32_t U_EXPORT2 |
361 | ucptrie_swap(const UDataSwapper *ds, |
362 | const void *inData, int32_t length, void *outData, |
363 | UErrorCode *pErrorCode); |
364 | |
365 | /** |
366 | * Swaps a serialized UTrie, UTrie2, or UCPTrie. |
367 | * @internal |
368 | */ |
369 | U_CAPI int32_t U_EXPORT2 |
370 | utrie_swapAnyVersion(const UDataSwapper *ds, |
371 | const void *inData, int32_t length, void *outData, |
372 | UErrorCode *pErrorCode); |
373 | |
374 | /* material... -------------------------------------------------------------- */ |
375 | |
376 | #if 0 |
377 | |
378 | /* udata.h */ |
379 | |
380 | /** |
381 | * Public API function in udata.c |
382 | * |
383 | * Same as udata_openChoice() but automatically swaps the data. |
384 | * isAcceptable, if not NULL, may accept data with endianness and charset family |
385 | * different from the current platform's properties. |
386 | * If the data is acceptable and the platform properties do not match, then |
387 | * the swap function is called to swap an allocated version of the data. |
388 | * Preflighting may or may not be performed depending on whether the size of |
389 | * the loaded data item is known. |
390 | * |
391 | * @param isAcceptable Same as for udata_openChoice(). May be NULL. |
392 | * |
393 | * @internal ICU 2.8 |
394 | */ |
395 | U_CAPI UDataMemory * U_EXPORT2 |
396 | udata_openSwap(const char *path, const char *type, const char *name, |
397 | UDataMemoryIsAcceptable *isAcceptable, void *isAcceptableContext, |
398 | UDataSwapFn *swap, |
399 | UDataPrintError *printError, void *printErrorContext, |
400 | UErrorCode *pErrorCode); |
401 | |
402 | #endif |
403 | |
404 | #endif |
405 | |