1 | // © 2018 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html |
3 | |
4 | // utrie_swap.cpp |
5 | // created: 2018aug08 Markus W. Scherer |
6 | |
7 | #include "unicode/utypes.h" |
8 | #include "cmemory.h" |
9 | #include "ucptrie_impl.h" |
10 | #include "udataswp.h" |
11 | #include "utrie.h" |
12 | #include "utrie2_impl.h" |
13 | |
14 | // These functions for swapping different generations of ICU code point tries are here |
15 | // so that their implementation files need not depend on swapper code, |
16 | // need not depend on each other, and so that other swapper code |
17 | // need not depend on other trie code. |
18 | |
19 | namespace { |
20 | |
21 | constexpr int32_t ASCII_LIMIT = 0x80; |
22 | |
23 | } // namespace |
24 | |
25 | U_CAPI int32_t U_EXPORT2 |
26 | utrie_swap(const UDataSwapper *ds, |
27 | const void *inData, int32_t length, void *outData, |
28 | UErrorCode *pErrorCode) { |
29 | const UTrieHeader *inTrie; |
30 | UTrieHeader trie; |
31 | int32_t size; |
32 | UBool dataIs32; |
33 | |
34 | if(pErrorCode==nullptr || U_FAILURE(*pErrorCode)) { |
35 | return 0; |
36 | } |
37 | if(ds==nullptr || inData==nullptr || (length>=0 && outData==nullptr)) { |
38 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
39 | return 0; |
40 | } |
41 | |
42 | /* setup and swapping */ |
43 | if(length>=0 && (uint32_t)length<sizeof(UTrieHeader)) { |
44 | *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; |
45 | return 0; |
46 | } |
47 | |
48 | inTrie=(const UTrieHeader *)inData; |
49 | trie.signature=ds->readUInt32(inTrie->signature); |
50 | trie.options=ds->readUInt32(inTrie->options); |
51 | trie.indexLength=udata_readInt32(ds, inTrie->indexLength); |
52 | trie.dataLength=udata_readInt32(ds, inTrie->dataLength); |
53 | |
54 | if( trie.signature!=0x54726965 || |
55 | (trie.options&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_SHIFT || |
56 | ((trie.options>>UTRIE_OPTIONS_INDEX_SHIFT)&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_INDEX_SHIFT || |
57 | trie.indexLength<UTRIE_BMP_INDEX_LENGTH || |
58 | (trie.indexLength&(UTRIE_SURROGATE_BLOCK_COUNT-1))!=0 || |
59 | trie.dataLength<UTRIE_DATA_BLOCK_LENGTH || |
60 | (trie.dataLength&(UTRIE_DATA_GRANULARITY-1))!=0 || |
61 | ((trie.options&UTRIE_OPTIONS_LATIN1_IS_LINEAR)!=0 && trie.dataLength<(UTRIE_DATA_BLOCK_LENGTH+0x100)) |
62 | ) { |
63 | *pErrorCode=U_INVALID_FORMAT_ERROR; /* not a UTrie */ |
64 | return 0; |
65 | } |
66 | |
67 | dataIs32=(UBool)((trie.options&UTRIE_OPTIONS_DATA_IS_32_BIT)!=0); |
68 | size=sizeof(UTrieHeader)+trie.indexLength*2+trie.dataLength*(dataIs32?4:2); |
69 | |
70 | if(length>=0) { |
71 | UTrieHeader *outTrie; |
72 | |
73 | if(length<size) { |
74 | *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; |
75 | return 0; |
76 | } |
77 | |
78 | outTrie=(UTrieHeader *)outData; |
79 | |
80 | /* swap the header */ |
81 | ds->swapArray32(ds, inTrie, sizeof(UTrieHeader), outTrie, pErrorCode); |
82 | |
83 | /* swap the index and the data */ |
84 | if(dataIs32) { |
85 | ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode); |
86 | ds->swapArray32(ds, (const uint16_t *)(inTrie+1)+trie.indexLength, trie.dataLength*4, |
87 | (uint16_t *)(outTrie+1)+trie.indexLength, pErrorCode); |
88 | } else { |
89 | ds->swapArray16(ds, inTrie+1, (trie.indexLength+trie.dataLength)*2, outTrie+1, pErrorCode); |
90 | } |
91 | } |
92 | |
93 | return size; |
94 | } |
95 | |
96 | U_CAPI int32_t U_EXPORT2 |
97 | utrie2_swap(const UDataSwapper *ds, |
98 | const void *inData, int32_t length, void *outData, |
99 | UErrorCode *pErrorCode) { |
100 | const UTrie2Header *inTrie; |
101 | UTrie2Header trie; |
102 | int32_t dataLength, size; |
103 | UTrie2ValueBits valueBits; |
104 | |
105 | if(U_FAILURE(*pErrorCode)) { |
106 | return 0; |
107 | } |
108 | if(ds==nullptr || inData==nullptr || (length>=0 && outData==nullptr)) { |
109 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
110 | return 0; |
111 | } |
112 | |
113 | /* setup and swapping */ |
114 | if(length>=0 && length<(int32_t)sizeof(UTrie2Header)) { |
115 | *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; |
116 | return 0; |
117 | } |
118 | |
119 | inTrie=(const UTrie2Header *)inData; |
120 | trie.signature=ds->readUInt32(inTrie->signature); |
121 | trie.options=ds->readUInt16(inTrie->options); |
122 | trie.indexLength=ds->readUInt16(inTrie->indexLength); |
123 | trie.shiftedDataLength=ds->readUInt16(inTrie->shiftedDataLength); |
124 | |
125 | valueBits=(UTrie2ValueBits)(trie.options&UTRIE2_OPTIONS_VALUE_BITS_MASK); |
126 | dataLength=(int32_t)trie.shiftedDataLength<<UTRIE2_INDEX_SHIFT; |
127 | |
128 | if( trie.signature!=UTRIE2_SIG || |
129 | valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits || |
130 | trie.indexLength<UTRIE2_INDEX_1_OFFSET || |
131 | dataLength<UTRIE2_DATA_START_OFFSET |
132 | ) { |
133 | *pErrorCode=U_INVALID_FORMAT_ERROR; /* not a UTrie */ |
134 | return 0; |
135 | } |
136 | |
137 | size=sizeof(UTrie2Header)+trie.indexLength*2; |
138 | switch(valueBits) { |
139 | case UTRIE2_16_VALUE_BITS: |
140 | size+=dataLength*2; |
141 | break; |
142 | case UTRIE2_32_VALUE_BITS: |
143 | size+=dataLength*4; |
144 | break; |
145 | default: |
146 | *pErrorCode=U_INVALID_FORMAT_ERROR; |
147 | return 0; |
148 | } |
149 | |
150 | if(length>=0) { |
151 | UTrie2Header *outTrie; |
152 | |
153 | if(length<size) { |
154 | *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; |
155 | return 0; |
156 | } |
157 | |
158 | outTrie=(UTrie2Header *)outData; |
159 | |
160 | /* swap the header */ |
161 | ds->swapArray32(ds, &inTrie->signature, 4, &outTrie->signature, pErrorCode); |
162 | ds->swapArray16(ds, &inTrie->options, 12, &outTrie->options, pErrorCode); |
163 | |
164 | /* swap the index and the data */ |
165 | switch(valueBits) { |
166 | case UTRIE2_16_VALUE_BITS: |
167 | ds->swapArray16(ds, inTrie+1, (trie.indexLength+dataLength)*2, outTrie+1, pErrorCode); |
168 | break; |
169 | case UTRIE2_32_VALUE_BITS: |
170 | ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode); |
171 | ds->swapArray32(ds, (const uint16_t *)(inTrie+1)+trie.indexLength, dataLength*4, |
172 | (uint16_t *)(outTrie+1)+trie.indexLength, pErrorCode); |
173 | break; |
174 | default: |
175 | *pErrorCode=U_INVALID_FORMAT_ERROR; |
176 | return 0; |
177 | } |
178 | } |
179 | |
180 | return size; |
181 | } |
182 | |
183 | U_CAPI int32_t U_EXPORT2 |
184 | ucptrie_swap(const UDataSwapper *ds, |
185 | const void *inData, int32_t length, void *outData, |
186 | UErrorCode *pErrorCode) { |
187 | const UCPTrieHeader *inTrie; |
188 | UCPTrieHeader trie; |
189 | int32_t dataLength, size; |
190 | UCPTrieValueWidth valueWidth; |
191 | |
192 | if(U_FAILURE(*pErrorCode)) { |
193 | return 0; |
194 | } |
195 | if(ds==nullptr || inData==nullptr || (length>=0 && outData==nullptr)) { |
196 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
197 | return 0; |
198 | } |
199 | |
200 | /* setup and swapping */ |
201 | if(length>=0 && length<(int32_t)sizeof(UCPTrieHeader)) { |
202 | *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; |
203 | return 0; |
204 | } |
205 | |
206 | inTrie=(const UCPTrieHeader *)inData; |
207 | trie.signature=ds->readUInt32(inTrie->signature); |
208 | trie.options=ds->readUInt16(inTrie->options); |
209 | trie.indexLength=ds->readUInt16(inTrie->indexLength); |
210 | trie.dataLength = ds->readUInt16(inTrie->dataLength); |
211 | |
212 | UCPTrieType type = (UCPTrieType)((trie.options >> 6) & 3); |
213 | valueWidth = (UCPTrieValueWidth)(trie.options & UCPTRIE_OPTIONS_VALUE_BITS_MASK); |
214 | dataLength = ((int32_t)(trie.options & UCPTRIE_OPTIONS_DATA_LENGTH_MASK) << 4) | trie.dataLength; |
215 | |
216 | int32_t minIndexLength = type == UCPTRIE_TYPE_FAST ? |
217 | UCPTRIE_BMP_INDEX_LENGTH : UCPTRIE_SMALL_INDEX_LENGTH; |
218 | if( trie.signature!=UCPTRIE_SIG || |
219 | type > UCPTRIE_TYPE_SMALL || |
220 | (trie.options & UCPTRIE_OPTIONS_RESERVED_MASK) != 0 || |
221 | valueWidth > UCPTRIE_VALUE_BITS_8 || |
222 | trie.indexLength < minIndexLength || |
223 | dataLength < ASCII_LIMIT |
224 | ) { |
225 | *pErrorCode=U_INVALID_FORMAT_ERROR; /* not a UCPTrie */ |
226 | return 0; |
227 | } |
228 | |
229 | size=sizeof(UCPTrieHeader)+trie.indexLength*2; |
230 | switch(valueWidth) { |
231 | case UCPTRIE_VALUE_BITS_16: |
232 | size+=dataLength*2; |
233 | break; |
234 | case UCPTRIE_VALUE_BITS_32: |
235 | size+=dataLength*4; |
236 | break; |
237 | case UCPTRIE_VALUE_BITS_8: |
238 | size+=dataLength; |
239 | break; |
240 | default: |
241 | *pErrorCode=U_INVALID_FORMAT_ERROR; |
242 | return 0; |
243 | } |
244 | |
245 | if(length>=0) { |
246 | UCPTrieHeader *outTrie; |
247 | |
248 | if(length<size) { |
249 | *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; |
250 | return 0; |
251 | } |
252 | |
253 | outTrie=(UCPTrieHeader *)outData; |
254 | |
255 | /* swap the header */ |
256 | ds->swapArray32(ds, &inTrie->signature, 4, &outTrie->signature, pErrorCode); |
257 | ds->swapArray16(ds, &inTrie->options, 12, &outTrie->options, pErrorCode); |
258 | |
259 | /* swap the index */ |
260 | const uint16_t *inIndex=reinterpret_cast<const uint16_t *>(inTrie+1); |
261 | uint16_t *outIndex=reinterpret_cast<uint16_t *>(outTrie+1); |
262 | ds->swapArray16(ds, inIndex, trie.indexLength*2, outIndex, pErrorCode); |
263 | |
264 | /* swap the data */ |
265 | const uint16_t *inData=inIndex+trie.indexLength; |
266 | uint16_t *outData=outIndex+trie.indexLength; |
267 | switch(valueWidth) { |
268 | case UCPTRIE_VALUE_BITS_16: |
269 | ds->swapArray16(ds, inData, dataLength*2, outData, pErrorCode); |
270 | break; |
271 | case UCPTRIE_VALUE_BITS_32: |
272 | ds->swapArray32(ds, inData, dataLength*4, outData, pErrorCode); |
273 | break; |
274 | case UCPTRIE_VALUE_BITS_8: |
275 | if(inTrie!=outTrie) { |
276 | uprv_memmove(outData, inData, dataLength); |
277 | } |
278 | break; |
279 | default: |
280 | *pErrorCode=U_INVALID_FORMAT_ERROR; |
281 | return 0; |
282 | } |
283 | } |
284 | |
285 | return size; |
286 | } |
287 | |
288 | namespace { |
289 | |
290 | /** |
291 | * Gets the trie version from 32-bit-aligned memory containing the serialized form |
292 | * of a UTrie (version 1), a UTrie2 (version 2), or a UCPTrie (version 3). |
293 | * |
294 | * @param data a pointer to 32-bit-aligned memory containing the serialized form of a trie |
295 | * @param length the number of bytes available at data; |
296 | * can be more than necessary (see return value) |
297 | * @param anyEndianOk If false, only platform-endian serialized forms are recognized. |
298 | * If true, opposite-endian serialized forms are recognized as well. |
299 | * @return the trie version of the serialized form, or 0 if it is not |
300 | * recognized as a serialized trie |
301 | */ |
302 | int32_t |
303 | getVersion(const void *data, int32_t length, UBool anyEndianOk) { |
304 | uint32_t signature; |
305 | if(length<16 || data==nullptr || (U_POINTER_MASK_LSB(data, 3)!=0)) { |
306 | return 0; |
307 | } |
308 | signature=*(const uint32_t *)data; |
309 | if(signature==UCPTRIE_SIG) { |
310 | return 3; |
311 | } |
312 | if(anyEndianOk && signature==UCPTRIE_OE_SIG) { |
313 | return 3; |
314 | } |
315 | if(signature==UTRIE2_SIG) { |
316 | return 2; |
317 | } |
318 | if(anyEndianOk && signature==UTRIE2_OE_SIG) { |
319 | return 2; |
320 | } |
321 | if(signature==UTRIE_SIG) { |
322 | return 1; |
323 | } |
324 | if(anyEndianOk && signature==UTRIE_OE_SIG) { |
325 | return 1; |
326 | } |
327 | return 0; |
328 | } |
329 | |
330 | } // namespace |
331 | |
332 | U_CAPI int32_t U_EXPORT2 |
333 | utrie_swapAnyVersion(const UDataSwapper *ds, |
334 | const void *inData, int32_t length, void *outData, |
335 | UErrorCode *pErrorCode) { |
336 | if(U_FAILURE(*pErrorCode)) { return 0; } |
337 | switch(getVersion(inData, length, true)) { |
338 | case 1: |
339 | return utrie_swap(ds, inData, length, outData, pErrorCode); |
340 | case 2: |
341 | return utrie2_swap(ds, inData, length, outData, pErrorCode); |
342 | case 3: |
343 | return ucptrie_swap(ds, inData, length, outData, pErrorCode); |
344 | default: |
345 | *pErrorCode=U_INVALID_FORMAT_ERROR; |
346 | return 0; |
347 | } |
348 | } |
349 | |