1// Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file
2// for details. All rights reserved. Use of this source code is governed by a
3// BSD-style license that can be found in the LICENSE file.
4
5#include "platform/unicode.h"
6#include "vm/globals.h"
7#include "vm/unit_test.h"
8
9namespace dart {
10
11ISOLATE_UNIT_TEST_CASE(Utf8Encode) {
12 const intptr_t kInputLen = 3;
13 const uint16_t kInput[kInputLen] = {0xe6, 0xe7, 0xe8}; // æøå
14 const String& input = String::Handle(String::FromUTF16(kInput, kInputLen));
15 static const uintptr_t kBufferLength = 10;
16 unsigned char buffer[kBufferLength];
17 for (uintptr_t i = 0; i < kBufferLength; i++) {
18 buffer[i] = 42;
19 }
20 Utf8::Encode(input, reinterpret_cast<char*>(&buffer[0]), 10);
21 uintptr_t i;
22 for (i = 0; i < static_cast<uintptr_t>(Utf8::Length(input)); i++) {
23 EXPECT(buffer[i] > 127);
24 }
25 for (; i < kBufferLength; i++) {
26 EXPECT(buffer[i] == 42);
27 }
28}
29
30ISOLATE_UNIT_TEST_CASE(Utf8InvalidByte) {
31 {
32 uint8_t array[] = {0x41, 0xF0, 0x92};
33 intptr_t encode_len = 3;
34 intptr_t decode_len = 3;
35 intptr_t pos = Utf8::ReportInvalidByte(array, encode_len, decode_len);
36 EXPECT(pos == 1);
37 }
38
39 {
40 uint8_t array[] = {0x81, 0x40, 0x42};
41 intptr_t encode_len = 3;
42 intptr_t decode_len = 3;
43 intptr_t pos = Utf8::ReportInvalidByte(array, encode_len, decode_len);
44 EXPECT(pos == 0);
45 }
46
47 {
48 uint8_t array[] = {0x42, 0x40, 0x80};
49 intptr_t encode_len = 3;
50 intptr_t decode_len = 3;
51 intptr_t pos = Utf8::ReportInvalidByte(array, encode_len, decode_len);
52 EXPECT(pos == 2);
53 }
54
55 {
56 uint8_t array[] = {0x41, 0xF0, 0x92, 0x92, 0x91};
57 intptr_t encode_len = 5;
58 intptr_t decode_len = 2;
59 intptr_t pos = Utf8::ReportInvalidByte(array, encode_len, decode_len);
60 EXPECT(pos == encode_len);
61 }
62}
63
64ISOLATE_UNIT_TEST_CASE(Utf8Decode) {
65 // Examples from the Unicode specification, chapter 3
66 {
67 const char* src = "\x41\xC3\xB1\x42";
68 int32_t expected[] = {0x41, 0xF1, 0x42};
69 int32_t dst[ARRAY_SIZE(expected)];
70 memset(dst, 0, sizeof(dst));
71 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
72 EXPECT(is_valid);
73 EXPECT(!memcmp(expected, dst, sizeof(expected)));
74 }
75
76 {
77 const char* src = "\x4D";
78 int32_t expected[] = {0x4D};
79 int32_t dst[ARRAY_SIZE(expected)];
80 memset(dst, 0, sizeof(dst));
81 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
82 EXPECT(is_valid);
83 EXPECT(!memcmp(expected, dst, sizeof(expected)));
84 }
85
86 {
87 const char* src = "\xD0\xB0";
88 int32_t expected[] = {0x430};
89 int32_t dst[ARRAY_SIZE(expected)];
90 memset(dst, 0, sizeof(dst));
91 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
92 EXPECT(is_valid);
93 EXPECT(!memcmp(expected, dst, sizeof(expected)));
94 }
95
96 {
97 const char* src = "\xE4\xBA\x8C";
98 int32_t expected[] = {0x4E8C};
99 int32_t dst[ARRAY_SIZE(expected)];
100 memset(dst, 0, sizeof(dst));
101 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
102 EXPECT(is_valid);
103 EXPECT(!memcmp(expected, dst, sizeof(expected)));
104 }
105
106 {
107 const char* src = "\xF0\x90\x8C\x82";
108 int32_t expected[] = {0x10302};
109 int32_t dst[ARRAY_SIZE(expected)];
110 memset(dst, 0, sizeof(dst));
111 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
112 EXPECT(is_valid);
113 EXPECT(!memcmp(expected, dst, sizeof(expected)));
114 }
115
116 {
117 const char* src = "\x4D\xD0\xB0\xE4\xBA\x8C\xF0\x90\x8C\x82";
118 int32_t expected[] = {0x4D, 0x430, 0x4E8C, 0x10302};
119 int32_t dst[ARRAY_SIZE(expected)];
120 memset(dst, 0, sizeof(dst));
121 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
122 EXPECT(is_valid);
123 EXPECT(!memcmp(expected, dst, sizeof(expected)));
124 }
125
126 // Mixture of non-ASCII and ASCII characters
127 {
128 const char* src =
129 "\xD7\x92\xD7\x9C\xD7\xA2\xD7\x93"
130 "\x20"
131 "\xD7\x91\xD7\xA8\xD7\x9B\xD7\x94";
132 int32_t expected[] = {0x5D2, 0x5DC, 0x5E2, 0x5D3, 0x20,
133 0x5D1, 0x5E8, 0x5DB, 0x5D4};
134 int32_t dst[ARRAY_SIZE(expected)];
135 memset(dst, 0, sizeof(dst));
136 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
137 EXPECT(is_valid);
138 EXPECT(!memcmp(expected, dst, sizeof(expected)));
139 }
140
141 // http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt
142
143 // 1 - Some correct UTF-8 text
144 {
145 const char* src = "\xCE\xBA\xE1\xBD\xB9\xCF\x83\xCE\xBC\xCE\xB5";
146 int32_t expected[] = {0x3BA, 0x1F79, 0x3C3, 0x3BC, 0x3B5};
147 int32_t dst[ARRAY_SIZE(expected)];
148 memset(dst, 0, sizeof(dst));
149 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
150 EXPECT(is_valid);
151 EXPECT(!memcmp(expected, dst, sizeof(expected)));
152 }
153
154 // 2 - Boundary condition test cases
155
156 // 2.1 - First possible sequence of a certain length
157
158 // 2.1.1 - 1 byte (U-00000000): "\x00"
159 {
160 const char* src = "\x00";
161 int32_t expected[] = {0x0};
162 int32_t dst[ARRAY_SIZE(expected)];
163 memset(dst, 0xFF, sizeof(dst));
164 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
165 EXPECT(is_valid);
166 EXPECT(memcmp(expected, dst, sizeof(expected)));
167 }
168
169 // 2.1.2 - 2 bytes (U-00000080): "\xC2\x80"
170 {
171 const char* src = "\xC2\x80";
172 int32_t expected[] = {0x80};
173 int32_t dst[ARRAY_SIZE(expected)];
174 memset(dst, 0, sizeof(dst));
175 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
176 EXPECT(is_valid);
177 EXPECT(!memcmp(expected, dst, sizeof(expected)));
178 }
179
180 // 2.1.3 - 3 bytes (U-00000800): "\xE0\xA0\x80"
181 {
182 const char* src = "\xE0\xA0\x80";
183 int32_t expected[] = {0x800};
184 int32_t dst[ARRAY_SIZE(expected)];
185 memset(dst, 0, sizeof(dst));
186 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
187 EXPECT(is_valid);
188 EXPECT(!memcmp(expected, dst, sizeof(expected)));
189 }
190
191 // 2.1.4 - 4 bytes (U-00010000): "\xF0\x90\x80\x80"
192 {
193 const char* src = "\xF0\x90\x80\x80";
194 int32_t expected[] = {0x10000};
195 int32_t dst[ARRAY_SIZE(expected)];
196 memset(dst, 0, sizeof(dst));
197 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
198 EXPECT(is_valid);
199 EXPECT(!memcmp(expected, dst, sizeof(expected)));
200 }
201
202 // 2.1.5 - 5 bytes (U-00200000): "\xF8\x88\x80\x80\x80"
203 {
204 const char* src = "\xF8\x88\x80\x80\x80";
205 int32_t expected[] = {0x200000};
206 int32_t dst[ARRAY_SIZE(expected)];
207 memset(dst, 0, sizeof(dst));
208 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
209 EXPECT(!is_valid);
210 EXPECT(memcmp(expected, dst, sizeof(expected)));
211 }
212
213 // 2.1.6 - 6 bytes (U-04000000): "\xFC\x84\x80\x80\x80\x80"
214 {
215 const char* src = "\xFC\x84\x80\x80\x80\x80";
216 int32_t expected[] = {0x400000};
217 int32_t dst[ARRAY_SIZE(expected)];
218 memset(dst, 0, sizeof(dst));
219 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
220 EXPECT(!is_valid);
221 EXPECT(memcmp(expected, dst, sizeof(expected)));
222 }
223
224 // 2.2 - Last possible sequence of a certain length
225
226 // 2.2.1 - 1 byte (U-0000007F): "\x7F"
227 {
228 const char* src = "\x7F";
229 int32_t expected[] = {0x7F};
230 int32_t dst[ARRAY_SIZE(expected)];
231 memset(dst, 0, sizeof(dst));
232 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
233 EXPECT(is_valid);
234 EXPECT(!memcmp(expected, dst, sizeof(expected)));
235 }
236
237 // 2.2.2 - 2 bytes (U-000007FF): "\xDF\xBF"
238 {
239 const char* src = "\xDF\xBF";
240 int32_t expected[] = {0x7FF};
241 int32_t dst[ARRAY_SIZE(expected)];
242 memset(dst, 0, sizeof(dst));
243 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
244 EXPECT(is_valid);
245 EXPECT(!memcmp(expected, dst, sizeof(expected)));
246 }
247
248 // 2.2.3 - 3 bytes (U-0000FFFF): "\xEF\xBF\xBF"
249 {
250 const char* src = "\xEF\xBF\xBF";
251 int32_t expected[] = {0xFFFF};
252 int32_t dst[ARRAY_SIZE(expected)];
253 memset(dst, 0, sizeof(dst));
254 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
255 EXPECT(is_valid);
256 EXPECT(!memcmp(expected, dst, sizeof(expected)));
257 }
258
259 // 2.2.4 - 4 bytes (U-001FFFFF): "\xF7\xBF\xBF\xBF"
260 {
261 const char* src = "\xF7\xBF\xBF\xBF";
262 int32_t expected[] = {0x1FFFF};
263 int32_t dst[ARRAY_SIZE(expected)];
264 memset(dst, 0, sizeof(dst));
265 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
266 EXPECT(!is_valid);
267 EXPECT(memcmp(expected, dst, sizeof(expected)));
268 }
269
270 // 2.2.5 - 5 bytes (U-03FFFFFF): "\xFB\xBF\xBF\xBF\xBF"
271 {
272 const char* src = "\xFB\xBF\xBF\xBF\xBF";
273 int32_t expected[] = {0x3FFFFFF};
274 int32_t dst[ARRAY_SIZE(expected)];
275 memset(dst, 0, sizeof(dst));
276 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
277 EXPECT(!is_valid);
278 EXPECT(memcmp(expected, dst, sizeof(expected)));
279 }
280
281 // 2.2.6 - 6 bytes (U-7FFFFFFF): "\xFD\xBF\xBF\xBF\xBF\xBF"
282 {
283 const char* src = "\xFD\xBF\xBF\xBF\xBF\xBF";
284 int32_t expected[] = {0x7FFFFFF};
285 int32_t dst[ARRAY_SIZE(expected)];
286 memset(dst, 0, sizeof(dst));
287 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
288 EXPECT(!is_valid);
289 EXPECT(memcmp(expected, dst, sizeof(expected)));
290 }
291
292 // 2.3 - Other boundary conditions
293
294 // 2.3.1 - U-0000D7FF = ed 9f bf = "\xED\x9F\xBF"
295 {
296 const char* src = "\xED\x9F\xBF";
297 int32_t expected[] = {0xD7FF};
298 int32_t dst[ARRAY_SIZE(expected)];
299 memset(dst, 0, sizeof(dst));
300 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
301 EXPECT(is_valid);
302 EXPECT(!memcmp(expected, dst, sizeof(expected)));
303 }
304
305 // 2.3.2 - U-0000E000 = ee 80 80 = "\xEE\x80\x80"
306 {
307 const char* src = "\xEE\x80\x80";
308 int32_t expected[] = {0xE000};
309 int32_t dst[ARRAY_SIZE(expected)];
310 memset(dst, 0, sizeof(dst));
311 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
312 EXPECT(is_valid);
313 EXPECT(!memcmp(expected, dst, sizeof(expected)));
314 }
315
316 // 2.3.3 - U-0000FFFD = ef bf bd = "\xEF\xBF\xBD"
317 {
318 const char* src = "\xEF\xBF\xBD";
319 int32_t expected[] = {0xFFFD};
320 int32_t dst[ARRAY_SIZE(expected)];
321 memset(dst, 0, sizeof(dst));
322 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
323 EXPECT(is_valid);
324 EXPECT(!memcmp(expected, dst, sizeof(expected)));
325 }
326
327 // 2.3.4 - U-0010FFFF = f4 8f bf bf = "\xF4\x8F\xBF\xBF"
328 {
329 const char* src = "\xF4\x8F\xBF\xBF";
330 int32_t expected[] = {0x10FFFF};
331 int32_t dst[ARRAY_SIZE(expected)];
332 memset(dst, 0, sizeof(dst));
333 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
334 EXPECT(is_valid);
335 EXPECT(!memcmp(expected, dst, sizeof(expected)));
336 }
337
338 // 2.3.5 - U-00110000 = f4 90 80 80 = "\xF4\x90\x80\x80"
339 {
340 const char* src = "\xF4\x90\x80\x80";
341 int32_t expected[] = {0x110000};
342 int32_t dst[ARRAY_SIZE(expected)];
343 memset(dst, 0, sizeof(dst));
344 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
345 EXPECT(!is_valid);
346 EXPECT(memcmp(expected, dst, sizeof(expected)));
347 }
348
349 // 3 - Malformed sequences
350
351 // 3.1 - Unexpected continuation bytes
352
353 // 3.1.1 - First continuation byte 0x80: "\x80"
354 {
355 const char* src = "\x80";
356 int32_t expected[] = {0x80};
357 int32_t dst[ARRAY_SIZE(expected)];
358 memset(dst, 0, sizeof(dst));
359 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
360 EXPECT(!is_valid);
361 EXPECT(memcmp(expected, dst, sizeof(expected)));
362 }
363
364 // 3.1.2 - Last continuation byte 0xbf: "\xBF"
365 {
366 const char* src = "\xBF";
367 int32_t expected[] = {0xBF};
368 int32_t dst[ARRAY_SIZE(expected)];
369 memset(dst, 0, sizeof(dst));
370 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
371 EXPECT(!is_valid);
372 EXPECT(memcmp(expected, dst, sizeof(expected)));
373 }
374
375 // 3.1.3 - 2 continuation bytes: "\x80\xBF"
376 {
377 const char* src = "\x80\xBF";
378 int32_t expected[] = {0x80, 0xBF};
379 int32_t dst[ARRAY_SIZE(expected)];
380 memset(dst, 0, sizeof(dst));
381 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
382 EXPECT(!is_valid);
383 EXPECT(memcmp(expected, dst, sizeof(expected)));
384 }
385
386 // 3.1.4 - 3 continuation bytes: "\x80\xBF\x80"
387 {
388 const char* src = "\x80\xBF\x80";
389 int32_t expected[] = {0x80, 0xBF, 0x80};
390 int32_t dst[ARRAY_SIZE(expected)];
391 memset(dst, 0, sizeof(dst));
392 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
393 EXPECT(!is_valid);
394 EXPECT(memcmp(expected, dst, sizeof(expected)));
395 }
396
397 // 3.1.5 - 4 continuation bytes: "\x80\xBF\x80\xBF"
398 {
399 const char* src = "\x80\xBF\x80\xBF";
400 int32_t expected[] = {0x80, 0xBF, 0x80, 0xBF};
401 int32_t dst[ARRAY_SIZE(expected)];
402 memset(dst, 0, sizeof(dst));
403 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
404 EXPECT(!is_valid);
405 EXPECT(memcmp(expected, dst, sizeof(expected)));
406 }
407
408 // 3.1.6 - 5 continuation bytes: "\x80\xBF\x80\xBF\x80"
409 {
410 const char* src = "\x80\xBF\x80\xBF\x80";
411 int32_t expected[] = {0x80, 0xBF, 0x80, 0xBF, 0x80};
412 int32_t dst[ARRAY_SIZE(expected)];
413 memset(dst, 0, sizeof(dst));
414 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
415 EXPECT(!is_valid);
416 EXPECT(memcmp(expected, dst, sizeof(expected)));
417 }
418
419 // 3.1.7 - 6 continuation bytes: "\x80\xBF\x80\xBF\x80\xBF"
420 {
421 const char* src = "\x80\xBF\x80\xBF\x80\xBF";
422 int32_t expected[] = {0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF};
423 int32_t dst[ARRAY_SIZE(expected)];
424 memset(dst, 0, sizeof(dst));
425 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
426 EXPECT(!is_valid);
427 EXPECT(memcmp(expected, dst, sizeof(expected)));
428 }
429
430 // 3.1.8 - 7 continuation bytes: "\x80\xBF\x80\xBF\x80\xBF\x80"
431 {
432 const char* src = "\x80\xBF\x80\xBF\x80\xBF\x80";
433 int32_t expected[] = {0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF, 0x80};
434 int32_t dst[ARRAY_SIZE(expected)];
435 memset(dst, 0, sizeof(dst));
436 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
437 EXPECT(!is_valid);
438 EXPECT(memcmp(expected, dst, sizeof(expected)));
439 }
440
441 // 3.1.9 - Sequence of all 64 possible continuation bytes (0x80-0xbf):
442 {
443 const char* src =
444 "\x80\x81\x82\x83\x84\x85\x86\x87"
445 "\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F"
446 "\x90\x91\x92\x93\x94\x95\x96\x97"
447 "\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F"
448 "\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7"
449 "\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF"
450 "\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7"
451 "\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF";
452 int32_t expected[] = {0x0};
453 int32_t dst[ARRAY_SIZE(expected)];
454 for (size_t i = 0; i < strlen(src); ++i) {
455 memset(dst, 0xFF, sizeof(dst));
456 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst));
457 EXPECT(!is_valid);
458 EXPECT(memcmp(expected, dst, sizeof(expected)));
459 }
460 }
461
462 // 3.2 - Lonely start character
463
464 // 3.2.1 - All 32 first bytes of 2-byte sequences (0xc0-0xdf), each
465 // followed by a space character:
466 {
467 const char* src =
468 "\xC0\x20\xC1\x20\xC2\x20\xC3\x20"
469 "\xC4\x20\xC5\x20\xC6\x20\xC7\x20"
470 "\xC8\x20\xC9\x20\xCA\x20\xCB\x20"
471 "\xCC\x20\xCD\x20\xCE\x20\xCF\x20"
472 "\xD0\x20\xD1\x20\xD2\x20\xD3\x20"
473 "\xD4\x20\xD5\x20\xD6\x20\xD7\x20"
474 "\xD8\x20\xD9\x20\xDA\x20\xDB\x20"
475 "\xDC\x20\xDD\x20\xDE\x20\xDF\x20";
476 int32_t expected[] = {0x0};
477 int32_t dst[ARRAY_SIZE(expected)];
478 for (size_t i = 0; i < strlen(src); i += 2) {
479 memset(dst, 0xFF, sizeof(dst));
480 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst));
481 EXPECT(!is_valid);
482 EXPECT(memcmp(expected, dst, sizeof(expected)));
483 }
484 }
485
486 // 3.2.2 - All 16 first bytes of 3-byte sequences (0xe0-0xef), each
487 // followed by a space character:
488 {
489 const char* src =
490 "\xE0\x20\xE1\x20\xE2\x20\xE3\x20"
491 "\xE4\x20\xE5\x20\xE6\x20\xE7\x20"
492 "\xE8\x20\xE9\x20\xEA\x20\xEB\x20"
493 "\xEC\x20\xED\x20\xEE\x20\xEF\x20";
494 int32_t expected[] = {0x0};
495 int32_t dst[ARRAY_SIZE(expected)];
496 for (size_t i = 0; i < strlen(src); i += 2) {
497 memset(dst, 0xFF, sizeof(dst));
498 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst));
499 EXPECT(!is_valid);
500 EXPECT(memcmp(expected, dst, sizeof(expected)));
501 }
502 }
503
504 // 3.2.3 - All 8 first bytes of 4-byte sequences (0xf0-0xf7), each
505 // followed by a space character:
506 {
507 const char* src =
508 "\xF0\x20\xF1\x20\xF2\x20\xF3\x20"
509 "\xF4\x20\xF5\x20\xF6\x20\xF7\x20";
510 int32_t expected[] = {0x0};
511 int32_t dst[ARRAY_SIZE(expected)];
512 for (size_t i = 0; i < strlen(src); i += 2) {
513 memset(dst, 0xFF, sizeof(dst));
514 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst));
515 EXPECT(!is_valid);
516 EXPECT(memcmp(expected, dst, sizeof(expected)));
517 }
518 }
519
520 // 3.2.4 - All 4 first bytes of 5-byte sequences (0xf8-0xfb), each
521 // followed by a space character:
522 {
523 const char* src = "\xF8\x20\xF9\x20\xFA\x20\xFB\x20";
524 int32_t expected[] = {0x0};
525 int32_t dst[ARRAY_SIZE(expected)];
526 for (size_t i = 0; i < strlen(src); i += 2) {
527 memset(dst, 0xFF, sizeof(dst));
528 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst));
529 EXPECT(!is_valid);
530 EXPECT(memcmp(expected, dst, sizeof(expected)));
531 }
532 }
533
534 // 3.2.5 - All 2 first bytes of 6-byte sequences (0xfc-0xfd), each
535 // followed by a space character:
536 {
537 const char* src = "\xFC\x20\xFD\x20";
538 int32_t expected[] = {0x0};
539 int32_t dst[ARRAY_SIZE(expected)];
540 for (size_t i = 0; i < strlen(src); i += 2) {
541 memset(dst, 0xFF, sizeof(dst));
542 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst));
543 EXPECT(!is_valid);
544 EXPECT(memcmp(expected, dst, sizeof(expected)));
545 }
546 }
547
548 // 3.3 - Sequences with last continuation byte missing
549
550 // 3.3.1 - 2-byte sequence with last byte missing (U+0000): "\xC0"
551 {
552 const char* src = "\xC0";
553 int32_t expected[] = {0x0};
554 int32_t dst[ARRAY_SIZE(expected)];
555 memset(dst, 0xFF, sizeof(dst));
556 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
557 EXPECT(!is_valid);
558 EXPECT(memcmp(expected, dst, sizeof(expected)));
559 }
560
561 // 3.3.2 - 3-byte sequence with last byte missing (U+0000): "\xE0\x80"
562 {
563 const char* src = "\xE0\x80";
564 int32_t expected[] = {0x0};
565 int32_t dst[ARRAY_SIZE(expected)];
566 memset(dst, 0xFF, sizeof(dst));
567 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
568 EXPECT(!is_valid);
569 EXPECT(memcmp(expected, dst, sizeof(expected)));
570 }
571
572 // 3.3.3 - 4-byte sequence with last byte missing (U+0000): "\xF0\x80\x80"
573 {
574 const char* src = "\xF0\x80\x80";
575 int32_t expected[] = {0x0};
576 int32_t dst[ARRAY_SIZE(expected)];
577 memset(dst, 0xFF, sizeof(dst));
578 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
579 EXPECT(!is_valid);
580 EXPECT(memcmp(expected, dst, sizeof(expected)));
581 }
582
583 // 3.3.4 - 5-byte sequence with last byte missing (U+0000): "\xF8\x80\x80\x80"
584 {
585 const char* src = "\xF8\x80\x80\x80";
586 int32_t expected[] = {0x0};
587 int32_t dst[ARRAY_SIZE(expected)];
588 memset(dst, 0xFF, sizeof(dst));
589 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
590 EXPECT(!is_valid);
591 EXPECT(memcmp(expected, dst, sizeof(expected)));
592 }
593
594 // 3.3.5 - 6-byte sequence with last byte missing (U+0000):
595 // "\xFC\x80\x80\x80\x80"
596 {
597 const char* src = "\xFC\x80\x80\x80\x80";
598 int32_t expected[] = {0x0};
599 int32_t dst[ARRAY_SIZE(expected)];
600 memset(dst, 0xFF, sizeof(dst));
601 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
602 EXPECT(!is_valid);
603 EXPECT(memcmp(expected, dst, sizeof(expected)));
604 }
605
606 // 3.3.6 - 2-byte sequence with last byte missing (U-000007FF): "\xDF"
607 {
608 const char* src = "\xDF";
609 int32_t expected[] = {0x0};
610 int32_t dst[ARRAY_SIZE(expected)];
611 memset(dst, 0xFF, sizeof(dst));
612 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
613 EXPECT(!is_valid);
614 EXPECT(memcmp(expected, dst, sizeof(expected)));
615 }
616
617 // 3.3.7 - 3-byte sequence with last byte missing (U-0000FFFF): "\xEF\xBF"
618 {
619 const char* src = "\xEF\xBF";
620 int32_t expected[] = {0x0};
621 int32_t dst[ARRAY_SIZE(expected)];
622 memset(dst, 0xFF, sizeof(dst));
623 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
624 EXPECT(!is_valid);
625 EXPECT(memcmp(expected, dst, sizeof(expected)));
626 }
627
628 // 3.3.8 - 4-byte sequence with last byte missing (U-001FFFFF): "\xF7\xBF\xBF"
629 {
630 const char* src = "\xF7\xBF\xBF";
631 int32_t expected[] = {0x0};
632 int32_t dst[ARRAY_SIZE(expected)];
633 memset(dst, 0xFF, sizeof(dst));
634 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
635 EXPECT(!is_valid);
636 EXPECT(memcmp(expected, dst, sizeof(expected)));
637 }
638
639 // 3.3.9 - 5-byte sequence with last byte missing (U-03FFFFFF):
640 // "\xFB\xBF\xBF\xBF"
641 {
642 const char* src = "\xFB\xBF\xBF\xBF";
643 int32_t expected[] = {0x0};
644 int32_t dst[ARRAY_SIZE(expected)];
645 memset(dst, 0xFF, sizeof(dst));
646 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
647 EXPECT(!is_valid);
648 EXPECT(memcmp(expected, dst, sizeof(expected)));
649 }
650
651 // 3.3.10 - 6-byte sequence with last byte missing (U-7FFFFFFF):
652 // "\xFD\xBF\xBF\xBF\xBF"
653 {
654 const char* src = "\xFD\xBF\xBF\xBF\xBF";
655 int32_t expected[] = {0x0};
656 int32_t dst[ARRAY_SIZE(expected)];
657 memset(dst, 0xFF, sizeof(dst));
658 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
659 EXPECT(!is_valid);
660 EXPECT(memcmp(expected, dst, sizeof(expected)));
661 }
662
663 // 3.4 - Concatenation of incomplete sequences
664 {
665 const char* src =
666 "\xC0\xE0\x80\xF0\x80\x80"
667 "\xF8\x80\x80\x80\xFC\x80"
668 "\x80\x80\x80\xDF\xEF\xBF"
669 "\xF7\xBF\xBF\xFB\xBF\xBF"
670 "\xBF\xFD\xBF\xBF\xBF\xBF";
671 int32_t expected[] = {0x0};
672 int32_t dst[ARRAY_SIZE(expected)];
673 for (size_t i = 0; i < strlen(src); ++i) {
674 for (size_t j = 1; j < (strlen(src) - i); ++j) {
675 memset(dst, 0xFF, sizeof(dst));
676 bool is_valid =
677 Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst));
678 EXPECT(!is_valid);
679 EXPECT(memcmp(expected, dst, sizeof(expected)));
680 }
681 }
682 }
683
684 // 3.5 - Impossible bytes
685
686 // 3.5.1 - fe = "\xFE"
687 {
688 const char* src = "\xFE";
689 int32_t expected[] = {0xFE};
690 int32_t dst[ARRAY_SIZE(expected)];
691 memset(dst, 0, sizeof(dst));
692 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
693 EXPECT(!is_valid);
694 EXPECT(memcmp(expected, dst, sizeof(expected)));
695 }
696
697 // 3.5.2 - ff = "\xFF"
698 {
699 const char* src = "\xFF";
700 int32_t expected[] = {0xFF};
701 int32_t dst[ARRAY_SIZE(expected)];
702 memset(dst, 0, sizeof(dst));
703 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
704 EXPECT(!is_valid);
705 EXPECT(memcmp(expected, dst, sizeof(expected)));
706 }
707
708 // 3.5.3 - fe fe ff ff = "\xFE\xFE\xFF\xFF"
709 {
710 const char* src = "\xFE\xFE\xFF\xFF";
711 int32_t expected[] = {0xFF};
712 int32_t dst[ARRAY_SIZE(expected)];
713 memset(dst, 0, sizeof(dst));
714 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
715 EXPECT(!is_valid);
716 EXPECT(memcmp(expected, dst, sizeof(expected)));
717 }
718
719 // 4 - Overlong sequences
720
721 // 4.1 - Examples of an overlong ASCII character
722
723 // 4.1.1 - U+002F = c0 af = "\xC0\xAF"
724 {
725 const char* src = "\xC0\xAF";
726 int32_t expected[] = {0x2F};
727 int32_t dst[ARRAY_SIZE(expected)];
728 memset(dst, 0, sizeof(dst));
729 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
730 EXPECT(!is_valid);
731 EXPECT(memcmp(expected, dst, sizeof(expected)));
732 }
733
734 // 4.1.2 - U+002F = e0 80 af = "\xE0\x80\xAF"
735 {
736 const char* src = "\xE0\x80\xAF";
737 int32_t expected[] = {0x2F};
738 int32_t dst[ARRAY_SIZE(expected)];
739 memset(dst, 0, sizeof(dst));
740 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
741 EXPECT(!is_valid);
742 EXPECT(memcmp(expected, dst, sizeof(expected)));
743 }
744
745 // 4.1.3 - U+002F = f0 80 80 af = "\xF0\x80\x80\xAF"
746 {
747 const char* src = "\xF0\x80\x80\xAF";
748 int32_t expected[] = {0x2F};
749 int32_t dst[ARRAY_SIZE(expected)];
750 memset(dst, 0, sizeof(dst));
751 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
752 EXPECT(!is_valid);
753 EXPECT(memcmp(expected, dst, sizeof(expected)));
754 }
755
756 // 4.1.4 - U+002F = f8 80 80 80 af = "\xF8\x80\x80\x80\xAF"
757 {
758 const char* src = "\xF8\x80\x80\x80\xAF";
759 int32_t expected[] = {0x2F};
760 int32_t dst[ARRAY_SIZE(expected)];
761 memset(dst, 0, sizeof(dst));
762 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
763 EXPECT(!is_valid);
764 EXPECT(memcmp(expected, dst, sizeof(expected)));
765 }
766
767 // 4.1.5 - U+002F = fc 80 80 80 80 af = "\xFC\x80\x80\x80\x80\xAF"
768 {
769 const char* src = "\xFC\x80\x80\x80\x80\xAF";
770 int32_t expected[] = {0x2F};
771 int32_t dst[ARRAY_SIZE(expected)];
772 memset(dst, 0, sizeof(dst));
773 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
774 EXPECT(!is_valid);
775 EXPECT(memcmp(expected, dst, sizeof(expected)));
776 }
777
778 // 4.2 Maximum overlong sequences
779
780 // 4.2.1 - U-0000007F = c1 bf = "\xC1\xBF"
781 {
782 const char* src = "\xC1\xBF";
783 int32_t expected[] = {0x7F};
784 int32_t dst[ARRAY_SIZE(expected)];
785 memset(dst, 0, sizeof(dst));
786 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
787 EXPECT(!is_valid);
788 EXPECT(memcmp(expected, dst, sizeof(expected)));
789 }
790
791 // 4.2.2 U+000007FF = e0 9f bf = "\xE0\x9F\xBF"
792 {
793 const char* src = "\xE0\x9F\xBF";
794 int32_t expected[] = {0x7FF};
795 int32_t dst[ARRAY_SIZE(expected)];
796 memset(dst, 0, sizeof(dst));
797 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
798 EXPECT(!is_valid);
799 EXPECT(memcmp(expected, dst, sizeof(expected)));
800 }
801
802 // 4.2.3 - U+0000FFFF = f0 8f bf bf = "\xF0\x8F\xBF\xBF"
803 {
804 const char* src = "\xF0\x8F\xBF\xBF";
805 int32_t expected[] = {0xFFFF};
806 int32_t dst[ARRAY_SIZE(expected)];
807 memset(dst, 0, sizeof(dst));
808 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
809 EXPECT(!is_valid);
810 EXPECT(memcmp(expected, dst, sizeof(expected)));
811 }
812
813 // 4.2.4 U-001FFFFF = f8 87 bf bf bf = "\xF8\x87\xBF\xBF\xBF"
814 {
815 const char* src = "\xF8\x87\xBF\xBF\xBF";
816 int32_t expected[] = {0x1FFFFF};
817 int32_t dst[ARRAY_SIZE(expected)];
818 memset(dst, 0, sizeof(dst));
819 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
820 EXPECT(!is_valid);
821 EXPECT(memcmp(expected, dst, sizeof(expected)));
822 }
823
824 // 4.2.5 U-03FFFFFF = fc 83 bf bf bf bf = "\xFC\x83\xBF\xBF\xBF\xBF"
825 {
826 const char* src = "\xFC\x83\xBF\xBF\xBF\xBF";
827 int32_t expected[] = {0x3FFFFFF};
828 int32_t dst[ARRAY_SIZE(expected)];
829 memset(dst, 0, sizeof(dst));
830 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
831 EXPECT(!is_valid);
832 EXPECT(memcmp(expected, dst, sizeof(expected)));
833 }
834
835 // 4.3 - Overlong representation of the NUL character
836
837 // 4.3.1 - U+0000 = "\xC0\x80"
838 {
839 const char* src = "\xC0\x80";
840 int32_t expected[] = {0x0};
841 int32_t dst[ARRAY_SIZE(expected)];
842 memset(dst, 0xFF, sizeof(dst));
843 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
844 EXPECT(!is_valid);
845 EXPECT(memcmp(expected, dst, sizeof(expected)));
846 }
847
848 // 4.3.2 U+0000 = e0 80 80 = "\xE0\x80\x80"
849 {
850 const char* src = "\xE0\x80\x80";
851 int32_t expected[] = {0x0};
852 int32_t dst[ARRAY_SIZE(expected)];
853 memset(dst, 0xFF, sizeof(dst));
854 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
855 EXPECT(!is_valid);
856 EXPECT(memcmp(expected, dst, sizeof(expected)));
857 }
858
859 // 4.3.3 U+0000 = f0 80 80 80 = "\xF0\x80\x80\x80"
860 {
861 const char* src = "\xF0\x80\x80\x80";
862 int32_t expected[] = {0x0};
863 int32_t dst[ARRAY_SIZE(expected)];
864 memset(dst, 0xFF, sizeof(dst));
865 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
866 EXPECT(!is_valid);
867 EXPECT(memcmp(expected, dst, sizeof(expected)));
868 }
869
870 // 4.3.4 U+0000 = f8 80 80 80 80 = "\xF8\x80\x80\x80\x80"
871 {
872 const char* src = "\xF8\x80\x80\x80\x80";
873 int32_t expected[] = {0x0};
874 int32_t dst[ARRAY_SIZE(expected)];
875 memset(dst, 0xFF, sizeof(dst));
876 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
877 EXPECT(!is_valid);
878 EXPECT(memcmp(expected, dst, sizeof(expected)));
879 }
880
881 // 4.3.5 U+0000 = fc 80 80 80 80 80 = "\xFC\x80\x80\x80\x80\x80"
882 {
883 const char* src = "\xFC\x80\x80\x80\x80\x80";
884 int32_t expected[] = {0x0};
885 int32_t dst[ARRAY_SIZE(expected)];
886 memset(dst, 0xFF, sizeof(dst));
887 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
888 EXPECT(!is_valid);
889 EXPECT(memcmp(expected, dst, sizeof(expected)));
890 }
891
892 // 5.1 - Single UTF-16 surrogates
893 // UTF-8 suggests single surrogates are invalid, but both JS and
894 // Dart allow them and make use of them.
895
896 // 5.1.1 - U+D800 = ed a0 80 = "\xED\xA0\x80"
897 {
898 const char* src = "\xED\xA0\x80";
899 int32_t expected[] = {0xD800};
900 int32_t dst[ARRAY_SIZE(expected)];
901 memset(dst, 0, sizeof(dst));
902 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
903 EXPECT(is_valid);
904 EXPECT(!memcmp(expected, dst, sizeof(expected)));
905 }
906
907 // 5.1.2 - U+DB7F = ed ad bf = "\xED\xAD\xBF"
908 {
909 const char* src = "\xED\xAD\xBF";
910 int32_t expected[] = {0xDB7F};
911 int32_t dst[ARRAY_SIZE(expected)];
912 memset(dst, 0, sizeof(dst));
913 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
914 EXPECT(is_valid);
915 EXPECT(!memcmp(expected, dst, sizeof(expected)));
916 }
917
918 // 5.1.3 - U+DB80 = ed ae 80 = "\xED\xAE\x80"
919 {
920 const char* src = "\xED\xAE\x80";
921 int32_t expected[] = {0xDB80};
922 int32_t dst[ARRAY_SIZE(expected)];
923 memset(dst, 0, sizeof(dst));
924 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
925 EXPECT(is_valid);
926 EXPECT(!memcmp(expected, dst, sizeof(expected)));
927 }
928
929 // 5.1.4 - U+DBFF = ed af bf = "\xED\xAF\xBF"
930 {
931 const char* src = "\xED\xAF\xBF";
932 int32_t expected[] = {0xDBFF};
933 int32_t dst[ARRAY_SIZE(expected)];
934 memset(dst, 0, sizeof(dst));
935 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
936 EXPECT(is_valid);
937 EXPECT(!memcmp(expected, dst, sizeof(expected)));
938 }
939
940 // 5.1.5 - U+DC00 = ed b0 80 = "\xED\xB0\x80"
941 {
942 const char* src = "\xED\xB0\x80";
943 int32_t expected[] = {0xDC00};
944 int32_t dst[ARRAY_SIZE(expected)];
945 memset(dst, 0, sizeof(dst));
946 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
947 EXPECT(is_valid);
948 EXPECT(!memcmp(expected, dst, sizeof(expected)));
949 }
950
951 // 5.1.6 - U+DF80 = ed be 80 = "\xED\xBE\x80"
952 {
953 const char* src = "\xED\xBE\x80";
954 int32_t expected[] = {0xDF80};
955 int32_t dst[ARRAY_SIZE(expected)];
956 memset(dst, 0, sizeof(dst));
957 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
958 EXPECT(is_valid);
959 EXPECT(!memcmp(expected, dst, sizeof(expected)));
960 }
961
962 // 5.1.7 - U+DFFF = ed bf bf = "\xED\xBF\xBF"
963 {
964 const char* src = "\xED\xBF\xBF";
965 int32_t expected[] = {0xDFFF};
966 int32_t dst[ARRAY_SIZE(expected)];
967 memset(dst, 0, sizeof(dst));
968 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
969 EXPECT(is_valid);
970 EXPECT(!memcmp(expected, dst, sizeof(expected)));
971 }
972
973 // 5.2 Paired UTF-16 surrogates
974 // Also not a valid string, but accepted in Dart, even if it doesn't make
975 // sense. e.g.
976 // var s = new String.fromCharCodes([0xd800, 0xDC00]);
977 // print(s.runes); // (65536) (0x10000)
978 // print(s.codeUnits); // [55296, 56320]
979
980 // 5.2.1 - U+D800 U+DC00 = ed a0 80 ed b0 80 = "\xED\xA0\x80\xED\xB0\x80"
981 {
982 const char* src = "\xED\xA0\x80\xED\xB0\x80";
983 int32_t expected[] = {0xD800, 0xDC00};
984 int32_t dst[ARRAY_SIZE(expected)];
985 memset(dst, 0, sizeof(dst));
986 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
987 EXPECT(is_valid);
988 EXPECT(!memcmp(expected, dst, sizeof(expected)));
989 }
990
991 // 5.2.2 - U+D800 U+DFFF = ed a0 80 ed bf bf = "\xED\xA0\x80\xED\xBF\xBF"
992 {
993 const char* src = "\xED\xA0\x80\xED\xBF\xBF";
994 int32_t expected[] = {0xD800, 0xDFFF};
995 int32_t dst[ARRAY_SIZE(expected)];
996 memset(dst, 0, sizeof(dst));
997 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
998 EXPECT(is_valid);
999 EXPECT(!memcmp(expected, dst, sizeof(expected)));
1000 }
1001
1002 // 5.2.3 - U+DB7F U+DC00 = ed a0 80 ed bf bf = "\xED\xAD\xBF\xED\xB0\x80"
1003 {
1004 const char* src = "\xED\xAD\xBF\xED\xB0\x80";
1005 int32_t expected[] = {0xDB7F, 0xDC00};
1006 int32_t dst[ARRAY_SIZE(expected)];
1007 memset(dst, 0, sizeof(dst));
1008 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
1009 EXPECT(is_valid);
1010 EXPECT(!memcmp(expected, dst, sizeof(expected)));
1011 }
1012
1013 // 5.2.4 - U+DB7F U+DFFF = ed ad bf ed bf bf = "\xED\xAD\xBF\xED\xBF\xBF"
1014 {
1015 const char* src = "\xED\xAD\xBF\xED\xBF\xBF";
1016 int32_t expected[] = {0xDB7F, 0xDFFF};
1017 int32_t dst[ARRAY_SIZE(expected)];
1018 memset(dst, 0, sizeof(dst));
1019 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
1020 EXPECT(is_valid);
1021 EXPECT(!memcmp(expected, dst, sizeof(expected)));
1022 }
1023
1024 // 5.2.5 - U+DB80 U+DC00 = ed ae 80 ed b0 80 = "\xED\xAE\x80\xED\xB0\x80"
1025 {
1026 const char* src = "\xED\xAE\x80\xED\xB0\x80";
1027 int32_t expected[] = {0xDB80, 0xDC00};
1028 int32_t dst[ARRAY_SIZE(expected)];
1029 memset(dst, 0, sizeof(dst));
1030 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
1031 EXPECT(is_valid);
1032 EXPECT(!memcmp(expected, dst, sizeof(expected)));
1033 }
1034
1035 // 5.2.6 - U+DB80 U+DFFF = ed ae 80 ed bf bf = "\xED\xAE\x80\xED\xBF\xBF"
1036 {
1037 const char* src = "\xED\xAE\x80\xED\xBF\xBF";
1038 int32_t expected[] = {0xDB80, 0xDFFF};
1039 int32_t dst[ARRAY_SIZE(expected)];
1040 memset(dst, 0, sizeof(dst));
1041 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
1042 EXPECT(is_valid);
1043 EXPECT(!memcmp(expected, dst, sizeof(expected)));
1044 }
1045
1046 // 5.2.7 - U+DBFF U+DC00 = ed af bf ed b0 80 = "\xED\xAF\xBF\xED\xB0\x80"
1047 {
1048 const char* src = "\xED\xAF\xBF\xED\xB0\x80";
1049 int32_t expected[] = {0xDBFF, 0xDC00};
1050 int32_t dst[ARRAY_SIZE(expected)];
1051 memset(dst, 0, sizeof(dst));
1052 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
1053 EXPECT(is_valid);
1054 EXPECT(!memcmp(expected, dst, sizeof(expected)));
1055 }
1056
1057 // 5.2.8 - U+DBFF U+DFFF = ed af bf ed bf bf = "\xED\xAF\xBF\xED\xBF\xBF"
1058 {
1059 const char* src = "\xED\xAF\xBF\xED\xBF\xBF";
1060 int32_t expected[] = {0xDBFF, 0xDFFF};
1061 int32_t dst[ARRAY_SIZE(expected)];
1062 memset(dst, 0, sizeof(dst));
1063 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
1064 EXPECT(is_valid);
1065 EXPECT(!memcmp(expected, dst, sizeof(expected)));
1066 }
1067
1068 // 5.3 - Other illegal code positions
1069
1070 // 5.3.1 - U+FFFE = ef bf be = "\xEF\xBF\xBE"
1071 {
1072 const char* src = "\xEF\xBF\xBE";
1073 int32_t expected[] = {0xFFFE};
1074 int32_t dst[ARRAY_SIZE(expected)];
1075 memset(dst, 0, sizeof(dst));
1076 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
1077 EXPECT(is_valid);
1078 EXPECT(!memcmp(expected, dst, sizeof(expected)));
1079 }
1080
1081 // 5.3.2 - U+FFFF = ef bf bf = "\xEF\xBF\xBF"
1082 {
1083 const char* src = "\xEF\xBF\xBF";
1084 int32_t expected[] = {0xFFFF};
1085 int32_t dst[ARRAY_SIZE(expected)];
1086 memset(dst, 0, sizeof(dst));
1087 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
1088 EXPECT(is_valid);
1089 EXPECT(!memcmp(expected, dst, sizeof(expected)));
1090 }
1091}
1092
1093} // namespace dart
1094