1/* Copyright (c) 2010, Oracle and/or its affiliates
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License as published by
5 the Free Software Foundation; version 2 of the License.
6
7 This program is distributed in the hope that it will be useful,
8 but WITHOUT ANY WARRANTY; without even the implied warranty of
9 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 GNU General Public License for more details.
11
12 You should have received a copy of the GNU General Public License
13 along with this program; if not, write to the Free Software
14 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
15
16#include <tap.h>
17#include <my_global.h>
18#include <my_sys.h>
19
20
21/*
22 Test that like_range() returns well-formed results.
23*/
24static int
25test_like_range_for_charset(CHARSET_INFO *cs, const char *src, size_t src_len)
26{
27 char min_str[32], max_str[32];
28 size_t min_len, max_len, min_well_formed_len, max_well_formed_len;
29 int error= 0;
30
31 cs->coll->like_range(cs, src, src_len, '\\', '_', '%',
32 sizeof(min_str), min_str, max_str, &min_len, &max_len);
33 diag("min_len=%d\tmax_len=%d\t%s", (int) min_len, (int) max_len, cs->name);
34 min_well_formed_len= my_well_formed_length(cs,
35 min_str, min_str + min_len,
36 10000, &error);
37 max_well_formed_len= my_well_formed_length(cs,
38 max_str, max_str + max_len,
39 10000, &error);
40 if (min_len != min_well_formed_len)
41 diag("Bad min_str: min_well_formed_len=%d min_str[%d]=0x%02X",
42 (int) min_well_formed_len, (int) min_well_formed_len,
43 (uchar) min_str[min_well_formed_len]);
44 if (max_len != max_well_formed_len)
45 diag("Bad max_str: max_well_formed_len=%d max_str[%d]=0x%02X",
46 (int) max_well_formed_len, (int) max_well_formed_len,
47 (uchar) max_str[max_well_formed_len]);
48 return
49 min_len == min_well_formed_len &&
50 max_len == max_well_formed_len ? 0 : 1;
51}
52
53
54static CHARSET_INFO *charset_list[]=
55{
56#ifdef HAVE_CHARSET_big5
57 &my_charset_big5_chinese_ci,
58 &my_charset_big5_bin,
59#endif
60#ifdef HAVE_CHARSET_euckr
61 &my_charset_euckr_korean_ci,
62 &my_charset_euckr_bin,
63#endif
64#ifdef HAVE_CHARSET_gb2312
65 &my_charset_gb2312_chinese_ci,
66 &my_charset_gb2312_bin,
67#endif
68#ifdef HAVE_CHARSET_gbk
69 &my_charset_gbk_chinese_ci,
70 &my_charset_gbk_bin,
71#endif
72#ifdef HAVE_CHARSET_latin1
73 &my_charset_latin1,
74 &my_charset_latin1_bin,
75#endif
76#ifdef HAVE_CHARSET_sjis
77 &my_charset_sjis_japanese_ci,
78 &my_charset_sjis_bin,
79#endif
80#ifdef HAVE_CHARSET_tis620
81 &my_charset_tis620_thai_ci,
82 &my_charset_tis620_bin,
83#endif
84#ifdef HAVE_CHARSET_ujis
85 &my_charset_ujis_japanese_ci,
86 &my_charset_ujis_bin,
87#endif
88#ifdef HAVE_CHARSET_utf8
89 &my_charset_utf8_general_ci,
90#ifdef HAVE_UCA_COLLATIONS
91 &my_charset_utf8_unicode_ci,
92#endif
93 &my_charset_utf8_bin,
94#endif
95};
96
97
98typedef struct
99{
100 const char *a;
101 size_t alen;
102 const char *b;
103 size_t blen;
104 int res;
105} STRNNCOLL_PARAM;
106
107
108#define CSTR(x) (x),(sizeof(x)-1)
109
110/*
111 Byte sequence types used in the tests:
112 8BIT - a 8 bit byte (>=00x80) which makes a single byte characters
113 MB2 - two bytes that make a valid character
114 H2 - a byte which is a valid MB2 head byte
115 T2 - a byte which is a valid MB2 tail byte
116 ILSEQ - a byte which makes an illegal sequence
117 H2+ILSEQ - a sequence that starts with a valid H2 byte,
118 but not followed by a valid T2 byte.
119
120 Charset H2 T2 8BIT
121 ------- ---------------- --------------- --------
122 big5 [A1..F9] [40..7E,A1..FE]
123 euckr [81..FE] [41..5A,61..7A,81..FE]
124 gb2312 [A1..F7] [A1..FE]
125 gbk [81..FE] [40..7E,80..FE]
126
127 cp932 [81..9F,E0..FC] [40..7E,80..FC] [A1..DF]
128 sjis [81..9F,E0..FC] [40..7E,80..FC] [A1..DF]
129
130
131 Essential byte sequences in various character sets:
132
133 Sequence big5 cp932 euckr gb2312 gbk sjis
134 -------- ---- ----- ----- ------ --- ----
135 80 ILSEQ ILSEQ ILSEQ ILSEQ ILSEQ ILSEQ
136 81 ILSEQ H2 H2 ILSEQ H2 H2
137 A1 H2 8BIT H2 H2 H2 8BIT
138 A1A1 MB2 8BIT+8BIT MB2 MB2 MB2 8BIT+8BIT
139 E0E0 MB2 MB2 MB2 MB2 MB2 MB2
140 F9FE MB2 H2+ILSEQ MB2 ILSEQ+T2 MB2 H2+ILSEQ
141*/
142
143
144/*
145 For character sets that have the following byte sequences:
146 80 - ILSEQ
147 81 - ILSEQ or H2
148 F9 - ILSEQ or H2
149 A1A1 - MB2 or 8BIT+8BIT
150 E0E0 - MB2
151*/
152static STRNNCOLL_PARAM strcoll_mb2_common[]=
153{
154 /* Compare two good sequences */
155 {CSTR(""), CSTR(""), 0},
156 {CSTR(""), CSTR(" "), 0},
157 {CSTR(""), CSTR("A"), -1},
158 {CSTR(""), CSTR("a"), -1},
159 {CSTR(""), CSTR("\xA1\xA1"), -1},
160 {CSTR(""), CSTR("\xE0\xE0"), -1},
161
162 {CSTR(" "), CSTR(""), 0},
163 {CSTR(" "), CSTR(" "), 0},
164 {CSTR(" "), CSTR("A"), -1},
165 {CSTR(" "), CSTR("a"), -1},
166 {CSTR(" "), CSTR("\xA1\xA1"), -1},
167 {CSTR(" "), CSTR("\xE0\xE0"), -1},
168
169 {CSTR("a"), CSTR(""), 1},
170 {CSTR("a"), CSTR(" "), 1},
171 {CSTR("a"), CSTR("a"), 0},
172 {CSTR("a"), CSTR("\xA1\xA1"), -1},
173 {CSTR("a"), CSTR("\xE0\xE0"), -1},
174
175 {CSTR("\xA1\xA1"), CSTR("\xA1\xA1"), 0},
176 {CSTR("\xA1\xA1"), CSTR("\xE0\xE0"), -1},
177
178 /* Compare a good character to an illegal or an incomplete sequence */
179 {CSTR(""), CSTR("\x80"), -1},
180 {CSTR(""), CSTR("\x81"), -1},
181 {CSTR(""), CSTR("\xF9"), -1},
182
183 {CSTR(" "), CSTR("\x80"), -1},
184 {CSTR(" "), CSTR("\x81"), -1},
185 {CSTR(" "), CSTR("\xF9"), -1},
186
187 {CSTR("a"), CSTR("\x80"), -1},
188 {CSTR("a"), CSTR("\x81"), -1},
189 {CSTR("a"), CSTR("\xF9"), -1},
190
191 {CSTR("\xA1\xA1"), CSTR("\x80"), -1},
192 {CSTR("\xA1\xA1"), CSTR("\x81"), -1},
193 {CSTR("\xA1\xA1"), CSTR("\xF9"), -1},
194
195 {CSTR("\xE0\xE0"), CSTR("\x80"), -1},
196 {CSTR("\xE0\xE0"), CSTR("\x81"), -1},
197 {CSTR("\xE0\xE0"), CSTR("\xF9"), -1},
198
199 /* Compare two bad/incomplete sequences */
200 {CSTR("\x80"), CSTR("\x80"), 0},
201 {CSTR("\x80"), CSTR("\x81"), -1},
202 {CSTR("\x80"), CSTR("\xF9"), -1},
203 {CSTR("\x81"), CSTR("\x81"), 0},
204 {CSTR("\x81"), CSTR("\xF9"), -1},
205
206 {NULL, 0, NULL, 0, 0}
207};
208
209
210/*
211 For character sets that have good mb2 characters A1A1 and F9FE
212*/
213static STRNNCOLL_PARAM strcoll_mb2_A1A1_mb2_F9FE[]=
214{
215 /* Compare two good characters */
216 {CSTR(""), CSTR("\xF9\xFE"), -1},
217 {CSTR(" "), CSTR("\xF9\xFE"), -1},
218 {CSTR("a") , CSTR("\xF9\xFE"), -1},
219 {CSTR("\xA1\xA1"), CSTR("\xF9\xFE"), -1},
220 {CSTR("\xF9\xFE"), CSTR("\xF9\xFE"), 0},
221
222 /* Compare a good character to an illegal or an incomplete sequence */
223 {CSTR(""), CSTR("\xA1"), -1},
224 {CSTR(""), CSTR("\xF9"), -1},
225 {CSTR("a"), CSTR("\xA1"), -1},
226 {CSTR("a"), CSTR("\xF9"), -1},
227
228 {CSTR("\xA1\xA1"), CSTR("\xA1"), -1},
229 {CSTR("\xA1\xA1"), CSTR("\xF9"), -1},
230
231 {CSTR("\xF9\xFE"), CSTR("\x80"), -1},
232 {CSTR("\xF9\xFE"), CSTR("\x81"), -1},
233 {CSTR("\xF9\xFE"), CSTR("\xA1"), -1},
234 {CSTR("\xF9\xFE"), CSTR("\xF9"), -1},
235
236 /* Compare two bad/incomplete sequences */
237 {CSTR("\x80"), CSTR("\xA1"), -1},
238 {CSTR("\x80"), CSTR("\xF9"), -1},
239
240 {NULL, 0, NULL, 0, 0}
241};
242
243
244/*
245 For character sets that have:
246 A1A1 - a good mb2 character
247 F9FE - a bad sequence
248*/
249static STRNNCOLL_PARAM strcoll_mb2_A1A1_bad_F9FE[]=
250{
251 /* Compare a good character to an illegal or an incomplete sequence */
252 {CSTR(""), CSTR("\xF9\xFE"), -1},
253 {CSTR(" "), CSTR("\xF9\xFE"), -1},
254 {CSTR("a") , CSTR("\xF9\xFE"), -1},
255 {CSTR("\xA1\xA1"), CSTR("\xF9\xFE"), -1},
256
257 {CSTR(""), CSTR("\xA1"), -1},
258 {CSTR(""), CSTR("\xF9"), -1},
259 {CSTR("a"), CSTR("\xA1"), -1},
260 {CSTR("a"), CSTR("\xF9"), -1},
261
262 {CSTR("\xA1\xA1"), CSTR("\xA1"), -1},
263 {CSTR("\xA1\xA1"), CSTR("\xF9"), -1},
264
265 /* Compare two bad/incomplete sequences */
266 {CSTR("\xF9\xFE"), CSTR("\x80"), 1},
267 {CSTR("\xF9\xFE"), CSTR("\x81"), 1},
268 {CSTR("\xF9\xFE"), CSTR("\xA1"), 1},
269 {CSTR("\xF9\xFE"), CSTR("\xF9"), 1},
270 {CSTR("\x80"), CSTR("\xA1"), -1},
271 {CSTR("\x80"), CSTR("\xF9"), -1},
272 {CSTR("\xF9\xFE"), CSTR("\xF9\xFE"), 0},
273
274 {NULL, 0, NULL, 0, 0}
275};
276
277
278/*
279 For character sets that have:
280 80 - ILSEQ or H2
281 81 - ILSEQ or H2
282 A1 - 8BIT
283 F9 - ILSEQ or H2
284 F9FE - a bad sequence (ILSEQ+XX or H2+ILSEQ)
285*/
286static STRNNCOLL_PARAM strcoll_mb1_A1_bad_F9FE[]=
287{
288 /* Compare two good characters */
289 {CSTR(""), CSTR("\xA1"), -1},
290 {CSTR("\xA1\xA1"), CSTR("\xA1"), 1},
291
292 /* Compare a good character to an illegal or an incomplete sequence */
293 {CSTR(""), CSTR("\xF9"), -1},
294 {CSTR(""), CSTR("\xF9\xFE"), -1},
295 {CSTR(" "), CSTR("\xF9\xFE"), -1},
296 {CSTR("a"), CSTR("\xF9\xFE"), -1},
297 {CSTR("a"), CSTR("\xA1"), -1},
298 {CSTR("a"), CSTR("\xF9"), -1},
299
300 {CSTR("\xA1\xA1"), CSTR("\xF9"), -1},
301 {CSTR("\xA1\xA1"), CSTR("\xF9\xFE"), -1},
302
303 {CSTR("\xF9\xFE"), CSTR("\x80"), 1},
304 {CSTR("\xF9\xFE"), CSTR("\x81"), 1},
305 {CSTR("\xF9\xFE"), CSTR("\xA1"), 1},
306 {CSTR("\xF9\xFE"), CSTR("\xF9"), 1},
307
308 {CSTR("\x80"), CSTR("\xA1"), 1},
309
310 /* Compare two bad/incomplete sequences */
311 {CSTR("\x80"), CSTR("\xF9"), -1},
312 {CSTR("\xF9\xFE"), CSTR("\xF9\xFE"), 0},
313
314 {NULL, 0, NULL, 0, 0}
315};
316
317
318/*
319 For character sets (e.g. cp932 and sjis) that have:
320 8181 - a valid MB2 character
321 A1 - a valid 8BIT character
322 E0E0 - a valid MB2 character
323 and sort in this order:
324 8181 < A1 < E0E0
325*/
326static STRNNCOLL_PARAM strcoll_8181_A1_E0E0[]=
327{
328 {CSTR("\x81\x81"), CSTR("\xA1"), -1},
329 {CSTR("\x81\x81"), CSTR("\xE0\xE0"), -1},
330 {CSTR("\xA1"), CSTR("\xE0\xE0"), -1},
331
332 {NULL, 0, NULL, 0, 0}
333};
334
335
336/*
337 A shared test for eucjpms and ujis.
338*/
339static STRNNCOLL_PARAM strcoll_ujis[]=
340{
341 {CSTR("\x8E\xA1"), CSTR("\x8E"), -1}, /* Good MB2 vs incomplete MB2 */
342 {CSTR("\x8E\xA1"), CSTR("\x8F\xA1"), -1}, /* Good MB2 vs incomplete MB3 */
343 {CSTR("\x8E\xA1"), CSTR("\x8F\xA1\xA1"), -1}, /* Good MB2 vs good MB3 */
344 {CSTR("\xA1\xA1"), CSTR("\x8F\xA1\xA1"), 1}, /* Good MB2 vs good MB3 */
345 {CSTR("\x8E"), CSTR("\x8F\xA1"), -1}, /* Incomplete MB2 vs incomplete MB3 */
346 {NULL, 0, NULL, 0, 0}
347};
348
349
350static STRNNCOLL_PARAM strcoll_utf8mb3_common[]=
351{
352 {CSTR("\xC0"), CSTR("\xC1"), -1}, /* Unused byte vs unused byte */
353 {CSTR("\xC0"), CSTR("\xFF"), -1}, /* Unused byte vs unused byte */
354 {CSTR("\xC2\xA1"), CSTR("\xC0"), -1}, /* MB2 vs unused byte */
355 {CSTR("\xC2\xA1"), CSTR("\xC2"), -1}, /* MB2 vs incomplete MB2 */
356 {CSTR("\xC2\xA1"), CSTR("\xC2\xA2"), -1}, /* MB2 vs MB2 */
357 {CSTR("\xC2\xA1"), CSTR("\xE0\xA0\x7F"),-1}, /* MB2 vs broken MB3 */
358 {CSTR("\xC2\xA1"), CSTR("\xE0\xA0\x80"),-1}, /* MB2 vs MB3 */
359 {CSTR("\xC2\xA1"), CSTR("\xE0\xA0\xBF"),-1}, /* MB2 vs MB3 */
360 {CSTR("\xC2\xA1"), CSTR("\xE0\xA0\xC0"),-1}, /* MB2 vs broken MB3 */
361 {CSTR("\xC2\xA1"), CSTR("\xE0\xA0"), -1}, /* MB2 vs incomplete MB3 */
362 {CSTR("\xE0\xA0\x7E"), CSTR("\xE0\xA0\x7F"),-1},/* Broken MB3 vs broken MB3 */
363 {CSTR("\xE0\xA0\x80"), CSTR("\xE0\xA0"), -1},/* MB3 vs incomplete MB3 */
364 {CSTR("\xE0\xA0\x80"), CSTR("\xE0\xA0\x7F"),-1},/* MB3 vs broken MB3 */
365 {CSTR("\xE0\xA0\x80"), CSTR("\xE0\xA0\xBF"),-1},/* MB3 vs MB3 */
366 {CSTR("\xE0\xA0\x80"), CSTR("\xE0\xA0\xC0"),-1},/* MB3 vs broken MB3 */
367 {CSTR("\xE0\xA0\xC0"), CSTR("\xE0\xA0\xC1"),-1},/* Broken MB3 vs broken MB3 */
368 {NULL, 0, NULL, 0, 0}
369};
370
371
372static STRNNCOLL_PARAM strcoll_utf8mb4_common[]=
373{
374 /* Minimum four-byte character: U+10000 == _utf8 0xF0908080 */
375 {CSTR("\xF0\x90\x80\x80"), CSTR("\xC0"), -1}, /* MB4 vs unused byte */
376 {CSTR("\xF0\x90\x80\x80"), CSTR("\xC2"), -1}, /* MB4 vs incomplete MB2 */
377 {CSTR("\xF0\x90\x80\x80"), CSTR("\xE0\xA0\x7F"),-1}, /* MB4 vs broken MB3 */
378 {CSTR("\xF0\x90\x80\x80"), CSTR("\xE0\xA0\xC0"),-1}, /* MB4 vs broken MB3 */
379 {CSTR("\xF0\x90\x80\x80"), CSTR("\xE0\xA0"), -1}, /* MB4 vs incomplete MB3 */
380 {CSTR("\xF0\x90\x80\x80"), CSTR("\xF0\x90\x80"),-1}, /* MB4 vs incomplete MB4 */
381 {CSTR("\xF0\x90\x80\x80"), CSTR("\xF0\x90\x80\x7F"),-1},/* MB4 vs broken MB4 */
382 {CSTR("\xF0\x90\x80\x80"), CSTR("\xF0\x90\x80\xC0"),-1},/* MB4 vs broken MB4 */
383
384 /* Maximum four-byte character: U+10FFFF == _utf8 0xF48FBFBF */
385 {CSTR("\xF4\x8F\xBF\xBF"), CSTR("\xC0"), -1}, /* MB4 vs unused byte */
386 {CSTR("\xF4\x8F\xBF\xBF"), CSTR("\xC2"), -1}, /* MB4 vs incomplete MB2 */
387 {CSTR("\xF4\x8F\xBF\xBF"), CSTR("\xE0\xA0\x7F"),-1}, /* MB4 vs broken MB3 */
388 {CSTR("\xF4\x8F\xBF\xBF"), CSTR("\xE0\xA0\xC0"),-1}, /* MB4 vs broken MB3 */
389 {CSTR("\xF4\x8F\xBF\xBF"), CSTR("\xE0\xA0"), -1}, /* MB4 vs incomplete MB3 */
390 {CSTR("\xF4\x8F\xBF\xBF"), CSTR("\xF0\x90\x80"),-1}, /* MB4 vs incomplete MB4 */
391 {CSTR("\xF4\x8F\xBF\xBF"), CSTR("\xF0\x90\x80\x7F"),-1},/* MB4 vs broken MB4 */
392 {CSTR("\xF4\x8F\xBF\xBF"), CSTR("\xF0\x90\x80\xC0"),-1},/* MB4 vs broken MB4 */
393
394 /* Broken MB4 vs incomplete/broken MB3 */
395 {CSTR("\xF0\x90\x80\x7F"), CSTR("\xE0\xA0"), 1}, /* Broken MB4 vs incomplete MB3 */
396 {CSTR("\xF0\x90\x80\x7F"), CSTR("\xE0\xA0\x7F"),1}, /* Broken MB4 vs broken MB3 */
397 {CSTR("\xF0\x90\x80\x7F"), CSTR("\xE0\xA0\xC0"),1}, /* Broken MB4 vs broken MB3 */
398
399 /*
400 Broken MB4 vs incomplete MB4:
401 The three leftmost bytes are compared binary, the fourth byte is compared
402 to auto-padded space.
403 */
404 {CSTR("\xF0\x90\x80\x1F"), CSTR("\xF0\x90\x80"),-1}, /* Broken MB4 vs incomplete MB4 */
405 {CSTR("\xF0\x90\x80\x7E"), CSTR("\xF0\x90\x80"),1}, /* Broken MB4 vs incomplete MB4 */
406
407 /* Broken MB4 vs broken MB4 */
408 {CSTR("\xF0\x90\x80\x7E"), CSTR("\xF0\x90\x80\x7F"),-1},/* Broken MB4 vs broken MB4 */
409 {CSTR("\xF0\x90\x80\x7E"), CSTR("\xF0\x90\x80\xC0"),-1},/* Broken MB4 vs broken MB4 */
410
411 {NULL, 0, NULL, 0, 0}
412};
413
414
415static STRNNCOLL_PARAM strcoll_utf8mb4_general_ci[]=
416{
417 /* All non-BMP characters are equal in utf8mb4_general_ci */
418 {CSTR("\xF0\x90\x80\x80"), CSTR("\xF0\x90\x80\x81"),0},/* Non-BMB MB4 vs non-BMP MB4 */
419 {CSTR("\xF0\x90\x80\x80"), CSTR("\xF4\x8F\xBF\xBF"),0},/* Non-BMB MB4 vs non-BMP MB4 */
420 {CSTR("\x00"), CSTR("\xF0\x90\x80\x80"),-1},/* U+0000 vs non-BMP MB4 */
421 {CSTR("\x00"), CSTR("\xF0\x90\x80\x81"),-1},/* U+0000 vs non-BMP MB4 */
422 {CSTR("\x00"), CSTR("\xF4\x8F\xBF\xBF"),-1},/* U+0000 vs non-BMP MB4 */
423 {NULL, 0, NULL, 0, 0}
424};
425
426
427static STRNNCOLL_PARAM strcoll_ucs2_common[]=
428{
429 {CSTR("\xC0"), CSTR("\xC1"), -1}, /* Incomlete MB2 vs incomplete MB2 */
430 {CSTR("\xC0"), CSTR("\xFF"), -1}, /* Incomlete MB2 vs incomplete MB2 */
431 {CSTR("\xC2\xA1"), CSTR("\xC0"), -1}, /* MB2 vs incomplete MB2 */
432 {CSTR("\xC2\xA1"), CSTR("\xC2"), -1}, /* MB2 vs incomplete MB2 */
433 {CSTR("\xC2\xA0"), CSTR("\xC2\xA1"), -1}, /* MB2 vs MB2 */
434 {CSTR("\xC2\xA1"), CSTR("\xC2\xA2"), -1}, /* MB2 vs MB2 */
435
436 {CSTR("\xFF\xFF"), CSTR("\x00"),-1}, /* MB2 vs incomplete */
437 {CSTR("\xFF\xFF\xFF\xFF"), CSTR("\x00"),-1}, /* MB2+MB2 vs incomplete */
438 {CSTR("\xFF\xFF\xFF\xFF"), CSTR("\x00\x00\x00"), 1},/* MB2+MB2 vs MB2+incomplete */
439
440 {NULL, 0, NULL, 0, 0}
441};
442
443
444/* Tests that involve comparison to SPACE (explicit, or padded) */
445static STRNNCOLL_PARAM strcoll_ucs2_space[]=
446{
447 {CSTR("\x00\x1F"), CSTR("\x00\x20"), -1}, /* MB2 vs MB2 */
448 {CSTR("\x00\x20"), CSTR("\x00\x21"), -1}, /* MB2 vs MB2 */
449 {CSTR("\x00\x1F"), CSTR(""), -1}, /* MB2 vs empty */
450 {CSTR("\x00\x20"), CSTR(""), 0}, /* MB2 vs empty */
451 {CSTR("\x00\x21"), CSTR(""), 1}, /* MB2 vs empty */
452
453 {NULL, 0, NULL, 0, 0}
454};
455
456
457/* Tests that involve comparison to SPACE (explicit, or padded) */
458static STRNNCOLL_PARAM strcoll_utf16le_space[]=
459{
460 {CSTR("\x1F\x00"), CSTR("\x20\x00"), -1}, /* MB2 vs MB2 */
461 {CSTR("\x20\x00"), CSTR("\x21\x00"), -1}, /* MB2 vs MB2 */
462 {CSTR("\x1F\x00"), CSTR(""), -1}, /* MB2 vs empty */
463 {CSTR("\x20\x00"), CSTR(""), 0}, /* MB2 vs empty */
464 {CSTR("\x21\x00"), CSTR(""), 1}, /* MB2 vs empty */
465
466 {NULL, 0, NULL, 0, 0}
467};
468
469
470static STRNNCOLL_PARAM strcoll_utf16_common[]=
471{
472 /* Minimum four-byte character: U+10000 == _utf16 0xD800DC00 */
473 {CSTR("\xD8\x00\xDC\x00"), CSTR("\xC0"), -1},/* MB4 vs incomplete MB2 */
474 {CSTR("\xD8\x00\xDC\x00"), CSTR("\xC2"), -1},/* MB4 vs incomplete MB2 */
475 {CSTR("\xD8\x00\xDC\x00"), CSTR("\xD8\x00\xDB\x00"),-1},/* MB4 vs broken MB4 */
476 {CSTR("\xD8\x00\xDC\x00"), CSTR("\xD8\x00\xE0\x00"),-1},/* MB4 vs broken MB4 */
477 {CSTR("\xD8\x00\xDC\x00"), CSTR("\xDC\x00"), -1},/* MB4 vs broken MB2 */
478 {CSTR("\xD8\x00\xDC\x00"), CSTR("\xD8\x00\xDC"), -1},/* MB4 vs incomplete MB4 */
479
480 /* Maximum four-byte character: U+10FFFF == _utf8 0xF48FBFBF */
481 {CSTR("\xDB\xFF\xDF\xFF"), CSTR("\xC0"), -1},/* MB4 vs incomplete MB2 */
482 {CSTR("\xDB\xFF\xDF\xFF"), CSTR("\xC2"), -1},/* MB4 vs incomplete MB2 */
483 {CSTR("\xDB\xFF\xDF\xFF"), CSTR("\xD8\x00\xDB\x00"),-1},/* MB4 vs broken MB4 */
484 {CSTR("\xDB\xFF\xDF\xFF"), CSTR("\xD8\x00\xE0\x00"),-1},/* MB4 vs broken MB4 */
485 {CSTR("\xDB\xFF\xDF\xFF"), CSTR("\xDC\x00"), -1},/* MB4 vs broken MB2 */
486 {CSTR("\xDB\xFF\xDF\xFF"), CSTR("\xDC\xFF\xDF"), -1},/* MB4 vs incomplete MB4 */
487
488 /* Broken MB4 vs broken MB4 */
489 {CSTR("\xD8\x00\xDC\x00"), CSTR("\xD8\x00\xDB\x01"),-1},/* Broken MB4 vs broken MB4 */
490 {CSTR("\xDB\xFF\xE0\xFE"), CSTR("\xDB\xFF\xE0\xFF"),-1},/* Broken MB4 vs broken MB4 */
491
492 {NULL, 0, NULL, 0, 0}
493};
494
495
496static STRNNCOLL_PARAM strcoll_utf16_general_ci[]=
497{
498 /* All non-BMP characters are compared as equal */
499 {CSTR("\xD8\x00\xDC\x00"), CSTR("\xD8\x00\xDC\x01"), 0},/* Non-BMP MB4 vs non-BMP MB4 */
500 {CSTR("\xD8\x00\xDC\x00"), CSTR("\xDB\xFF\xDF\xFF"), 0},/* Non-BMP MB4 vs non-BMP MB4 */
501 {CSTR("\x00\x00"), CSTR("\xD8\x00\xDC\x01"),-1},/* U+0000 vs non-BMP MB4 */
502 {CSTR("\x00\x00"), CSTR("\xDB\xFF\xDF\xFF"),-1},/* U+0000 vs non-BMP MB4 */
503 {NULL, 0, NULL, 0, 0}
504};
505
506
507static STRNNCOLL_PARAM strcoll_utf16le_common[]=
508{
509 /* Minimum four-byte character: U+10000 == _utf16 0xD800DC00 */
510 {CSTR("\x00\xD8\x00\xDC"), CSTR("\xC0"), -1},/* MB4 vs incomplete MB2 */
511 {CSTR("\x00\xD8\x00\xDC"), CSTR("\xC2"), -1},/* MB4 vs incomplete MB2 */
512 {CSTR("\x00\xD8\x00\xDC"), CSTR("\x00\xD8\x00\xDB"),-1},/* MB4 vs broken MB4 */
513 {CSTR("\x00\xD8\x00\xDC"), CSTR("\x00\xD8\x00\xD0"),-1},/* MB4 vs broken MB4 */
514 {CSTR("\x00\xD8\x00\xDC"), CSTR("\x00\xDC"), -1},/* MB4 vs broken MB2 */
515 {CSTR("\x00\xD8\x00\xDC"), CSTR("\x00\xD8\x00"), -1},/* MB4 vs incomplete MB4 */
516
517 /* Maximum four-byte character: U+10FFFF == _utf8 0xF48FBFBF */
518 {CSTR("\xFF\xDB\xFF\xDF"), CSTR("\xC0"), -1},/* MB4 vs incomplete MB2 */
519 {CSTR("\xFF\xDB\xFF\xDF"), CSTR("\xC2"), -1},/* MB4 vs incomplete MB2 */
520 {CSTR("\xFF\xDB\xFF\xDF"), CSTR("\x00\xD8\x00\xDB"),-1},/* MB4 vs broken MB4 */
521 {CSTR("\xFF\xDB\xFF\xDF"), CSTR("\x00\xD8\x00\xE0"),-1},/* MB4 vs broken MB4 */
522 {CSTR("\xFF\xDB\xFF\xDF"), CSTR("\x00\xDC"), -1},/* MB4 vs broken MB2 */
523 {CSTR("\xFF\xDB\xFF\xDF"), CSTR("\xFF\xDC\x00"), -1},/* MB4 vs incomplete MB4 */
524
525 /* Broken MB4 vs broken MB4 */
526 {CSTR("\x00\xD8\x00\xDC"), CSTR("\x00\xD8\x01\xDB"),-1},/* Broken MB4 vs broken MB4 */
527 {CSTR("\xFF\xDB\xFE\xE0"), CSTR("\xFF\xDB\xFF\xE0"),-1},/* Broken MB4 vs broken MB4 */
528
529 {NULL, 0, NULL, 0, 0}
530};
531
532
533static STRNNCOLL_PARAM strcoll_utf16le_general_ci[]=
534{
535 /* All non-BMP characters are compared as equal */
536 {CSTR("\x00\xD8\x00\xDC"), CSTR("\x00\xD8\x01\xDC"), 0},/* Non-BMP MB4 vs non-BMP MB4 */
537 {CSTR("\x00\xD8\x00\xDC"), CSTR("\xFF\xDB\xFF\xDF"), 0},/* Non-BMP MB4 vs non-BMP MB4 */
538 {CSTR("\x00\x00"), CSTR("\x00\xD8\x01\xDC"), -1},/* U+0000 vs non-BMP MB4 */
539 {CSTR("\x00\x00"), CSTR("\xFF\xDB\xFF\xDF"), -1},/* U+0000 vs non-BMP MB4 */
540
541 {NULL, 0, NULL, 0, 0}
542};
543
544
545static STRNNCOLL_PARAM strcoll_utf32_common[]=
546{
547 /* Minimum character: U+0000 == _utf32 0x00000000 */
548 {CSTR("\x00\x00\x00\x00"), CSTR("\x00"), -1}, /* MB4 vs incomplete MB4 */
549 {CSTR("\x00\x00\x00\x00"), CSTR("\xFF"), -1}, /* MB4 vs incomplete MB4 */
550 {CSTR("\x00\x00\x00\x00"), CSTR("\x00\x00"), -1}, /* MB4 vs incomplete MB4 */
551 {CSTR("\x00\x00\x00\x00"), CSTR("\x00\x00\x00"),-1}, /* MB4 vs incomplete MB4 */
552 {CSTR("\x00\x00\x00\x00"), CSTR("\x00\x20\x00\x00"),-1},/* MB4 vs broken MB4 */
553 {CSTR("\x00\x00\x00\x00"), CSTR("\xFF\xFF\xFF\xFF"),-1},/* MB4 vs broken MB4 */
554
555 /* Minimum non-BMP character: U+10000 == _utf32 0x00010000 */
556 {CSTR("\x00\x01\x00\x00"), CSTR("\x00"), -1}, /* MB4 vs incomplete MB4 */
557 {CSTR("\x00\x01\x00\x00"), CSTR("\xFF"), -1}, /* MB4 vs incomplete MB4 */
558 {CSTR("\x00\x01\x00\x00"), CSTR("\x00\x00"), -1}, /* MB4 vs incomplete MB4 */
559 {CSTR("\x00\x01\x00\x00"), CSTR("\x00\x00\x00"),-1}, /* MB4 vs incomplete MB4 */
560 {CSTR("\x00\x01\x00\x00"), CSTR("\x00\x20\x00\x00"),-1},/* MB4 vs broken MB4 */
561 {CSTR("\x00\x01\x00\x00"), CSTR("\xFF\xFF\xFF\xFF"),-1},/* MB4 vs broken MB4 */
562
563 /* Maximum character: U+10FFFF == _utf32 0x0010FFFF */
564 {CSTR("\x00\x10\xFF\xFF"), CSTR("\x00"), -1}, /* MB4 vs incomplete MB4 */
565 {CSTR("\x00\x10\xFF\xFF"), CSTR("\xFF"), -1}, /* MB4 vs incomplete MB4 */
566 {CSTR("\x00\x10\xFF\xFF"), CSTR("\x00\x00"), -1}, /* MB4 vs incomplete MB4 */
567 {CSTR("\x00\x10\xFF\xFF"), CSTR("\x00\x00\x00"), -1}, /* MB4 vs incomplete MB4 */
568 {CSTR("\x00\x10\xFF\xFF"), CSTR("\x20\x00\x00\x00"),-1},/* MB4 vs broken MB3 */
569 {CSTR("\x00\x10\xFF\xFF"), CSTR("\xFF\xFF\xFF\xFF"),-1},/* MB4 vs broken MB4 */
570
571
572 /* Broken MB4 vs incomplete/broken MB3 */
573 {CSTR("\x00\x20\x00\x00"), CSTR("\x00"), 1}, /* Broken MB4 vs incomplete MB4 */
574 {CSTR("\x00\x20\x00\x00"), CSTR("\x00\x00"), 1}, /* Broken MB4 vs incomplete MB4 */
575 {CSTR("\x00\x20\x00\x00"), CSTR("\x00\x00\x00"), 1}, /* Broken MB4 vs incomplete MB4 */
576 {CSTR("\x00\x20\x00\x00"), CSTR("\x00\x20\x00\x01"),-1},/* Broken MB4 vs broken MB4 */
577
578 {NULL, 0, NULL, 0, 0}
579};
580
581
582static STRNNCOLL_PARAM strcoll_utf32_general_ci[]=
583{
584 /* Two non-BMP characters are compared as equal */
585 {CSTR("\x00\x01\x00\x00"), CSTR("\x00\x01\x00\x01"), 0}, /* non-BMP MB4 vs non-BMP MB4 */
586 {CSTR("\x00\x00\x00\x00"), CSTR("\x00\x01\x00\x00"), -1}, /* U+0000 vs non-BMP MB4 */
587 {CSTR("\x00\x00\x00\x00"), CSTR("\x00\x01\x00\x01"), -1}, /* U+0000 vs non-BMP MB4 */
588
589 {NULL, 0, NULL, 0, 0}
590};
591
592
593static void
594str2hex(char *dst, size_t dstlen, const char *src, size_t srclen)
595{
596 char *dstend= dst + dstlen;
597 const char *srcend= src + srclen;
598 for (*dst= '\0' ; dst + 3 < dstend && src < srcend; )
599 {
600 sprintf(dst, "%02X", (unsigned char) src[0]);
601 dst+=2;
602 src++;
603 }
604}
605
606
607/*
608 Check if the two comparison result are semantically equal:
609 both are negative, both are positive, or both are zero.
610*/
611static int
612eqres(int ares, int bres)
613{
614 return (ares < 0 && bres < 0) ||
615 (ares > 0 && bres > 0) ||
616 (ares == 0 && bres == 0);
617}
618
619
620static int
621strcollsp(CHARSET_INFO *cs, const STRNNCOLL_PARAM *param)
622{
623 int failed= 0;
624 const STRNNCOLL_PARAM *p;
625 diag("%-20s %-10s %-10s %10s %10s", "Collation", "a", "b", "ExpectSign", "Actual");
626 for (p= param; p->a; p++)
627 {
628 char ahex[64], bhex[64];
629 int res= cs->coll->strnncollsp(cs, (uchar *) p->a, p->alen,
630 (uchar *) p->b, p->blen);
631 str2hex(ahex, sizeof(ahex), p->a, p->alen);
632 str2hex(bhex, sizeof(bhex), p->b, p->blen);
633 diag("%-20s %-10s %-10s %10d %10d%s",
634 cs->name, ahex, bhex, p->res, res,
635 eqres(res, p->res) ? "" : " FAILED");
636 if (!eqres(res, p->res))
637 {
638 failed++;
639 }
640 else
641 {
642 /* Test in reverse order */
643 res= cs->coll->strnncollsp(cs, (uchar *) p->b, p->blen,
644 (uchar *) p->a, p->alen);
645 if (!eqres(res, -p->res))
646 {
647 diag("Comparison in reverse order failed. Expected %d, got %d",
648 -p->res, res);
649 failed++;
650 }
651 }
652 }
653 return failed;
654}
655
656
657static int
658test_strcollsp()
659{
660 int failed= 0;
661#ifdef HAVE_CHARSET_big5
662 failed+= strcollsp(&my_charset_big5_chinese_ci, strcoll_mb2_common);
663 failed+= strcollsp(&my_charset_big5_chinese_ci, strcoll_mb2_A1A1_mb2_F9FE);
664 failed+= strcollsp(&my_charset_big5_bin, strcoll_mb2_common);
665 failed+= strcollsp(&my_charset_big5_bin, strcoll_mb2_A1A1_mb2_F9FE);
666#endif
667#ifdef HAVE_CHARSET_cp932
668 failed+= strcollsp(&my_charset_cp932_japanese_ci, strcoll_mb2_common);
669 failed+= strcollsp(&my_charset_cp932_japanese_ci, strcoll_mb1_A1_bad_F9FE);
670 failed+= strcollsp(&my_charset_cp932_bin, strcoll_mb2_common);
671 failed+= strcollsp(&my_charset_cp932_bin, strcoll_mb1_A1_bad_F9FE);
672 failed+= strcollsp(&my_charset_cp932_japanese_ci, strcoll_8181_A1_E0E0);
673 failed+= strcollsp(&my_charset_cp932_bin, strcoll_8181_A1_E0E0);
674#endif
675#ifdef HAVE_CHARSET_eucjpms
676 failed+= strcollsp(&my_charset_eucjpms_japanese_ci, strcoll_mb2_common);
677 failed+= strcollsp(&my_charset_eucjpms_bin, strcoll_mb2_common);
678 failed+= strcollsp(&my_charset_eucjpms_japanese_ci, strcoll_mb2_A1A1_mb2_F9FE);
679 failed+= strcollsp(&my_charset_eucjpms_bin, strcoll_mb2_A1A1_mb2_F9FE);
680 failed+= strcollsp(&my_charset_eucjpms_japanese_ci, strcoll_ujis);
681 failed+= strcollsp(&my_charset_eucjpms_bin, strcoll_ujis);
682#endif
683#ifdef HAVE_CHARSET_euckr
684 failed+= strcollsp(&my_charset_euckr_korean_ci, strcoll_mb2_common);
685 failed+= strcollsp(&my_charset_euckr_korean_ci, strcoll_mb2_A1A1_mb2_F9FE);
686 failed+= strcollsp(&my_charset_euckr_bin, strcoll_mb2_common);
687 failed+= strcollsp(&my_charset_euckr_bin, strcoll_mb2_A1A1_mb2_F9FE);
688#endif
689#ifdef HAVE_CHARSET_gb2312
690 failed+= strcollsp(&my_charset_gb2312_chinese_ci, strcoll_mb2_common);
691 failed+= strcollsp(&my_charset_gb2312_chinese_ci, strcoll_mb2_A1A1_bad_F9FE);
692 failed+= strcollsp(&my_charset_gb2312_bin, strcoll_mb2_common);
693 failed+= strcollsp(&my_charset_gb2312_bin, strcoll_mb2_A1A1_bad_F9FE);
694#endif
695#ifdef HAVE_CHARSET_gbk
696 failed+= strcollsp(&my_charset_gbk_chinese_ci, strcoll_mb2_common);
697 failed+= strcollsp(&my_charset_gbk_chinese_ci, strcoll_mb2_A1A1_mb2_F9FE);
698 failed+= strcollsp(&my_charset_gbk_bin, strcoll_mb2_common);
699 failed+= strcollsp(&my_charset_gbk_bin, strcoll_mb2_A1A1_mb2_F9FE);
700#endif
701#ifdef HAVE_CHARSET_sjis
702 failed+= strcollsp(&my_charset_sjis_japanese_ci, strcoll_mb2_common);
703 failed+= strcollsp(&my_charset_sjis_bin, strcoll_mb2_common);
704 failed+= strcollsp(&my_charset_sjis_japanese_ci, strcoll_mb1_A1_bad_F9FE);
705 failed+= strcollsp(&my_charset_sjis_bin, strcoll_mb1_A1_bad_F9FE);
706 failed+= strcollsp(&my_charset_sjis_japanese_ci, strcoll_8181_A1_E0E0);
707 failed+= strcollsp(&my_charset_sjis_bin, strcoll_8181_A1_E0E0);
708#endif
709#ifdef HAVE_CHARSET_ucs2
710 failed+= strcollsp(&my_charset_ucs2_general_ci, strcoll_ucs2_common);
711 failed+= strcollsp(&my_charset_ucs2_general_ci, strcoll_ucs2_space);
712 failed+= strcollsp(&my_charset_ucs2_bin, strcoll_ucs2_common);
713 failed+= strcollsp(&my_charset_ucs2_bin, strcoll_ucs2_space);
714#endif
715#ifdef HAVE_CHARSET_ujis
716 failed+= strcollsp(&my_charset_ujis_japanese_ci, strcoll_mb2_common);
717 failed+= strcollsp(&my_charset_ujis_bin, strcoll_mb2_common);
718 failed+= strcollsp(&my_charset_ujis_japanese_ci, strcoll_mb2_A1A1_mb2_F9FE);
719 failed+= strcollsp(&my_charset_ujis_bin, strcoll_mb2_A1A1_mb2_F9FE);
720 failed+= strcollsp(&my_charset_ujis_japanese_ci, strcoll_ujis);
721 failed+= strcollsp(&my_charset_ujis_bin, strcoll_ujis);
722#endif
723#ifdef HAVE_CHARSET_utf16
724 failed+= strcollsp(&my_charset_utf16_general_ci, strcoll_ucs2_common);
725 failed+= strcollsp(&my_charset_utf16_general_ci, strcoll_ucs2_space);
726 failed+= strcollsp(&my_charset_utf16_general_ci, strcoll_utf16_common);
727 failed+= strcollsp(&my_charset_utf16_general_ci, strcoll_utf16_general_ci);
728 failed+= strcollsp(&my_charset_utf16_bin, strcoll_ucs2_common);
729 failed+= strcollsp(&my_charset_utf16_bin, strcoll_ucs2_space);
730 failed+= strcollsp(&my_charset_utf16_bin, strcoll_utf16_common);
731
732 failed+= strcollsp(&my_charset_utf16le_general_ci,strcoll_ucs2_common);
733 failed+= strcollsp(&my_charset_utf16le_general_ci,strcoll_utf16le_space);
734 failed+= strcollsp(&my_charset_utf16le_general_ci,strcoll_utf16le_common);
735 failed+= strcollsp(&my_charset_utf16le_general_ci,strcoll_utf16le_general_ci);
736 failed+= strcollsp(&my_charset_utf16le_bin, strcoll_ucs2_common);
737 failed+= strcollsp(&my_charset_utf16le_bin, strcoll_utf16le_space);
738 failed+= strcollsp(&my_charset_utf16le_bin, strcoll_utf16le_common);
739#endif
740#ifdef HAVE_CHARSET_utf32
741 failed+= strcollsp(&my_charset_utf32_general_ci, strcoll_utf32_common);
742 failed+= strcollsp(&my_charset_utf32_general_ci, strcoll_utf32_general_ci);
743 failed+= strcollsp(&my_charset_utf32_bin, strcoll_utf32_common);
744#endif
745#ifdef HAVE_CHARSET_utf8
746 failed+= strcollsp(&my_charset_utf8_general_ci, strcoll_utf8mb3_common);
747 failed+= strcollsp(&my_charset_utf8_general_mysql500_ci, strcoll_utf8mb3_common);
748 failed+= strcollsp(&my_charset_utf8_bin, strcoll_utf8mb3_common);
749#endif
750#ifdef HAVE_CHARSET_utf8mb4
751 failed+= strcollsp(&my_charset_utf8mb4_general_ci, strcoll_utf8mb3_common);
752 failed+= strcollsp(&my_charset_utf8mb4_bin, strcoll_utf8mb3_common);
753 failed+= strcollsp(&my_charset_utf8mb4_general_ci, strcoll_utf8mb4_common);
754 failed+= strcollsp(&my_charset_utf8mb4_general_ci, strcoll_utf8mb4_general_ci);
755 failed+= strcollsp(&my_charset_utf8mb4_bin, strcoll_utf8mb4_common);
756#endif
757 return failed;
758}
759
760
761int main()
762{
763 size_t i, failed= 0;
764
765 plan(2);
766 diag("Testing my_like_range_xxx() functions");
767
768 for (i= 0; i < array_elements(charset_list); i++)
769 {
770 CHARSET_INFO *cs= charset_list[i];
771 if (test_like_range_for_charset(cs, "abc%", 4))
772 {
773 ++failed;
774 diag("Failed for %s", cs->name);
775 }
776 }
777 ok(failed == 0, "Testing my_like_range_xxx() functions");
778
779 diag("Testing cs->coll->strnncollsp()");
780 failed= test_strcollsp();
781 ok(failed == 0, "Testing cs->coll->strnncollsp()");
782
783 return exit_status();
784}
785