1 | /* -*- c-basic-offset: 2 -*- */ |
2 | /* Copyright(C) 2009-2016 Brazil |
3 | |
4 | This library is free software; you can redistribute it and/or |
5 | modify it under the terms of the GNU Lesser General Public |
6 | License version 2.1 as published by the Free Software Foundation. |
7 | |
8 | This library is distributed in the hope that it will be useful, |
9 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
11 | Lesser General Public License for more details. |
12 | |
13 | You should have received a copy of the GNU Lesser General Public |
14 | License along with this library; if not, write to the Free Software |
15 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
16 | */ |
17 | #include "grn.h" |
18 | #include <limits.h> |
19 | #include <stdarg.h> |
20 | #include <string.h> |
21 | #include "grn_db.h" |
22 | #include "grn_str.h" |
23 | #include "grn_nfkc.h" |
24 | |
25 | #ifndef _ISOC99_SOURCE |
26 | #define _ISOC99_SOURCE |
27 | #endif /* _ISOC99_SOURCE */ |
28 | #include <math.h> |
29 | |
30 | #if defined(HAVE__GMTIME64_S) && defined(__GNUC__) |
31 | # ifdef _WIN64 |
32 | # define gmtime_s(tm, time) _gmtime64_s(tm, time) |
33 | # else /* _WIN64 */ |
34 | # define gmtime_s(tm, time) _gmtime32_s(tm, time) |
35 | # endif /* _WIN64 */ |
36 | #endif /* defined(HAVE__GMTIME64_S) && defined(__GNUC__) */ |
37 | |
38 | inline static int |
39 | grn_str_charlen_utf8(grn_ctx *ctx, const unsigned char *str, const unsigned char *end) |
40 | { |
41 | /* MEMO: This function allows non-null-terminated string as str. */ |
42 | /* But requires the end of string. */ |
43 | if (end <= str || !*str) { |
44 | return 0; |
45 | } |
46 | if (*str & 0x80) { |
47 | int i; |
48 | int len; |
49 | GRN_BIT_SCAN_REV(~(*str << 24), len); |
50 | len = 31 - len; |
51 | if ((unsigned int)(len - 2) >= 3) { /* (len == 1 || len >= 5) */ |
52 | GRN_LOG(ctx, GRN_LOG_WARNING, |
53 | "grn_str_charlen_utf8(): first byte is invalid" ); |
54 | return 0; |
55 | } |
56 | if (str + len > end) { |
57 | GRN_LOG(ctx, GRN_LOG_WARNING, |
58 | "grn_str_charlen_utf8(): incomplete character" ); |
59 | return 0; |
60 | } |
61 | for (i = 1; i < len; ++i) { |
62 | if ((str[i] & 0xc0) != 0x80) { |
63 | GRN_LOG(ctx, GRN_LOG_WARNING, |
64 | "grn_str_charlen_utf8(): <%d>th byte is invalid" , |
65 | i + 1); |
66 | return 0; |
67 | } |
68 | } |
69 | return len; |
70 | } else { |
71 | return 1; |
72 | } |
73 | } |
74 | |
75 | unsigned int |
76 | grn_str_charlen(grn_ctx *ctx, const char *str, grn_encoding encoding) |
77 | { |
78 | /* MEMO: This function requires null-terminated string as str.*/ |
79 | unsigned char *p = (unsigned char *) str; |
80 | if (!*p) { return 0; } |
81 | switch (encoding) { |
82 | case GRN_ENC_EUC_JP : |
83 | if (*p & 0x80) { |
84 | if (*(p + 1)) { |
85 | return 2; |
86 | } else { |
87 | /* This is invalid character */ |
88 | GRN_LOG(ctx, GRN_LOG_WARNING, "invalid euc-jp string end on grn_str_charlen" ); |
89 | return 0; |
90 | } |
91 | } |
92 | return 1; |
93 | case GRN_ENC_UTF8 : |
94 | if (*p & 0x80) { |
95 | int b, w; |
96 | size_t size; |
97 | for (b = 0x40, w = 0; b && (*p & b); b >>= 1, w++); |
98 | if (!w) { |
99 | GRN_LOG(ctx, GRN_LOG_WARNING, "invalid utf8 string(1) on grn_str_charlen" ); |
100 | return 0; |
101 | } |
102 | for (size = 1; w--; size++) { |
103 | if (!*++p || (*p & 0xc0) != 0x80) { |
104 | GRN_LOG(ctx, GRN_LOG_WARNING, "invalid utf8 string(2) on grn_str_charlen" ); |
105 | return 0; |
106 | } |
107 | } |
108 | return size; |
109 | } else { |
110 | return 1; |
111 | } |
112 | case GRN_ENC_SJIS : |
113 | if (*p & 0x80) { |
114 | /* we regard 0xa0 as JIS X 0201 KANA. adjusted to other tools. */ |
115 | if (0xa0 <= *p && *p <= 0xdf) { |
116 | /* hankaku-kana */ |
117 | return 1; |
118 | } else if (!(*(p + 1))) { |
119 | /* This is invalid character */ |
120 | GRN_LOG(ctx, GRN_LOG_WARNING, "invalid sjis string end on grn_str_charlen" ); |
121 | return 0; |
122 | } else { |
123 | return 2; |
124 | } |
125 | } else { |
126 | return 1; |
127 | } |
128 | default : |
129 | return 1; |
130 | } |
131 | return 0; |
132 | } |
133 | |
134 | int |
135 | grn_charlen_(grn_ctx *ctx, const char *str, const char *end, grn_encoding encoding) |
136 | { |
137 | /* MEMO: This function allows non-null-terminated string as str. */ |
138 | /* But requires the end of string. */ |
139 | unsigned char *p = (unsigned char *) str; |
140 | if (p >= (unsigned char *)end) { return 0; } |
141 | switch (encoding) { |
142 | case GRN_ENC_EUC_JP : |
143 | if (*p & 0x80) { |
144 | if ((p + 1) < (unsigned char *)end) { |
145 | return 2; |
146 | } else { |
147 | /* This is invalid character */ |
148 | GRN_LOG(ctx, GRN_LOG_WARNING, "invalid euc-jp string end on grn_charlen" ); |
149 | return 0; |
150 | } |
151 | } |
152 | return 1; |
153 | case GRN_ENC_UTF8 : |
154 | return grn_str_charlen_utf8(ctx, p, (unsigned char *)end); |
155 | case GRN_ENC_SJIS : |
156 | if (*p & 0x80) { |
157 | /* we regard 0xa0 as JIS X 0201 KANA. adjusted to other tools. */ |
158 | if (0xa0 <= *p && *p <= 0xdf) { |
159 | /* hankaku-kana */ |
160 | return 1; |
161 | } else if (++p >= (unsigned char *)end) { |
162 | /* This is invalid character */ |
163 | GRN_LOG(ctx, GRN_LOG_WARNING, "invalid sjis string end on grn_charlen" ); |
164 | return 0; |
165 | } else { |
166 | return 2; |
167 | } |
168 | } else { |
169 | return 1; |
170 | } |
171 | default : |
172 | return 1; |
173 | } |
174 | return 0; |
175 | } |
176 | |
177 | int |
178 | grn_charlen(grn_ctx *ctx, const char *str, const char *end) |
179 | { |
180 | return grn_charlen_(ctx, str, end, ctx->encoding); |
181 | } |
182 | |
183 | static unsigned char symbol[] = { |
184 | ',', '.', 0, ':', ';', '?', '!', 0, 0, 0, '`', 0, '^', '~', '_', 0, 0, 0, |
185 | 0, 0, 0, 0, 0, 0, 0, '-', '-', '/', '\\', 0, 0, '|', 0, 0, 0, '\'', 0, |
186 | '"', '(', ')', 0, 0, '[', ']', '{', '}', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
187 | '+', '-', 0, 0, 0, '=', 0, '<', '>', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
188 | '$', 0, 0, '%', '#', '&', '*', '@', 0, 0, 0, 0, 0, 0, 0, 0 |
189 | }; |
190 | |
191 | inline static grn_rc |
192 | normalize_euc(grn_ctx *ctx, grn_str *nstr) |
193 | { |
194 | static uint16_t hankana[] = { |
195 | 0xa1a1, 0xa1a3, 0xa1d6, 0xa1d7, 0xa1a2, 0xa1a6, 0xa5f2, 0xa5a1, 0xa5a3, |
196 | 0xa5a5, 0xa5a7, 0xa5a9, 0xa5e3, 0xa5e5, 0xa5e7, 0xa5c3, 0xa1bc, 0xa5a2, |
197 | 0xa5a4, 0xa5a6, 0xa5a8, 0xa5aa, 0xa5ab, 0xa5ad, 0xa5af, 0xa5b1, 0xa5b3, |
198 | 0xa5b5, 0xa5b7, 0xa5b9, 0xa5bb, 0xa5bd, 0xa5bf, 0xa5c1, 0xa5c4, 0xa5c6, |
199 | 0xa5c8, 0xa5ca, 0xa5cb, 0xa5cc, 0xa5cd, 0xa5ce, 0xa5cf, 0xa5d2, 0xa5d5, |
200 | 0xa5d8, 0xa5db, 0xa5de, 0xa5df, 0xa5e0, 0xa5e1, 0xa5e2, 0xa5e4, 0xa5e6, |
201 | 0xa5e8, 0xa5e9, 0xa5ea, 0xa5eb, 0xa5ec, 0xa5ed, 0xa5ef, 0xa5f3, 0xa1ab, |
202 | 0xa1eb |
203 | }; |
204 | static unsigned char dakuten[] = { |
205 | 0xf4, 0, 0, 0, 0, 0xac, 0, 0xae, 0, 0xb0, 0, 0xb2, 0, 0xb4, 0, 0xb6, 0, |
206 | 0xb8, 0, 0xba, 0, 0xbc, 0, 0xbe, 0, 0xc0, 0, 0xc2, 0, 0, 0xc5, 0, 0xc7, |
207 | 0, 0xc9, 0, 0, 0, 0, 0, 0, 0xd0, 0, 0, 0xd3, 0, 0, 0xd6, 0, 0, 0xd9, 0, |
208 | 0, 0xdc |
209 | }; |
210 | static unsigned char handaku[] = { |
211 | 0xd1, 0, 0, 0xd4, 0, 0, 0xd7, 0, 0, 0xda, 0, 0, 0xdd |
212 | }; |
213 | int16_t *ch; |
214 | const unsigned char *s, *s_, *e; |
215 | unsigned char *d, *d0, *d_, b; |
216 | uint_least8_t *cp, *ctypes, ctype; |
217 | size_t size = nstr->orig_blen, length = 0; |
218 | int removeblankp = nstr->flags & GRN_STR_REMOVEBLANK; |
219 | if (!(nstr->norm = GRN_MALLOC(size * 2 + 1))) { |
220 | return GRN_NO_MEMORY_AVAILABLE; |
221 | } |
222 | d0 = (unsigned char *) nstr->norm; |
223 | if (nstr->flags & GRN_STR_WITH_CHECKS) { |
224 | if (!(nstr->checks = GRN_MALLOC(size * 2 * sizeof(int16_t) + 1))) { |
225 | GRN_FREE(nstr->norm); |
226 | nstr->norm = NULL; |
227 | return GRN_NO_MEMORY_AVAILABLE; |
228 | } |
229 | } |
230 | ch = nstr->checks; |
231 | if (nstr->flags & GRN_STR_WITH_CTYPES) { |
232 | if (!(nstr->ctypes = GRN_MALLOC(size + 1))) { |
233 | GRN_FREE(nstr->checks); |
234 | GRN_FREE(nstr->norm); |
235 | nstr->checks = NULL; |
236 | nstr->norm = NULL; |
237 | return GRN_NO_MEMORY_AVAILABLE; |
238 | } |
239 | } |
240 | cp = ctypes = nstr->ctypes; |
241 | e = (unsigned char *)nstr->orig + size; |
242 | for (s = s_ = (unsigned char *) nstr->orig, d = d_ = d0; s < e; s++) { |
243 | if ((*s & 0x80)) { |
244 | if (((s + 1) < e) && (*(s + 1) & 0x80)) { |
245 | unsigned char c1 = *s++, c2 = *s, c3 = 0; |
246 | switch (c1 >> 4) { |
247 | case 0x08 : |
248 | if (c1 == 0x8e && 0xa0 <= c2 && c2 <= 0xdf) { |
249 | uint16_t c = hankana[c2 - 0xa0]; |
250 | switch (c) { |
251 | case 0xa1ab : |
252 | if (d > d0 + 1 && d[-2] == 0xa5 |
253 | && 0xa6 <= d[-1] && d[-1] <= 0xdb && (b = dakuten[d[-1] - 0xa6])) { |
254 | *(d - 1) = b; |
255 | if (ch) { ch[-1] += 2; s_ += 2; } |
256 | continue; |
257 | } else { |
258 | *d++ = c >> 8; *d = c & 0xff; |
259 | } |
260 | break; |
261 | case 0xa1eb : |
262 | if (d > d0 + 1 && d[-2] == 0xa5 |
263 | && 0xcf <= d[-1] && d[-1] <= 0xdb && (b = handaku[d[-1] - 0xcf])) { |
264 | *(d - 1) = b; |
265 | if (ch) { ch[-1] += 2; s_ += 2; } |
266 | continue; |
267 | } else { |
268 | *d++ = c >> 8; *d = c & 0xff; |
269 | } |
270 | break; |
271 | default : |
272 | *d++ = c >> 8; *d = c & 0xff; |
273 | break; |
274 | } |
275 | ctype = GRN_CHAR_KATAKANA; |
276 | } else { |
277 | *d++ = c1; *d = c2; |
278 | ctype = GRN_CHAR_OTHERS; |
279 | } |
280 | break; |
281 | case 0x09 : |
282 | *d++ = c1; *d = c2; |
283 | ctype = GRN_CHAR_OTHERS; |
284 | break; |
285 | case 0x0a : |
286 | switch (c1 & 0x0f) { |
287 | case 1 : |
288 | switch (c2) { |
289 | case 0xbc : |
290 | *d++ = c1; *d = c2; |
291 | ctype = GRN_CHAR_KATAKANA; |
292 | break; |
293 | case 0xb9 : |
294 | *d++ = c1; *d = c2; |
295 | ctype = GRN_CHAR_KANJI; |
296 | break; |
297 | case 0xa1 : |
298 | if (removeblankp) { |
299 | if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; } |
300 | continue; |
301 | } else { |
302 | *d = ' '; |
303 | ctype = GRN_STR_BLANK|GRN_CHAR_SYMBOL; |
304 | } |
305 | break; |
306 | default : |
307 | if (c2 >= 0xa4 && (c3 = symbol[c2 - 0xa4])) { |
308 | *d = c3; |
309 | ctype = GRN_CHAR_SYMBOL; |
310 | } else { |
311 | *d++ = c1; *d = c2; |
312 | ctype = GRN_CHAR_OTHERS; |
313 | } |
314 | break; |
315 | } |
316 | break; |
317 | case 2 : |
318 | *d++ = c1; *d = c2; |
319 | ctype = GRN_CHAR_SYMBOL; |
320 | break; |
321 | case 3 : |
322 | c3 = c2 - 0x80; |
323 | if ('a' <= c3 && c3 <= 'z') { |
324 | ctype = GRN_CHAR_ALPHA; |
325 | *d = c3; |
326 | } else if ('A' <= c3 && c3 <= 'Z') { |
327 | ctype = GRN_CHAR_ALPHA; |
328 | *d = c3 + 0x20; |
329 | } else if ('0' <= c3 && c3 <= '9') { |
330 | ctype = GRN_CHAR_DIGIT; |
331 | *d = c3; |
332 | } else { |
333 | ctype = GRN_CHAR_OTHERS; |
334 | *d++ = c1; *d = c2; |
335 | } |
336 | break; |
337 | case 4 : |
338 | *d++ = c1; *d = c2; |
339 | ctype = GRN_CHAR_HIRAGANA; |
340 | break; |
341 | case 5 : |
342 | *d++ = c1; *d = c2; |
343 | ctype = GRN_CHAR_KATAKANA; |
344 | break; |
345 | case 6 : |
346 | case 7 : |
347 | case 8 : |
348 | *d++ = c1; *d = c2; |
349 | ctype = GRN_CHAR_SYMBOL; |
350 | break; |
351 | default : |
352 | *d++ = c1; *d = c2; |
353 | ctype = GRN_CHAR_OTHERS; |
354 | break; |
355 | } |
356 | break; |
357 | default : |
358 | *d++ = c1; *d = c2; |
359 | ctype = GRN_CHAR_KANJI; |
360 | break; |
361 | } |
362 | } else { |
363 | /* skip invalid character */ |
364 | continue; |
365 | } |
366 | } else { |
367 | unsigned char c = *s; |
368 | switch (c >> 4) { |
369 | case 0 : |
370 | case 1 : |
371 | /* skip unprintable ascii */ |
372 | if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; } |
373 | continue; |
374 | case 2 : |
375 | if (c == 0x20) { |
376 | if (removeblankp) { |
377 | if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; } |
378 | continue; |
379 | } else { |
380 | *d = ' '; |
381 | ctype = GRN_STR_BLANK|GRN_CHAR_SYMBOL; |
382 | } |
383 | } else { |
384 | *d = c; |
385 | ctype = GRN_CHAR_SYMBOL; |
386 | } |
387 | break; |
388 | case 3 : |
389 | *d = c; |
390 | ctype = (c <= 0x39) ? GRN_CHAR_DIGIT : GRN_CHAR_SYMBOL; |
391 | break; |
392 | case 4 : |
393 | *d = ('A' <= c) ? c + 0x20 : c; |
394 | ctype = (c == 0x40) ? GRN_CHAR_SYMBOL : GRN_CHAR_ALPHA; |
395 | break; |
396 | case 5 : |
397 | *d = (c <= 'Z') ? c + 0x20 : c; |
398 | ctype = (c <= 0x5a) ? GRN_CHAR_ALPHA : GRN_CHAR_SYMBOL; |
399 | break; |
400 | case 6 : |
401 | *d = c; |
402 | ctype = (c == 0x60) ? GRN_CHAR_SYMBOL : GRN_CHAR_ALPHA; |
403 | break; |
404 | case 7 : |
405 | *d = c; |
406 | ctype = (c <= 0x7a) ? GRN_CHAR_ALPHA : (c == 0x7f ? GRN_CHAR_OTHERS : GRN_CHAR_SYMBOL); |
407 | break; |
408 | default : |
409 | *d = c; |
410 | ctype = GRN_CHAR_OTHERS; |
411 | break; |
412 | } |
413 | } |
414 | d++; |
415 | length++; |
416 | if (cp) { *cp++ = ctype; } |
417 | if (ch) { |
418 | *ch++ = (int16_t)(s + 1 - s_); |
419 | s_ = s + 1; |
420 | while (++d_ < d) { *ch++ = 0; } |
421 | } |
422 | } |
423 | if (cp) { *cp = GRN_CHAR_NULL; } |
424 | *d = '\0'; |
425 | nstr->length = length; |
426 | nstr->norm_blen = (size_t)(d - (unsigned char *)nstr->norm); |
427 | return GRN_SUCCESS; |
428 | } |
429 | |
430 | #ifdef GRN_WITH_NFKC |
431 | inline static grn_rc |
432 | normalize_utf8(grn_ctx *ctx, grn_str *nstr) |
433 | { |
434 | int16_t *ch; |
435 | const unsigned char *s, *s_, *s__ = NULL, *p, *p2, *pe, *e; |
436 | unsigned char *d, *d_, *de; |
437 | uint_least8_t *cp; |
438 | size_t length = 0, ls, lp, size = nstr->orig_blen, ds = size * 3; |
439 | int removeblankp = nstr->flags & GRN_STR_REMOVEBLANK; |
440 | if (!(nstr->norm = GRN_MALLOC(ds + 1))) { |
441 | return GRN_NO_MEMORY_AVAILABLE; |
442 | } |
443 | if (nstr->flags & GRN_STR_WITH_CHECKS) { |
444 | if (!(nstr->checks = GRN_MALLOC(ds * sizeof(int16_t) + 1))) { |
445 | GRN_FREE(nstr->norm); nstr->norm = NULL; |
446 | return GRN_NO_MEMORY_AVAILABLE; |
447 | } |
448 | } |
449 | ch = nstr->checks; |
450 | if (nstr->flags & GRN_STR_WITH_CTYPES) { |
451 | if (!(nstr->ctypes = GRN_MALLOC(ds + 1))) { |
452 | if (nstr->checks) { GRN_FREE(nstr->checks); nstr->checks = NULL; } |
453 | GRN_FREE(nstr->norm); nstr->norm = NULL; |
454 | return GRN_NO_MEMORY_AVAILABLE; |
455 | } |
456 | } |
457 | cp = nstr->ctypes; |
458 | d = (unsigned char *)nstr->norm; |
459 | de = d + ds; |
460 | d_ = NULL; |
461 | e = (unsigned char *)nstr->orig + size; |
462 | for (s = s_ = (unsigned char *)nstr->orig; ; s += ls) { |
463 | if (!(ls = grn_str_charlen_utf8(ctx, s, e))) { |
464 | break; |
465 | } |
466 | if ((p = (unsigned char *)grn_nfkc_decompose(s))) { |
467 | pe = p + strlen((char *)p); |
468 | } else { |
469 | p = s; |
470 | pe = p + ls; |
471 | } |
472 | if (d_ && (p2 = (unsigned char *)grn_nfkc_compose(d_, p))) { |
473 | p = p2; |
474 | pe = p + strlen((char *)p); |
475 | if (cp) { cp--; } |
476 | if (ch) { |
477 | ch -= (d - d_); |
478 | s_ = s__; |
479 | } |
480 | d = d_; |
481 | length--; |
482 | } |
483 | for (; ; p += lp) { |
484 | if (!(lp = grn_str_charlen_utf8(ctx, p, pe))) { |
485 | break; |
486 | } |
487 | if ((*p == ' ' && removeblankp) || *p < 0x20 /* skip unprintable ascii */ ) { |
488 | if (cp > nstr->ctypes) { *(cp - 1) |= GRN_STR_BLANK; } |
489 | } else { |
490 | if (de <= d + lp) { |
491 | unsigned char *norm; |
492 | ds += (ds >> 1) + lp; |
493 | if (!(norm = GRN_REALLOC(nstr->norm, ds + 1))) { |
494 | if (nstr->ctypes) { GRN_FREE(nstr->ctypes); nstr->ctypes = NULL; } |
495 | if (nstr->checks) { GRN_FREE(nstr->checks); nstr->checks = NULL; } |
496 | GRN_FREE(nstr->norm); nstr->norm = NULL; |
497 | return GRN_NO_MEMORY_AVAILABLE; |
498 | } |
499 | de = norm + ds; |
500 | d = norm + (d - (unsigned char *)nstr->norm); |
501 | nstr->norm = (char *)norm; |
502 | if (ch) { |
503 | int16_t *checks; |
504 | if (!(checks = GRN_REALLOC(nstr->checks, ds * sizeof(int16_t)+ 1))) { |
505 | if (nstr->ctypes) { GRN_FREE(nstr->ctypes); nstr->ctypes = NULL; } |
506 | GRN_FREE(nstr->checks); nstr->checks = NULL; |
507 | GRN_FREE(nstr->norm); nstr->norm = NULL; |
508 | return GRN_NO_MEMORY_AVAILABLE; |
509 | } |
510 | ch = checks + (ch - nstr->checks); |
511 | nstr->checks = checks; |
512 | } |
513 | if (cp) { |
514 | uint_least8_t *ctypes; |
515 | if (!(ctypes = GRN_REALLOC(nstr->ctypes, ds + 1))) { |
516 | GRN_FREE(nstr->ctypes); nstr->ctypes = NULL; |
517 | if (nstr->checks) { GRN_FREE(nstr->checks); nstr->checks = NULL; } |
518 | GRN_FREE(nstr->norm); nstr->norm = NULL; |
519 | return GRN_NO_MEMORY_AVAILABLE; |
520 | } |
521 | cp = ctypes + (cp - nstr->ctypes); |
522 | nstr->ctypes = ctypes; |
523 | } |
524 | } |
525 | grn_memcpy(d, p, lp); |
526 | d_ = d; |
527 | d += lp; |
528 | length++; |
529 | if (cp) { *cp++ = grn_nfkc_char_type(p); } |
530 | if (ch) { |
531 | size_t i; |
532 | if (s_ == s + ls) { |
533 | *ch++ = -1; |
534 | } else { |
535 | *ch++ = (int16_t)(s + ls - s_); |
536 | s__ = s_; |
537 | s_ = s + ls; |
538 | } |
539 | for (i = lp; i > 1; i--) { *ch++ = 0; } |
540 | } |
541 | } |
542 | } |
543 | } |
544 | if (cp) { *cp = GRN_CHAR_NULL; } |
545 | *d = '\0'; |
546 | nstr->length = length; |
547 | nstr->norm_blen = (size_t)(d - (unsigned char *)nstr->norm); |
548 | return GRN_SUCCESS; |
549 | } |
550 | #endif /* GRN_WITH_NFKC */ |
551 | |
552 | inline static grn_rc |
553 | normalize_sjis(grn_ctx *ctx, grn_str *nstr) |
554 | { |
555 | static uint16_t hankana[] = { |
556 | 0x8140, 0x8142, 0x8175, 0x8176, 0x8141, 0x8145, 0x8392, 0x8340, 0x8342, |
557 | 0x8344, 0x8346, 0x8348, 0x8383, 0x8385, 0x8387, 0x8362, 0x815b, 0x8341, |
558 | 0x8343, 0x8345, 0x8347, 0x8349, 0x834a, 0x834c, 0x834e, 0x8350, 0x8352, |
559 | 0x8354, 0x8356, 0x8358, 0x835a, 0x835c, 0x835e, 0x8360, 0x8363, 0x8365, |
560 | 0x8367, 0x8369, 0x836a, 0x836b, 0x836c, 0x836d, 0x836e, 0x8371, 0x8374, |
561 | 0x8377, 0x837a, 0x837d, 0x837e, 0x8380, 0x8381, 0x8382, 0x8384, 0x8386, |
562 | 0x8388, 0x8389, 0x838a, 0x838b, 0x838c, 0x838d, 0x838f, 0x8393, 0x814a, |
563 | 0x814b |
564 | }; |
565 | static unsigned char dakuten[] = { |
566 | 0x94, 0, 0, 0, 0, 0x4b, 0, 0x4d, 0, 0x4f, 0, 0x51, 0, 0x53, 0, 0x55, 0, |
567 | 0x57, 0, 0x59, 0, 0x5b, 0, 0x5d, 0, 0x5f, 0, 0x61, 0, 0, 0x64, 0, 0x66, |
568 | 0, 0x68, 0, 0, 0, 0, 0, 0, 0x6f, 0, 0, 0x72, 0, 0, 0x75, 0, 0, 0x78, 0, |
569 | 0, 0x7b |
570 | }; |
571 | static unsigned char handaku[] = { |
572 | 0x70, 0, 0, 0x73, 0, 0, 0x76, 0, 0, 0x79, 0, 0, 0x7c |
573 | }; |
574 | int16_t *ch; |
575 | const unsigned char *s, *s_; |
576 | unsigned char *d, *d0, *d_, b, *e; |
577 | uint_least8_t *cp, *ctypes, ctype; |
578 | size_t size = nstr->orig_blen, length = 0; |
579 | int removeblankp = nstr->flags & GRN_STR_REMOVEBLANK; |
580 | if (!(nstr->norm = GRN_MALLOC(size * 2 + 1))) { |
581 | return GRN_NO_MEMORY_AVAILABLE; |
582 | } |
583 | d0 = (unsigned char *) nstr->norm; |
584 | if (nstr->flags & GRN_STR_WITH_CHECKS) { |
585 | if (!(nstr->checks = GRN_MALLOC(size * 2 * sizeof(int16_t) + 1))) { |
586 | GRN_FREE(nstr->norm); |
587 | nstr->norm = NULL; |
588 | return GRN_NO_MEMORY_AVAILABLE; |
589 | } |
590 | } |
591 | ch = nstr->checks; |
592 | if (nstr->flags & GRN_STR_WITH_CTYPES) { |
593 | if (!(nstr->ctypes = GRN_MALLOC(size + 1))) { |
594 | GRN_FREE(nstr->checks); |
595 | GRN_FREE(nstr->norm); |
596 | nstr->checks = NULL; |
597 | nstr->norm = NULL; |
598 | return GRN_NO_MEMORY_AVAILABLE; |
599 | } |
600 | } |
601 | cp = ctypes = nstr->ctypes; |
602 | e = (unsigned char *)nstr->orig + size; |
603 | for (s = s_ = (unsigned char *) nstr->orig, d = d_ = d0; s < e; s++) { |
604 | if ((*s & 0x80)) { |
605 | if (0xa0 <= *s && *s <= 0xdf) { |
606 | uint16_t c = hankana[*s - 0xa0]; |
607 | switch (c) { |
608 | case 0x814a : |
609 | if (d > d0 + 1 && d[-2] == 0x83 |
610 | && 0x45 <= d[-1] && d[-1] <= 0x7a && (b = dakuten[d[-1] - 0x45])) { |
611 | *(d - 1) = b; |
612 | if (ch) { ch[-1]++; s_++; } |
613 | continue; |
614 | } else { |
615 | *d++ = c >> 8; *d = c & 0xff; |
616 | } |
617 | break; |
618 | case 0x814b : |
619 | if (d > d0 + 1 && d[-2] == 0x83 |
620 | && 0x6e <= d[-1] && d[-1] <= 0x7a && (b = handaku[d[-1] - 0x6e])) { |
621 | *(d - 1) = b; |
622 | if (ch) { ch[-1]++; s_++; } |
623 | continue; |
624 | } else { |
625 | *d++ = c >> 8; *d = c & 0xff; |
626 | } |
627 | break; |
628 | default : |
629 | *d++ = c >> 8; *d = c & 0xff; |
630 | break; |
631 | } |
632 | ctype = GRN_CHAR_KATAKANA; |
633 | } else { |
634 | if ((s + 1) < e && 0x40 <= *(s + 1) && *(s + 1) <= 0xfc) { |
635 | unsigned char c1 = *s++, c2 = *s, c3 = 0; |
636 | if (0x81 <= c1 && c1 <= 0x87) { |
637 | switch (c1 & 0x0f) { |
638 | case 1 : |
639 | switch (c2) { |
640 | case 0x5b : |
641 | *d++ = c1; *d = c2; |
642 | ctype = GRN_CHAR_KATAKANA; |
643 | break; |
644 | case 0x58 : |
645 | *d++ = c1; *d = c2; |
646 | ctype = GRN_CHAR_KANJI; |
647 | break; |
648 | case 0x40 : |
649 | if (removeblankp) { |
650 | if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; } |
651 | continue; |
652 | } else { |
653 | *d = ' '; |
654 | ctype = GRN_STR_BLANK|GRN_CHAR_SYMBOL; |
655 | } |
656 | break; |
657 | default : |
658 | if (0x43 <= c2 && c2 <= 0x7e && (c3 = symbol[c2 - 0x43])) { |
659 | *d = c3; |
660 | ctype = GRN_CHAR_SYMBOL; |
661 | } else if (0x7f <= c2 && c2 <= 0x97 && (c3 = symbol[c2 - 0x44])) { |
662 | *d = c3; |
663 | ctype = GRN_CHAR_SYMBOL; |
664 | } else { |
665 | *d++ = c1; *d = c2; |
666 | ctype = GRN_CHAR_OTHERS; |
667 | } |
668 | break; |
669 | } |
670 | break; |
671 | case 2 : |
672 | c3 = c2 - 0x1f; |
673 | if (0x4f <= c2 && c2 <= 0x58) { |
674 | ctype = GRN_CHAR_DIGIT; |
675 | *d = c2 - 0x1f; |
676 | } else if (0x60 <= c2 && c2 <= 0x79) { |
677 | ctype = GRN_CHAR_ALPHA; |
678 | *d = c2 + 0x01; |
679 | } else if (0x81 <= c2 && c2 <= 0x9a) { |
680 | ctype = GRN_CHAR_ALPHA; |
681 | *d = c2 - 0x20; |
682 | } else if (0x9f <= c2 && c2 <= 0xf1) { |
683 | *d++ = c1; *d = c2; |
684 | ctype = GRN_CHAR_HIRAGANA; |
685 | } else { |
686 | *d++ = c1; *d = c2; |
687 | ctype = GRN_CHAR_OTHERS; |
688 | } |
689 | break; |
690 | case 3 : |
691 | if (0x40 <= c2 && c2 <= 0x96) { |
692 | *d++ = c1; *d = c2; |
693 | ctype = GRN_CHAR_KATAKANA; |
694 | } else { |
695 | *d++ = c1; *d = c2; |
696 | ctype = GRN_CHAR_SYMBOL; |
697 | } |
698 | break; |
699 | case 4 : |
700 | case 7 : |
701 | *d++ = c1; *d = c2; |
702 | ctype = GRN_CHAR_SYMBOL; |
703 | break; |
704 | default : |
705 | *d++ = c1; *d = c2; |
706 | ctype = GRN_CHAR_OTHERS; |
707 | break; |
708 | } |
709 | } else { |
710 | *d++ = c1; *d = c2; |
711 | ctype = GRN_CHAR_KANJI; |
712 | } |
713 | } else { |
714 | /* skip invalid character */ |
715 | continue; |
716 | } |
717 | } |
718 | } else { |
719 | unsigned char c = *s; |
720 | switch (c >> 4) { |
721 | case 0 : |
722 | case 1 : |
723 | /* skip unprintable ascii */ |
724 | if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; } |
725 | continue; |
726 | case 2 : |
727 | if (c == 0x20) { |
728 | if (removeblankp) { |
729 | if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; } |
730 | continue; |
731 | } else { |
732 | *d = ' '; |
733 | ctype = GRN_STR_BLANK|GRN_CHAR_SYMBOL; |
734 | } |
735 | } else { |
736 | *d = c; |
737 | ctype = GRN_CHAR_SYMBOL; |
738 | } |
739 | break; |
740 | case 3 : |
741 | *d = c; |
742 | ctype = (c <= 0x39) ? GRN_CHAR_DIGIT : GRN_CHAR_SYMBOL; |
743 | break; |
744 | case 4 : |
745 | *d = ('A' <= c) ? c + 0x20 : c; |
746 | ctype = (c == 0x40) ? GRN_CHAR_SYMBOL : GRN_CHAR_ALPHA; |
747 | break; |
748 | case 5 : |
749 | *d = (c <= 'Z') ? c + 0x20 : c; |
750 | ctype = (c <= 0x5a) ? GRN_CHAR_ALPHA : GRN_CHAR_SYMBOL; |
751 | break; |
752 | case 6 : |
753 | *d = c; |
754 | ctype = (c == 0x60) ? GRN_CHAR_SYMBOL : GRN_CHAR_ALPHA; |
755 | break; |
756 | case 7 : |
757 | *d = c; |
758 | ctype = (c <= 0x7a) ? GRN_CHAR_ALPHA : (c == 0x7f ? GRN_CHAR_OTHERS : GRN_CHAR_SYMBOL); |
759 | break; |
760 | default : |
761 | *d = c; |
762 | ctype = GRN_CHAR_OTHERS; |
763 | break; |
764 | } |
765 | } |
766 | d++; |
767 | length++; |
768 | if (cp) { *cp++ = ctype; } |
769 | if (ch) { |
770 | *ch++ = (int16_t)(s + 1 - s_); |
771 | s_ = s + 1; |
772 | while (++d_ < d) { *ch++ = 0; } |
773 | } |
774 | } |
775 | if (cp) { *cp = GRN_CHAR_NULL; } |
776 | *d = '\0'; |
777 | nstr->length = length; |
778 | nstr->norm_blen = (size_t)(d - (unsigned char *)nstr->norm); |
779 | return GRN_SUCCESS; |
780 | } |
781 | |
782 | inline static grn_rc |
783 | normalize_none(grn_ctx *ctx, grn_str *nstr) |
784 | { |
785 | int16_t *ch; |
786 | const unsigned char *s, *s_, *e; |
787 | unsigned char *d, *d0, *d_; |
788 | uint_least8_t *cp, *ctypes, ctype; |
789 | size_t size = nstr->orig_blen, length = 0; |
790 | int removeblankp = nstr->flags & GRN_STR_REMOVEBLANK; |
791 | if (!(nstr->norm = GRN_MALLOC(size + 1))) { |
792 | return GRN_NO_MEMORY_AVAILABLE; |
793 | } |
794 | d0 = (unsigned char *) nstr->norm; |
795 | if (nstr->flags & GRN_STR_WITH_CHECKS) { |
796 | if (!(nstr->checks = GRN_MALLOC(size * sizeof(int16_t) + 1))) { |
797 | GRN_FREE(nstr->norm); |
798 | nstr->norm = NULL; |
799 | return GRN_NO_MEMORY_AVAILABLE; |
800 | } |
801 | } |
802 | ch = nstr->checks; |
803 | if (nstr->flags & GRN_STR_WITH_CTYPES) { |
804 | if (!(nstr->ctypes = GRN_MALLOC(size + 1))) { |
805 | GRN_FREE(nstr->checks); |
806 | GRN_FREE(nstr->norm); |
807 | nstr->checks = NULL; |
808 | nstr->norm = NULL; |
809 | return GRN_NO_MEMORY_AVAILABLE; |
810 | } |
811 | } |
812 | cp = ctypes = nstr->ctypes; |
813 | e = (unsigned char *)nstr->orig + size; |
814 | for (s = s_ = (unsigned char *) nstr->orig, d = d_ = d0; s < e; s++) { |
815 | unsigned char c = *s; |
816 | switch (c >> 4) { |
817 | case 0 : |
818 | case 1 : |
819 | /* skip unprintable ascii */ |
820 | if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; } |
821 | continue; |
822 | case 2 : |
823 | if (c == 0x20) { |
824 | if (removeblankp) { |
825 | if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; } |
826 | continue; |
827 | } else { |
828 | *d = ' '; |
829 | ctype = GRN_STR_BLANK|GRN_CHAR_SYMBOL; |
830 | } |
831 | } else { |
832 | *d = c; |
833 | ctype = GRN_CHAR_SYMBOL; |
834 | } |
835 | break; |
836 | case 3 : |
837 | *d = c; |
838 | ctype = (c <= 0x39) ? GRN_CHAR_DIGIT : GRN_CHAR_SYMBOL; |
839 | break; |
840 | case 4 : |
841 | *d = ('A' <= c) ? c + 0x20 : c; |
842 | ctype = (c == 0x40) ? GRN_CHAR_SYMBOL : GRN_CHAR_ALPHA; |
843 | break; |
844 | case 5 : |
845 | *d = (c <= 'Z') ? c + 0x20 : c; |
846 | ctype = (c <= 0x5a) ? GRN_CHAR_ALPHA : GRN_CHAR_SYMBOL; |
847 | break; |
848 | case 6 : |
849 | *d = c; |
850 | ctype = (c == 0x60) ? GRN_CHAR_SYMBOL : GRN_CHAR_ALPHA; |
851 | break; |
852 | case 7 : |
853 | *d = c; |
854 | ctype = (c <= 0x7a) ? GRN_CHAR_ALPHA : (c == 0x7f ? GRN_CHAR_OTHERS : GRN_CHAR_SYMBOL); |
855 | break; |
856 | default : |
857 | *d = c; |
858 | ctype = GRN_CHAR_OTHERS; |
859 | break; |
860 | } |
861 | d++; |
862 | length++; |
863 | if (cp) { *cp++ = ctype; } |
864 | if (ch) { |
865 | *ch++ = (int16_t)(s + 1 - s_); |
866 | s_ = s + 1; |
867 | while (++d_ < d) { *ch++ = 0; } |
868 | } |
869 | } |
870 | if (cp) { *cp = GRN_CHAR_NULL; } |
871 | *d = '\0'; |
872 | nstr->length = length; |
873 | nstr->norm_blen = (size_t)(d - (unsigned char *)nstr->norm); |
874 | return GRN_SUCCESS; |
875 | } |
876 | |
877 | /* use cp1252 as latin1 */ |
878 | inline static grn_rc |
879 | normalize_latin1(grn_ctx *ctx, grn_str *nstr) |
880 | { |
881 | int16_t *ch; |
882 | const unsigned char *s, *s_, *e; |
883 | unsigned char *d, *d0, *d_; |
884 | uint_least8_t *cp, *ctypes, ctype; |
885 | size_t size = nstr->orig_blen, length = 0; |
886 | int removeblankp = nstr->flags & GRN_STR_REMOVEBLANK; |
887 | if (!(nstr->norm = GRN_MALLOC(size + 1))) { |
888 | return GRN_NO_MEMORY_AVAILABLE; |
889 | } |
890 | d0 = (unsigned char *) nstr->norm; |
891 | if (nstr->flags & GRN_STR_WITH_CHECKS) { |
892 | if (!(nstr->checks = GRN_MALLOC(size * sizeof(int16_t) + 1))) { |
893 | GRN_FREE(nstr->norm); |
894 | nstr->norm = NULL; |
895 | return GRN_NO_MEMORY_AVAILABLE; |
896 | } |
897 | } |
898 | ch = nstr->checks; |
899 | if (nstr->flags & GRN_STR_WITH_CTYPES) { |
900 | if (!(nstr->ctypes = GRN_MALLOC(size + 1))) { |
901 | GRN_FREE(nstr->checks); |
902 | GRN_FREE(nstr->norm); |
903 | nstr->checks = NULL; |
904 | nstr->norm = NULL; |
905 | return GRN_NO_MEMORY_AVAILABLE; |
906 | } |
907 | } |
908 | cp = ctypes = nstr->ctypes; |
909 | e = (unsigned char *)nstr->orig + size; |
910 | for (s = s_ = (unsigned char *) nstr->orig, d = d_ = d0; s < e; s++) { |
911 | unsigned char c = *s; |
912 | switch (c >> 4) { |
913 | case 0 : |
914 | case 1 : |
915 | /* skip unprintable ascii */ |
916 | if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; } |
917 | continue; |
918 | case 2 : |
919 | if (c == 0x20) { |
920 | if (removeblankp) { |
921 | if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; } |
922 | continue; |
923 | } else { |
924 | *d = ' '; |
925 | ctype = GRN_STR_BLANK|GRN_CHAR_SYMBOL; |
926 | } |
927 | } else { |
928 | *d = c; |
929 | ctype = GRN_CHAR_SYMBOL; |
930 | } |
931 | break; |
932 | case 3 : |
933 | *d = c; |
934 | ctype = (c <= 0x39) ? GRN_CHAR_DIGIT : GRN_CHAR_SYMBOL; |
935 | break; |
936 | case 4 : |
937 | *d = ('A' <= c) ? c + 0x20 : c; |
938 | ctype = (c == 0x40) ? GRN_CHAR_SYMBOL : GRN_CHAR_ALPHA; |
939 | break; |
940 | case 5 : |
941 | *d = (c <= 'Z') ? c + 0x20 : c; |
942 | ctype = (c <= 0x5a) ? GRN_CHAR_ALPHA : GRN_CHAR_SYMBOL; |
943 | break; |
944 | case 6 : |
945 | *d = c; |
946 | ctype = (c == 0x60) ? GRN_CHAR_SYMBOL : GRN_CHAR_ALPHA; |
947 | break; |
948 | case 7 : |
949 | *d = c; |
950 | ctype = (c <= 0x7a) ? GRN_CHAR_ALPHA : (c == 0x7f ? GRN_CHAR_OTHERS : GRN_CHAR_SYMBOL); |
951 | break; |
952 | case 8 : |
953 | if (c == 0x8a || c == 0x8c || c == 0x8e) { |
954 | *d = c + 0x10; |
955 | ctype = GRN_CHAR_ALPHA; |
956 | } else { |
957 | *d = c; |
958 | ctype = GRN_CHAR_SYMBOL; |
959 | } |
960 | break; |
961 | case 9 : |
962 | if (c == 0x9a || c == 0x9c || c == 0x9e || c == 0x9f) { |
963 | *d = (c == 0x9f) ? c + 0x60 : c; |
964 | ctype = GRN_CHAR_ALPHA; |
965 | } else { |
966 | *d = c; |
967 | ctype = GRN_CHAR_SYMBOL; |
968 | } |
969 | break; |
970 | case 0x0c : |
971 | *d = c + 0x20; |
972 | ctype = GRN_CHAR_ALPHA; |
973 | break; |
974 | case 0x0d : |
975 | *d = (c == 0xd7 || c == 0xdf) ? c : c + 0x20; |
976 | ctype = (c == 0xd7) ? GRN_CHAR_SYMBOL : GRN_CHAR_ALPHA; |
977 | break; |
978 | case 0x0e : |
979 | *d = c; |
980 | ctype = GRN_CHAR_ALPHA; |
981 | break; |
982 | case 0x0f : |
983 | *d = c; |
984 | ctype = (c == 0xf7) ? GRN_CHAR_SYMBOL : GRN_CHAR_ALPHA; |
985 | break; |
986 | default : |
987 | *d = c; |
988 | ctype = GRN_CHAR_OTHERS; |
989 | break; |
990 | } |
991 | d++; |
992 | length++; |
993 | if (cp) { *cp++ = ctype; } |
994 | if (ch) { |
995 | *ch++ = (int16_t)(s + 1 - s_); |
996 | s_ = s + 1; |
997 | while (++d_ < d) { *ch++ = 0; } |
998 | } |
999 | } |
1000 | if (cp) { *cp = GRN_CHAR_NULL; } |
1001 | *d = '\0'; |
1002 | nstr->length = length; |
1003 | nstr->norm_blen = (size_t)(d - (unsigned char *)nstr->norm); |
1004 | return GRN_SUCCESS; |
1005 | } |
1006 | |
1007 | inline static grn_rc |
1008 | normalize_koi8r(grn_ctx *ctx, grn_str *nstr) |
1009 | { |
1010 | int16_t *ch; |
1011 | const unsigned char *s, *s_, *e; |
1012 | unsigned char *d, *d0, *d_; |
1013 | uint_least8_t *cp, *ctypes, ctype; |
1014 | size_t size = strlen(nstr->orig), length = 0; |
1015 | int removeblankp = nstr->flags & GRN_STR_REMOVEBLANK; |
1016 | if (!(nstr->norm = GRN_MALLOC(size + 1))) { |
1017 | return GRN_NO_MEMORY_AVAILABLE; |
1018 | } |
1019 | d0 = (unsigned char *) nstr->norm; |
1020 | if (nstr->flags & GRN_STR_WITH_CHECKS) { |
1021 | if (!(nstr->checks = GRN_MALLOC(size * sizeof(int16_t) + 1))) { |
1022 | GRN_FREE(nstr->norm); |
1023 | nstr->norm = NULL; |
1024 | return GRN_NO_MEMORY_AVAILABLE; |
1025 | } |
1026 | } |
1027 | ch = nstr->checks; |
1028 | if (nstr->flags & GRN_STR_WITH_CTYPES) { |
1029 | if (!(nstr->ctypes = GRN_MALLOC(size + 1))) { |
1030 | GRN_FREE(nstr->checks); |
1031 | GRN_FREE(nstr->norm); |
1032 | nstr->checks = NULL; |
1033 | nstr->norm = NULL; |
1034 | return GRN_NO_MEMORY_AVAILABLE; |
1035 | } |
1036 | } |
1037 | cp = ctypes = nstr->ctypes; |
1038 | e = (unsigned char *)nstr->orig + size; |
1039 | for (s = s_ = (unsigned char *) nstr->orig, d = d_ = d0; s < e; s++) { |
1040 | unsigned char c = *s; |
1041 | switch (c >> 4) { |
1042 | case 0 : |
1043 | case 1 : |
1044 | /* skip unprintable ascii */ |
1045 | if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; } |
1046 | continue; |
1047 | case 2 : |
1048 | if (c == 0x20) { |
1049 | if (removeblankp) { |
1050 | if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; } |
1051 | continue; |
1052 | } else { |
1053 | *d = ' '; |
1054 | ctype = GRN_STR_BLANK|GRN_CHAR_SYMBOL; |
1055 | } |
1056 | } else { |
1057 | *d = c; |
1058 | ctype = GRN_CHAR_SYMBOL; |
1059 | } |
1060 | break; |
1061 | case 3 : |
1062 | *d = c; |
1063 | ctype = (c <= 0x39) ? GRN_CHAR_DIGIT : GRN_CHAR_SYMBOL; |
1064 | break; |
1065 | case 4 : |
1066 | *d = ('A' <= c) ? c + 0x20 : c; |
1067 | ctype = (c == 0x40) ? GRN_CHAR_SYMBOL : GRN_CHAR_ALPHA; |
1068 | break; |
1069 | case 5 : |
1070 | *d = (c <= 'Z') ? c + 0x20 : c; |
1071 | ctype = (c <= 0x5a) ? GRN_CHAR_ALPHA : GRN_CHAR_SYMBOL; |
1072 | break; |
1073 | case 6 : |
1074 | *d = c; |
1075 | ctype = (c == 0x60) ? GRN_CHAR_SYMBOL : GRN_CHAR_ALPHA; |
1076 | break; |
1077 | case 7 : |
1078 | *d = c; |
1079 | ctype = (c <= 0x7a) ? GRN_CHAR_ALPHA : (c == 0x7f ? GRN_CHAR_OTHERS : GRN_CHAR_SYMBOL); |
1080 | break; |
1081 | case 0x0a : |
1082 | *d = c; |
1083 | ctype = (c == 0xa3) ? GRN_CHAR_ALPHA : GRN_CHAR_OTHERS; |
1084 | break; |
1085 | case 0x0b : |
1086 | if (c == 0xb3) { |
1087 | *d = c - 0x10; |
1088 | ctype = GRN_CHAR_ALPHA; |
1089 | } else { |
1090 | *d = c; |
1091 | ctype = GRN_CHAR_OTHERS; |
1092 | } |
1093 | break; |
1094 | case 0x0c : |
1095 | case 0x0d : |
1096 | *d = c; |
1097 | ctype = GRN_CHAR_ALPHA; |
1098 | break; |
1099 | case 0x0e : |
1100 | case 0x0f : |
1101 | *d = c - 0x20; |
1102 | ctype = GRN_CHAR_ALPHA; |
1103 | break; |
1104 | default : |
1105 | *d = c; |
1106 | ctype = GRN_CHAR_OTHERS; |
1107 | break; |
1108 | } |
1109 | d++; |
1110 | length++; |
1111 | if (cp) { *cp++ = ctype; } |
1112 | if (ch) { |
1113 | *ch++ = (int16_t)(s + 1 - s_); |
1114 | s_ = s + 1; |
1115 | while (++d_ < d) { *ch++ = 0; } |
1116 | } |
1117 | } |
1118 | if (cp) { *cp = GRN_CHAR_NULL; } |
1119 | *d = '\0'; |
1120 | nstr->length = length; |
1121 | nstr->norm_blen = (size_t)(d - (unsigned char *)nstr->norm); |
1122 | return GRN_SUCCESS; |
1123 | } |
1124 | |
1125 | static grn_str * |
1126 | grn_fakenstr_open(grn_ctx *ctx, const char *str, size_t str_len, grn_encoding encoding, int flags) |
1127 | { |
1128 | /* TODO: support GRN_STR_REMOVEBLANK flag and ctypes */ |
1129 | grn_str *nstr; |
1130 | if (!(nstr = GRN_MALLOC(sizeof(grn_str)))) { |
1131 | GRN_LOG(ctx, GRN_LOG_ALERT, "memory allocation on grn_fakenstr_open failed !" ); |
1132 | return NULL; |
1133 | } |
1134 | if (!(nstr->norm = GRN_MALLOC(str_len + 1))) { |
1135 | GRN_LOG(ctx, GRN_LOG_ALERT, "memory allocation for keyword on grn_snip_add_cond failed !" ); |
1136 | GRN_FREE(nstr); |
1137 | return NULL; |
1138 | } |
1139 | nstr->orig = str; |
1140 | nstr->orig_blen = str_len; |
1141 | grn_memcpy(nstr->norm, str, str_len); |
1142 | nstr->norm[str_len] = '\0'; |
1143 | nstr->norm_blen = str_len; |
1144 | nstr->ctypes = NULL; |
1145 | nstr->flags = flags; |
1146 | |
1147 | if (flags & GRN_STR_WITH_CHECKS) { |
1148 | int16_t f = 0; |
1149 | unsigned char c; |
1150 | size_t i; |
1151 | if (!(nstr->checks = (int16_t *) GRN_MALLOC(sizeof(int16_t) * str_len))) { |
1152 | GRN_FREE(nstr->norm); |
1153 | GRN_FREE(nstr); |
1154 | return NULL; |
1155 | } |
1156 | switch (encoding) { |
1157 | case GRN_ENC_EUC_JP: |
1158 | for (i = 0; i < str_len; i++) { |
1159 | if (!f) { |
1160 | c = (unsigned char) str[i]; |
1161 | f = ((c >= 0xa1U && c <= 0xfeU) || c == 0x8eU ? 2 : (c == 0x8fU ? 3 : 1) |
1162 | ); |
1163 | nstr->checks[i] = f; |
1164 | } else { |
1165 | nstr->checks[i] = 0; |
1166 | } |
1167 | f--; |
1168 | } |
1169 | break; |
1170 | case GRN_ENC_SJIS: |
1171 | for (i = 0; i < str_len; i++) { |
1172 | if (!f) { |
1173 | c = (unsigned char) str[i]; |
1174 | f = (c >= 0x81U && ((c <= 0x9fU) || (c >= 0xe0U && c <= 0xfcU)) ? 2 : 1); |
1175 | nstr->checks[i] = f; |
1176 | } else { |
1177 | nstr->checks[i] = 0; |
1178 | } |
1179 | f--; |
1180 | } |
1181 | break; |
1182 | case GRN_ENC_UTF8: |
1183 | for (i = 0; i < str_len; i++) { |
1184 | if (!f) { |
1185 | c = (unsigned char) str[i]; |
1186 | f = (c & 0x80U ? (c & 0x20U ? (c & 0x10U ? 4 : 3) |
1187 | : 2) |
1188 | : 1); |
1189 | nstr->checks[i] = f; |
1190 | } else { |
1191 | nstr->checks[i] = 0; |
1192 | } |
1193 | f--; |
1194 | } |
1195 | break; |
1196 | default: |
1197 | for (i = 0; i < str_len; i++) { |
1198 | nstr->checks[i] = 1; |
1199 | } |
1200 | break; |
1201 | } |
1202 | } else { |
1203 | nstr->checks = NULL; |
1204 | } |
1205 | return nstr; |
1206 | } |
1207 | |
1208 | grn_str * |
1209 | grn_str_open_(grn_ctx *ctx, const char *str, unsigned int str_len, int flags, grn_encoding encoding) |
1210 | { |
1211 | grn_rc rc; |
1212 | grn_str *nstr; |
1213 | if (!str || !str_len) { return NULL; } |
1214 | |
1215 | if (!(flags & GRN_STR_NORMALIZE)) { |
1216 | return grn_fakenstr_open(ctx, str, str_len, encoding, flags); |
1217 | } |
1218 | |
1219 | if (!(nstr = GRN_MALLOC(sizeof(grn_str)))) { |
1220 | GRN_LOG(ctx, GRN_LOG_ALERT, "memory allocation on grn_str_open failed !" ); |
1221 | return NULL; |
1222 | } |
1223 | nstr->orig = str; |
1224 | nstr->orig_blen = str_len; |
1225 | nstr->norm = NULL; |
1226 | nstr->norm_blen = 0; |
1227 | nstr->checks = NULL; |
1228 | nstr->ctypes = NULL; |
1229 | nstr->encoding = encoding; |
1230 | nstr->flags = flags; |
1231 | switch (encoding) { |
1232 | case GRN_ENC_EUC_JP : |
1233 | rc = normalize_euc(ctx, nstr); |
1234 | break; |
1235 | case GRN_ENC_UTF8 : |
1236 | #ifdef GRN_WITH_NFKC |
1237 | rc = normalize_utf8(ctx, nstr); |
1238 | #else /* GRN_WITH_NFKC */ |
1239 | rc = normalize_none(ctx, nstr); |
1240 | #endif /* GRN_WITH_NFKC */ |
1241 | break; |
1242 | case GRN_ENC_SJIS : |
1243 | rc = normalize_sjis(ctx, nstr); |
1244 | break; |
1245 | case GRN_ENC_LATIN1 : |
1246 | rc = normalize_latin1(ctx, nstr); |
1247 | break; |
1248 | case GRN_ENC_KOI8R : |
1249 | rc = normalize_koi8r(ctx, nstr); |
1250 | break; |
1251 | default : |
1252 | rc = normalize_none(ctx, nstr); |
1253 | break; |
1254 | } |
1255 | if (rc) { |
1256 | grn_str_close(ctx, nstr); |
1257 | return NULL; |
1258 | } |
1259 | return nstr; |
1260 | } |
1261 | |
1262 | grn_str * |
1263 | grn_str_open(grn_ctx *ctx, const char *str, unsigned int str_len, int flags) |
1264 | { |
1265 | return grn_str_open_(ctx, str, str_len, flags, ctx->encoding); |
1266 | } |
1267 | |
1268 | grn_rc |
1269 | grn_str_close(grn_ctx *ctx, grn_str *nstr) |
1270 | { |
1271 | if (nstr) { |
1272 | if (nstr->norm) { GRN_FREE(nstr->norm); } |
1273 | if (nstr->ctypes) { GRN_FREE(nstr->ctypes); } |
1274 | if (nstr->checks) { GRN_FREE(nstr->checks); } |
1275 | GRN_FREE(nstr); |
1276 | return GRN_SUCCESS; |
1277 | } else { |
1278 | return GRN_INVALID_ARGUMENT; |
1279 | } |
1280 | } |
1281 | |
1282 | static const char *grn_enc_string[] = { |
1283 | "default" , |
1284 | "none" , |
1285 | "euc_jp" , |
1286 | "utf8" , |
1287 | "sjis" , |
1288 | "latin1" , |
1289 | "koi8r" |
1290 | }; |
1291 | |
1292 | const char * |
1293 | grn_encoding_to_string(grn_encoding enc) |
1294 | { |
1295 | if (enc < (sizeof(grn_enc_string) / sizeof(char *))) { |
1296 | return grn_enc_string[enc]; |
1297 | } else { |
1298 | return "unknown" ; |
1299 | } |
1300 | } |
1301 | |
1302 | grn_encoding |
1303 | grn_encoding_parse(const char *str) |
1304 | { |
1305 | grn_encoding e = GRN_ENC_UTF8; |
1306 | int i = sizeof(grn_enc_string) / sizeof(grn_enc_string[0]); |
1307 | while (i--) { |
1308 | if (!strcmp(str, grn_enc_string[i])) { |
1309 | e = (grn_encoding)i; |
1310 | } |
1311 | } |
1312 | return e; |
1313 | } |
1314 | |
1315 | size_t |
1316 | grn_str_len(grn_ctx *ctx, const char *str, grn_encoding encoding, const char **last) |
1317 | { |
1318 | size_t len, tlen; |
1319 | const char *p = NULL; |
1320 | for (len = 0; ; len++) { |
1321 | p = str; |
1322 | if (!(tlen = grn_str_charlen(ctx, str, encoding))) { |
1323 | break; |
1324 | } |
1325 | str += tlen; |
1326 | } |
1327 | if (last) { *last = p; } |
1328 | return len; |
1329 | } |
1330 | |
1331 | int |
1332 | grn_isspace(const char *str, grn_encoding encoding) |
1333 | { |
1334 | const unsigned char *s = (const unsigned char *) str; |
1335 | if (!s) { return 0; } |
1336 | switch (s[0]) { |
1337 | case ' ' : |
1338 | case '\f' : |
1339 | case '\n' : |
1340 | case '\r' : |
1341 | case '\t' : |
1342 | case '\v' : |
1343 | return 1; |
1344 | case 0x81 : |
1345 | if (encoding == GRN_ENC_SJIS && s[1] == 0x40) { return 2; } |
1346 | break; |
1347 | case 0xA1 : |
1348 | if (encoding == GRN_ENC_EUC_JP && s[1] == 0xA1) { return 2; } |
1349 | break; |
1350 | case 0xE3 : |
1351 | if (encoding == GRN_ENC_UTF8 && s[1] == 0x80 && s[2] == 0x80) { return 3; } |
1352 | break; |
1353 | default : |
1354 | break; |
1355 | } |
1356 | return 0; |
1357 | } |
1358 | |
1359 | int8_t |
1360 | grn_atoi8(const char *nptr, const char *end, const char **rest) |
1361 | { |
1362 | const char *p = nptr; |
1363 | int8_t v = 0, t, n = 0, o = 0; |
1364 | if (p < end && *p == '-') { |
1365 | p++; |
1366 | n = 1; |
1367 | o = 1; |
1368 | } |
1369 | while (p < end && *p >= '0' && *p <= '9') { |
1370 | t = v * 10 - (*p - '0'); |
1371 | if (t > v || (!n && t == INT8_MIN)) { v = 0; break; } |
1372 | v = t; |
1373 | o = 0; |
1374 | p++; |
1375 | } |
1376 | if (rest) { *rest = o ? nptr : p; } |
1377 | return n ? v : -v; |
1378 | } |
1379 | |
1380 | uint8_t |
1381 | grn_atoui8(const char *nptr, const char *end, const char **rest) |
1382 | { |
1383 | uint8_t v = 0, t; |
1384 | while (nptr < end && *nptr >= '0' && *nptr <= '9') { |
1385 | t = v * 10 + (*nptr - '0'); |
1386 | if (t < v) { v = 0; break; } |
1387 | v = t; |
1388 | nptr++; |
1389 | } |
1390 | if (rest) { *rest = nptr; } |
1391 | return v; |
1392 | } |
1393 | |
1394 | int16_t |
1395 | grn_atoi16(const char *nptr, const char *end, const char **rest) |
1396 | { |
1397 | const char *p = nptr; |
1398 | int16_t v = 0, t, n = 0, o = 0; |
1399 | if (p < end && *p == '-') { |
1400 | p++; |
1401 | n = 1; |
1402 | o = 1; |
1403 | } |
1404 | while (p < end && *p >= '0' && *p <= '9') { |
1405 | t = v * 10 - (*p - '0'); |
1406 | if (t > v || (!n && t == INT16_MIN)) { v = 0; break; } |
1407 | v = t; |
1408 | o = 0; |
1409 | p++; |
1410 | } |
1411 | if (rest) { *rest = o ? nptr : p; } |
1412 | return n ? v : -v; |
1413 | } |
1414 | |
1415 | uint16_t |
1416 | grn_atoui16(const char *nptr, const char *end, const char **rest) |
1417 | { |
1418 | uint16_t v = 0, t; |
1419 | while (nptr < end && *nptr >= '0' && *nptr <= '9') { |
1420 | t = v * 10 + (*nptr - '0'); |
1421 | if (t < v) { v = 0; break; } |
1422 | v = t; |
1423 | nptr++; |
1424 | } |
1425 | if (rest) { *rest = nptr; } |
1426 | return v; |
1427 | } |
1428 | |
1429 | int |
1430 | grn_atoi(const char *nptr, const char *end, const char **rest) |
1431 | { |
1432 | const char *p = nptr; |
1433 | int v = 0, t, n = 0, o = 0; |
1434 | if (p < end && *p == '-') { |
1435 | p++; |
1436 | n = 1; |
1437 | o = 1; |
1438 | } |
1439 | while (p < end && *p >= '0' && *p <= '9') { |
1440 | t = v * 10 - (*p - '0'); |
1441 | if (t > v || (!n && t == INT32_MIN)) { v = 0; break; } |
1442 | v = t; |
1443 | o = 0; |
1444 | p++; |
1445 | } |
1446 | if (rest) { *rest = o ? nptr : p; } |
1447 | return n ? v : -v; |
1448 | } |
1449 | |
1450 | unsigned int |
1451 | grn_atoui(const char *nptr, const char *end, const char **rest) |
1452 | { |
1453 | unsigned int v = 0, t; |
1454 | while (nptr < end && *nptr >= '0' && *nptr <= '9') { |
1455 | t = v * 10 + (*nptr - '0'); |
1456 | if (t < v) { v = 0; break; } |
1457 | v = t; |
1458 | nptr++; |
1459 | } |
1460 | if (rest) { *rest = nptr; } |
1461 | return v; |
1462 | } |
1463 | |
1464 | int64_t |
1465 | grn_atoll(const char *nptr, const char *end, const char **rest) |
1466 | { |
1467 | const char *p = nptr; |
1468 | int o = 0; |
1469 | int64_t v = 0; |
1470 | if (p < end && *p == '-') { |
1471 | p++; |
1472 | o = 1; |
1473 | while (p < end && *p >= '0' && *p <= '9') { |
1474 | int64_t t = v * 10 - (*p - '0'); |
1475 | if (t > v) { v = 0; break; } |
1476 | v = t; |
1477 | o = 0; |
1478 | p++; |
1479 | } |
1480 | } else { |
1481 | while (p < end && *p >= '0' && *p <= '9') { |
1482 | int64_t t = v * 10 + (*p - '0'); |
1483 | if (t < v) { v = 0; break; } |
1484 | v = t; |
1485 | p++; |
1486 | } |
1487 | } |
1488 | if (rest) { *rest = o ? nptr : p; } |
1489 | return v; |
1490 | } |
1491 | |
1492 | uint64_t |
1493 | grn_atoull(const char *nptr, const char *end, const char **rest) |
1494 | { |
1495 | uint64_t v = 0, t; |
1496 | while (nptr < end && *nptr >= '0' && *nptr <= '9') { |
1497 | t = v * 10 + (*nptr - '0'); |
1498 | if (t < v) { v = 0; break; } |
1499 | v = t; |
1500 | nptr++; |
1501 | } |
1502 | if (rest) { *rest = nptr; } |
1503 | return v; |
1504 | } |
1505 | |
1506 | unsigned int |
1507 | grn_htoui(const char *nptr, const char *end, const char **rest) |
1508 | { |
1509 | unsigned int v = 0, t; |
1510 | while (nptr < end) { |
1511 | switch (*nptr) { |
1512 | case '0' : |
1513 | case '1' : |
1514 | case '2' : |
1515 | case '3' : |
1516 | case '4' : |
1517 | case '5' : |
1518 | case '6' : |
1519 | case '7' : |
1520 | case '8' : |
1521 | case '9' : |
1522 | t = v * 16 + (*nptr++ - '0'); |
1523 | break; |
1524 | case 'a' : |
1525 | case 'b' : |
1526 | case 'c' : |
1527 | case 'd' : |
1528 | case 'e' : |
1529 | case 'f' : |
1530 | t = v * 16 + (*nptr++ - 'a') + 10; |
1531 | break; |
1532 | case 'A' : |
1533 | case 'B' : |
1534 | case 'C' : |
1535 | case 'D' : |
1536 | case 'E' : |
1537 | case 'F' : |
1538 | t = v * 16 + (*nptr++ - 'A') + 10; |
1539 | break; |
1540 | default : |
1541 | v = 0; goto exit; |
1542 | } |
1543 | if (t < v) { v = 0; goto exit; } |
1544 | v = t; |
1545 | } |
1546 | exit : |
1547 | if (rest) { *rest = nptr; } |
1548 | return v; |
1549 | } |
1550 | |
1551 | void |
1552 | grn_itoh(unsigned int i, char *p, unsigned int len) |
1553 | { |
1554 | static const char *hex = "0123456789ABCDEF" ; |
1555 | p += len - 1; |
1556 | while (len--) { |
1557 | *p-- = hex[i & 0xf]; |
1558 | i >>= 4; |
1559 | } |
1560 | } |
1561 | |
1562 | grn_rc |
1563 | grn_itoa(int i, char *p, char *end, char **rest) |
1564 | { |
1565 | char *q; |
1566 | if (p >= end) { return GRN_INVALID_ARGUMENT; } |
1567 | q = p; |
1568 | if (i < 0) { |
1569 | *p++ = '-'; |
1570 | q = p; |
1571 | if (i == INT_MIN) { |
1572 | if (p >= end) { return GRN_INVALID_ARGUMENT; } |
1573 | *p++ = (-(i % 10)) + '0'; |
1574 | i /= 10; |
1575 | } |
1576 | i = -i; |
1577 | } |
1578 | do { |
1579 | if (p >= end) { return GRN_INVALID_ARGUMENT; } |
1580 | *p++ = i % 10 + '0'; |
1581 | } while ((i /= 10) > 0); |
1582 | if (rest) { *rest = p; } |
1583 | for (p--; q < p; q++, p--) { |
1584 | char t = *q; |
1585 | *q = *p; |
1586 | *p = t; |
1587 | } |
1588 | return GRN_SUCCESS; |
1589 | } |
1590 | |
1591 | grn_rc |
1592 | grn_itoa_padded(int i, char *p, char *end, char ch) |
1593 | { |
1594 | char *q; |
1595 | if (p >= end) { return GRN_INVALID_ARGUMENT; } |
1596 | if (i < 0) { |
1597 | *p++ = '-'; |
1598 | if (i == INT_MIN) { |
1599 | if (p >= end) { return GRN_INVALID_ARGUMENT; } |
1600 | *p++ = (-(i % 10)) + '0'; |
1601 | i /= 10; |
1602 | } |
1603 | i = -i; |
1604 | } |
1605 | q = end - 1; |
1606 | do { |
1607 | if (q < p) { return GRN_INVALID_ARGUMENT; } |
1608 | *q-- = i % 10 + '0'; |
1609 | } while ((i /= 10) > 0); |
1610 | while (q >= p) { |
1611 | *q-- = ch; |
1612 | } |
1613 | return GRN_SUCCESS; |
1614 | } |
1615 | |
1616 | grn_rc |
1617 | grn_lltoa(int64_t i, char *p, char *end, char **rest) |
1618 | { |
1619 | char *q; |
1620 | if (p >= end) { return GRN_INVALID_ARGUMENT; } |
1621 | q = p; |
1622 | if (i < 0) { |
1623 | *p++ = '-'; |
1624 | q = p; |
1625 | if (i == INT64_MIN) { |
1626 | *p++ = (-(i % 10)) + '0'; |
1627 | i /= 10; |
1628 | } |
1629 | i = -i; |
1630 | } |
1631 | do { |
1632 | if (p >= end) { return GRN_INVALID_ARGUMENT; } |
1633 | *p++ = i % 10 + '0'; |
1634 | } while ((i /= 10) > 0); |
1635 | if (rest) { *rest = p; } |
1636 | for (p--; q < p; q++, p--) { |
1637 | char t = *q; |
1638 | *q = *p; |
1639 | *p = t; |
1640 | } |
1641 | return GRN_SUCCESS; |
1642 | } |
1643 | |
1644 | grn_rc |
1645 | grn_ulltoa(uint64_t i, char *p, char *end, char **rest) |
1646 | { |
1647 | char *q; |
1648 | if (p >= end) { return GRN_INVALID_ARGUMENT; } |
1649 | q = p; |
1650 | do { |
1651 | if (p >= end) { return GRN_INVALID_ARGUMENT; } |
1652 | *p++ = i % 10 + '0'; |
1653 | } while ((i /= 10) > 0); |
1654 | if (rest) { *rest = p; } |
1655 | for (p--; q < p; q++, p--) { |
1656 | char t = *q; |
1657 | *q = *p; |
1658 | *p = t; |
1659 | } |
1660 | return GRN_SUCCESS; |
1661 | } |
1662 | |
1663 | #define I2B(i) \ |
1664 | ("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"[(i) & 0x3f]) |
1665 | |
1666 | #define B2I(b) \ |
1667 | (((b) < '+' || 'z' < (b)) ? 0xff : "\x3e\xff\xff\xff\x3f\x34\x35\x36\x37\x38\x39\x3a\x3b\x3c\x3d\xff\xff\xff\xff\xff\xff\xff\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\xff\xff\xff\xff\xff\xff\x1a\x1b\x1c\x1d\x1e\x1f\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f\x30\x31\x32\x33"[(b) - '+']) |
1668 | |
1669 | #define MASK 0x34d34d34 |
1670 | |
1671 | char * |
1672 | grn_itob(grn_id id, char *p) |
1673 | { |
1674 | id ^= MASK; |
1675 | *p++ = I2B(id >> 24); |
1676 | *p++ = I2B(id >> 18); |
1677 | *p++ = I2B(id >> 12); |
1678 | *p++ = I2B(id >> 6); |
1679 | *p++ = I2B(id); |
1680 | return p; |
1681 | } |
1682 | |
1683 | grn_id |
1684 | grn_btoi(char *b) |
1685 | { |
1686 | uint8_t i; |
1687 | grn_id id = 0; |
1688 | int len = 5; |
1689 | while (len--) { |
1690 | char c = *b++; |
1691 | if ((i = B2I(c)) == 0xff) { return 0; } |
1692 | id = (id << 6) + i; |
1693 | } |
1694 | return id ^ MASK; |
1695 | } |
1696 | |
1697 | #define I2B32H(i) ("0123456789ABCDEFGHIJKLMNOPQRSTUV"[(i) & 0x1f]) |
1698 | |
1699 | char * |
1700 | grn_lltob32h(int64_t i, char *p) |
1701 | { |
1702 | uint64_t u = (uint64_t)i + 0x8000000000000000ULL; |
1703 | *p++ = I2B32H(u >> 60); |
1704 | *p++ = I2B32H(u >> 55); |
1705 | *p++ = I2B32H(u >> 50); |
1706 | *p++ = I2B32H(u >> 45); |
1707 | *p++ = I2B32H(u >> 40); |
1708 | *p++ = I2B32H(u >> 35); |
1709 | *p++ = I2B32H(u >> 30); |
1710 | *p++ = I2B32H(u >> 25); |
1711 | *p++ = I2B32H(u >> 20); |
1712 | *p++ = I2B32H(u >> 15); |
1713 | *p++ = I2B32H(u >> 10); |
1714 | *p++ = I2B32H(u >> 5); |
1715 | *p++ = I2B32H(u); |
1716 | return p; |
1717 | } |
1718 | |
1719 | char * |
1720 | grn_ulltob32h(uint64_t i, char *p) |
1721 | { |
1722 | char lb = (i >> 59) & 0x10; |
1723 | i += 0x8000000000000000ULL; |
1724 | *p++ = lb + I2B32H(i >> 60); |
1725 | *p++ = I2B32H(i >> 55); |
1726 | *p++ = I2B32H(i >> 50); |
1727 | *p++ = I2B32H(i >> 45); |
1728 | *p++ = I2B32H(i >> 40); |
1729 | *p++ = I2B32H(i >> 35); |
1730 | *p++ = I2B32H(i >> 30); |
1731 | *p++ = I2B32H(i >> 25); |
1732 | *p++ = I2B32H(i >> 20); |
1733 | *p++ = I2B32H(i >> 15); |
1734 | *p++ = I2B32H(i >> 10); |
1735 | *p++ = I2B32H(i >> 5); |
1736 | *p++ = I2B32H(i); |
1737 | return p; |
1738 | } |
1739 | |
1740 | grn_rc |
1741 | grn_aton(grn_ctx *ctx, const char *p, const char *end, const char **rest, |
1742 | grn_obj *res) |
1743 | { |
1744 | if (*p == '+') { |
1745 | p++; |
1746 | } |
1747 | |
1748 | switch (*p) { |
1749 | case '-' : |
1750 | case '0' : case '1' : case '2' : case '3' : case '4' : |
1751 | case '5' : case '6' : case '7' : case '8' : case '9' : |
1752 | { |
1753 | int64_t int64; |
1754 | char rest_char; |
1755 | int64 = grn_atoll(p, end, rest); |
1756 | rest_char = **rest; |
1757 | if (end == *rest) { |
1758 | if ((int64_t)INT32_MIN <= int64 && int64 <= (int64_t)INT32_MAX) { |
1759 | grn_obj_reinit(ctx, res, GRN_DB_INT32, 0); |
1760 | GRN_INT32_SET(ctx, res, int64); |
1761 | } else if ((int64_t)INT32_MAX < int64 && int64 <= (int64_t)UINT32_MAX) { |
1762 | grn_obj_reinit(ctx, res, GRN_DB_UINT32, 0); |
1763 | GRN_UINT32_SET(ctx, res, int64); |
1764 | } else { |
1765 | grn_obj_reinit(ctx, res, GRN_DB_INT64, 0); |
1766 | GRN_INT64_SET(ctx, res, int64); |
1767 | } |
1768 | } else { |
1769 | if (*p != '-' && rest_char >= '0' && rest_char <= '9') { |
1770 | uint64_t uint64 = grn_atoull(p, end, rest); |
1771 | if (end == *rest) { |
1772 | grn_obj_reinit(ctx, res, GRN_DB_UINT64, 0); |
1773 | GRN_UINT64_SET(ctx, res, uint64); |
1774 | } |
1775 | } |
1776 | if (end != *rest) { |
1777 | if (rest_char == '.' || rest_char == 'e' || rest_char == 'E' || |
1778 | (rest_char >= '0' && rest_char <= '9')) { |
1779 | char *rest_float; |
1780 | double d; |
1781 | errno = 0; |
1782 | d = strtod(p, &rest_float); |
1783 | if (!errno && rest_float == end) { |
1784 | grn_obj_reinit(ctx, res, GRN_DB_FLOAT, 0); |
1785 | GRN_FLOAT_SET(ctx, res, d); |
1786 | *rest = rest_float; |
1787 | } else { |
1788 | return GRN_INVALID_ARGUMENT; |
1789 | } |
1790 | } |
1791 | } |
1792 | } |
1793 | } |
1794 | break; |
1795 | default : |
1796 | return GRN_INVALID_ARGUMENT; |
1797 | } |
1798 | |
1799 | return GRN_SUCCESS; |
1800 | } |
1801 | |
1802 | int |
1803 | grn_str_tok(const char *str, size_t str_len, char delim, const char **tokbuf, int buf_size, const char **rest) |
1804 | { |
1805 | const char **tok = tokbuf, **tok_end = tokbuf + buf_size; |
1806 | if (buf_size > 0) { |
1807 | const char *str_end = str + str_len; |
1808 | for (;;str++) { |
1809 | if (str == str_end) { |
1810 | *tok++ = str; |
1811 | break; |
1812 | } |
1813 | if (delim == *str) { |
1814 | // *str = '\0'; |
1815 | *tok++ = str; |
1816 | if (tok == tok_end) { break; } |
1817 | } |
1818 | } |
1819 | } |
1820 | if (rest) { *rest = str; } |
1821 | return tok - tokbuf; |
1822 | } |
1823 | |
1824 | inline static int |
1825 | op_getopt_flag(int *flags, const grn_str_getopt_opt *o, |
1826 | int argc, char * const argv[], int i, const char *optvalue) |
1827 | { |
1828 | switch (o->op) { |
1829 | case GETOPT_OP_NONE: |
1830 | break; |
1831 | case GETOPT_OP_ON: |
1832 | *flags |= o->flag; |
1833 | break; |
1834 | case GETOPT_OP_OFF: |
1835 | *flags &= ~o->flag; |
1836 | break; |
1837 | case GETOPT_OP_UPDATE: |
1838 | *flags = o->flag; |
1839 | break; |
1840 | default: |
1841 | return i; |
1842 | } |
1843 | if (o->arg) { |
1844 | if (optvalue) { |
1845 | *o->arg = (char *)optvalue; |
1846 | } else if (++i < argc) { |
1847 | *o->arg = argv[i]; |
1848 | } else { |
1849 | return -1; |
1850 | } |
1851 | } |
1852 | return i; |
1853 | } |
1854 | |
1855 | int |
1856 | grn_str_getopt(int argc, char * const argv[], const grn_str_getopt_opt *opts, |
1857 | int *flags) |
1858 | { |
1859 | int i; |
1860 | for (i = 1; i < argc; i++) { |
1861 | const char * v = argv[i]; |
1862 | if (*v == '-') { |
1863 | const grn_str_getopt_opt *o; |
1864 | int found; |
1865 | if (*++v == '-') { |
1866 | const char *eq; |
1867 | size_t len; |
1868 | found = 0; |
1869 | v++; |
1870 | for (eq = v; *eq != '\0' && *eq != '='; eq++) {} |
1871 | len = eq - v; |
1872 | for (o = opts; o->opt != '\0' || o->longopt != NULL; o++) { |
1873 | if (o->longopt && strlen(o->longopt) == len && |
1874 | !memcmp(v, o->longopt, len)) { |
1875 | i = op_getopt_flag(flags, o, argc, argv, i, |
1876 | (*eq == '\0' ? NULL : eq + 1)); |
1877 | if (i < 0) { |
1878 | fprintf(stderr, "%s: option '--%s' needs argument.\n" , argv[0], o->longopt); |
1879 | return -1; |
1880 | } |
1881 | found = 1; |
1882 | break; |
1883 | } |
1884 | } |
1885 | if (!found) { goto exit; } |
1886 | } else { |
1887 | const char *p; |
1888 | for (p = v; *p; p++) { |
1889 | found = 0; |
1890 | for (o = opts; o->opt != '\0' || o->longopt != NULL; o++) { |
1891 | if (o->opt && *p == o->opt) { |
1892 | i = op_getopt_flag(flags, o, argc, argv, i, NULL); |
1893 | if (i < 0) { |
1894 | fprintf(stderr, "%s: option '-%c' needs argument.\n" , argv[0], *p); |
1895 | return -1; |
1896 | } |
1897 | found = 1; |
1898 | break; |
1899 | } |
1900 | } |
1901 | if (!found) { goto exit; } |
1902 | } |
1903 | } |
1904 | } else { |
1905 | break; |
1906 | } |
1907 | } |
1908 | return i; |
1909 | exit: |
1910 | fprintf(stderr, "%s: cannot recognize option '%s'.\n" , argv[0], argv[i]); |
1911 | return -1; |
1912 | } |
1913 | |
1914 | #define UNIT_SIZE (1 << 12) |
1915 | #define UNIT_MASK (UNIT_SIZE - 1) |
1916 | |
1917 | int grn_bulk_margin_size = 0; |
1918 | |
1919 | grn_rc |
1920 | grn_bulk_resize(grn_ctx *ctx, grn_obj *buf, unsigned int newsize) |
1921 | { |
1922 | char *head; |
1923 | unsigned int rounded_newsize; |
1924 | newsize += grn_bulk_margin_size + 1; |
1925 | if (GRN_BULK_OUTP(buf)) { |
1926 | rounded_newsize = (newsize + (UNIT_MASK)) & ~UNIT_MASK; |
1927 | if (rounded_newsize < newsize) { return GRN_NOT_ENOUGH_SPACE; } |
1928 | newsize = rounded_newsize; |
1929 | head = buf->u.b.head - (buf->u.b.head ? grn_bulk_margin_size : 0); |
1930 | if (!(head = GRN_REALLOC(head, newsize))) { return GRN_NO_MEMORY_AVAILABLE; } |
1931 | buf->u.b.curr = head + grn_bulk_margin_size + GRN_BULK_VSIZE(buf); |
1932 | buf->u.b.head = head + grn_bulk_margin_size; |
1933 | buf->u.b.tail = head + newsize; |
1934 | } else { |
1935 | if (newsize > GRN_BULK_BUFSIZE) { |
1936 | rounded_newsize = (newsize + (UNIT_MASK)) & ~UNIT_MASK; |
1937 | if (rounded_newsize < newsize) { return GRN_NOT_ENOUGH_SPACE; } |
1938 | newsize = rounded_newsize; |
1939 | if (!(head = GRN_MALLOC(newsize))) { return GRN_NO_MEMORY_AVAILABLE; } |
1940 | grn_memcpy(head, GRN_BULK_HEAD(buf), GRN_BULK_VSIZE(buf)); |
1941 | buf->u.b.curr = head + grn_bulk_margin_size + GRN_BULK_VSIZE(buf); |
1942 | buf->u.b.head = head + grn_bulk_margin_size; |
1943 | buf->u.b.tail = head + newsize; |
1944 | buf->header.impl_flags |= GRN_OBJ_OUTPLACE; |
1945 | } |
1946 | } |
1947 | return GRN_SUCCESS; |
1948 | } |
1949 | |
1950 | grn_rc |
1951 | grn_bulk_reinit(grn_ctx *ctx, grn_obj *buf, unsigned int size) |
1952 | { |
1953 | GRN_BULK_REWIND(buf); |
1954 | return grn_bulk_resize(ctx, buf, size); |
1955 | } |
1956 | |
1957 | grn_rc |
1958 | grn_bulk_write(grn_ctx *ctx, grn_obj *buf, const char *str, unsigned int len) |
1959 | { |
1960 | grn_rc rc = GRN_SUCCESS; |
1961 | char *curr; |
1962 | if (GRN_BULK_REST(buf) < len) { |
1963 | if ((rc = grn_bulk_resize(ctx, buf, GRN_BULK_VSIZE(buf) + len))) { return rc; } |
1964 | } |
1965 | curr = GRN_BULK_CURR(buf); |
1966 | grn_memcpy(curr, str, len); |
1967 | GRN_BULK_INCR_LEN(buf, len); |
1968 | return rc; |
1969 | } |
1970 | |
1971 | grn_rc |
1972 | grn_bulk_write_from(grn_ctx *ctx, grn_obj *bulk, |
1973 | const char *str, unsigned int from, unsigned int len) |
1974 | { |
1975 | grn_rc rc = grn_bulk_truncate(ctx, bulk, from); |
1976 | if (!rc) { rc = grn_bulk_write(ctx, bulk, str, len); } |
1977 | return rc; |
1978 | } |
1979 | |
1980 | grn_rc |
1981 | grn_bulk_reserve(grn_ctx *ctx, grn_obj *buf, unsigned int len) |
1982 | { |
1983 | grn_rc rc = GRN_SUCCESS; |
1984 | if (GRN_BULK_REST(buf) < len) { |
1985 | if ((rc = grn_bulk_resize(ctx, buf, GRN_BULK_VSIZE(buf) + len))) { return rc; } |
1986 | } |
1987 | return rc; |
1988 | } |
1989 | |
1990 | grn_rc |
1991 | grn_bulk_space(grn_ctx *ctx, grn_obj *buf, unsigned int len) |
1992 | { |
1993 | grn_rc rc = grn_bulk_reserve(ctx, buf, len); |
1994 | if (!rc) { |
1995 | GRN_BULK_INCR_LEN(buf, len); |
1996 | } |
1997 | return rc; |
1998 | } |
1999 | |
2000 | static grn_rc |
2001 | grn_bulk_space_clear(grn_ctx *ctx, grn_obj *buf, unsigned int len) |
2002 | { |
2003 | grn_rc rc = grn_bulk_reserve(ctx, buf, len); |
2004 | if (!rc) { |
2005 | memset(GRN_BULK_CURR(buf), 0, len); |
2006 | GRN_BULK_INCR_LEN(buf, len); |
2007 | } |
2008 | return rc; |
2009 | } |
2010 | |
2011 | grn_rc |
2012 | grn_bulk_truncate(grn_ctx *ctx, grn_obj *bulk, unsigned int len) |
2013 | { |
2014 | if (GRN_BULK_OUTP(bulk)) { |
2015 | if ((bulk->u.b.tail - bulk->u.b.head) < len) { |
2016 | return grn_bulk_space_clear(ctx, bulk, len); |
2017 | } else { |
2018 | bulk->u.b.curr = bulk->u.b.head + len; |
2019 | } |
2020 | } else { |
2021 | if (GRN_BULK_BUFSIZE < len) { |
2022 | return grn_bulk_space_clear(ctx, bulk, len); |
2023 | } else { |
2024 | bulk->header.flags &= ~GRN_BULK_BUFSIZE_MAX; |
2025 | bulk->header.flags += len; |
2026 | } |
2027 | } |
2028 | return GRN_SUCCESS; |
2029 | } |
2030 | |
2031 | grn_rc |
2032 | grn_text_itoa(grn_ctx *ctx, grn_obj *buf, int i) |
2033 | { |
2034 | grn_rc rc = GRN_SUCCESS; |
2035 | for (;;) { |
2036 | char *curr = GRN_BULK_CURR(buf); |
2037 | char *tail = GRN_BULK_TAIL(buf); |
2038 | if (grn_itoa(i, curr, tail, &curr)) { |
2039 | if ((rc = grn_bulk_resize(ctx, buf, GRN_BULK_WSIZE(buf) + UNIT_SIZE))) { return rc; } |
2040 | } else { |
2041 | GRN_BULK_SET_CURR(buf, curr); |
2042 | break; |
2043 | } |
2044 | } |
2045 | return rc; |
2046 | } |
2047 | |
2048 | grn_rc |
2049 | grn_text_itoa_padded(grn_ctx *ctx, grn_obj *buf, int i, char ch, unsigned int len) |
2050 | { |
2051 | grn_rc rc = GRN_SUCCESS; |
2052 | char *curr; |
2053 | if ((rc = grn_bulk_reserve(ctx, buf, len))) { return rc; } |
2054 | curr = GRN_BULK_CURR(buf); |
2055 | if (!grn_itoa_padded(i, curr, curr + len, ch)) { |
2056 | GRN_BULK_SET_CURR(buf, curr + len); |
2057 | } |
2058 | return rc; |
2059 | } |
2060 | |
2061 | grn_rc |
2062 | grn_text_lltoa(grn_ctx *ctx, grn_obj *buf, long long int i) |
2063 | { |
2064 | grn_rc rc = GRN_SUCCESS; |
2065 | for (;;) { |
2066 | char *curr = GRN_BULK_CURR(buf); |
2067 | char *tail = GRN_BULK_TAIL(buf); |
2068 | if (grn_lltoa(i, curr, tail, &curr)) { |
2069 | if ((rc = grn_bulk_resize(ctx, buf, GRN_BULK_WSIZE(buf) + UNIT_SIZE))) { return rc; } |
2070 | } else { |
2071 | GRN_BULK_SET_CURR(buf, curr); |
2072 | break; |
2073 | } |
2074 | } |
2075 | return rc; |
2076 | } |
2077 | |
2078 | grn_rc |
2079 | grn_text_ulltoa(grn_ctx *ctx, grn_obj *buf, unsigned long long int i) |
2080 | { |
2081 | grn_rc rc = GRN_SUCCESS; |
2082 | for (;;) { |
2083 | char *curr = GRN_BULK_CURR(buf); |
2084 | char *tail = GRN_BULK_TAIL(buf); |
2085 | if (grn_ulltoa(i, curr, tail, &curr)) { |
2086 | if ((rc = grn_bulk_resize(ctx, buf, GRN_BULK_WSIZE(buf) + UNIT_SIZE))) { return rc; } |
2087 | } else { |
2088 | GRN_BULK_SET_CURR(buf, curr); |
2089 | break; |
2090 | } |
2091 | } |
2092 | return rc; |
2093 | } |
2094 | |
2095 | inline static void |
2096 | ftoa_(grn_ctx *ctx, grn_obj *buf, double d) |
2097 | { |
2098 | char *start; |
2099 | size_t before_size; |
2100 | size_t len; |
2101 | #define DIGIT_NUMBER 16 |
2102 | #define FIRST_BUFFER_SIZE (DIGIT_NUMBER + 4) |
2103 | before_size = GRN_BULK_VSIZE(buf); |
2104 | grn_bulk_reserve(ctx, buf, FIRST_BUFFER_SIZE); |
2105 | grn_text_printf(ctx, buf, "%#.*g" , DIGIT_NUMBER, d); |
2106 | len = GRN_BULK_VSIZE(buf) - before_size; |
2107 | start = GRN_BULK_CURR(buf) - len; |
2108 | #undef FIRST_BUFFER_SIZE |
2109 | #undef DIGIT_NUMBER |
2110 | if (start[len - 1] == '.') { |
2111 | GRN_TEXT_PUTC(ctx, buf, '0'); |
2112 | } else { |
2113 | char *p, *q; |
2114 | start[len] = '\0'; |
2115 | if ((p = strchr(start, 'e'))) { |
2116 | for (q = p; *(q - 2) != '.' && *(q - 1) == '0'; q--) { len--; } |
2117 | grn_memmove(q, p, start + len - q); |
2118 | } else { |
2119 | for (q = start + len; *(q - 2) != '.' && *(q - 1) == '0'; q--) { len--; } |
2120 | } |
2121 | grn_bulk_truncate(ctx, buf, before_size + len); |
2122 | } |
2123 | } |
2124 | |
2125 | grn_rc |
2126 | grn_text_ftoa(grn_ctx *ctx, grn_obj *buf, double d) |
2127 | { |
2128 | grn_rc rc = GRN_SUCCESS; |
2129 | if (GRN_BULK_REST(buf) < 32) { |
2130 | if ((rc = grn_bulk_resize(ctx, buf, GRN_BULK_VSIZE(buf) + 32))) { return rc; } |
2131 | } |
2132 | #ifdef HAVE_FPCLASSIFY |
2133 | switch (fpclassify(d)) { |
2134 | case FP_NAN : |
2135 | GRN_TEXT_PUTS(ctx, buf, "#<nan>" ); |
2136 | break; |
2137 | case FP_INFINITE : |
2138 | GRN_TEXT_PUTS(ctx, buf, d > 0 ? "#i1/0" : "#i-1/0" ); |
2139 | break; |
2140 | default : |
2141 | ftoa_(ctx, buf, d); |
2142 | break; |
2143 | } |
2144 | #else /* HAVE_FPCLASSIFY */ |
2145 | if (d == d) { |
2146 | if (d != 0 && ((d / 2.0) == d)) { |
2147 | GRN_TEXT_PUTS(ctx, buf, d > 0 ? "#i1/0" : "#i-1/0" ); |
2148 | } else { |
2149 | ftoa_(ctx, buf, d); |
2150 | } |
2151 | } else { |
2152 | GRN_TEXT_PUTS(ctx, buf, "#<nan>" ); |
2153 | } |
2154 | #endif /* HAVE_FPCLASSIFY */ |
2155 | return rc; |
2156 | } |
2157 | |
2158 | grn_rc |
2159 | grn_text_itoh(grn_ctx *ctx, grn_obj *buf, int i, unsigned int len) |
2160 | { |
2161 | grn_rc rc = GRN_SUCCESS; |
2162 | if (GRN_BULK_REST(buf) < len) { |
2163 | if ((rc = grn_bulk_resize(ctx, buf, GRN_BULK_VSIZE(buf) + len))) { return rc; } |
2164 | } |
2165 | grn_itoh(i, GRN_BULK_CURR(buf), len); |
2166 | GRN_BULK_INCR_LEN(buf, len); |
2167 | return rc; |
2168 | } |
2169 | |
2170 | grn_rc |
2171 | grn_text_itob(grn_ctx *ctx, grn_obj *buf, grn_id id) |
2172 | { |
2173 | size_t len = 5; |
2174 | grn_rc rc = GRN_SUCCESS; |
2175 | if (GRN_BULK_REST(buf) < len) { |
2176 | if ((rc = grn_bulk_resize(ctx, buf, GRN_BULK_VSIZE(buf) + len))) { return rc; } |
2177 | } |
2178 | grn_itob(id, GRN_BULK_CURR(buf)); |
2179 | GRN_BULK_INCR_LEN(buf, len); |
2180 | return rc; |
2181 | } |
2182 | |
2183 | grn_rc |
2184 | grn_text_lltob32h(grn_ctx *ctx, grn_obj *buf, long long int i) |
2185 | { |
2186 | size_t len = 13; |
2187 | grn_rc rc = GRN_SUCCESS; |
2188 | if (GRN_BULK_REST(buf) < len) { |
2189 | if ((rc = grn_bulk_resize(ctx, buf, GRN_BULK_VSIZE(buf) + len))) { return rc; } |
2190 | } |
2191 | grn_lltob32h(i, GRN_BULK_CURR(buf)); |
2192 | GRN_BULK_INCR_LEN(buf, len); |
2193 | return rc; |
2194 | } |
2195 | |
2196 | grn_rc |
2197 | grn_text_esc(grn_ctx *ctx, grn_obj *buf, const char *s, unsigned int len) |
2198 | { |
2199 | const char *e; |
2200 | unsigned int l; |
2201 | grn_rc rc = GRN_SUCCESS; |
2202 | |
2203 | GRN_TEXT_PUTC(ctx, buf, '"'); |
2204 | for (e = s + len; s < e; s += l) { |
2205 | if (!(l = grn_charlen(ctx, s, e))) { break; } |
2206 | if (l == 1) { |
2207 | switch (*s) { |
2208 | case '"' : |
2209 | grn_bulk_write(ctx, buf, "\\\"" , 2); |
2210 | break; |
2211 | case '\\' : |
2212 | grn_bulk_write(ctx, buf, "\\\\" , 2); |
2213 | break; |
2214 | case '\b' : |
2215 | grn_bulk_write(ctx, buf, "\\b" , 2); |
2216 | break; |
2217 | case '\f' : |
2218 | grn_bulk_write(ctx, buf, "\\f" , 2); |
2219 | break; |
2220 | case '\n' : |
2221 | grn_bulk_write(ctx, buf, "\\n" , 2); |
2222 | break; |
2223 | case '\r' : |
2224 | grn_bulk_write(ctx, buf, "\\r" , 2); |
2225 | break; |
2226 | case '\t' : |
2227 | grn_bulk_write(ctx, buf, "\\t" , 2); |
2228 | break; |
2229 | case '\x00': case '\x01': case '\x02': case '\x03': case '\x04': case '\x05': |
2230 | case '\x06': case '\x07': case '\x0b': case '\x0e': case '\x0f': case '\x10': |
2231 | case '\x11': case '\x12': case '\x13': case '\x14': case '\x15': case '\x16': |
2232 | case '\x17': case '\x18': case '\x19': case '\x1a': case '\x1b': case '\x1c': |
2233 | case '\x1d': case '\x1e': case '\x1f': case '\x7f': |
2234 | if (!(rc = grn_bulk_write(ctx, buf, "\\u" , 2))) { |
2235 | if ((rc = grn_text_itoh(ctx, buf, *s, 4))) { |
2236 | GRN_BULK_INCR_LEN(buf, -2); |
2237 | return rc; |
2238 | } |
2239 | } else { |
2240 | return rc; |
2241 | } |
2242 | break; |
2243 | default : |
2244 | GRN_TEXT_PUTC(ctx, buf, *s); |
2245 | } |
2246 | } else if (l == 3) { |
2247 | if (*s == '\xe2' && *(s + 1) == '\x80') { |
2248 | switch (*(s + 2)) { |
2249 | case '\xa8': /* \u2028 */ |
2250 | grn_bulk_write(ctx, buf, "\\u2028" , 6); |
2251 | break; |
2252 | case '\xa9': /* \u2029 */ |
2253 | grn_bulk_write(ctx, buf, "\\u2029" , 6); |
2254 | break; |
2255 | default: |
2256 | grn_bulk_write(ctx, buf, s, l); |
2257 | } |
2258 | } else { |
2259 | grn_bulk_write(ctx, buf, s, l); |
2260 | } |
2261 | } else { |
2262 | grn_bulk_write(ctx, buf, s, l); |
2263 | } |
2264 | } |
2265 | GRN_TEXT_PUTC(ctx, buf, '"'); |
2266 | return rc; |
2267 | } |
2268 | |
2269 | grn_rc |
2270 | grn_text_escape_xml(grn_ctx *ctx, grn_obj *buf, const char *s, unsigned int len) |
2271 | { |
2272 | const char *e; |
2273 | unsigned int l; |
2274 | grn_rc rc = GRN_SUCCESS; |
2275 | |
2276 | for (e = s + len; s < e; s += l) { |
2277 | if (!(l = grn_charlen(ctx, s, e))) { break; } |
2278 | if (l == 1) { |
2279 | switch (*s) { |
2280 | case '"' : |
2281 | grn_bulk_write(ctx, buf, """ , 6); |
2282 | break; |
2283 | case '<' : |
2284 | grn_bulk_write(ctx, buf, "<" , 4); |
2285 | break; |
2286 | case '>' : |
2287 | grn_bulk_write(ctx, buf, ">" , 4); |
2288 | break; |
2289 | case '&' : |
2290 | grn_bulk_write(ctx, buf, "&" , 5); |
2291 | break; |
2292 | default : |
2293 | GRN_TEXT_PUTC(ctx, buf, *s); |
2294 | } |
2295 | } else { |
2296 | grn_bulk_write(ctx, buf, s, l); |
2297 | } |
2298 | } |
2299 | return rc; |
2300 | } |
2301 | |
2302 | #define TOK_ESC (0x80) |
2303 | |
2304 | const char * |
2305 | grn_text_unesc_tok(grn_ctx *ctx, grn_obj *buf, const char *s, const char *e, char *tok_type) |
2306 | { |
2307 | const char *p; |
2308 | unsigned int len; |
2309 | uint8_t stat = GRN_TOK_VOID; |
2310 | for (p = s; p < e; p += len) { |
2311 | if (!(len = grn_charlen(ctx, p, e))) { |
2312 | p = e; |
2313 | stat &= ~TOK_ESC; |
2314 | goto exit; |
2315 | } |
2316 | switch (stat) { |
2317 | case GRN_TOK_VOID : |
2318 | if (*p == ' ') { continue; } |
2319 | switch (*p) { |
2320 | case '"' : |
2321 | stat = GRN_TOK_STRING; |
2322 | break; |
2323 | case '\'' : |
2324 | stat = GRN_TOK_QUOTE; |
2325 | break; |
2326 | case ')' : |
2327 | case '(' : |
2328 | GRN_TEXT_PUT(ctx, buf, p, len); |
2329 | p += len; |
2330 | stat = GRN_TOK_SYMBOL; |
2331 | goto exit; |
2332 | case '\\' : |
2333 | stat = GRN_TOK_SYMBOL|TOK_ESC; |
2334 | break; |
2335 | default : |
2336 | stat = GRN_TOK_SYMBOL; |
2337 | GRN_TEXT_PUT(ctx, buf, p, len); |
2338 | break; |
2339 | } |
2340 | break; |
2341 | case GRN_TOK_SYMBOL : |
2342 | if (*p == ' ') { goto exit; } |
2343 | switch (*p) { |
2344 | case '\'' : |
2345 | case '"' : |
2346 | case ')' : |
2347 | case '(' : |
2348 | goto exit; |
2349 | case '\\' : |
2350 | stat |= TOK_ESC; |
2351 | break; |
2352 | default : |
2353 | GRN_TEXT_PUT(ctx, buf, p, len); |
2354 | break; |
2355 | } |
2356 | break; |
2357 | case GRN_TOK_STRING : |
2358 | switch (*p) { |
2359 | case '"' : |
2360 | p += len; |
2361 | goto exit; |
2362 | case '\\' : |
2363 | stat |= TOK_ESC; |
2364 | break; |
2365 | default : |
2366 | GRN_TEXT_PUT(ctx, buf, p, len); |
2367 | break; |
2368 | } |
2369 | break; |
2370 | case GRN_TOK_QUOTE : |
2371 | switch (*p) { |
2372 | case '\'' : |
2373 | p += len; |
2374 | goto exit; |
2375 | case '\\' : |
2376 | stat |= TOK_ESC; |
2377 | break; |
2378 | default : |
2379 | GRN_TEXT_PUT(ctx, buf, p, len); |
2380 | break; |
2381 | } |
2382 | break; |
2383 | case GRN_TOK_SYMBOL|TOK_ESC : |
2384 | case GRN_TOK_STRING|TOK_ESC : |
2385 | case GRN_TOK_QUOTE|TOK_ESC : |
2386 | switch (*p) { |
2387 | case 'b' : |
2388 | GRN_TEXT_PUTC(ctx, buf, '\b'); |
2389 | break; |
2390 | case 'f' : |
2391 | GRN_TEXT_PUTC(ctx, buf, '\f'); |
2392 | break; |
2393 | case 'n' : |
2394 | GRN_TEXT_PUTC(ctx, buf, '\n'); |
2395 | break; |
2396 | case 'r' : |
2397 | GRN_TEXT_PUTC(ctx, buf, '\r'); |
2398 | break; |
2399 | case 't' : |
2400 | GRN_TEXT_PUTC(ctx, buf, '\t'); |
2401 | break; |
2402 | default : |
2403 | GRN_TEXT_PUT(ctx, buf, p, len); |
2404 | break; |
2405 | } |
2406 | stat &= ~TOK_ESC; |
2407 | break; |
2408 | } |
2409 | } |
2410 | exit : |
2411 | *tok_type = stat; |
2412 | return p; |
2413 | } |
2414 | |
2415 | grn_rc |
2416 | grn_text_benc(grn_ctx *ctx, grn_obj *buf, unsigned int v) |
2417 | { |
2418 | grn_rc rc = GRN_SUCCESS; |
2419 | uint8_t *p; |
2420 | if (GRN_BULK_REST(buf) < 5) { |
2421 | if ((rc = grn_bulk_resize(ctx, buf, GRN_BULK_VSIZE(buf) + 5))) { return rc; } |
2422 | } |
2423 | p = (uint8_t *)GRN_BULK_CURR(buf); |
2424 | GRN_B_ENC(v, p); |
2425 | GRN_BULK_SET_CURR(buf, (char *)p); |
2426 | return rc; |
2427 | } |
2428 | |
2429 | /* 0x00 - 0x7f */ |
2430 | static const int_least8_t urlenc_tbl[] = { |
2431 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
2432 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
2433 | 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, |
2434 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, |
2435 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
2436 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
2437 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
2438 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 |
2439 | }; |
2440 | |
2441 | grn_rc |
2442 | grn_text_urlenc(grn_ctx *ctx, grn_obj *buf, const char *s, unsigned int len) |
2443 | { |
2444 | const char *e, c = '%'; |
2445 | for (e = s + len; s < e; s++) { |
2446 | if ((signed char)*s < 0 || urlenc_tbl[(int)*s]) { |
2447 | if (!grn_bulk_write(ctx, buf, &c, 1)) { |
2448 | if (grn_text_itoh(ctx, buf, *s, 2)) { |
2449 | GRN_BULK_INCR_LEN(buf, -1); |
2450 | } |
2451 | } |
2452 | } else { |
2453 | GRN_TEXT_PUTC(ctx, buf, *s); |
2454 | } |
2455 | } |
2456 | return GRN_SUCCESS; |
2457 | } |
2458 | |
2459 | static const char *weekdays[7] = {"Sun" , "Mon" , "Tue" , "Wed" , "Thu" , "Fri" , "Sat" }; |
2460 | static const char *months[12] = { |
2461 | "Jan" , "Feb" , "Mar" , "Apr" , "May" , "Jun" , |
2462 | "Jul" , "Aug" , "Sep" , "Oct" , "Nov" , "Dec" }; |
2463 | |
2464 | grn_rc |
2465 | grn_text_time2rfc1123(grn_ctx *ctx, grn_obj *bulk, int sec) |
2466 | { |
2467 | time_t tsec; |
2468 | struct tm *t; |
2469 | #ifdef HAVE__GMTIME64_S |
2470 | struct tm tm; |
2471 | tsec = (time_t)sec; |
2472 | t = (gmtime_s(&tm, &tsec) == 0) ? &tm : NULL; |
2473 | #else /* HAVE__GMTIME64_S */ |
2474 | # ifdef HAVE_GMTIME_R |
2475 | struct tm tm; |
2476 | tsec = (time_t)sec; |
2477 | t = gmtime_r(&tsec, &tm); |
2478 | # else /* HAVE_GMTIME_R */ |
2479 | tsec = (time_t)sec; |
2480 | t = gmtime(&tsec); |
2481 | # endif /* HAVE_GMTIME_R */ |
2482 | #endif /* HAVE__GMTIME64_S */ |
2483 | if (t) { |
2484 | GRN_TEXT_SET(ctx, bulk, weekdays[t->tm_wday], 3); |
2485 | GRN_TEXT_PUTS(ctx, bulk, ", " ); |
2486 | grn_text_itoa_padded(ctx, bulk, t->tm_mday, '0', 2); |
2487 | GRN_TEXT_PUTS(ctx, bulk, " " ); |
2488 | GRN_TEXT_PUT(ctx, bulk, months[t->tm_mon], 3); |
2489 | GRN_TEXT_PUTS(ctx, bulk, " " ); |
2490 | grn_text_itoa(ctx, bulk, t->tm_year + 1900); |
2491 | GRN_TEXT_PUTS(ctx, bulk, " " ); |
2492 | grn_text_itoa_padded(ctx, bulk, t->tm_hour, '0', 2); |
2493 | GRN_TEXT_PUTS(ctx, bulk, ":" ); |
2494 | grn_text_itoa_padded(ctx, bulk, t->tm_min, '0', 2); |
2495 | GRN_TEXT_PUTS(ctx, bulk, ":" ); |
2496 | grn_text_itoa_padded(ctx, bulk, t->tm_sec, '0', 2); |
2497 | GRN_TEXT_PUTS(ctx, bulk, " GMT" ); |
2498 | } else { |
2499 | GRN_TEXT_SETS(ctx, bulk, "Mon, 16 Mar 1980 20:40:00 GMT" ); |
2500 | } |
2501 | return GRN_SUCCESS; |
2502 | } |
2503 | |
2504 | grn_rc |
2505 | grn_text_printf(grn_ctx *ctx, grn_obj *bulk, const char *format, ...) |
2506 | { |
2507 | va_list args; |
2508 | |
2509 | va_start(args, format); |
2510 | grn_text_vprintf(ctx, bulk, format, args); |
2511 | va_end(args); |
2512 | |
2513 | return GRN_SUCCESS; |
2514 | } |
2515 | |
2516 | grn_rc |
2517 | grn_text_vprintf(grn_ctx *ctx, grn_obj *bulk, const char *format, va_list args) |
2518 | { |
2519 | grn_bool is_written = GRN_FALSE; |
2520 | int written_size; |
2521 | |
2522 | { |
2523 | int rest_size; |
2524 | va_list copied_args; |
2525 | |
2526 | rest_size = GRN_BULK_REST(bulk); |
2527 | va_copy(copied_args, args); |
2528 | written_size = vsnprintf(GRN_BULK_CURR(bulk), rest_size, |
2529 | format, copied_args); |
2530 | va_end(copied_args); |
2531 | |
2532 | if (0 <= written_size && written_size < rest_size) { |
2533 | is_written = GRN_TRUE; |
2534 | } |
2535 | } |
2536 | |
2537 | if (!is_written) { |
2538 | #ifdef WIN32 |
2539 | # define N_NEW_SIZES 3 |
2540 | int i; |
2541 | int new_sizes[N_NEW_SIZES]; |
2542 | |
2543 | new_sizes[0] = GRN_BULK_REST(bulk) + strlen(format) * 2; |
2544 | new_sizes[1] = new_sizes[0] + 4096; |
2545 | new_sizes[2] = new_sizes[0] + 65536; |
2546 | |
2547 | for (i = 0; i < N_NEW_SIZES; i++) { |
2548 | grn_rc rc; |
2549 | int new_size = new_sizes[i]; |
2550 | va_list copied_args; |
2551 | |
2552 | rc = grn_bulk_reserve(ctx, bulk, GRN_BULK_VSIZE(bulk) + new_size); |
2553 | if (rc) { |
2554 | return rc; |
2555 | } |
2556 | va_copy(copied_args, args); |
2557 | written_size = vsnprintf(GRN_BULK_CURR(bulk), new_size, |
2558 | format, copied_args); |
2559 | va_end(copied_args); |
2560 | if (written_size != -1) { |
2561 | break; |
2562 | } |
2563 | } |
2564 | # undef N_NEW_SIZES |
2565 | #else /* WIN32 */ |
2566 | grn_rc rc; |
2567 | int required_size = written_size + 1; /* "+ 1" for terminate '\0'. */ |
2568 | |
2569 | rc = grn_bulk_reserve(ctx, bulk, GRN_BULK_VSIZE(bulk) + required_size); |
2570 | if (rc) { |
2571 | return rc; |
2572 | } |
2573 | written_size = vsnprintf(GRN_BULK_CURR(bulk), required_size, |
2574 | format, args); |
2575 | #endif /* WIN32 */ |
2576 | } |
2577 | |
2578 | if (written_size < 0) { |
2579 | return GRN_INVALID_ARGUMENT; |
2580 | } |
2581 | |
2582 | GRN_BULK_INCR_LEN(bulk, written_size); |
2583 | return GRN_SUCCESS; |
2584 | } |
2585 | |
2586 | grn_rc |
2587 | grn_bulk_fin(grn_ctx *ctx, grn_obj *buf) |
2588 | { |
2589 | if (!(buf->header.impl_flags & GRN_OBJ_REFER)) { |
2590 | if (GRN_BULK_OUTP(buf) && buf->u.b.head) { |
2591 | GRN_REALLOC(buf->u.b.head - grn_bulk_margin_size, 0); |
2592 | } |
2593 | } |
2594 | buf->header.flags = 0; |
2595 | buf->header.impl_flags &= ~GRN_OBJ_DO_SHALLOW_COPY; |
2596 | buf->u.b.head = NULL; |
2597 | buf->u.b.curr = NULL; |
2598 | buf->u.b.tail = NULL; |
2599 | return GRN_SUCCESS; |
2600 | } |
2601 | |
2602 | grn_rc |
2603 | grn_substring(grn_ctx *ctx, char **str, char **str_end, int start, int end, grn_encoding encoding) |
2604 | { |
2605 | int i; |
2606 | size_t l; |
2607 | char *s = *str, *e = *str_end; |
2608 | for (i = 0; s < e; i++, s += l) { |
2609 | if (i == start) { *str = s; } |
2610 | if (!(l = grn_charlen(ctx, s, e))) { |
2611 | return GRN_INVALID_ARGUMENT; |
2612 | } |
2613 | if (i == end) { |
2614 | *str_end = s; |
2615 | break; |
2616 | } |
2617 | } |
2618 | return GRN_SUCCESS; |
2619 | } |
2620 | |
2621 | static void |
2622 | grn_text_atoj(grn_ctx *ctx, grn_obj *bulk, grn_obj *obj, grn_id id) |
2623 | { |
2624 | uint32_t vs; |
2625 | grn_obj buf; |
2626 | if (obj->header.type == GRN_ACCESSOR) { |
2627 | grn_accessor *a = (grn_accessor *)obj; |
2628 | GRN_TEXT_INIT(&buf, 0); |
2629 | for (;;) { |
2630 | GRN_BULK_REWIND(&buf); |
2631 | switch (a->action) { |
2632 | case GRN_ACCESSOR_GET_ID : |
2633 | GRN_UINT32_PUT(ctx, &buf, id); |
2634 | buf.header.domain = GRN_DB_UINT32; |
2635 | break; |
2636 | case GRN_ACCESSOR_GET_KEY : |
2637 | grn_table_get_key2(ctx, a->obj, id, &buf); |
2638 | buf.header.domain = DB_OBJ(a->obj)->header.domain; |
2639 | break; |
2640 | case GRN_ACCESSOR_GET_VALUE : |
2641 | grn_obj_get_value(ctx, a->obj, id, &buf); |
2642 | buf.header.domain = GRN_DB_INT32; /* fix me */ |
2643 | break; |
2644 | case GRN_ACCESSOR_GET_SCORE : |
2645 | { |
2646 | grn_rset_recinfo *ri = (grn_rset_recinfo *)grn_obj_get_value_(ctx, a->obj, id, &vs); |
2647 | int32_t int32_score = ri->score; |
2648 | GRN_INT32_PUT(ctx, &buf, int32_score); |
2649 | } |
2650 | buf.header.domain = GRN_DB_INT32; |
2651 | break; |
2652 | case GRN_ACCESSOR_GET_NSUBRECS : |
2653 | { |
2654 | grn_rset_recinfo *ri = (grn_rset_recinfo *)grn_obj_get_value_(ctx, a->obj, id, &vs); |
2655 | GRN_INT32_PUT(ctx, &buf, ri->n_subrecs); |
2656 | } |
2657 | buf.header.domain = GRN_DB_INT32; |
2658 | break; |
2659 | case GRN_ACCESSOR_GET_COLUMN_VALUE : |
2660 | if ((a->obj->header.flags & GRN_OBJ_COLUMN_TYPE_MASK) == GRN_OBJ_COLUMN_VECTOR) { |
2661 | if (a->next) { |
2662 | grn_id *idp; |
2663 | grn_obj_get_value(ctx, a->obj, id, &buf); |
2664 | idp = (grn_id *)GRN_BULK_HEAD(&buf); |
2665 | GRN_TEXT_PUTC(ctx, bulk, '['); |
2666 | for (vs = GRN_BULK_VSIZE(&buf) / sizeof(grn_id); vs--; idp++) { |
2667 | grn_text_atoj(ctx, bulk, (grn_obj *)a->next, *idp); |
2668 | if (vs) { GRN_TEXT_PUTC(ctx, bulk, ','); } |
2669 | } |
2670 | GRN_TEXT_PUTC(ctx, bulk, ']'); |
2671 | } else { |
2672 | grn_text_atoj(ctx, bulk, a->obj, id); |
2673 | } |
2674 | goto exit; |
2675 | } else { |
2676 | grn_obj_get_value(ctx, a->obj, id, &buf); |
2677 | } |
2678 | break; |
2679 | case GRN_ACCESSOR_GET_DB_OBJ : |
2680 | /* todo */ |
2681 | break; |
2682 | case GRN_ACCESSOR_LOOKUP : |
2683 | /* todo */ |
2684 | break; |
2685 | case GRN_ACCESSOR_FUNCALL : |
2686 | /* todo */ |
2687 | break; |
2688 | } |
2689 | if (a->next) { |
2690 | a = a->next; |
2691 | id = *((grn_id *)GRN_BULK_HEAD(&buf)); |
2692 | } else { |
2693 | break; |
2694 | } |
2695 | } |
2696 | } else { |
2697 | switch (obj->header.type) { |
2698 | case GRN_COLUMN_FIX_SIZE : |
2699 | GRN_VALUE_FIX_SIZE_INIT(&buf, 0, DB_OBJ(obj)->range); |
2700 | break; |
2701 | case GRN_COLUMN_VAR_SIZE : |
2702 | if ((obj->header.flags & GRN_OBJ_COLUMN_TYPE_MASK) == GRN_OBJ_COLUMN_VECTOR) { |
2703 | grn_obj *range = grn_ctx_at(ctx, DB_OBJ(obj)->range); |
2704 | if (range->header.flags & GRN_OBJ_KEY_VAR_SIZE) { |
2705 | GRN_VALUE_VAR_SIZE_INIT(&buf, GRN_OBJ_VECTOR, DB_OBJ(obj)->range); |
2706 | } else { |
2707 | GRN_VALUE_FIX_SIZE_INIT(&buf, GRN_OBJ_VECTOR, DB_OBJ(obj)->range); |
2708 | } |
2709 | } else { |
2710 | GRN_VALUE_VAR_SIZE_INIT(&buf, 0, DB_OBJ(obj)->range); |
2711 | } |
2712 | break; |
2713 | case GRN_COLUMN_INDEX : |
2714 | GRN_UINT32_INIT(&buf, 0); |
2715 | break; |
2716 | default: |
2717 | GRN_TEXT_INIT(&buf, 0); |
2718 | break; |
2719 | } |
2720 | grn_obj_get_value(ctx, obj, id, &buf); |
2721 | } |
2722 | grn_text_otoj(ctx, bulk, &buf, NULL); |
2723 | exit : |
2724 | grn_obj_close(ctx, &buf); |
2725 | } |
2726 | |
2727 | grn_rc |
2728 | grn_text_otoj(grn_ctx *ctx, grn_obj *bulk, grn_obj *obj, grn_obj_format *format) |
2729 | { |
2730 | grn_obj buf; |
2731 | GRN_TEXT_INIT(&buf, 0); |
2732 | switch (obj->header.type) { |
2733 | case GRN_BULK : |
2734 | switch (obj->header.domain) { |
2735 | case GRN_DB_VOID : |
2736 | case GRN_DB_SHORT_TEXT : |
2737 | case GRN_DB_TEXT : |
2738 | case GRN_DB_LONG_TEXT : |
2739 | grn_text_esc(ctx, bulk, GRN_BULK_HEAD(obj), GRN_BULK_VSIZE(obj)); |
2740 | break; |
2741 | case GRN_DB_BOOL : |
2742 | if (*((unsigned char *)GRN_BULK_HEAD(obj))) { |
2743 | GRN_TEXT_PUTS(ctx, bulk, "true" ); |
2744 | } else { |
2745 | GRN_TEXT_PUTS(ctx, bulk, "false" ); |
2746 | } |
2747 | break; |
2748 | case GRN_DB_INT8 : |
2749 | grn_text_itoa(ctx, bulk, GRN_BULK_VSIZE(obj) ? GRN_INT8_VALUE(obj) : 0); |
2750 | break; |
2751 | case GRN_DB_UINT8 : |
2752 | grn_text_lltoa(ctx, bulk, GRN_BULK_VSIZE(obj) ? GRN_UINT8_VALUE(obj) : 0); |
2753 | break; |
2754 | case GRN_DB_INT16 : |
2755 | grn_text_itoa(ctx, bulk, GRN_BULK_VSIZE(obj) ? GRN_INT16_VALUE(obj) : 0); |
2756 | break; |
2757 | case GRN_DB_UINT16 : |
2758 | grn_text_lltoa(ctx, bulk, GRN_BULK_VSIZE(obj) ? GRN_UINT16_VALUE(obj) : 0); |
2759 | break; |
2760 | case GRN_DB_INT32 : |
2761 | grn_text_itoa(ctx, bulk, GRN_BULK_VSIZE(obj) ? GRN_INT32_VALUE(obj) : 0); |
2762 | break; |
2763 | case GRN_DB_UINT32 : |
2764 | grn_text_lltoa(ctx, bulk, GRN_BULK_VSIZE(obj) ? GRN_UINT32_VALUE(obj) : 0); |
2765 | break; |
2766 | case GRN_DB_INT64 : |
2767 | grn_text_lltoa(ctx, bulk, GRN_BULK_VSIZE(obj) ? GRN_INT64_VALUE(obj) : 0); |
2768 | break; |
2769 | case GRN_DB_UINT64 : |
2770 | grn_text_ulltoa(ctx, bulk, GRN_BULK_VSIZE(obj) ? GRN_UINT64_VALUE(obj) : 0); |
2771 | break; |
2772 | case GRN_DB_FLOAT : |
2773 | grn_text_ftoa(ctx, bulk, GRN_BULK_VSIZE(obj) ? GRN_FLOAT_VALUE(obj) : 0); |
2774 | break; |
2775 | case GRN_DB_TIME : |
2776 | { |
2777 | double dv = *((int64_t *)GRN_BULK_HEAD(obj)); |
2778 | dv /= 1000000.0; |
2779 | grn_text_ftoa(ctx, bulk, dv); |
2780 | } |
2781 | break; |
2782 | case GRN_DB_TOKYO_GEO_POINT : |
2783 | case GRN_DB_WGS84_GEO_POINT : |
2784 | if (GRN_BULK_VSIZE(obj) == sizeof(grn_geo_point)) { |
2785 | grn_geo_point *gp = (grn_geo_point *)GRN_BULK_HEAD(obj); |
2786 | GRN_TEXT_PUTC(ctx, bulk, '"'); |
2787 | grn_text_itoa(ctx, bulk, gp->latitude); |
2788 | GRN_TEXT_PUTC(ctx, bulk, 'x'); |
2789 | grn_text_itoa(ctx, bulk, gp->longitude); |
2790 | GRN_TEXT_PUTC(ctx, bulk, '"'); |
2791 | } else { |
2792 | GRN_TEXT_PUTS(ctx, bulk, "\"\"" ); |
2793 | } |
2794 | break; |
2795 | default : |
2796 | if (format) { |
2797 | int j; |
2798 | int ncolumns = GRN_BULK_VSIZE(&format->columns)/sizeof(grn_obj *); |
2799 | grn_id id = GRN_RECORD_VALUE(obj); |
2800 | grn_obj **columns = (grn_obj **)GRN_BULK_HEAD(&format->columns); |
2801 | if (format->flags & GRN_OBJ_FORMAT_WITH_COLUMN_NAMES) { |
2802 | GRN_TEXT_PUTS(ctx, bulk, "[" ); |
2803 | for (j = 0; j < ncolumns; j++) { |
2804 | grn_id range_id; |
2805 | if (j) { GRN_TEXT_PUTC(ctx, bulk, ','); } |
2806 | GRN_TEXT_PUTS(ctx, bulk, "[" ); |
2807 | GRN_BULK_REWIND(&buf); |
2808 | grn_column_name_(ctx, columns[j], &buf); |
2809 | grn_text_otoj(ctx, bulk, &buf, NULL); |
2810 | GRN_TEXT_PUTC(ctx, bulk, ','); |
2811 | /* column range */ |
2812 | range_id = grn_obj_get_range(ctx, columns[j]); |
2813 | if (range_id == GRN_ID_NIL) { |
2814 | GRN_TEXT_PUTS(ctx, bulk, "null" ); |
2815 | } else { |
2816 | int name_len; |
2817 | grn_obj *range_obj; |
2818 | char name_buf[GRN_TABLE_MAX_KEY_SIZE]; |
2819 | |
2820 | range_obj = grn_ctx_at(ctx, range_id); |
2821 | name_len = grn_obj_name(ctx, range_obj, name_buf, |
2822 | GRN_TABLE_MAX_KEY_SIZE); |
2823 | GRN_BULK_REWIND(&buf); |
2824 | GRN_TEXT_PUT(ctx, &buf, name_buf, name_len); |
2825 | grn_text_otoj(ctx, bulk, &buf, NULL); |
2826 | } |
2827 | GRN_TEXT_PUTS(ctx, bulk, "]" ); |
2828 | } |
2829 | GRN_TEXT_PUTS(ctx, bulk, "]," ); |
2830 | } |
2831 | GRN_TEXT_PUTC(ctx, bulk, '['); |
2832 | for (j = 0; j < ncolumns; j++) { |
2833 | if (j) { GRN_TEXT_PUTC(ctx, bulk, ','); } |
2834 | grn_text_atoj(ctx, bulk, columns[j], id); |
2835 | } |
2836 | GRN_TEXT_PUTC(ctx, bulk, ']'); |
2837 | } else { |
2838 | if (GRN_BULK_VSIZE(obj) == 0) { |
2839 | GRN_TEXT_PUTS(ctx, bulk, "null" ); |
2840 | } else { |
2841 | grn_obj *table = grn_ctx_at(ctx, obj->header.domain); |
2842 | grn_id id = GRN_RECORD_VALUE(obj); |
2843 | if (table && table->header.type != GRN_TABLE_NO_KEY) { |
2844 | /* todo : temporal patch. grn_table_at() is kinda costful... */ |
2845 | if (grn_table_at(ctx, table, id)) { |
2846 | grn_obj *accessor = grn_obj_column(ctx, table, |
2847 | GRN_COLUMN_NAME_KEY, |
2848 | GRN_COLUMN_NAME_KEY_LEN); |
2849 | if (accessor) { |
2850 | grn_obj_get_value(ctx, accessor, id, &buf); |
2851 | grn_obj_unlink(ctx, accessor); |
2852 | } |
2853 | } |
2854 | grn_text_otoj(ctx, bulk, &buf, format); |
2855 | } else { |
2856 | grn_text_lltoa(ctx, bulk, id); |
2857 | } |
2858 | } |
2859 | } |
2860 | } |
2861 | break; |
2862 | case GRN_UVECTOR : |
2863 | if (format) { |
2864 | if (format->flags & GRN_OBJ_FORMAT_WITH_WEIGHT) { |
2865 | int i, n; |
2866 | grn_obj *domain; |
2867 | |
2868 | n = grn_uvector_size(ctx, obj); |
2869 | domain = grn_ctx_at(ctx, obj->header.domain); |
2870 | GRN_TEXT_PUTS(ctx, bulk, "{" ); |
2871 | for (i = 0; i < n; i++) { |
2872 | grn_id id; |
2873 | unsigned int weight; |
2874 | |
2875 | if (i > 0) { |
2876 | GRN_TEXT_PUTC(ctx, bulk, ','); |
2877 | } |
2878 | id = grn_uvector_get_element(ctx, obj, i, &weight); |
2879 | if (domain) { |
2880 | if (domain->header.type == GRN_TABLE_NO_KEY) { |
2881 | GRN_TEXT_PUTC(ctx, bulk, '"'); |
2882 | grn_text_ulltoa(ctx, bulk, id); |
2883 | GRN_TEXT_PUTC(ctx, bulk, '"'); |
2884 | } else { |
2885 | GRN_BULK_REWIND(&buf); |
2886 | grn_table_get_key2(ctx, domain, id, &buf); |
2887 | grn_text_otoj(ctx, bulk, &buf, NULL); |
2888 | } |
2889 | } else { |
2890 | GRN_TEXT_PUTC(ctx, bulk, '"'); |
2891 | grn_text_ulltoa(ctx, bulk, id); |
2892 | GRN_TEXT_PUTC(ctx, bulk, '"'); |
2893 | } |
2894 | GRN_TEXT_PUTC(ctx, bulk, ':'); |
2895 | grn_text_ulltoa(ctx, bulk, weight); |
2896 | } |
2897 | GRN_TEXT_PUTS(ctx, bulk, "}" ); |
2898 | } else { |
2899 | /* TODO: Does we still need this code? If we don't need this, we should |
2900 | remove this. */ |
2901 | int i, j; |
2902 | grn_id *v = (grn_id *)GRN_BULK_HEAD(obj), *ve = (grn_id *)GRN_BULK_CURR(obj); |
2903 | int ncolumns = GRN_BULK_VSIZE(&format->columns) / sizeof(grn_obj *); |
2904 | grn_obj **columns = (grn_obj **)GRN_BULK_HEAD(&format->columns); |
2905 | GRN_TEXT_PUTS(ctx, bulk, "[[" ); |
2906 | grn_text_itoa(ctx, bulk, ve - v); |
2907 | GRN_TEXT_PUTC(ctx, bulk, ']'); |
2908 | if (v < ve) { |
2909 | if (format->flags & GRN_OBJ_FORMAT_WITH_COLUMN_NAMES) { |
2910 | GRN_TEXT_PUTS(ctx, bulk, ",[" ); |
2911 | for (j = 0; j < ncolumns; j++) { |
2912 | grn_id range_id; |
2913 | if (j) { GRN_TEXT_PUTC(ctx, bulk, ','); } |
2914 | GRN_TEXT_PUTS(ctx, bulk, "[" ); |
2915 | GRN_BULK_REWIND(&buf); |
2916 | grn_column_name_(ctx, columns[j], &buf); |
2917 | grn_text_otoj(ctx, bulk, &buf, NULL); |
2918 | GRN_TEXT_PUTC(ctx, bulk, ','); |
2919 | /* column range */ |
2920 | range_id = grn_obj_get_range(ctx, columns[j]); |
2921 | if (range_id == GRN_ID_NIL) { |
2922 | GRN_TEXT_PUTS(ctx, bulk, "null" ); |
2923 | } else { |
2924 | int name_len; |
2925 | grn_obj *range_obj; |
2926 | char name_buf[GRN_TABLE_MAX_KEY_SIZE]; |
2927 | |
2928 | range_obj = grn_ctx_at(ctx, range_id); |
2929 | name_len = grn_obj_name(ctx, range_obj, name_buf, |
2930 | GRN_TABLE_MAX_KEY_SIZE); |
2931 | GRN_BULK_REWIND(&buf); |
2932 | GRN_TEXT_PUT(ctx, &buf, name_buf, name_len); |
2933 | grn_text_otoj(ctx, bulk, &buf, NULL); |
2934 | } |
2935 | GRN_TEXT_PUTS(ctx, bulk, "]" ); |
2936 | } |
2937 | GRN_TEXT_PUTC(ctx, bulk, ']'); |
2938 | } |
2939 | for (i = 0;; i++) { |
2940 | GRN_TEXT_PUTS(ctx, bulk, ",[" ); |
2941 | for (j = 0; j < ncolumns; j++) { |
2942 | if (j) { GRN_TEXT_PUTC(ctx, bulk, ','); } |
2943 | GRN_BULK_REWIND(&buf); |
2944 | grn_obj_get_value(ctx, columns[j], *v, &buf); |
2945 | grn_text_otoj(ctx, bulk, &buf, NULL); |
2946 | } |
2947 | GRN_TEXT_PUTC(ctx, bulk, ']'); |
2948 | v++; |
2949 | if (v < ve) { |
2950 | GRN_TEXT_PUTC(ctx, bulk, ','); |
2951 | } else { |
2952 | break; |
2953 | } |
2954 | } |
2955 | } |
2956 | GRN_TEXT_PUTC(ctx, bulk, ']'); |
2957 | } |
2958 | } else { |
2959 | grn_obj *range = grn_ctx_at(ctx, obj->header.domain); |
2960 | if (range && range->header.type == GRN_TYPE) { |
2961 | grn_id value_size = ((struct _grn_type *)range)->obj.range; |
2962 | char *v = (char *)GRN_BULK_HEAD(obj), |
2963 | *ve = (char *)GRN_BULK_CURR(obj); |
2964 | GRN_TEXT_PUTC(ctx, bulk, '['); |
2965 | if (v < ve) { |
2966 | for (;;) { |
2967 | grn_obj value; |
2968 | GRN_OBJ_INIT(&value, GRN_BULK, 0, obj->header.domain); |
2969 | grn_bulk_write_from(ctx, &value, v, 0, value_size); |
2970 | grn_text_otoj(ctx, bulk, &value, NULL); |
2971 | |
2972 | v += value_size; |
2973 | if (v < ve) { |
2974 | GRN_TEXT_PUTC(ctx, bulk, ','); |
2975 | } else { |
2976 | break; |
2977 | } |
2978 | } |
2979 | } |
2980 | GRN_TEXT_PUTC(ctx, bulk, ']'); |
2981 | } else { |
2982 | grn_id *v = (grn_id *)GRN_BULK_HEAD(obj), |
2983 | *ve = (grn_id *)GRN_BULK_CURR(obj); |
2984 | GRN_TEXT_PUTC(ctx, bulk, '['); |
2985 | if (v < ve) { |
2986 | for (;;) { |
2987 | if (range->header.type != GRN_TABLE_NO_KEY) { |
2988 | grn_obj key; |
2989 | GRN_OBJ_INIT(&key, GRN_BULK, 0, range->header.domain); |
2990 | grn_table_get_key2(ctx, range, *v, &key); |
2991 | grn_text_otoj(ctx, bulk, &key, NULL); |
2992 | GRN_OBJ_FIN(ctx, &key); |
2993 | } else { |
2994 | grn_text_lltoa(ctx, bulk, *v); |
2995 | } |
2996 | v++; |
2997 | if (v < ve) { |
2998 | GRN_TEXT_PUTC(ctx, bulk, ','); |
2999 | } else { |
3000 | break; |
3001 | } |
3002 | } |
3003 | } |
3004 | GRN_TEXT_PUTC(ctx, bulk, ']'); |
3005 | } |
3006 | } |
3007 | break; |
3008 | case GRN_VECTOR : |
3009 | if (obj->header.domain == GRN_DB_VOID) { |
3010 | ERR(GRN_INVALID_ARGUMENT, "invalid obj->header.domain" ); |
3011 | } else { |
3012 | unsigned int i, n; |
3013 | grn_obj value; |
3014 | grn_obj weight; |
3015 | grn_bool with_weight; |
3016 | |
3017 | GRN_VOID_INIT(&value); |
3018 | GRN_UINT32_INIT(&weight, 0); |
3019 | with_weight = (format && format->flags & GRN_OBJ_FORMAT_WITH_WEIGHT); |
3020 | n = grn_vector_size(ctx, obj); |
3021 | if (with_weight) { |
3022 | GRN_TEXT_PUTC(ctx, bulk, '{'); |
3023 | } else { |
3024 | GRN_TEXT_PUTC(ctx, bulk, '['); |
3025 | } |
3026 | for (i = 0; i < n; i++) { |
3027 | const char *_value; |
3028 | unsigned int _weight, length; |
3029 | grn_id domain; |
3030 | if (i) { GRN_TEXT_PUTC(ctx, bulk, ','); } |
3031 | |
3032 | length = grn_vector_get_element(ctx, obj, i, |
3033 | &_value, &_weight, &domain); |
3034 | if (domain != GRN_DB_VOID) { |
3035 | grn_obj_reinit(ctx, &value, domain, 0); |
3036 | } else { |
3037 | grn_obj_reinit(ctx, &value, obj->header.domain, 0); |
3038 | } |
3039 | grn_bulk_write(ctx, &value, _value, length); |
3040 | grn_text_otoj(ctx, bulk, &value, NULL); |
3041 | if (with_weight) { |
3042 | GRN_TEXT_PUTC(ctx, bulk, ':'); |
3043 | GRN_UINT32_SET(ctx, &weight, _weight); |
3044 | grn_text_otoj(ctx, bulk, &weight, NULL); |
3045 | } |
3046 | } |
3047 | if (with_weight) { |
3048 | GRN_TEXT_PUTC(ctx, bulk, '}'); |
3049 | } else { |
3050 | GRN_TEXT_PUTC(ctx, bulk, ']'); |
3051 | } |
3052 | GRN_OBJ_FIN(ctx, &value); |
3053 | GRN_OBJ_FIN(ctx, &weight); |
3054 | } |
3055 | break; |
3056 | case GRN_PVECTOR : |
3057 | if (format) { |
3058 | ERR(GRN_FUNCTION_NOT_IMPLEMENTED, |
3059 | "cannot print GRN_PVECTOR using grn_obj_format" ); |
3060 | } else { |
3061 | unsigned int i, n; |
3062 | GRN_TEXT_PUTC(ctx, bulk, '['); |
3063 | n = GRN_BULK_VSIZE(obj) / sizeof(grn_obj *); |
3064 | for (i = 0; i < n; i++) { |
3065 | grn_obj *value; |
3066 | |
3067 | if (i) { GRN_TEXT_PUTC(ctx, bulk, ','); } |
3068 | value = GRN_PTR_VALUE_AT(obj, i); |
3069 | grn_text_otoj(ctx, bulk, value, NULL); |
3070 | } |
3071 | GRN_TEXT_PUTC(ctx, bulk, ']'); |
3072 | } |
3073 | break; |
3074 | case GRN_TABLE_HASH_KEY : |
3075 | case GRN_TABLE_PAT_KEY : |
3076 | case GRN_TABLE_NO_KEY : |
3077 | if (format) { |
3078 | int i, j; |
3079 | int ncolumns = GRN_BULK_VSIZE(&format->columns)/sizeof(grn_obj *); |
3080 | grn_obj **columns = (grn_obj **)GRN_BULK_HEAD(&format->columns); |
3081 | grn_table_cursor *tc = grn_table_cursor_open(ctx, obj, NULL, 0, NULL, 0, |
3082 | format->offset, format->limit, |
3083 | GRN_CURSOR_ASCENDING); |
3084 | if (!tc) { ERRCLR(ctx); } |
3085 | GRN_TEXT_PUTS(ctx, bulk, "[[" ); |
3086 | grn_text_itoa(ctx, bulk, format->nhits); |
3087 | GRN_TEXT_PUTC(ctx, bulk, ']'); |
3088 | if (format->flags & GRN_OBJ_FORMAT_WITH_COLUMN_NAMES) { |
3089 | GRN_TEXT_PUTS(ctx, bulk, ",[" ); |
3090 | for (j = 0; j < ncolumns; j++) { |
3091 | grn_id range_id; |
3092 | if (j) { GRN_TEXT_PUTC(ctx, bulk, ','); } |
3093 | GRN_TEXT_PUTS(ctx, bulk, "[" ); |
3094 | GRN_BULK_REWIND(&buf); |
3095 | grn_column_name_(ctx, columns[j], &buf); |
3096 | grn_text_otoj(ctx, bulk, &buf, NULL); |
3097 | GRN_TEXT_PUTC(ctx, bulk, ','); |
3098 | /* column range */ |
3099 | range_id = grn_obj_get_range(ctx, columns[j]); |
3100 | if (range_id == GRN_ID_NIL) { |
3101 | GRN_TEXT_PUTS(ctx, bulk, "null" ); |
3102 | } else { |
3103 | int name_len; |
3104 | grn_obj *range_obj; |
3105 | char name_buf[GRN_TABLE_MAX_KEY_SIZE]; |
3106 | |
3107 | range_obj = grn_ctx_at(ctx, range_id); |
3108 | name_len = grn_obj_name(ctx, range_obj, name_buf, |
3109 | GRN_TABLE_MAX_KEY_SIZE); |
3110 | GRN_BULK_REWIND(&buf); |
3111 | GRN_TEXT_PUT(ctx, &buf, name_buf, name_len); |
3112 | grn_text_otoj(ctx, bulk, &buf, NULL); |
3113 | } |
3114 | GRN_TEXT_PUTS(ctx, bulk, "]" ); |
3115 | } |
3116 | GRN_TEXT_PUTC(ctx, bulk, ']'); |
3117 | } |
3118 | if (tc) { |
3119 | grn_id id; |
3120 | for (i = 0; (id = grn_table_cursor_next(ctx, tc)) != GRN_ID_NIL; i++) { |
3121 | GRN_TEXT_PUTS(ctx, bulk, ",[" ); |
3122 | for (j = 0; j < ncolumns; j++) { |
3123 | if (j) { GRN_TEXT_PUTC(ctx, bulk, ','); } |
3124 | grn_text_atoj(ctx, bulk, columns[j], id); |
3125 | } |
3126 | GRN_TEXT_PUTC(ctx, bulk, ']'); |
3127 | } |
3128 | grn_table_cursor_close(ctx, tc); |
3129 | } |
3130 | GRN_TEXT_PUTC(ctx, bulk, ']'); |
3131 | } else { |
3132 | int i; |
3133 | grn_id id; |
3134 | grn_obj *column = grn_obj_column(ctx, obj, |
3135 | GRN_COLUMN_NAME_KEY, |
3136 | GRN_COLUMN_NAME_KEY_LEN); |
3137 | grn_table_cursor *tc = grn_table_cursor_open(ctx, obj, NULL, 0, NULL, 0, |
3138 | 0, -1, GRN_CURSOR_ASCENDING); |
3139 | GRN_TEXT_PUTC(ctx, bulk, '['); |
3140 | if (tc) { |
3141 | for (i = 0; (id = grn_table_cursor_next(ctx, tc)) != GRN_ID_NIL; i++) { |
3142 | if (i) { GRN_TEXT_PUTC(ctx, bulk, ','); } |
3143 | GRN_BULK_REWIND(&buf); |
3144 | grn_obj_get_value(ctx, column, id, &buf); |
3145 | grn_text_esc(ctx, bulk, GRN_BULK_HEAD(&buf), GRN_BULK_VSIZE(&buf)); |
3146 | } |
3147 | grn_table_cursor_close(ctx, tc); |
3148 | } |
3149 | GRN_TEXT_PUTC(ctx, bulk, ']'); |
3150 | grn_obj_unlink(ctx, column); |
3151 | } |
3152 | break; |
3153 | } |
3154 | grn_obj_close(ctx, &buf); |
3155 | return GRN_SUCCESS; |
3156 | } |
3157 | |
3158 | const char * |
3159 | grn_text_urldec(grn_ctx *ctx, grn_obj *buf, const char *p, const char *e, char d) |
3160 | { |
3161 | while (p < e) { |
3162 | if (*p == d) { |
3163 | p++; break; |
3164 | } else if (*p == '%' && p + 3 <= e) { |
3165 | const char *r; |
3166 | unsigned int c = grn_htoui(p + 1, p + 3, &r); |
3167 | if (p + 3 == r) { |
3168 | GRN_TEXT_PUTC(ctx, buf, c); |
3169 | p += 3; |
3170 | } else { |
3171 | GRN_LOG(ctx, GRN_LOG_NOTICE, "invalid %% sequence (%c%c)" , p[1], p[2]); |
3172 | GRN_TEXT_PUTC(ctx, buf, '%'); |
3173 | p += 1; |
3174 | } |
3175 | } else { |
3176 | GRN_TEXT_PUTC(ctx, buf, *p); |
3177 | p++; |
3178 | } |
3179 | } |
3180 | return p; |
3181 | } |
3182 | |
3183 | const char * |
3184 | grn_text_cgidec(grn_ctx *ctx, grn_obj *buf, const char *p, const char *e, |
3185 | const char *delimiters) |
3186 | { |
3187 | while (p < e) { |
3188 | grn_bool found_delimiter = GRN_FALSE; |
3189 | const char *delimiter; |
3190 | for (delimiter = delimiters; *delimiter; delimiter++) { |
3191 | if (*p == *delimiter) { |
3192 | found_delimiter = GRN_TRUE; |
3193 | break; |
3194 | } |
3195 | } |
3196 | if (found_delimiter) { |
3197 | p++; |
3198 | break; |
3199 | } |
3200 | |
3201 | if (*p == '+') { |
3202 | GRN_TEXT_PUTC(ctx, buf, ' '); |
3203 | p++; |
3204 | } else if (*p == '%' && p + 3 <= e) { |
3205 | const char *r; |
3206 | unsigned int c = grn_htoui(p + 1, p + 3, &r); |
3207 | if (p + 3 == r) { |
3208 | GRN_TEXT_PUTC(ctx, buf, c); |
3209 | p += 3; |
3210 | } else { |
3211 | GRN_LOG(ctx, GRN_LOG_NOTICE, "invalid %% sequence (%c%c)" , p[1], p[2]); |
3212 | GRN_TEXT_PUTC(ctx, buf, '%'); |
3213 | p += 1; |
3214 | } |
3215 | } else { |
3216 | GRN_TEXT_PUTC(ctx, buf, *p); |
3217 | p++; |
3218 | } |
3219 | } |
3220 | return p; |
3221 | } |
3222 | |
3223 | void |
3224 | grn_str_url_path_normalize(grn_ctx *ctx, const char *path, size_t path_len, |
3225 | char *buf, size_t buf_len) |
3226 | { |
3227 | char *b = buf, *be = buf + buf_len - 1; |
3228 | const char *p = path, *pe = path + path_len, *pc; |
3229 | |
3230 | if (buf_len < 2) { return; } |
3231 | |
3232 | while (p < pe) { |
3233 | for (pc = p; pc < pe && *pc != '/'; pc++) {} |
3234 | if (*p == '.') { |
3235 | if (pc == p + 2 && *(p + 1) == '.') { |
3236 | /* '..' */ |
3237 | if (b - buf >= 2) { |
3238 | for (b -= 2; *b != '/' && b >= buf; b--) {} |
3239 | } |
3240 | if (*b == '/') { |
3241 | b++; |
3242 | ERR(GRN_INVALID_ARGUMENT, "parent path doesn't exist." ); |
3243 | } |
3244 | p = pc + 1; |
3245 | continue; |
3246 | } else if (pc == p + 1) { |
3247 | /* '.' */ |
3248 | p = pc + 1; |
3249 | continue; |
3250 | } |
3251 | } |
3252 | if (be - b >= pc - p) { |
3253 | grn_memcpy(b, p, (pc - p)); |
3254 | b += pc - p; |
3255 | p = pc; |
3256 | if (p < pe && *pc == '/' && be > b) { |
3257 | *b++ = '/'; |
3258 | p++; |
3259 | } |
3260 | } |
3261 | } |
3262 | *b = '\0'; |
3263 | } |
3264 | |
3265 | grn_bool |
3266 | grn_bulk_is_zero(grn_ctx *ctx, grn_obj *obj) |
3267 | { |
3268 | const char *v = GRN_BULK_HEAD(obj); |
3269 | unsigned int s = GRN_BULK_VSIZE(obj); |
3270 | for (; s; s--, v++) { |
3271 | if (*v) { return GRN_FALSE; } |
3272 | } |
3273 | return GRN_TRUE; |
3274 | } |
3275 | |
3276 | |