1/* -*- c-basic-offset: 2 -*- */
2/* Copyright(C) 2009-2016 Brazil
3
4 This library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Lesser General Public
6 License version 2.1 as published by the Free Software Foundation.
7
8 This library is distributed in the hope that it will be useful,
9 but WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 Lesser General Public License for more details.
12
13 You should have received a copy of the GNU Lesser General Public
14 License along with this library; if not, write to the Free Software
15 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16*/
17#include "grn.h"
18#include <limits.h>
19#include <stdarg.h>
20#include <string.h>
21#include "grn_db.h"
22#include "grn_str.h"
23#include "grn_nfkc.h"
24
25#ifndef _ISOC99_SOURCE
26#define _ISOC99_SOURCE
27#endif /* _ISOC99_SOURCE */
28#include <math.h>
29
30#if defined(HAVE__GMTIME64_S) && defined(__GNUC__)
31# ifdef _WIN64
32# define gmtime_s(tm, time) _gmtime64_s(tm, time)
33# else /* _WIN64 */
34# define gmtime_s(tm, time) _gmtime32_s(tm, time)
35# endif /* _WIN64 */
36#endif /* defined(HAVE__GMTIME64_S) && defined(__GNUC__) */
37
38inline static int
39grn_str_charlen_utf8(grn_ctx *ctx, const unsigned char *str, const unsigned char *end)
40{
41 /* MEMO: This function allows non-null-terminated string as str. */
42 /* But requires the end of string. */
43 if (end <= str || !*str) {
44 return 0;
45 }
46 if (*str & 0x80) {
47 int i;
48 int len;
49 GRN_BIT_SCAN_REV(~(*str << 24), len);
50 len = 31 - len;
51 if ((unsigned int)(len - 2) >= 3) { /* (len == 1 || len >= 5) */
52 GRN_LOG(ctx, GRN_LOG_WARNING,
53 "grn_str_charlen_utf8(): first byte is invalid");
54 return 0;
55 }
56 if (str + len > end) {
57 GRN_LOG(ctx, GRN_LOG_WARNING,
58 "grn_str_charlen_utf8(): incomplete character");
59 return 0;
60 }
61 for (i = 1; i < len; ++i) {
62 if ((str[i] & 0xc0) != 0x80) {
63 GRN_LOG(ctx, GRN_LOG_WARNING,
64 "grn_str_charlen_utf8(): <%d>th byte is invalid",
65 i + 1);
66 return 0;
67 }
68 }
69 return len;
70 } else {
71 return 1;
72 }
73}
74
75unsigned int
76grn_str_charlen(grn_ctx *ctx, const char *str, grn_encoding encoding)
77{
78 /* MEMO: This function requires null-terminated string as str.*/
79 unsigned char *p = (unsigned char *) str;
80 if (!*p) { return 0; }
81 switch (encoding) {
82 case GRN_ENC_EUC_JP :
83 if (*p & 0x80) {
84 if (*(p + 1)) {
85 return 2;
86 } else {
87 /* This is invalid character */
88 GRN_LOG(ctx, GRN_LOG_WARNING, "invalid euc-jp string end on grn_str_charlen");
89 return 0;
90 }
91 }
92 return 1;
93 case GRN_ENC_UTF8 :
94 if (*p & 0x80) {
95 int b, w;
96 size_t size;
97 for (b = 0x40, w = 0; b && (*p & b); b >>= 1, w++);
98 if (!w) {
99 GRN_LOG(ctx, GRN_LOG_WARNING, "invalid utf8 string(1) on grn_str_charlen");
100 return 0;
101 }
102 for (size = 1; w--; size++) {
103 if (!*++p || (*p & 0xc0) != 0x80) {
104 GRN_LOG(ctx, GRN_LOG_WARNING, "invalid utf8 string(2) on grn_str_charlen");
105 return 0;
106 }
107 }
108 return size;
109 } else {
110 return 1;
111 }
112 case GRN_ENC_SJIS :
113 if (*p & 0x80) {
114 /* we regard 0xa0 as JIS X 0201 KANA. adjusted to other tools. */
115 if (0xa0 <= *p && *p <= 0xdf) {
116 /* hankaku-kana */
117 return 1;
118 } else if (!(*(p + 1))) {
119 /* This is invalid character */
120 GRN_LOG(ctx, GRN_LOG_WARNING, "invalid sjis string end on grn_str_charlen");
121 return 0;
122 } else {
123 return 2;
124 }
125 } else {
126 return 1;
127 }
128 default :
129 return 1;
130 }
131 return 0;
132}
133
134int
135grn_charlen_(grn_ctx *ctx, const char *str, const char *end, grn_encoding encoding)
136{
137 /* MEMO: This function allows non-null-terminated string as str. */
138 /* But requires the end of string. */
139 unsigned char *p = (unsigned char *) str;
140 if (p >= (unsigned char *)end) { return 0; }
141 switch (encoding) {
142 case GRN_ENC_EUC_JP :
143 if (*p & 0x80) {
144 if ((p + 1) < (unsigned char *)end) {
145 return 2;
146 } else {
147 /* This is invalid character */
148 GRN_LOG(ctx, GRN_LOG_WARNING, "invalid euc-jp string end on grn_charlen");
149 return 0;
150 }
151 }
152 return 1;
153 case GRN_ENC_UTF8 :
154 return grn_str_charlen_utf8(ctx, p, (unsigned char *)end);
155 case GRN_ENC_SJIS :
156 if (*p & 0x80) {
157 /* we regard 0xa0 as JIS X 0201 KANA. adjusted to other tools. */
158 if (0xa0 <= *p && *p <= 0xdf) {
159 /* hankaku-kana */
160 return 1;
161 } else if (++p >= (unsigned char *)end) {
162 /* This is invalid character */
163 GRN_LOG(ctx, GRN_LOG_WARNING, "invalid sjis string end on grn_charlen");
164 return 0;
165 } else {
166 return 2;
167 }
168 } else {
169 return 1;
170 }
171 default :
172 return 1;
173 }
174 return 0;
175}
176
177int
178grn_charlen(grn_ctx *ctx, const char *str, const char *end)
179{
180 return grn_charlen_(ctx, str, end, ctx->encoding);
181}
182
183static unsigned char symbol[] = {
184 ',', '.', 0, ':', ';', '?', '!', 0, 0, 0, '`', 0, '^', '~', '_', 0, 0, 0,
185 0, 0, 0, 0, 0, 0, 0, '-', '-', '/', '\\', 0, 0, '|', 0, 0, 0, '\'', 0,
186 '"', '(', ')', 0, 0, '[', ']', '{', '}', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
187 '+', '-', 0, 0, 0, '=', 0, '<', '>', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
188 '$', 0, 0, '%', '#', '&', '*', '@', 0, 0, 0, 0, 0, 0, 0, 0
189};
190
191inline static grn_rc
192normalize_euc(grn_ctx *ctx, grn_str *nstr)
193{
194 static uint16_t hankana[] = {
195 0xa1a1, 0xa1a3, 0xa1d6, 0xa1d7, 0xa1a2, 0xa1a6, 0xa5f2, 0xa5a1, 0xa5a3,
196 0xa5a5, 0xa5a7, 0xa5a9, 0xa5e3, 0xa5e5, 0xa5e7, 0xa5c3, 0xa1bc, 0xa5a2,
197 0xa5a4, 0xa5a6, 0xa5a8, 0xa5aa, 0xa5ab, 0xa5ad, 0xa5af, 0xa5b1, 0xa5b3,
198 0xa5b5, 0xa5b7, 0xa5b9, 0xa5bb, 0xa5bd, 0xa5bf, 0xa5c1, 0xa5c4, 0xa5c6,
199 0xa5c8, 0xa5ca, 0xa5cb, 0xa5cc, 0xa5cd, 0xa5ce, 0xa5cf, 0xa5d2, 0xa5d5,
200 0xa5d8, 0xa5db, 0xa5de, 0xa5df, 0xa5e0, 0xa5e1, 0xa5e2, 0xa5e4, 0xa5e6,
201 0xa5e8, 0xa5e9, 0xa5ea, 0xa5eb, 0xa5ec, 0xa5ed, 0xa5ef, 0xa5f3, 0xa1ab,
202 0xa1eb
203 };
204 static unsigned char dakuten[] = {
205 0xf4, 0, 0, 0, 0, 0xac, 0, 0xae, 0, 0xb0, 0, 0xb2, 0, 0xb4, 0, 0xb6, 0,
206 0xb8, 0, 0xba, 0, 0xbc, 0, 0xbe, 0, 0xc0, 0, 0xc2, 0, 0, 0xc5, 0, 0xc7,
207 0, 0xc9, 0, 0, 0, 0, 0, 0, 0xd0, 0, 0, 0xd3, 0, 0, 0xd6, 0, 0, 0xd9, 0,
208 0, 0xdc
209 };
210 static unsigned char handaku[] = {
211 0xd1, 0, 0, 0xd4, 0, 0, 0xd7, 0, 0, 0xda, 0, 0, 0xdd
212 };
213 int16_t *ch;
214 const unsigned char *s, *s_, *e;
215 unsigned char *d, *d0, *d_, b;
216 uint_least8_t *cp, *ctypes, ctype;
217 size_t size = nstr->orig_blen, length = 0;
218 int removeblankp = nstr->flags & GRN_STR_REMOVEBLANK;
219 if (!(nstr->norm = GRN_MALLOC(size * 2 + 1))) {
220 return GRN_NO_MEMORY_AVAILABLE;
221 }
222 d0 = (unsigned char *) nstr->norm;
223 if (nstr->flags & GRN_STR_WITH_CHECKS) {
224 if (!(nstr->checks = GRN_MALLOC(size * 2 * sizeof(int16_t) + 1))) {
225 GRN_FREE(nstr->norm);
226 nstr->norm = NULL;
227 return GRN_NO_MEMORY_AVAILABLE;
228 }
229 }
230 ch = nstr->checks;
231 if (nstr->flags & GRN_STR_WITH_CTYPES) {
232 if (!(nstr->ctypes = GRN_MALLOC(size + 1))) {
233 GRN_FREE(nstr->checks);
234 GRN_FREE(nstr->norm);
235 nstr->checks = NULL;
236 nstr->norm = NULL;
237 return GRN_NO_MEMORY_AVAILABLE;
238 }
239 }
240 cp = ctypes = nstr->ctypes;
241 e = (unsigned char *)nstr->orig + size;
242 for (s = s_ = (unsigned char *) nstr->orig, d = d_ = d0; s < e; s++) {
243 if ((*s & 0x80)) {
244 if (((s + 1) < e) && (*(s + 1) & 0x80)) {
245 unsigned char c1 = *s++, c2 = *s, c3 = 0;
246 switch (c1 >> 4) {
247 case 0x08 :
248 if (c1 == 0x8e && 0xa0 <= c2 && c2 <= 0xdf) {
249 uint16_t c = hankana[c2 - 0xa0];
250 switch (c) {
251 case 0xa1ab :
252 if (d > d0 + 1 && d[-2] == 0xa5
253 && 0xa6 <= d[-1] && d[-1] <= 0xdb && (b = dakuten[d[-1] - 0xa6])) {
254 *(d - 1) = b;
255 if (ch) { ch[-1] += 2; s_ += 2; }
256 continue;
257 } else {
258 *d++ = c >> 8; *d = c & 0xff;
259 }
260 break;
261 case 0xa1eb :
262 if (d > d0 + 1 && d[-2] == 0xa5
263 && 0xcf <= d[-1] && d[-1] <= 0xdb && (b = handaku[d[-1] - 0xcf])) {
264 *(d - 1) = b;
265 if (ch) { ch[-1] += 2; s_ += 2; }
266 continue;
267 } else {
268 *d++ = c >> 8; *d = c & 0xff;
269 }
270 break;
271 default :
272 *d++ = c >> 8; *d = c & 0xff;
273 break;
274 }
275 ctype = GRN_CHAR_KATAKANA;
276 } else {
277 *d++ = c1; *d = c2;
278 ctype = GRN_CHAR_OTHERS;
279 }
280 break;
281 case 0x09 :
282 *d++ = c1; *d = c2;
283 ctype = GRN_CHAR_OTHERS;
284 break;
285 case 0x0a :
286 switch (c1 & 0x0f) {
287 case 1 :
288 switch (c2) {
289 case 0xbc :
290 *d++ = c1; *d = c2;
291 ctype = GRN_CHAR_KATAKANA;
292 break;
293 case 0xb9 :
294 *d++ = c1; *d = c2;
295 ctype = GRN_CHAR_KANJI;
296 break;
297 case 0xa1 :
298 if (removeblankp) {
299 if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
300 continue;
301 } else {
302 *d = ' ';
303 ctype = GRN_STR_BLANK|GRN_CHAR_SYMBOL;
304 }
305 break;
306 default :
307 if (c2 >= 0xa4 && (c3 = symbol[c2 - 0xa4])) {
308 *d = c3;
309 ctype = GRN_CHAR_SYMBOL;
310 } else {
311 *d++ = c1; *d = c2;
312 ctype = GRN_CHAR_OTHERS;
313 }
314 break;
315 }
316 break;
317 case 2 :
318 *d++ = c1; *d = c2;
319 ctype = GRN_CHAR_SYMBOL;
320 break;
321 case 3 :
322 c3 = c2 - 0x80;
323 if ('a' <= c3 && c3 <= 'z') {
324 ctype = GRN_CHAR_ALPHA;
325 *d = c3;
326 } else if ('A' <= c3 && c3 <= 'Z') {
327 ctype = GRN_CHAR_ALPHA;
328 *d = c3 + 0x20;
329 } else if ('0' <= c3 && c3 <= '9') {
330 ctype = GRN_CHAR_DIGIT;
331 *d = c3;
332 } else {
333 ctype = GRN_CHAR_OTHERS;
334 *d++ = c1; *d = c2;
335 }
336 break;
337 case 4 :
338 *d++ = c1; *d = c2;
339 ctype = GRN_CHAR_HIRAGANA;
340 break;
341 case 5 :
342 *d++ = c1; *d = c2;
343 ctype = GRN_CHAR_KATAKANA;
344 break;
345 case 6 :
346 case 7 :
347 case 8 :
348 *d++ = c1; *d = c2;
349 ctype = GRN_CHAR_SYMBOL;
350 break;
351 default :
352 *d++ = c1; *d = c2;
353 ctype = GRN_CHAR_OTHERS;
354 break;
355 }
356 break;
357 default :
358 *d++ = c1; *d = c2;
359 ctype = GRN_CHAR_KANJI;
360 break;
361 }
362 } else {
363 /* skip invalid character */
364 continue;
365 }
366 } else {
367 unsigned char c = *s;
368 switch (c >> 4) {
369 case 0 :
370 case 1 :
371 /* skip unprintable ascii */
372 if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
373 continue;
374 case 2 :
375 if (c == 0x20) {
376 if (removeblankp) {
377 if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
378 continue;
379 } else {
380 *d = ' ';
381 ctype = GRN_STR_BLANK|GRN_CHAR_SYMBOL;
382 }
383 } else {
384 *d = c;
385 ctype = GRN_CHAR_SYMBOL;
386 }
387 break;
388 case 3 :
389 *d = c;
390 ctype = (c <= 0x39) ? GRN_CHAR_DIGIT : GRN_CHAR_SYMBOL;
391 break;
392 case 4 :
393 *d = ('A' <= c) ? c + 0x20 : c;
394 ctype = (c == 0x40) ? GRN_CHAR_SYMBOL : GRN_CHAR_ALPHA;
395 break;
396 case 5 :
397 *d = (c <= 'Z') ? c + 0x20 : c;
398 ctype = (c <= 0x5a) ? GRN_CHAR_ALPHA : GRN_CHAR_SYMBOL;
399 break;
400 case 6 :
401 *d = c;
402 ctype = (c == 0x60) ? GRN_CHAR_SYMBOL : GRN_CHAR_ALPHA;
403 break;
404 case 7 :
405 *d = c;
406 ctype = (c <= 0x7a) ? GRN_CHAR_ALPHA : (c == 0x7f ? GRN_CHAR_OTHERS : GRN_CHAR_SYMBOL);
407 break;
408 default :
409 *d = c;
410 ctype = GRN_CHAR_OTHERS;
411 break;
412 }
413 }
414 d++;
415 length++;
416 if (cp) { *cp++ = ctype; }
417 if (ch) {
418 *ch++ = (int16_t)(s + 1 - s_);
419 s_ = s + 1;
420 while (++d_ < d) { *ch++ = 0; }
421 }
422 }
423 if (cp) { *cp = GRN_CHAR_NULL; }
424 *d = '\0';
425 nstr->length = length;
426 nstr->norm_blen = (size_t)(d - (unsigned char *)nstr->norm);
427 return GRN_SUCCESS;
428}
429
430#ifdef GRN_WITH_NFKC
431inline static grn_rc
432normalize_utf8(grn_ctx *ctx, grn_str *nstr)
433{
434 int16_t *ch;
435 const unsigned char *s, *s_, *s__ = NULL, *p, *p2, *pe, *e;
436 unsigned char *d, *d_, *de;
437 uint_least8_t *cp;
438 size_t length = 0, ls, lp, size = nstr->orig_blen, ds = size * 3;
439 int removeblankp = nstr->flags & GRN_STR_REMOVEBLANK;
440 if (!(nstr->norm = GRN_MALLOC(ds + 1))) {
441 return GRN_NO_MEMORY_AVAILABLE;
442 }
443 if (nstr->flags & GRN_STR_WITH_CHECKS) {
444 if (!(nstr->checks = GRN_MALLOC(ds * sizeof(int16_t) + 1))) {
445 GRN_FREE(nstr->norm); nstr->norm = NULL;
446 return GRN_NO_MEMORY_AVAILABLE;
447 }
448 }
449 ch = nstr->checks;
450 if (nstr->flags & GRN_STR_WITH_CTYPES) {
451 if (!(nstr->ctypes = GRN_MALLOC(ds + 1))) {
452 if (nstr->checks) { GRN_FREE(nstr->checks); nstr->checks = NULL; }
453 GRN_FREE(nstr->norm); nstr->norm = NULL;
454 return GRN_NO_MEMORY_AVAILABLE;
455 }
456 }
457 cp = nstr->ctypes;
458 d = (unsigned char *)nstr->norm;
459 de = d + ds;
460 d_ = NULL;
461 e = (unsigned char *)nstr->orig + size;
462 for (s = s_ = (unsigned char *)nstr->orig; ; s += ls) {
463 if (!(ls = grn_str_charlen_utf8(ctx, s, e))) {
464 break;
465 }
466 if ((p = (unsigned char *)grn_nfkc_decompose(s))) {
467 pe = p + strlen((char *)p);
468 } else {
469 p = s;
470 pe = p + ls;
471 }
472 if (d_ && (p2 = (unsigned char *)grn_nfkc_compose(d_, p))) {
473 p = p2;
474 pe = p + strlen((char *)p);
475 if (cp) { cp--; }
476 if (ch) {
477 ch -= (d - d_);
478 s_ = s__;
479 }
480 d = d_;
481 length--;
482 }
483 for (; ; p += lp) {
484 if (!(lp = grn_str_charlen_utf8(ctx, p, pe))) {
485 break;
486 }
487 if ((*p == ' ' && removeblankp) || *p < 0x20 /* skip unprintable ascii */ ) {
488 if (cp > nstr->ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
489 } else {
490 if (de <= d + lp) {
491 unsigned char *norm;
492 ds += (ds >> 1) + lp;
493 if (!(norm = GRN_REALLOC(nstr->norm, ds + 1))) {
494 if (nstr->ctypes) { GRN_FREE(nstr->ctypes); nstr->ctypes = NULL; }
495 if (nstr->checks) { GRN_FREE(nstr->checks); nstr->checks = NULL; }
496 GRN_FREE(nstr->norm); nstr->norm = NULL;
497 return GRN_NO_MEMORY_AVAILABLE;
498 }
499 de = norm + ds;
500 d = norm + (d - (unsigned char *)nstr->norm);
501 nstr->norm = (char *)norm;
502 if (ch) {
503 int16_t *checks;
504 if (!(checks = GRN_REALLOC(nstr->checks, ds * sizeof(int16_t)+ 1))) {
505 if (nstr->ctypes) { GRN_FREE(nstr->ctypes); nstr->ctypes = NULL; }
506 GRN_FREE(nstr->checks); nstr->checks = NULL;
507 GRN_FREE(nstr->norm); nstr->norm = NULL;
508 return GRN_NO_MEMORY_AVAILABLE;
509 }
510 ch = checks + (ch - nstr->checks);
511 nstr->checks = checks;
512 }
513 if (cp) {
514 uint_least8_t *ctypes;
515 if (!(ctypes = GRN_REALLOC(nstr->ctypes, ds + 1))) {
516 GRN_FREE(nstr->ctypes); nstr->ctypes = NULL;
517 if (nstr->checks) { GRN_FREE(nstr->checks); nstr->checks = NULL; }
518 GRN_FREE(nstr->norm); nstr->norm = NULL;
519 return GRN_NO_MEMORY_AVAILABLE;
520 }
521 cp = ctypes + (cp - nstr->ctypes);
522 nstr->ctypes = ctypes;
523 }
524 }
525 grn_memcpy(d, p, lp);
526 d_ = d;
527 d += lp;
528 length++;
529 if (cp) { *cp++ = grn_nfkc_char_type(p); }
530 if (ch) {
531 size_t i;
532 if (s_ == s + ls) {
533 *ch++ = -1;
534 } else {
535 *ch++ = (int16_t)(s + ls - s_);
536 s__ = s_;
537 s_ = s + ls;
538 }
539 for (i = lp; i > 1; i--) { *ch++ = 0; }
540 }
541 }
542 }
543 }
544 if (cp) { *cp = GRN_CHAR_NULL; }
545 *d = '\0';
546 nstr->length = length;
547 nstr->norm_blen = (size_t)(d - (unsigned char *)nstr->norm);
548 return GRN_SUCCESS;
549}
550#endif /* GRN_WITH_NFKC */
551
552inline static grn_rc
553normalize_sjis(grn_ctx *ctx, grn_str *nstr)
554{
555 static uint16_t hankana[] = {
556 0x8140, 0x8142, 0x8175, 0x8176, 0x8141, 0x8145, 0x8392, 0x8340, 0x8342,
557 0x8344, 0x8346, 0x8348, 0x8383, 0x8385, 0x8387, 0x8362, 0x815b, 0x8341,
558 0x8343, 0x8345, 0x8347, 0x8349, 0x834a, 0x834c, 0x834e, 0x8350, 0x8352,
559 0x8354, 0x8356, 0x8358, 0x835a, 0x835c, 0x835e, 0x8360, 0x8363, 0x8365,
560 0x8367, 0x8369, 0x836a, 0x836b, 0x836c, 0x836d, 0x836e, 0x8371, 0x8374,
561 0x8377, 0x837a, 0x837d, 0x837e, 0x8380, 0x8381, 0x8382, 0x8384, 0x8386,
562 0x8388, 0x8389, 0x838a, 0x838b, 0x838c, 0x838d, 0x838f, 0x8393, 0x814a,
563 0x814b
564 };
565 static unsigned char dakuten[] = {
566 0x94, 0, 0, 0, 0, 0x4b, 0, 0x4d, 0, 0x4f, 0, 0x51, 0, 0x53, 0, 0x55, 0,
567 0x57, 0, 0x59, 0, 0x5b, 0, 0x5d, 0, 0x5f, 0, 0x61, 0, 0, 0x64, 0, 0x66,
568 0, 0x68, 0, 0, 0, 0, 0, 0, 0x6f, 0, 0, 0x72, 0, 0, 0x75, 0, 0, 0x78, 0,
569 0, 0x7b
570 };
571 static unsigned char handaku[] = {
572 0x70, 0, 0, 0x73, 0, 0, 0x76, 0, 0, 0x79, 0, 0, 0x7c
573 };
574 int16_t *ch;
575 const unsigned char *s, *s_;
576 unsigned char *d, *d0, *d_, b, *e;
577 uint_least8_t *cp, *ctypes, ctype;
578 size_t size = nstr->orig_blen, length = 0;
579 int removeblankp = nstr->flags & GRN_STR_REMOVEBLANK;
580 if (!(nstr->norm = GRN_MALLOC(size * 2 + 1))) {
581 return GRN_NO_MEMORY_AVAILABLE;
582 }
583 d0 = (unsigned char *) nstr->norm;
584 if (nstr->flags & GRN_STR_WITH_CHECKS) {
585 if (!(nstr->checks = GRN_MALLOC(size * 2 * sizeof(int16_t) + 1))) {
586 GRN_FREE(nstr->norm);
587 nstr->norm = NULL;
588 return GRN_NO_MEMORY_AVAILABLE;
589 }
590 }
591 ch = nstr->checks;
592 if (nstr->flags & GRN_STR_WITH_CTYPES) {
593 if (!(nstr->ctypes = GRN_MALLOC(size + 1))) {
594 GRN_FREE(nstr->checks);
595 GRN_FREE(nstr->norm);
596 nstr->checks = NULL;
597 nstr->norm = NULL;
598 return GRN_NO_MEMORY_AVAILABLE;
599 }
600 }
601 cp = ctypes = nstr->ctypes;
602 e = (unsigned char *)nstr->orig + size;
603 for (s = s_ = (unsigned char *) nstr->orig, d = d_ = d0; s < e; s++) {
604 if ((*s & 0x80)) {
605 if (0xa0 <= *s && *s <= 0xdf) {
606 uint16_t c = hankana[*s - 0xa0];
607 switch (c) {
608 case 0x814a :
609 if (d > d0 + 1 && d[-2] == 0x83
610 && 0x45 <= d[-1] && d[-1] <= 0x7a && (b = dakuten[d[-1] - 0x45])) {
611 *(d - 1) = b;
612 if (ch) { ch[-1]++; s_++; }
613 continue;
614 } else {
615 *d++ = c >> 8; *d = c & 0xff;
616 }
617 break;
618 case 0x814b :
619 if (d > d0 + 1 && d[-2] == 0x83
620 && 0x6e <= d[-1] && d[-1] <= 0x7a && (b = handaku[d[-1] - 0x6e])) {
621 *(d - 1) = b;
622 if (ch) { ch[-1]++; s_++; }
623 continue;
624 } else {
625 *d++ = c >> 8; *d = c & 0xff;
626 }
627 break;
628 default :
629 *d++ = c >> 8; *d = c & 0xff;
630 break;
631 }
632 ctype = GRN_CHAR_KATAKANA;
633 } else {
634 if ((s + 1) < e && 0x40 <= *(s + 1) && *(s + 1) <= 0xfc) {
635 unsigned char c1 = *s++, c2 = *s, c3 = 0;
636 if (0x81 <= c1 && c1 <= 0x87) {
637 switch (c1 & 0x0f) {
638 case 1 :
639 switch (c2) {
640 case 0x5b :
641 *d++ = c1; *d = c2;
642 ctype = GRN_CHAR_KATAKANA;
643 break;
644 case 0x58 :
645 *d++ = c1; *d = c2;
646 ctype = GRN_CHAR_KANJI;
647 break;
648 case 0x40 :
649 if (removeblankp) {
650 if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
651 continue;
652 } else {
653 *d = ' ';
654 ctype = GRN_STR_BLANK|GRN_CHAR_SYMBOL;
655 }
656 break;
657 default :
658 if (0x43 <= c2 && c2 <= 0x7e && (c3 = symbol[c2 - 0x43])) {
659 *d = c3;
660 ctype = GRN_CHAR_SYMBOL;
661 } else if (0x7f <= c2 && c2 <= 0x97 && (c3 = symbol[c2 - 0x44])) {
662 *d = c3;
663 ctype = GRN_CHAR_SYMBOL;
664 } else {
665 *d++ = c1; *d = c2;
666 ctype = GRN_CHAR_OTHERS;
667 }
668 break;
669 }
670 break;
671 case 2 :
672 c3 = c2 - 0x1f;
673 if (0x4f <= c2 && c2 <= 0x58) {
674 ctype = GRN_CHAR_DIGIT;
675 *d = c2 - 0x1f;
676 } else if (0x60 <= c2 && c2 <= 0x79) {
677 ctype = GRN_CHAR_ALPHA;
678 *d = c2 + 0x01;
679 } else if (0x81 <= c2 && c2 <= 0x9a) {
680 ctype = GRN_CHAR_ALPHA;
681 *d = c2 - 0x20;
682 } else if (0x9f <= c2 && c2 <= 0xf1) {
683 *d++ = c1; *d = c2;
684 ctype = GRN_CHAR_HIRAGANA;
685 } else {
686 *d++ = c1; *d = c2;
687 ctype = GRN_CHAR_OTHERS;
688 }
689 break;
690 case 3 :
691 if (0x40 <= c2 && c2 <= 0x96) {
692 *d++ = c1; *d = c2;
693 ctype = GRN_CHAR_KATAKANA;
694 } else {
695 *d++ = c1; *d = c2;
696 ctype = GRN_CHAR_SYMBOL;
697 }
698 break;
699 case 4 :
700 case 7 :
701 *d++ = c1; *d = c2;
702 ctype = GRN_CHAR_SYMBOL;
703 break;
704 default :
705 *d++ = c1; *d = c2;
706 ctype = GRN_CHAR_OTHERS;
707 break;
708 }
709 } else {
710 *d++ = c1; *d = c2;
711 ctype = GRN_CHAR_KANJI;
712 }
713 } else {
714 /* skip invalid character */
715 continue;
716 }
717 }
718 } else {
719 unsigned char c = *s;
720 switch (c >> 4) {
721 case 0 :
722 case 1 :
723 /* skip unprintable ascii */
724 if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
725 continue;
726 case 2 :
727 if (c == 0x20) {
728 if (removeblankp) {
729 if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
730 continue;
731 } else {
732 *d = ' ';
733 ctype = GRN_STR_BLANK|GRN_CHAR_SYMBOL;
734 }
735 } else {
736 *d = c;
737 ctype = GRN_CHAR_SYMBOL;
738 }
739 break;
740 case 3 :
741 *d = c;
742 ctype = (c <= 0x39) ? GRN_CHAR_DIGIT : GRN_CHAR_SYMBOL;
743 break;
744 case 4 :
745 *d = ('A' <= c) ? c + 0x20 : c;
746 ctype = (c == 0x40) ? GRN_CHAR_SYMBOL : GRN_CHAR_ALPHA;
747 break;
748 case 5 :
749 *d = (c <= 'Z') ? c + 0x20 : c;
750 ctype = (c <= 0x5a) ? GRN_CHAR_ALPHA : GRN_CHAR_SYMBOL;
751 break;
752 case 6 :
753 *d = c;
754 ctype = (c == 0x60) ? GRN_CHAR_SYMBOL : GRN_CHAR_ALPHA;
755 break;
756 case 7 :
757 *d = c;
758 ctype = (c <= 0x7a) ? GRN_CHAR_ALPHA : (c == 0x7f ? GRN_CHAR_OTHERS : GRN_CHAR_SYMBOL);
759 break;
760 default :
761 *d = c;
762 ctype = GRN_CHAR_OTHERS;
763 break;
764 }
765 }
766 d++;
767 length++;
768 if (cp) { *cp++ = ctype; }
769 if (ch) {
770 *ch++ = (int16_t)(s + 1 - s_);
771 s_ = s + 1;
772 while (++d_ < d) { *ch++ = 0; }
773 }
774 }
775 if (cp) { *cp = GRN_CHAR_NULL; }
776 *d = '\0';
777 nstr->length = length;
778 nstr->norm_blen = (size_t)(d - (unsigned char *)nstr->norm);
779 return GRN_SUCCESS;
780}
781
782inline static grn_rc
783normalize_none(grn_ctx *ctx, grn_str *nstr)
784{
785 int16_t *ch;
786 const unsigned char *s, *s_, *e;
787 unsigned char *d, *d0, *d_;
788 uint_least8_t *cp, *ctypes, ctype;
789 size_t size = nstr->orig_blen, length = 0;
790 int removeblankp = nstr->flags & GRN_STR_REMOVEBLANK;
791 if (!(nstr->norm = GRN_MALLOC(size + 1))) {
792 return GRN_NO_MEMORY_AVAILABLE;
793 }
794 d0 = (unsigned char *) nstr->norm;
795 if (nstr->flags & GRN_STR_WITH_CHECKS) {
796 if (!(nstr->checks = GRN_MALLOC(size * sizeof(int16_t) + 1))) {
797 GRN_FREE(nstr->norm);
798 nstr->norm = NULL;
799 return GRN_NO_MEMORY_AVAILABLE;
800 }
801 }
802 ch = nstr->checks;
803 if (nstr->flags & GRN_STR_WITH_CTYPES) {
804 if (!(nstr->ctypes = GRN_MALLOC(size + 1))) {
805 GRN_FREE(nstr->checks);
806 GRN_FREE(nstr->norm);
807 nstr->checks = NULL;
808 nstr->norm = NULL;
809 return GRN_NO_MEMORY_AVAILABLE;
810 }
811 }
812 cp = ctypes = nstr->ctypes;
813 e = (unsigned char *)nstr->orig + size;
814 for (s = s_ = (unsigned char *) nstr->orig, d = d_ = d0; s < e; s++) {
815 unsigned char c = *s;
816 switch (c >> 4) {
817 case 0 :
818 case 1 :
819 /* skip unprintable ascii */
820 if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
821 continue;
822 case 2 :
823 if (c == 0x20) {
824 if (removeblankp) {
825 if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
826 continue;
827 } else {
828 *d = ' ';
829 ctype = GRN_STR_BLANK|GRN_CHAR_SYMBOL;
830 }
831 } else {
832 *d = c;
833 ctype = GRN_CHAR_SYMBOL;
834 }
835 break;
836 case 3 :
837 *d = c;
838 ctype = (c <= 0x39) ? GRN_CHAR_DIGIT : GRN_CHAR_SYMBOL;
839 break;
840 case 4 :
841 *d = ('A' <= c) ? c + 0x20 : c;
842 ctype = (c == 0x40) ? GRN_CHAR_SYMBOL : GRN_CHAR_ALPHA;
843 break;
844 case 5 :
845 *d = (c <= 'Z') ? c + 0x20 : c;
846 ctype = (c <= 0x5a) ? GRN_CHAR_ALPHA : GRN_CHAR_SYMBOL;
847 break;
848 case 6 :
849 *d = c;
850 ctype = (c == 0x60) ? GRN_CHAR_SYMBOL : GRN_CHAR_ALPHA;
851 break;
852 case 7 :
853 *d = c;
854 ctype = (c <= 0x7a) ? GRN_CHAR_ALPHA : (c == 0x7f ? GRN_CHAR_OTHERS : GRN_CHAR_SYMBOL);
855 break;
856 default :
857 *d = c;
858 ctype = GRN_CHAR_OTHERS;
859 break;
860 }
861 d++;
862 length++;
863 if (cp) { *cp++ = ctype; }
864 if (ch) {
865 *ch++ = (int16_t)(s + 1 - s_);
866 s_ = s + 1;
867 while (++d_ < d) { *ch++ = 0; }
868 }
869 }
870 if (cp) { *cp = GRN_CHAR_NULL; }
871 *d = '\0';
872 nstr->length = length;
873 nstr->norm_blen = (size_t)(d - (unsigned char *)nstr->norm);
874 return GRN_SUCCESS;
875}
876
877/* use cp1252 as latin1 */
878inline static grn_rc
879normalize_latin1(grn_ctx *ctx, grn_str *nstr)
880{
881 int16_t *ch;
882 const unsigned char *s, *s_, *e;
883 unsigned char *d, *d0, *d_;
884 uint_least8_t *cp, *ctypes, ctype;
885 size_t size = nstr->orig_blen, length = 0;
886 int removeblankp = nstr->flags & GRN_STR_REMOVEBLANK;
887 if (!(nstr->norm = GRN_MALLOC(size + 1))) {
888 return GRN_NO_MEMORY_AVAILABLE;
889 }
890 d0 = (unsigned char *) nstr->norm;
891 if (nstr->flags & GRN_STR_WITH_CHECKS) {
892 if (!(nstr->checks = GRN_MALLOC(size * sizeof(int16_t) + 1))) {
893 GRN_FREE(nstr->norm);
894 nstr->norm = NULL;
895 return GRN_NO_MEMORY_AVAILABLE;
896 }
897 }
898 ch = nstr->checks;
899 if (nstr->flags & GRN_STR_WITH_CTYPES) {
900 if (!(nstr->ctypes = GRN_MALLOC(size + 1))) {
901 GRN_FREE(nstr->checks);
902 GRN_FREE(nstr->norm);
903 nstr->checks = NULL;
904 nstr->norm = NULL;
905 return GRN_NO_MEMORY_AVAILABLE;
906 }
907 }
908 cp = ctypes = nstr->ctypes;
909 e = (unsigned char *)nstr->orig + size;
910 for (s = s_ = (unsigned char *) nstr->orig, d = d_ = d0; s < e; s++) {
911 unsigned char c = *s;
912 switch (c >> 4) {
913 case 0 :
914 case 1 :
915 /* skip unprintable ascii */
916 if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
917 continue;
918 case 2 :
919 if (c == 0x20) {
920 if (removeblankp) {
921 if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
922 continue;
923 } else {
924 *d = ' ';
925 ctype = GRN_STR_BLANK|GRN_CHAR_SYMBOL;
926 }
927 } else {
928 *d = c;
929 ctype = GRN_CHAR_SYMBOL;
930 }
931 break;
932 case 3 :
933 *d = c;
934 ctype = (c <= 0x39) ? GRN_CHAR_DIGIT : GRN_CHAR_SYMBOL;
935 break;
936 case 4 :
937 *d = ('A' <= c) ? c + 0x20 : c;
938 ctype = (c == 0x40) ? GRN_CHAR_SYMBOL : GRN_CHAR_ALPHA;
939 break;
940 case 5 :
941 *d = (c <= 'Z') ? c + 0x20 : c;
942 ctype = (c <= 0x5a) ? GRN_CHAR_ALPHA : GRN_CHAR_SYMBOL;
943 break;
944 case 6 :
945 *d = c;
946 ctype = (c == 0x60) ? GRN_CHAR_SYMBOL : GRN_CHAR_ALPHA;
947 break;
948 case 7 :
949 *d = c;
950 ctype = (c <= 0x7a) ? GRN_CHAR_ALPHA : (c == 0x7f ? GRN_CHAR_OTHERS : GRN_CHAR_SYMBOL);
951 break;
952 case 8 :
953 if (c == 0x8a || c == 0x8c || c == 0x8e) {
954 *d = c + 0x10;
955 ctype = GRN_CHAR_ALPHA;
956 } else {
957 *d = c;
958 ctype = GRN_CHAR_SYMBOL;
959 }
960 break;
961 case 9 :
962 if (c == 0x9a || c == 0x9c || c == 0x9e || c == 0x9f) {
963 *d = (c == 0x9f) ? c + 0x60 : c;
964 ctype = GRN_CHAR_ALPHA;
965 } else {
966 *d = c;
967 ctype = GRN_CHAR_SYMBOL;
968 }
969 break;
970 case 0x0c :
971 *d = c + 0x20;
972 ctype = GRN_CHAR_ALPHA;
973 break;
974 case 0x0d :
975 *d = (c == 0xd7 || c == 0xdf) ? c : c + 0x20;
976 ctype = (c == 0xd7) ? GRN_CHAR_SYMBOL : GRN_CHAR_ALPHA;
977 break;
978 case 0x0e :
979 *d = c;
980 ctype = GRN_CHAR_ALPHA;
981 break;
982 case 0x0f :
983 *d = c;
984 ctype = (c == 0xf7) ? GRN_CHAR_SYMBOL : GRN_CHAR_ALPHA;
985 break;
986 default :
987 *d = c;
988 ctype = GRN_CHAR_OTHERS;
989 break;
990 }
991 d++;
992 length++;
993 if (cp) { *cp++ = ctype; }
994 if (ch) {
995 *ch++ = (int16_t)(s + 1 - s_);
996 s_ = s + 1;
997 while (++d_ < d) { *ch++ = 0; }
998 }
999 }
1000 if (cp) { *cp = GRN_CHAR_NULL; }
1001 *d = '\0';
1002 nstr->length = length;
1003 nstr->norm_blen = (size_t)(d - (unsigned char *)nstr->norm);
1004 return GRN_SUCCESS;
1005}
1006
1007inline static grn_rc
1008normalize_koi8r(grn_ctx *ctx, grn_str *nstr)
1009{
1010 int16_t *ch;
1011 const unsigned char *s, *s_, *e;
1012 unsigned char *d, *d0, *d_;
1013 uint_least8_t *cp, *ctypes, ctype;
1014 size_t size = strlen(nstr->orig), length = 0;
1015 int removeblankp = nstr->flags & GRN_STR_REMOVEBLANK;
1016 if (!(nstr->norm = GRN_MALLOC(size + 1))) {
1017 return GRN_NO_MEMORY_AVAILABLE;
1018 }
1019 d0 = (unsigned char *) nstr->norm;
1020 if (nstr->flags & GRN_STR_WITH_CHECKS) {
1021 if (!(nstr->checks = GRN_MALLOC(size * sizeof(int16_t) + 1))) {
1022 GRN_FREE(nstr->norm);
1023 nstr->norm = NULL;
1024 return GRN_NO_MEMORY_AVAILABLE;
1025 }
1026 }
1027 ch = nstr->checks;
1028 if (nstr->flags & GRN_STR_WITH_CTYPES) {
1029 if (!(nstr->ctypes = GRN_MALLOC(size + 1))) {
1030 GRN_FREE(nstr->checks);
1031 GRN_FREE(nstr->norm);
1032 nstr->checks = NULL;
1033 nstr->norm = NULL;
1034 return GRN_NO_MEMORY_AVAILABLE;
1035 }
1036 }
1037 cp = ctypes = nstr->ctypes;
1038 e = (unsigned char *)nstr->orig + size;
1039 for (s = s_ = (unsigned char *) nstr->orig, d = d_ = d0; s < e; s++) {
1040 unsigned char c = *s;
1041 switch (c >> 4) {
1042 case 0 :
1043 case 1 :
1044 /* skip unprintable ascii */
1045 if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
1046 continue;
1047 case 2 :
1048 if (c == 0x20) {
1049 if (removeblankp) {
1050 if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
1051 continue;
1052 } else {
1053 *d = ' ';
1054 ctype = GRN_STR_BLANK|GRN_CHAR_SYMBOL;
1055 }
1056 } else {
1057 *d = c;
1058 ctype = GRN_CHAR_SYMBOL;
1059 }
1060 break;
1061 case 3 :
1062 *d = c;
1063 ctype = (c <= 0x39) ? GRN_CHAR_DIGIT : GRN_CHAR_SYMBOL;
1064 break;
1065 case 4 :
1066 *d = ('A' <= c) ? c + 0x20 : c;
1067 ctype = (c == 0x40) ? GRN_CHAR_SYMBOL : GRN_CHAR_ALPHA;
1068 break;
1069 case 5 :
1070 *d = (c <= 'Z') ? c + 0x20 : c;
1071 ctype = (c <= 0x5a) ? GRN_CHAR_ALPHA : GRN_CHAR_SYMBOL;
1072 break;
1073 case 6 :
1074 *d = c;
1075 ctype = (c == 0x60) ? GRN_CHAR_SYMBOL : GRN_CHAR_ALPHA;
1076 break;
1077 case 7 :
1078 *d = c;
1079 ctype = (c <= 0x7a) ? GRN_CHAR_ALPHA : (c == 0x7f ? GRN_CHAR_OTHERS : GRN_CHAR_SYMBOL);
1080 break;
1081 case 0x0a :
1082 *d = c;
1083 ctype = (c == 0xa3) ? GRN_CHAR_ALPHA : GRN_CHAR_OTHERS;
1084 break;
1085 case 0x0b :
1086 if (c == 0xb3) {
1087 *d = c - 0x10;
1088 ctype = GRN_CHAR_ALPHA;
1089 } else {
1090 *d = c;
1091 ctype = GRN_CHAR_OTHERS;
1092 }
1093 break;
1094 case 0x0c :
1095 case 0x0d :
1096 *d = c;
1097 ctype = GRN_CHAR_ALPHA;
1098 break;
1099 case 0x0e :
1100 case 0x0f :
1101 *d = c - 0x20;
1102 ctype = GRN_CHAR_ALPHA;
1103 break;
1104 default :
1105 *d = c;
1106 ctype = GRN_CHAR_OTHERS;
1107 break;
1108 }
1109 d++;
1110 length++;
1111 if (cp) { *cp++ = ctype; }
1112 if (ch) {
1113 *ch++ = (int16_t)(s + 1 - s_);
1114 s_ = s + 1;
1115 while (++d_ < d) { *ch++ = 0; }
1116 }
1117 }
1118 if (cp) { *cp = GRN_CHAR_NULL; }
1119 *d = '\0';
1120 nstr->length = length;
1121 nstr->norm_blen = (size_t)(d - (unsigned char *)nstr->norm);
1122 return GRN_SUCCESS;
1123}
1124
1125static grn_str *
1126grn_fakenstr_open(grn_ctx *ctx, const char *str, size_t str_len, grn_encoding encoding, int flags)
1127{
1128 /* TODO: support GRN_STR_REMOVEBLANK flag and ctypes */
1129 grn_str *nstr;
1130 if (!(nstr = GRN_MALLOC(sizeof(grn_str)))) {
1131 GRN_LOG(ctx, GRN_LOG_ALERT, "memory allocation on grn_fakenstr_open failed !");
1132 return NULL;
1133 }
1134 if (!(nstr->norm = GRN_MALLOC(str_len + 1))) {
1135 GRN_LOG(ctx, GRN_LOG_ALERT, "memory allocation for keyword on grn_snip_add_cond failed !");
1136 GRN_FREE(nstr);
1137 return NULL;
1138 }
1139 nstr->orig = str;
1140 nstr->orig_blen = str_len;
1141 grn_memcpy(nstr->norm, str, str_len);
1142 nstr->norm[str_len] = '\0';
1143 nstr->norm_blen = str_len;
1144 nstr->ctypes = NULL;
1145 nstr->flags = flags;
1146
1147 if (flags & GRN_STR_WITH_CHECKS) {
1148 int16_t f = 0;
1149 unsigned char c;
1150 size_t i;
1151 if (!(nstr->checks = (int16_t *) GRN_MALLOC(sizeof(int16_t) * str_len))) {
1152 GRN_FREE(nstr->norm);
1153 GRN_FREE(nstr);
1154 return NULL;
1155 }
1156 switch (encoding) {
1157 case GRN_ENC_EUC_JP:
1158 for (i = 0; i < str_len; i++) {
1159 if (!f) {
1160 c = (unsigned char) str[i];
1161 f = ((c >= 0xa1U && c <= 0xfeU) || c == 0x8eU ? 2 : (c == 0x8fU ? 3 : 1)
1162 );
1163 nstr->checks[i] = f;
1164 } else {
1165 nstr->checks[i] = 0;
1166 }
1167 f--;
1168 }
1169 break;
1170 case GRN_ENC_SJIS:
1171 for (i = 0; i < str_len; i++) {
1172 if (!f) {
1173 c = (unsigned char) str[i];
1174 f = (c >= 0x81U && ((c <= 0x9fU) || (c >= 0xe0U && c <= 0xfcU)) ? 2 : 1);
1175 nstr->checks[i] = f;
1176 } else {
1177 nstr->checks[i] = 0;
1178 }
1179 f--;
1180 }
1181 break;
1182 case GRN_ENC_UTF8:
1183 for (i = 0; i < str_len; i++) {
1184 if (!f) {
1185 c = (unsigned char) str[i];
1186 f = (c & 0x80U ? (c & 0x20U ? (c & 0x10U ? 4 : 3)
1187 : 2)
1188 : 1);
1189 nstr->checks[i] = f;
1190 } else {
1191 nstr->checks[i] = 0;
1192 }
1193 f--;
1194 }
1195 break;
1196 default:
1197 for (i = 0; i < str_len; i++) {
1198 nstr->checks[i] = 1;
1199 }
1200 break;
1201 }
1202 } else {
1203 nstr->checks = NULL;
1204 }
1205 return nstr;
1206}
1207
1208grn_str *
1209grn_str_open_(grn_ctx *ctx, const char *str, unsigned int str_len, int flags, grn_encoding encoding)
1210{
1211 grn_rc rc;
1212 grn_str *nstr;
1213 if (!str || !str_len) { return NULL; }
1214
1215 if (!(flags & GRN_STR_NORMALIZE)) {
1216 return grn_fakenstr_open(ctx, str, str_len, encoding, flags);
1217 }
1218
1219 if (!(nstr = GRN_MALLOC(sizeof(grn_str)))) {
1220 GRN_LOG(ctx, GRN_LOG_ALERT, "memory allocation on grn_str_open failed !");
1221 return NULL;
1222 }
1223 nstr->orig = str;
1224 nstr->orig_blen = str_len;
1225 nstr->norm = NULL;
1226 nstr->norm_blen = 0;
1227 nstr->checks = NULL;
1228 nstr->ctypes = NULL;
1229 nstr->encoding = encoding;
1230 nstr->flags = flags;
1231 switch (encoding) {
1232 case GRN_ENC_EUC_JP :
1233 rc = normalize_euc(ctx, nstr);
1234 break;
1235 case GRN_ENC_UTF8 :
1236#ifdef GRN_WITH_NFKC
1237 rc = normalize_utf8(ctx, nstr);
1238#else /* GRN_WITH_NFKC */
1239 rc = normalize_none(ctx, nstr);
1240#endif /* GRN_WITH_NFKC */
1241 break;
1242 case GRN_ENC_SJIS :
1243 rc = normalize_sjis(ctx, nstr);
1244 break;
1245 case GRN_ENC_LATIN1 :
1246 rc = normalize_latin1(ctx, nstr);
1247 break;
1248 case GRN_ENC_KOI8R :
1249 rc = normalize_koi8r(ctx, nstr);
1250 break;
1251 default :
1252 rc = normalize_none(ctx, nstr);
1253 break;
1254 }
1255 if (rc) {
1256 grn_str_close(ctx, nstr);
1257 return NULL;
1258 }
1259 return nstr;
1260}
1261
1262grn_str *
1263grn_str_open(grn_ctx *ctx, const char *str, unsigned int str_len, int flags)
1264{
1265 return grn_str_open_(ctx, str, str_len, flags, ctx->encoding);
1266}
1267
1268grn_rc
1269grn_str_close(grn_ctx *ctx, grn_str *nstr)
1270{
1271 if (nstr) {
1272 if (nstr->norm) { GRN_FREE(nstr->norm); }
1273 if (nstr->ctypes) { GRN_FREE(nstr->ctypes); }
1274 if (nstr->checks) { GRN_FREE(nstr->checks); }
1275 GRN_FREE(nstr);
1276 return GRN_SUCCESS;
1277 } else {
1278 return GRN_INVALID_ARGUMENT;
1279 }
1280}
1281
1282static const char *grn_enc_string[] = {
1283 "default",
1284 "none",
1285 "euc_jp",
1286 "utf8",
1287 "sjis",
1288 "latin1",
1289 "koi8r"
1290};
1291
1292const char *
1293grn_encoding_to_string(grn_encoding enc)
1294{
1295 if (enc < (sizeof(grn_enc_string) / sizeof(char *))) {
1296 return grn_enc_string[enc];
1297 } else {
1298 return "unknown";
1299 }
1300}
1301
1302grn_encoding
1303grn_encoding_parse(const char *str)
1304{
1305 grn_encoding e = GRN_ENC_UTF8;
1306 int i = sizeof(grn_enc_string) / sizeof(grn_enc_string[0]);
1307 while (i--) {
1308 if (!strcmp(str, grn_enc_string[i])) {
1309 e = (grn_encoding)i;
1310 }
1311 }
1312 return e;
1313}
1314
1315size_t
1316grn_str_len(grn_ctx *ctx, const char *str, grn_encoding encoding, const char **last)
1317{
1318 size_t len, tlen;
1319 const char *p = NULL;
1320 for (len = 0; ; len++) {
1321 p = str;
1322 if (!(tlen = grn_str_charlen(ctx, str, encoding))) {
1323 break;
1324 }
1325 str += tlen;
1326 }
1327 if (last) { *last = p; }
1328 return len;
1329}
1330
1331int
1332grn_isspace(const char *str, grn_encoding encoding)
1333{
1334 const unsigned char *s = (const unsigned char *) str;
1335 if (!s) { return 0; }
1336 switch (s[0]) {
1337 case ' ' :
1338 case '\f' :
1339 case '\n' :
1340 case '\r' :
1341 case '\t' :
1342 case '\v' :
1343 return 1;
1344 case 0x81 :
1345 if (encoding == GRN_ENC_SJIS && s[1] == 0x40) { return 2; }
1346 break;
1347 case 0xA1 :
1348 if (encoding == GRN_ENC_EUC_JP && s[1] == 0xA1) { return 2; }
1349 break;
1350 case 0xE3 :
1351 if (encoding == GRN_ENC_UTF8 && s[1] == 0x80 && s[2] == 0x80) { return 3; }
1352 break;
1353 default :
1354 break;
1355 }
1356 return 0;
1357}
1358
1359int8_t
1360grn_atoi8(const char *nptr, const char *end, const char **rest)
1361{
1362 const char *p = nptr;
1363 int8_t v = 0, t, n = 0, o = 0;
1364 if (p < end && *p == '-') {
1365 p++;
1366 n = 1;
1367 o = 1;
1368 }
1369 while (p < end && *p >= '0' && *p <= '9') {
1370 t = v * 10 - (*p - '0');
1371 if (t > v || (!n && t == INT8_MIN)) { v = 0; break; }
1372 v = t;
1373 o = 0;
1374 p++;
1375 }
1376 if (rest) { *rest = o ? nptr : p; }
1377 return n ? v : -v;
1378}
1379
1380uint8_t
1381grn_atoui8(const char *nptr, const char *end, const char **rest)
1382{
1383 uint8_t v = 0, t;
1384 while (nptr < end && *nptr >= '0' && *nptr <= '9') {
1385 t = v * 10 + (*nptr - '0');
1386 if (t < v) { v = 0; break; }
1387 v = t;
1388 nptr++;
1389 }
1390 if (rest) { *rest = nptr; }
1391 return v;
1392}
1393
1394int16_t
1395grn_atoi16(const char *nptr, const char *end, const char **rest)
1396{
1397 const char *p = nptr;
1398 int16_t v = 0, t, n = 0, o = 0;
1399 if (p < end && *p == '-') {
1400 p++;
1401 n = 1;
1402 o = 1;
1403 }
1404 while (p < end && *p >= '0' && *p <= '9') {
1405 t = v * 10 - (*p - '0');
1406 if (t > v || (!n && t == INT16_MIN)) { v = 0; break; }
1407 v = t;
1408 o = 0;
1409 p++;
1410 }
1411 if (rest) { *rest = o ? nptr : p; }
1412 return n ? v : -v;
1413}
1414
1415uint16_t
1416grn_atoui16(const char *nptr, const char *end, const char **rest)
1417{
1418 uint16_t v = 0, t;
1419 while (nptr < end && *nptr >= '0' && *nptr <= '9') {
1420 t = v * 10 + (*nptr - '0');
1421 if (t < v) { v = 0; break; }
1422 v = t;
1423 nptr++;
1424 }
1425 if (rest) { *rest = nptr; }
1426 return v;
1427}
1428
1429int
1430grn_atoi(const char *nptr, const char *end, const char **rest)
1431{
1432 const char *p = nptr;
1433 int v = 0, t, n = 0, o = 0;
1434 if (p < end && *p == '-') {
1435 p++;
1436 n = 1;
1437 o = 1;
1438 }
1439 while (p < end && *p >= '0' && *p <= '9') {
1440 t = v * 10 - (*p - '0');
1441 if (t > v || (!n && t == INT32_MIN)) { v = 0; break; }
1442 v = t;
1443 o = 0;
1444 p++;
1445 }
1446 if (rest) { *rest = o ? nptr : p; }
1447 return n ? v : -v;
1448}
1449
1450unsigned int
1451grn_atoui(const char *nptr, const char *end, const char **rest)
1452{
1453 unsigned int v = 0, t;
1454 while (nptr < end && *nptr >= '0' && *nptr <= '9') {
1455 t = v * 10 + (*nptr - '0');
1456 if (t < v) { v = 0; break; }
1457 v = t;
1458 nptr++;
1459 }
1460 if (rest) { *rest = nptr; }
1461 return v;
1462}
1463
1464int64_t
1465grn_atoll(const char *nptr, const char *end, const char **rest)
1466{
1467 const char *p = nptr;
1468 int o = 0;
1469 int64_t v = 0;
1470 if (p < end && *p == '-') {
1471 p++;
1472 o = 1;
1473 while (p < end && *p >= '0' && *p <= '9') {
1474 int64_t t = v * 10 - (*p - '0');
1475 if (t > v) { v = 0; break; }
1476 v = t;
1477 o = 0;
1478 p++;
1479 }
1480 } else {
1481 while (p < end && *p >= '0' && *p <= '9') {
1482 int64_t t = v * 10 + (*p - '0');
1483 if (t < v) { v = 0; break; }
1484 v = t;
1485 p++;
1486 }
1487 }
1488 if (rest) { *rest = o ? nptr : p; }
1489 return v;
1490}
1491
1492uint64_t
1493grn_atoull(const char *nptr, const char *end, const char **rest)
1494{
1495 uint64_t v = 0, t;
1496 while (nptr < end && *nptr >= '0' && *nptr <= '9') {
1497 t = v * 10 + (*nptr - '0');
1498 if (t < v) { v = 0; break; }
1499 v = t;
1500 nptr++;
1501 }
1502 if (rest) { *rest = nptr; }
1503 return v;
1504}
1505
1506unsigned int
1507grn_htoui(const char *nptr, const char *end, const char **rest)
1508{
1509 unsigned int v = 0, t;
1510 while (nptr < end) {
1511 switch (*nptr) {
1512 case '0' :
1513 case '1' :
1514 case '2' :
1515 case '3' :
1516 case '4' :
1517 case '5' :
1518 case '6' :
1519 case '7' :
1520 case '8' :
1521 case '9' :
1522 t = v * 16 + (*nptr++ - '0');
1523 break;
1524 case 'a' :
1525 case 'b' :
1526 case 'c' :
1527 case 'd' :
1528 case 'e' :
1529 case 'f' :
1530 t = v * 16 + (*nptr++ - 'a') + 10;
1531 break;
1532 case 'A' :
1533 case 'B' :
1534 case 'C' :
1535 case 'D' :
1536 case 'E' :
1537 case 'F' :
1538 t = v * 16 + (*nptr++ - 'A') + 10;
1539 break;
1540 default :
1541 v = 0; goto exit;
1542 }
1543 if (t < v) { v = 0; goto exit; }
1544 v = t;
1545 }
1546exit :
1547 if (rest) { *rest = nptr; }
1548 return v;
1549}
1550
1551void
1552grn_itoh(unsigned int i, char *p, unsigned int len)
1553{
1554 static const char *hex = "0123456789ABCDEF";
1555 p += len - 1;
1556 while (len--) {
1557 *p-- = hex[i & 0xf];
1558 i >>= 4;
1559 }
1560}
1561
1562grn_rc
1563grn_itoa(int i, char *p, char *end, char **rest)
1564{
1565 char *q;
1566 if (p >= end) { return GRN_INVALID_ARGUMENT; }
1567 q = p;
1568 if (i < 0) {
1569 *p++ = '-';
1570 q = p;
1571 if (i == INT_MIN) {
1572 if (p >= end) { return GRN_INVALID_ARGUMENT; }
1573 *p++ = (-(i % 10)) + '0';
1574 i /= 10;
1575 }
1576 i = -i;
1577 }
1578 do {
1579 if (p >= end) { return GRN_INVALID_ARGUMENT; }
1580 *p++ = i % 10 + '0';
1581 } while ((i /= 10) > 0);
1582 if (rest) { *rest = p; }
1583 for (p--; q < p; q++, p--) {
1584 char t = *q;
1585 *q = *p;
1586 *p = t;
1587 }
1588 return GRN_SUCCESS;
1589}
1590
1591grn_rc
1592grn_itoa_padded(int i, char *p, char *end, char ch)
1593{
1594 char *q;
1595 if (p >= end) { return GRN_INVALID_ARGUMENT; }
1596 if (i < 0) {
1597 *p++ = '-';
1598 if (i == INT_MIN) {
1599 if (p >= end) { return GRN_INVALID_ARGUMENT; }
1600 *p++ = (-(i % 10)) + '0';
1601 i /= 10;
1602 }
1603 i = -i;
1604 }
1605 q = end - 1;
1606 do {
1607 if (q < p) { return GRN_INVALID_ARGUMENT; }
1608 *q-- = i % 10 + '0';
1609 } while ((i /= 10) > 0);
1610 while (q >= p) {
1611 *q-- = ch;
1612 }
1613 return GRN_SUCCESS;
1614}
1615
1616grn_rc
1617grn_lltoa(int64_t i, char *p, char *end, char **rest)
1618{
1619 char *q;
1620 if (p >= end) { return GRN_INVALID_ARGUMENT; }
1621 q = p;
1622 if (i < 0) {
1623 *p++ = '-';
1624 q = p;
1625 if (i == INT64_MIN) {
1626 *p++ = (-(i % 10)) + '0';
1627 i /= 10;
1628 }
1629 i = -i;
1630 }
1631 do {
1632 if (p >= end) { return GRN_INVALID_ARGUMENT; }
1633 *p++ = i % 10 + '0';
1634 } while ((i /= 10) > 0);
1635 if (rest) { *rest = p; }
1636 for (p--; q < p; q++, p--) {
1637 char t = *q;
1638 *q = *p;
1639 *p = t;
1640 }
1641 return GRN_SUCCESS;
1642}
1643
1644grn_rc
1645grn_ulltoa(uint64_t i, char *p, char *end, char **rest)
1646{
1647 char *q;
1648 if (p >= end) { return GRN_INVALID_ARGUMENT; }
1649 q = p;
1650 do {
1651 if (p >= end) { return GRN_INVALID_ARGUMENT; }
1652 *p++ = i % 10 + '0';
1653 } while ((i /= 10) > 0);
1654 if (rest) { *rest = p; }
1655 for (p--; q < p; q++, p--) {
1656 char t = *q;
1657 *q = *p;
1658 *p = t;
1659 }
1660 return GRN_SUCCESS;
1661}
1662
1663#define I2B(i) \
1664 ("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"[(i) & 0x3f])
1665
1666#define B2I(b) \
1667 (((b) < '+' || 'z' < (b)) ? 0xff : "\x3e\xff\xff\xff\x3f\x34\x35\x36\x37\x38\x39\x3a\x3b\x3c\x3d\xff\xff\xff\xff\xff\xff\xff\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\xff\xff\xff\xff\xff\xff\x1a\x1b\x1c\x1d\x1e\x1f\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f\x30\x31\x32\x33"[(b) - '+'])
1668
1669#define MASK 0x34d34d34
1670
1671char *
1672grn_itob(grn_id id, char *p)
1673{
1674 id ^= MASK;
1675 *p++ = I2B(id >> 24);
1676 *p++ = I2B(id >> 18);
1677 *p++ = I2B(id >> 12);
1678 *p++ = I2B(id >> 6);
1679 *p++ = I2B(id);
1680 return p;
1681}
1682
1683grn_id
1684grn_btoi(char *b)
1685{
1686 uint8_t i;
1687 grn_id id = 0;
1688 int len = 5;
1689 while (len--) {
1690 char c = *b++;
1691 if ((i = B2I(c)) == 0xff) { return 0; }
1692 id = (id << 6) + i;
1693 }
1694 return id ^ MASK;
1695}
1696
1697#define I2B32H(i) ("0123456789ABCDEFGHIJKLMNOPQRSTUV"[(i) & 0x1f])
1698
1699char *
1700grn_lltob32h(int64_t i, char *p)
1701{
1702 uint64_t u = (uint64_t)i + 0x8000000000000000ULL;
1703 *p++ = I2B32H(u >> 60);
1704 *p++ = I2B32H(u >> 55);
1705 *p++ = I2B32H(u >> 50);
1706 *p++ = I2B32H(u >> 45);
1707 *p++ = I2B32H(u >> 40);
1708 *p++ = I2B32H(u >> 35);
1709 *p++ = I2B32H(u >> 30);
1710 *p++ = I2B32H(u >> 25);
1711 *p++ = I2B32H(u >> 20);
1712 *p++ = I2B32H(u >> 15);
1713 *p++ = I2B32H(u >> 10);
1714 *p++ = I2B32H(u >> 5);
1715 *p++ = I2B32H(u);
1716 return p;
1717}
1718
1719char *
1720grn_ulltob32h(uint64_t i, char *p)
1721{
1722 char lb = (i >> 59) & 0x10;
1723 i += 0x8000000000000000ULL;
1724 *p++ = lb + I2B32H(i >> 60);
1725 *p++ = I2B32H(i >> 55);
1726 *p++ = I2B32H(i >> 50);
1727 *p++ = I2B32H(i >> 45);
1728 *p++ = I2B32H(i >> 40);
1729 *p++ = I2B32H(i >> 35);
1730 *p++ = I2B32H(i >> 30);
1731 *p++ = I2B32H(i >> 25);
1732 *p++ = I2B32H(i >> 20);
1733 *p++ = I2B32H(i >> 15);
1734 *p++ = I2B32H(i >> 10);
1735 *p++ = I2B32H(i >> 5);
1736 *p++ = I2B32H(i);
1737 return p;
1738}
1739
1740grn_rc
1741grn_aton(grn_ctx *ctx, const char *p, const char *end, const char **rest,
1742 grn_obj *res)
1743{
1744 if (*p == '+') {
1745 p++;
1746 }
1747
1748 switch (*p) {
1749 case '-' :
1750 case '0' : case '1' : case '2' : case '3' : case '4' :
1751 case '5' : case '6' : case '7' : case '8' : case '9' :
1752 {
1753 int64_t int64;
1754 char rest_char;
1755 int64 = grn_atoll(p, end, rest);
1756 rest_char = **rest;
1757 if (end == *rest) {
1758 if ((int64_t)INT32_MIN <= int64 && int64 <= (int64_t)INT32_MAX) {
1759 grn_obj_reinit(ctx, res, GRN_DB_INT32, 0);
1760 GRN_INT32_SET(ctx, res, int64);
1761 } else if ((int64_t)INT32_MAX < int64 && int64 <= (int64_t)UINT32_MAX) {
1762 grn_obj_reinit(ctx, res, GRN_DB_UINT32, 0);
1763 GRN_UINT32_SET(ctx, res, int64);
1764 } else {
1765 grn_obj_reinit(ctx, res, GRN_DB_INT64, 0);
1766 GRN_INT64_SET(ctx, res, int64);
1767 }
1768 } else {
1769 if (*p != '-' && rest_char >= '0' && rest_char <= '9') {
1770 uint64_t uint64 = grn_atoull(p, end, rest);
1771 if (end == *rest) {
1772 grn_obj_reinit(ctx, res, GRN_DB_UINT64, 0);
1773 GRN_UINT64_SET(ctx, res, uint64);
1774 }
1775 }
1776 if (end != *rest) {
1777 if (rest_char == '.' || rest_char == 'e' || rest_char == 'E' ||
1778 (rest_char >= '0' && rest_char <= '9')) {
1779 char *rest_float;
1780 double d;
1781 errno = 0;
1782 d = strtod(p, &rest_float);
1783 if (!errno && rest_float == end) {
1784 grn_obj_reinit(ctx, res, GRN_DB_FLOAT, 0);
1785 GRN_FLOAT_SET(ctx, res, d);
1786 *rest = rest_float;
1787 } else {
1788 return GRN_INVALID_ARGUMENT;
1789 }
1790 }
1791 }
1792 }
1793 }
1794 break;
1795 default :
1796 return GRN_INVALID_ARGUMENT;
1797 }
1798
1799 return GRN_SUCCESS;
1800}
1801
1802int
1803grn_str_tok(const char *str, size_t str_len, char delim, const char **tokbuf, int buf_size, const char **rest)
1804{
1805 const char **tok = tokbuf, **tok_end = tokbuf + buf_size;
1806 if (buf_size > 0) {
1807 const char *str_end = str + str_len;
1808 for (;;str++) {
1809 if (str == str_end) {
1810 *tok++ = str;
1811 break;
1812 }
1813 if (delim == *str) {
1814 // *str = '\0';
1815 *tok++ = str;
1816 if (tok == tok_end) { break; }
1817 }
1818 }
1819 }
1820 if (rest) { *rest = str; }
1821 return tok - tokbuf;
1822}
1823
1824inline static int
1825op_getopt_flag(int *flags, const grn_str_getopt_opt *o,
1826 int argc, char * const argv[], int i, const char *optvalue)
1827{
1828 switch (o->op) {
1829 case GETOPT_OP_NONE:
1830 break;
1831 case GETOPT_OP_ON:
1832 *flags |= o->flag;
1833 break;
1834 case GETOPT_OP_OFF:
1835 *flags &= ~o->flag;
1836 break;
1837 case GETOPT_OP_UPDATE:
1838 *flags = o->flag;
1839 break;
1840 default:
1841 return i;
1842 }
1843 if (o->arg) {
1844 if (optvalue) {
1845 *o->arg = (char *)optvalue;
1846 } else if (++i < argc) {
1847 *o->arg = argv[i];
1848 } else {
1849 return -1;
1850 }
1851 }
1852 return i;
1853}
1854
1855int
1856grn_str_getopt(int argc, char * const argv[], const grn_str_getopt_opt *opts,
1857 int *flags)
1858{
1859 int i;
1860 for (i = 1; i < argc; i++) {
1861 const char * v = argv[i];
1862 if (*v == '-') {
1863 const grn_str_getopt_opt *o;
1864 int found;
1865 if (*++v == '-') {
1866 const char *eq;
1867 size_t len;
1868 found = 0;
1869 v++;
1870 for (eq = v; *eq != '\0' && *eq != '='; eq++) {}
1871 len = eq - v;
1872 for (o = opts; o->opt != '\0' || o->longopt != NULL; o++) {
1873 if (o->longopt && strlen(o->longopt) == len &&
1874 !memcmp(v, o->longopt, len)) {
1875 i = op_getopt_flag(flags, o, argc, argv, i,
1876 (*eq == '\0' ? NULL : eq + 1));
1877 if (i < 0) {
1878 fprintf(stderr, "%s: option '--%s' needs argument.\n", argv[0], o->longopt);
1879 return -1;
1880 }
1881 found = 1;
1882 break;
1883 }
1884 }
1885 if (!found) { goto exit; }
1886 } else {
1887 const char *p;
1888 for (p = v; *p; p++) {
1889 found = 0;
1890 for (o = opts; o->opt != '\0' || o->longopt != NULL; o++) {
1891 if (o->opt && *p == o->opt) {
1892 i = op_getopt_flag(flags, o, argc, argv, i, NULL);
1893 if (i < 0) {
1894 fprintf(stderr, "%s: option '-%c' needs argument.\n", argv[0], *p);
1895 return -1;
1896 }
1897 found = 1;
1898 break;
1899 }
1900 }
1901 if (!found) { goto exit; }
1902 }
1903 }
1904 } else {
1905 break;
1906 }
1907 }
1908 return i;
1909exit:
1910 fprintf(stderr, "%s: cannot recognize option '%s'.\n", argv[0], argv[i]);
1911 return -1;
1912}
1913
1914#define UNIT_SIZE (1 << 12)
1915#define UNIT_MASK (UNIT_SIZE - 1)
1916
1917int grn_bulk_margin_size = 0;
1918
1919grn_rc
1920grn_bulk_resize(grn_ctx *ctx, grn_obj *buf, unsigned int newsize)
1921{
1922 char *head;
1923 unsigned int rounded_newsize;
1924 newsize += grn_bulk_margin_size + 1;
1925 if (GRN_BULK_OUTP(buf)) {
1926 rounded_newsize = (newsize + (UNIT_MASK)) & ~UNIT_MASK;
1927 if (rounded_newsize < newsize) { return GRN_NOT_ENOUGH_SPACE; }
1928 newsize = rounded_newsize;
1929 head = buf->u.b.head - (buf->u.b.head ? grn_bulk_margin_size : 0);
1930 if (!(head = GRN_REALLOC(head, newsize))) { return GRN_NO_MEMORY_AVAILABLE; }
1931 buf->u.b.curr = head + grn_bulk_margin_size + GRN_BULK_VSIZE(buf);
1932 buf->u.b.head = head + grn_bulk_margin_size;
1933 buf->u.b.tail = head + newsize;
1934 } else {
1935 if (newsize > GRN_BULK_BUFSIZE) {
1936 rounded_newsize = (newsize + (UNIT_MASK)) & ~UNIT_MASK;
1937 if (rounded_newsize < newsize) { return GRN_NOT_ENOUGH_SPACE; }
1938 newsize = rounded_newsize;
1939 if (!(head = GRN_MALLOC(newsize))) { return GRN_NO_MEMORY_AVAILABLE; }
1940 grn_memcpy(head, GRN_BULK_HEAD(buf), GRN_BULK_VSIZE(buf));
1941 buf->u.b.curr = head + grn_bulk_margin_size + GRN_BULK_VSIZE(buf);
1942 buf->u.b.head = head + grn_bulk_margin_size;
1943 buf->u.b.tail = head + newsize;
1944 buf->header.impl_flags |= GRN_OBJ_OUTPLACE;
1945 }
1946 }
1947 return GRN_SUCCESS;
1948}
1949
1950grn_rc
1951grn_bulk_reinit(grn_ctx *ctx, grn_obj *buf, unsigned int size)
1952{
1953 GRN_BULK_REWIND(buf);
1954 return grn_bulk_resize(ctx, buf, size);
1955}
1956
1957grn_rc
1958grn_bulk_write(grn_ctx *ctx, grn_obj *buf, const char *str, unsigned int len)
1959{
1960 grn_rc rc = GRN_SUCCESS;
1961 char *curr;
1962 if (GRN_BULK_REST(buf) < len) {
1963 if ((rc = grn_bulk_resize(ctx, buf, GRN_BULK_VSIZE(buf) + len))) { return rc; }
1964 }
1965 curr = GRN_BULK_CURR(buf);
1966 grn_memcpy(curr, str, len);
1967 GRN_BULK_INCR_LEN(buf, len);
1968 return rc;
1969}
1970
1971grn_rc
1972grn_bulk_write_from(grn_ctx *ctx, grn_obj *bulk,
1973 const char *str, unsigned int from, unsigned int len)
1974{
1975 grn_rc rc = grn_bulk_truncate(ctx, bulk, from);
1976 if (!rc) { rc = grn_bulk_write(ctx, bulk, str, len); }
1977 return rc;
1978}
1979
1980grn_rc
1981grn_bulk_reserve(grn_ctx *ctx, grn_obj *buf, unsigned int len)
1982{
1983 grn_rc rc = GRN_SUCCESS;
1984 if (GRN_BULK_REST(buf) < len) {
1985 if ((rc = grn_bulk_resize(ctx, buf, GRN_BULK_VSIZE(buf) + len))) { return rc; }
1986 }
1987 return rc;
1988}
1989
1990grn_rc
1991grn_bulk_space(grn_ctx *ctx, grn_obj *buf, unsigned int len)
1992{
1993 grn_rc rc = grn_bulk_reserve(ctx, buf, len);
1994 if (!rc) {
1995 GRN_BULK_INCR_LEN(buf, len);
1996 }
1997 return rc;
1998}
1999
2000static grn_rc
2001grn_bulk_space_clear(grn_ctx *ctx, grn_obj *buf, unsigned int len)
2002{
2003 grn_rc rc = grn_bulk_reserve(ctx, buf, len);
2004 if (!rc) {
2005 memset(GRN_BULK_CURR(buf), 0, len);
2006 GRN_BULK_INCR_LEN(buf, len);
2007 }
2008 return rc;
2009}
2010
2011grn_rc
2012grn_bulk_truncate(grn_ctx *ctx, grn_obj *bulk, unsigned int len)
2013{
2014 if (GRN_BULK_OUTP(bulk)) {
2015 if ((bulk->u.b.tail - bulk->u.b.head) < len) {
2016 return grn_bulk_space_clear(ctx, bulk, len);
2017 } else {
2018 bulk->u.b.curr = bulk->u.b.head + len;
2019 }
2020 } else {
2021 if (GRN_BULK_BUFSIZE < len) {
2022 return grn_bulk_space_clear(ctx, bulk, len);
2023 } else {
2024 bulk->header.flags &= ~GRN_BULK_BUFSIZE_MAX;
2025 bulk->header.flags += len;
2026 }
2027 }
2028 return GRN_SUCCESS;
2029}
2030
2031grn_rc
2032grn_text_itoa(grn_ctx *ctx, grn_obj *buf, int i)
2033{
2034 grn_rc rc = GRN_SUCCESS;
2035 for (;;) {
2036 char *curr = GRN_BULK_CURR(buf);
2037 char *tail = GRN_BULK_TAIL(buf);
2038 if (grn_itoa(i, curr, tail, &curr)) {
2039 if ((rc = grn_bulk_resize(ctx, buf, GRN_BULK_WSIZE(buf) + UNIT_SIZE))) { return rc; }
2040 } else {
2041 GRN_BULK_SET_CURR(buf, curr);
2042 break;
2043 }
2044 }
2045 return rc;
2046}
2047
2048grn_rc
2049grn_text_itoa_padded(grn_ctx *ctx, grn_obj *buf, int i, char ch, unsigned int len)
2050{
2051 grn_rc rc = GRN_SUCCESS;
2052 char *curr;
2053 if ((rc = grn_bulk_reserve(ctx, buf, len))) { return rc; }
2054 curr = GRN_BULK_CURR(buf);
2055 if (!grn_itoa_padded(i, curr, curr + len, ch)) {
2056 GRN_BULK_SET_CURR(buf, curr + len);
2057 }
2058 return rc;
2059}
2060
2061grn_rc
2062grn_text_lltoa(grn_ctx *ctx, grn_obj *buf, long long int i)
2063{
2064 grn_rc rc = GRN_SUCCESS;
2065 for (;;) {
2066 char *curr = GRN_BULK_CURR(buf);
2067 char *tail = GRN_BULK_TAIL(buf);
2068 if (grn_lltoa(i, curr, tail, &curr)) {
2069 if ((rc = grn_bulk_resize(ctx, buf, GRN_BULK_WSIZE(buf) + UNIT_SIZE))) { return rc; }
2070 } else {
2071 GRN_BULK_SET_CURR(buf, curr);
2072 break;
2073 }
2074 }
2075 return rc;
2076}
2077
2078grn_rc
2079grn_text_ulltoa(grn_ctx *ctx, grn_obj *buf, unsigned long long int i)
2080{
2081 grn_rc rc = GRN_SUCCESS;
2082 for (;;) {
2083 char *curr = GRN_BULK_CURR(buf);
2084 char *tail = GRN_BULK_TAIL(buf);
2085 if (grn_ulltoa(i, curr, tail, &curr)) {
2086 if ((rc = grn_bulk_resize(ctx, buf, GRN_BULK_WSIZE(buf) + UNIT_SIZE))) { return rc; }
2087 } else {
2088 GRN_BULK_SET_CURR(buf, curr);
2089 break;
2090 }
2091 }
2092 return rc;
2093}
2094
2095inline static void
2096ftoa_(grn_ctx *ctx, grn_obj *buf, double d)
2097{
2098 char *start;
2099 size_t before_size;
2100 size_t len;
2101#define DIGIT_NUMBER 16
2102#define FIRST_BUFFER_SIZE (DIGIT_NUMBER + 4)
2103 before_size = GRN_BULK_VSIZE(buf);
2104 grn_bulk_reserve(ctx, buf, FIRST_BUFFER_SIZE);
2105 grn_text_printf(ctx, buf, "%#.*g", DIGIT_NUMBER, d);
2106 len = GRN_BULK_VSIZE(buf) - before_size;
2107 start = GRN_BULK_CURR(buf) - len;
2108#undef FIRST_BUFFER_SIZE
2109#undef DIGIT_NUMBER
2110 if (start[len - 1] == '.') {
2111 GRN_TEXT_PUTC(ctx, buf, '0');
2112 } else {
2113 char *p, *q;
2114 start[len] = '\0';
2115 if ((p = strchr(start, 'e'))) {
2116 for (q = p; *(q - 2) != '.' && *(q - 1) == '0'; q--) { len--; }
2117 grn_memmove(q, p, start + len - q);
2118 } else {
2119 for (q = start + len; *(q - 2) != '.' && *(q - 1) == '0'; q--) { len--; }
2120 }
2121 grn_bulk_truncate(ctx, buf, before_size + len);
2122 }
2123}
2124
2125grn_rc
2126grn_text_ftoa(grn_ctx *ctx, grn_obj *buf, double d)
2127{
2128 grn_rc rc = GRN_SUCCESS;
2129 if (GRN_BULK_REST(buf) < 32) {
2130 if ((rc = grn_bulk_resize(ctx, buf, GRN_BULK_VSIZE(buf) + 32))) { return rc; }
2131 }
2132#ifdef HAVE_FPCLASSIFY
2133 switch (fpclassify(d)) {
2134 case FP_NAN :
2135 GRN_TEXT_PUTS(ctx, buf, "#<nan>");
2136 break;
2137 case FP_INFINITE :
2138 GRN_TEXT_PUTS(ctx, buf, d > 0 ? "#i1/0" : "#i-1/0");
2139 break;
2140 default :
2141 ftoa_(ctx, buf, d);
2142 break;
2143 }
2144#else /* HAVE_FPCLASSIFY */
2145 if (d == d) {
2146 if (d != 0 && ((d / 2.0) == d)) {
2147 GRN_TEXT_PUTS(ctx, buf, d > 0 ? "#i1/0" : "#i-1/0");
2148 } else {
2149 ftoa_(ctx, buf, d);
2150 }
2151 } else {
2152 GRN_TEXT_PUTS(ctx, buf, "#<nan>");
2153 }
2154#endif /* HAVE_FPCLASSIFY */
2155 return rc;
2156}
2157
2158grn_rc
2159grn_text_itoh(grn_ctx *ctx, grn_obj *buf, int i, unsigned int len)
2160{
2161 grn_rc rc = GRN_SUCCESS;
2162 if (GRN_BULK_REST(buf) < len) {
2163 if ((rc = grn_bulk_resize(ctx, buf, GRN_BULK_VSIZE(buf) + len))) { return rc; }
2164 }
2165 grn_itoh(i, GRN_BULK_CURR(buf), len);
2166 GRN_BULK_INCR_LEN(buf, len);
2167 return rc;
2168}
2169
2170grn_rc
2171grn_text_itob(grn_ctx *ctx, grn_obj *buf, grn_id id)
2172{
2173 size_t len = 5;
2174 grn_rc rc = GRN_SUCCESS;
2175 if (GRN_BULK_REST(buf) < len) {
2176 if ((rc = grn_bulk_resize(ctx, buf, GRN_BULK_VSIZE(buf) + len))) { return rc; }
2177 }
2178 grn_itob(id, GRN_BULK_CURR(buf));
2179 GRN_BULK_INCR_LEN(buf, len);
2180 return rc;
2181}
2182
2183grn_rc
2184grn_text_lltob32h(grn_ctx *ctx, grn_obj *buf, long long int i)
2185{
2186 size_t len = 13;
2187 grn_rc rc = GRN_SUCCESS;
2188 if (GRN_BULK_REST(buf) < len) {
2189 if ((rc = grn_bulk_resize(ctx, buf, GRN_BULK_VSIZE(buf) + len))) { return rc; }
2190 }
2191 grn_lltob32h(i, GRN_BULK_CURR(buf));
2192 GRN_BULK_INCR_LEN(buf, len);
2193 return rc;
2194}
2195
2196grn_rc
2197grn_text_esc(grn_ctx *ctx, grn_obj *buf, const char *s, unsigned int len)
2198{
2199 const char *e;
2200 unsigned int l;
2201 grn_rc rc = GRN_SUCCESS;
2202
2203 GRN_TEXT_PUTC(ctx, buf, '"');
2204 for (e = s + len; s < e; s += l) {
2205 if (!(l = grn_charlen(ctx, s, e))) { break; }
2206 if (l == 1) {
2207 switch (*s) {
2208 case '"' :
2209 grn_bulk_write(ctx, buf, "\\\"", 2);
2210 break;
2211 case '\\' :
2212 grn_bulk_write(ctx, buf, "\\\\", 2);
2213 break;
2214 case '\b' :
2215 grn_bulk_write(ctx, buf, "\\b", 2);
2216 break;
2217 case '\f' :
2218 grn_bulk_write(ctx, buf, "\\f", 2);
2219 break;
2220 case '\n' :
2221 grn_bulk_write(ctx, buf, "\\n", 2);
2222 break;
2223 case '\r' :
2224 grn_bulk_write(ctx, buf, "\\r", 2);
2225 break;
2226 case '\t' :
2227 grn_bulk_write(ctx, buf, "\\t", 2);
2228 break;
2229 case '\x00': case '\x01': case '\x02': case '\x03': case '\x04': case '\x05':
2230 case '\x06': case '\x07': case '\x0b': case '\x0e': case '\x0f': case '\x10':
2231 case '\x11': case '\x12': case '\x13': case '\x14': case '\x15': case '\x16':
2232 case '\x17': case '\x18': case '\x19': case '\x1a': case '\x1b': case '\x1c':
2233 case '\x1d': case '\x1e': case '\x1f': case '\x7f':
2234 if (!(rc = grn_bulk_write(ctx, buf, "\\u", 2))) {
2235 if ((rc = grn_text_itoh(ctx, buf, *s, 4))) {
2236 GRN_BULK_INCR_LEN(buf, -2);
2237 return rc;
2238 }
2239 } else {
2240 return rc;
2241 }
2242 break;
2243 default :
2244 GRN_TEXT_PUTC(ctx, buf, *s);
2245 }
2246 } else if (l == 3) {
2247 if (*s == '\xe2' && *(s + 1) == '\x80') {
2248 switch (*(s + 2)) {
2249 case '\xa8': /* \u2028 */
2250 grn_bulk_write(ctx, buf, "\\u2028", 6);
2251 break;
2252 case '\xa9': /* \u2029 */
2253 grn_bulk_write(ctx, buf, "\\u2029", 6);
2254 break;
2255 default:
2256 grn_bulk_write(ctx, buf, s, l);
2257 }
2258 } else {
2259 grn_bulk_write(ctx, buf, s, l);
2260 }
2261 } else {
2262 grn_bulk_write(ctx, buf, s, l);
2263 }
2264 }
2265 GRN_TEXT_PUTC(ctx, buf, '"');
2266 return rc;
2267}
2268
2269grn_rc
2270grn_text_escape_xml(grn_ctx *ctx, grn_obj *buf, const char *s, unsigned int len)
2271{
2272 const char *e;
2273 unsigned int l;
2274 grn_rc rc = GRN_SUCCESS;
2275
2276 for (e = s + len; s < e; s += l) {
2277 if (!(l = grn_charlen(ctx, s, e))) { break; }
2278 if (l == 1) {
2279 switch (*s) {
2280 case '"' :
2281 grn_bulk_write(ctx, buf, "&quot;", 6);
2282 break;
2283 case '<' :
2284 grn_bulk_write(ctx, buf, "&lt;", 4);
2285 break;
2286 case '>' :
2287 grn_bulk_write(ctx, buf, "&gt;", 4);
2288 break;
2289 case '&' :
2290 grn_bulk_write(ctx, buf, "&amp;", 5);
2291 break;
2292 default :
2293 GRN_TEXT_PUTC(ctx, buf, *s);
2294 }
2295 } else {
2296 grn_bulk_write(ctx, buf, s, l);
2297 }
2298 }
2299 return rc;
2300}
2301
2302#define TOK_ESC (0x80)
2303
2304const char *
2305grn_text_unesc_tok(grn_ctx *ctx, grn_obj *buf, const char *s, const char *e, char *tok_type)
2306{
2307 const char *p;
2308 unsigned int len;
2309 uint8_t stat = GRN_TOK_VOID;
2310 for (p = s; p < e; p += len) {
2311 if (!(len = grn_charlen(ctx, p, e))) {
2312 p = e;
2313 stat &= ~TOK_ESC;
2314 goto exit;
2315 }
2316 switch (stat) {
2317 case GRN_TOK_VOID :
2318 if (*p == ' ') { continue; }
2319 switch (*p) {
2320 case '"' :
2321 stat = GRN_TOK_STRING;
2322 break;
2323 case '\'' :
2324 stat = GRN_TOK_QUOTE;
2325 break;
2326 case ')' :
2327 case '(' :
2328 GRN_TEXT_PUT(ctx, buf, p, len);
2329 p += len;
2330 stat = GRN_TOK_SYMBOL;
2331 goto exit;
2332 case '\\' :
2333 stat = GRN_TOK_SYMBOL|TOK_ESC;
2334 break;
2335 default :
2336 stat = GRN_TOK_SYMBOL;
2337 GRN_TEXT_PUT(ctx, buf, p, len);
2338 break;
2339 }
2340 break;
2341 case GRN_TOK_SYMBOL :
2342 if (*p == ' ') { goto exit; }
2343 switch (*p) {
2344 case '\'' :
2345 case '"' :
2346 case ')' :
2347 case '(' :
2348 goto exit;
2349 case '\\' :
2350 stat |= TOK_ESC;
2351 break;
2352 default :
2353 GRN_TEXT_PUT(ctx, buf, p, len);
2354 break;
2355 }
2356 break;
2357 case GRN_TOK_STRING :
2358 switch (*p) {
2359 case '"' :
2360 p += len;
2361 goto exit;
2362 case '\\' :
2363 stat |= TOK_ESC;
2364 break;
2365 default :
2366 GRN_TEXT_PUT(ctx, buf, p, len);
2367 break;
2368 }
2369 break;
2370 case GRN_TOK_QUOTE :
2371 switch (*p) {
2372 case '\'' :
2373 p += len;
2374 goto exit;
2375 case '\\' :
2376 stat |= TOK_ESC;
2377 break;
2378 default :
2379 GRN_TEXT_PUT(ctx, buf, p, len);
2380 break;
2381 }
2382 break;
2383 case GRN_TOK_SYMBOL|TOK_ESC :
2384 case GRN_TOK_STRING|TOK_ESC :
2385 case GRN_TOK_QUOTE|TOK_ESC :
2386 switch (*p) {
2387 case 'b' :
2388 GRN_TEXT_PUTC(ctx, buf, '\b');
2389 break;
2390 case 'f' :
2391 GRN_TEXT_PUTC(ctx, buf, '\f');
2392 break;
2393 case 'n' :
2394 GRN_TEXT_PUTC(ctx, buf, '\n');
2395 break;
2396 case 'r' :
2397 GRN_TEXT_PUTC(ctx, buf, '\r');
2398 break;
2399 case 't' :
2400 GRN_TEXT_PUTC(ctx, buf, '\t');
2401 break;
2402 default :
2403 GRN_TEXT_PUT(ctx, buf, p, len);
2404 break;
2405 }
2406 stat &= ~TOK_ESC;
2407 break;
2408 }
2409 }
2410exit :
2411 *tok_type = stat;
2412 return p;
2413}
2414
2415grn_rc
2416grn_text_benc(grn_ctx *ctx, grn_obj *buf, unsigned int v)
2417{
2418 grn_rc rc = GRN_SUCCESS;
2419 uint8_t *p;
2420 if (GRN_BULK_REST(buf) < 5) {
2421 if ((rc = grn_bulk_resize(ctx, buf, GRN_BULK_VSIZE(buf) + 5))) { return rc; }
2422 }
2423 p = (uint8_t *)GRN_BULK_CURR(buf);
2424 GRN_B_ENC(v, p);
2425 GRN_BULK_SET_CURR(buf, (char *)p);
2426 return rc;
2427}
2428
2429/* 0x00 - 0x7f */
2430static const int_least8_t urlenc_tbl[] = {
2431 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2432 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2433 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1,
2434 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
2435 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2436 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2437 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2438 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1
2439};
2440
2441grn_rc
2442grn_text_urlenc(grn_ctx *ctx, grn_obj *buf, const char *s, unsigned int len)
2443{
2444 const char *e, c = '%';
2445 for (e = s + len; s < e; s++) {
2446 if ((signed char)*s < 0 || urlenc_tbl[(int)*s]) {
2447 if (!grn_bulk_write(ctx, buf, &c, 1)) {
2448 if (grn_text_itoh(ctx, buf, *s, 2)) {
2449 GRN_BULK_INCR_LEN(buf, -1);
2450 }
2451 }
2452 } else {
2453 GRN_TEXT_PUTC(ctx, buf, *s);
2454 }
2455 }
2456 return GRN_SUCCESS;
2457}
2458
2459static const char *weekdays[7] = {"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"};
2460static const char *months[12] = {
2461 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
2462 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"};
2463
2464grn_rc
2465grn_text_time2rfc1123(grn_ctx *ctx, grn_obj *bulk, int sec)
2466{
2467 time_t tsec;
2468 struct tm *t;
2469#ifdef HAVE__GMTIME64_S
2470 struct tm tm;
2471 tsec = (time_t)sec;
2472 t = (gmtime_s(&tm, &tsec) == 0) ? &tm : NULL;
2473#else /* HAVE__GMTIME64_S */
2474# ifdef HAVE_GMTIME_R
2475 struct tm tm;
2476 tsec = (time_t)sec;
2477 t = gmtime_r(&tsec, &tm);
2478# else /* HAVE_GMTIME_R */
2479 tsec = (time_t)sec;
2480 t = gmtime(&tsec);
2481# endif /* HAVE_GMTIME_R */
2482#endif /* HAVE__GMTIME64_S */
2483 if (t) {
2484 GRN_TEXT_SET(ctx, bulk, weekdays[t->tm_wday], 3);
2485 GRN_TEXT_PUTS(ctx, bulk, ", ");
2486 grn_text_itoa_padded(ctx, bulk, t->tm_mday, '0', 2);
2487 GRN_TEXT_PUTS(ctx, bulk, " ");
2488 GRN_TEXT_PUT(ctx, bulk, months[t->tm_mon], 3);
2489 GRN_TEXT_PUTS(ctx, bulk, " ");
2490 grn_text_itoa(ctx, bulk, t->tm_year + 1900);
2491 GRN_TEXT_PUTS(ctx, bulk, " ");
2492 grn_text_itoa_padded(ctx, bulk, t->tm_hour, '0', 2);
2493 GRN_TEXT_PUTS(ctx, bulk, ":");
2494 grn_text_itoa_padded(ctx, bulk, t->tm_min, '0', 2);
2495 GRN_TEXT_PUTS(ctx, bulk, ":");
2496 grn_text_itoa_padded(ctx, bulk, t->tm_sec, '0', 2);
2497 GRN_TEXT_PUTS(ctx, bulk, " GMT");
2498 } else {
2499 GRN_TEXT_SETS(ctx, bulk, "Mon, 16 Mar 1980 20:40:00 GMT");
2500 }
2501 return GRN_SUCCESS;
2502}
2503
2504grn_rc
2505grn_text_printf(grn_ctx *ctx, grn_obj *bulk, const char *format, ...)
2506{
2507 va_list args;
2508
2509 va_start(args, format);
2510 grn_text_vprintf(ctx, bulk, format, args);
2511 va_end(args);
2512
2513 return GRN_SUCCESS;
2514}
2515
2516grn_rc
2517grn_text_vprintf(grn_ctx *ctx, grn_obj *bulk, const char *format, va_list args)
2518{
2519 grn_bool is_written = GRN_FALSE;
2520 int written_size;
2521
2522 {
2523 int rest_size;
2524 va_list copied_args;
2525
2526 rest_size = GRN_BULK_REST(bulk);
2527 va_copy(copied_args, args);
2528 written_size = vsnprintf(GRN_BULK_CURR(bulk), rest_size,
2529 format, copied_args);
2530 va_end(copied_args);
2531
2532 if (0 <= written_size && written_size < rest_size) {
2533 is_written = GRN_TRUE;
2534 }
2535 }
2536
2537 if (!is_written) {
2538#ifdef WIN32
2539# define N_NEW_SIZES 3
2540 int i;
2541 int new_sizes[N_NEW_SIZES];
2542
2543 new_sizes[0] = GRN_BULK_REST(bulk) + strlen(format) * 2;
2544 new_sizes[1] = new_sizes[0] + 4096;
2545 new_sizes[2] = new_sizes[0] + 65536;
2546
2547 for (i = 0; i < N_NEW_SIZES; i++) {
2548 grn_rc rc;
2549 int new_size = new_sizes[i];
2550 va_list copied_args;
2551
2552 rc = grn_bulk_reserve(ctx, bulk, GRN_BULK_VSIZE(bulk) + new_size);
2553 if (rc) {
2554 return rc;
2555 }
2556 va_copy(copied_args, args);
2557 written_size = vsnprintf(GRN_BULK_CURR(bulk), new_size,
2558 format, copied_args);
2559 va_end(copied_args);
2560 if (written_size != -1) {
2561 break;
2562 }
2563 }
2564# undef N_NEW_SIZES
2565#else /* WIN32 */
2566 grn_rc rc;
2567 int required_size = written_size + 1; /* "+ 1" for terminate '\0'. */
2568
2569 rc = grn_bulk_reserve(ctx, bulk, GRN_BULK_VSIZE(bulk) + required_size);
2570 if (rc) {
2571 return rc;
2572 }
2573 written_size = vsnprintf(GRN_BULK_CURR(bulk), required_size,
2574 format, args);
2575#endif /* WIN32 */
2576 }
2577
2578 if (written_size < 0) {
2579 return GRN_INVALID_ARGUMENT;
2580 }
2581
2582 GRN_BULK_INCR_LEN(bulk, written_size);
2583 return GRN_SUCCESS;
2584}
2585
2586grn_rc
2587grn_bulk_fin(grn_ctx *ctx, grn_obj *buf)
2588{
2589 if (!(buf->header.impl_flags & GRN_OBJ_REFER)) {
2590 if (GRN_BULK_OUTP(buf) && buf->u.b.head) {
2591 GRN_REALLOC(buf->u.b.head - grn_bulk_margin_size, 0);
2592 }
2593 }
2594 buf->header.flags = 0;
2595 buf->header.impl_flags &= ~GRN_OBJ_DO_SHALLOW_COPY;
2596 buf->u.b.head = NULL;
2597 buf->u.b.curr = NULL;
2598 buf->u.b.tail = NULL;
2599 return GRN_SUCCESS;
2600}
2601
2602grn_rc
2603grn_substring(grn_ctx *ctx, char **str, char **str_end, int start, int end, grn_encoding encoding)
2604{
2605 int i;
2606 size_t l;
2607 char *s = *str, *e = *str_end;
2608 for (i = 0; s < e; i++, s += l) {
2609 if (i == start) { *str = s; }
2610 if (!(l = grn_charlen(ctx, s, e))) {
2611 return GRN_INVALID_ARGUMENT;
2612 }
2613 if (i == end) {
2614 *str_end = s;
2615 break;
2616 }
2617 }
2618 return GRN_SUCCESS;
2619}
2620
2621static void
2622grn_text_atoj(grn_ctx *ctx, grn_obj *bulk, grn_obj *obj, grn_id id)
2623{
2624 uint32_t vs;
2625 grn_obj buf;
2626 if (obj->header.type == GRN_ACCESSOR) {
2627 grn_accessor *a = (grn_accessor *)obj;
2628 GRN_TEXT_INIT(&buf, 0);
2629 for (;;) {
2630 GRN_BULK_REWIND(&buf);
2631 switch (a->action) {
2632 case GRN_ACCESSOR_GET_ID :
2633 GRN_UINT32_PUT(ctx, &buf, id);
2634 buf.header.domain = GRN_DB_UINT32;
2635 break;
2636 case GRN_ACCESSOR_GET_KEY :
2637 grn_table_get_key2(ctx, a->obj, id, &buf);
2638 buf.header.domain = DB_OBJ(a->obj)->header.domain;
2639 break;
2640 case GRN_ACCESSOR_GET_VALUE :
2641 grn_obj_get_value(ctx, a->obj, id, &buf);
2642 buf.header.domain = GRN_DB_INT32; /* fix me */
2643 break;
2644 case GRN_ACCESSOR_GET_SCORE :
2645 {
2646 grn_rset_recinfo *ri = (grn_rset_recinfo *)grn_obj_get_value_(ctx, a->obj, id, &vs);
2647 int32_t int32_score = ri->score;
2648 GRN_INT32_PUT(ctx, &buf, int32_score);
2649 }
2650 buf.header.domain = GRN_DB_INT32;
2651 break;
2652 case GRN_ACCESSOR_GET_NSUBRECS :
2653 {
2654 grn_rset_recinfo *ri = (grn_rset_recinfo *)grn_obj_get_value_(ctx, a->obj, id, &vs);
2655 GRN_INT32_PUT(ctx, &buf, ri->n_subrecs);
2656 }
2657 buf.header.domain = GRN_DB_INT32;
2658 break;
2659 case GRN_ACCESSOR_GET_COLUMN_VALUE :
2660 if ((a->obj->header.flags & GRN_OBJ_COLUMN_TYPE_MASK) == GRN_OBJ_COLUMN_VECTOR) {
2661 if (a->next) {
2662 grn_id *idp;
2663 grn_obj_get_value(ctx, a->obj, id, &buf);
2664 idp = (grn_id *)GRN_BULK_HEAD(&buf);
2665 GRN_TEXT_PUTC(ctx, bulk, '[');
2666 for (vs = GRN_BULK_VSIZE(&buf) / sizeof(grn_id); vs--; idp++) {
2667 grn_text_atoj(ctx, bulk, (grn_obj *)a->next, *idp);
2668 if (vs) { GRN_TEXT_PUTC(ctx, bulk, ','); }
2669 }
2670 GRN_TEXT_PUTC(ctx, bulk, ']');
2671 } else {
2672 grn_text_atoj(ctx, bulk, a->obj, id);
2673 }
2674 goto exit;
2675 } else {
2676 grn_obj_get_value(ctx, a->obj, id, &buf);
2677 }
2678 break;
2679 case GRN_ACCESSOR_GET_DB_OBJ :
2680 /* todo */
2681 break;
2682 case GRN_ACCESSOR_LOOKUP :
2683 /* todo */
2684 break;
2685 case GRN_ACCESSOR_FUNCALL :
2686 /* todo */
2687 break;
2688 }
2689 if (a->next) {
2690 a = a->next;
2691 id = *((grn_id *)GRN_BULK_HEAD(&buf));
2692 } else {
2693 break;
2694 }
2695 }
2696 } else {
2697 switch (obj->header.type) {
2698 case GRN_COLUMN_FIX_SIZE :
2699 GRN_VALUE_FIX_SIZE_INIT(&buf, 0, DB_OBJ(obj)->range);
2700 break;
2701 case GRN_COLUMN_VAR_SIZE :
2702 if ((obj->header.flags & GRN_OBJ_COLUMN_TYPE_MASK) == GRN_OBJ_COLUMN_VECTOR) {
2703 grn_obj *range = grn_ctx_at(ctx, DB_OBJ(obj)->range);
2704 if (range->header.flags & GRN_OBJ_KEY_VAR_SIZE) {
2705 GRN_VALUE_VAR_SIZE_INIT(&buf, GRN_OBJ_VECTOR, DB_OBJ(obj)->range);
2706 } else {
2707 GRN_VALUE_FIX_SIZE_INIT(&buf, GRN_OBJ_VECTOR, DB_OBJ(obj)->range);
2708 }
2709 } else {
2710 GRN_VALUE_VAR_SIZE_INIT(&buf, 0, DB_OBJ(obj)->range);
2711 }
2712 break;
2713 case GRN_COLUMN_INDEX :
2714 GRN_UINT32_INIT(&buf, 0);
2715 break;
2716 default:
2717 GRN_TEXT_INIT(&buf, 0);
2718 break;
2719 }
2720 grn_obj_get_value(ctx, obj, id, &buf);
2721 }
2722 grn_text_otoj(ctx, bulk, &buf, NULL);
2723exit :
2724 grn_obj_close(ctx, &buf);
2725}
2726
2727grn_rc
2728grn_text_otoj(grn_ctx *ctx, grn_obj *bulk, grn_obj *obj, grn_obj_format *format)
2729{
2730 grn_obj buf;
2731 GRN_TEXT_INIT(&buf, 0);
2732 switch (obj->header.type) {
2733 case GRN_BULK :
2734 switch (obj->header.domain) {
2735 case GRN_DB_VOID :
2736 case GRN_DB_SHORT_TEXT :
2737 case GRN_DB_TEXT :
2738 case GRN_DB_LONG_TEXT :
2739 grn_text_esc(ctx, bulk, GRN_BULK_HEAD(obj), GRN_BULK_VSIZE(obj));
2740 break;
2741 case GRN_DB_BOOL :
2742 if (*((unsigned char *)GRN_BULK_HEAD(obj))) {
2743 GRN_TEXT_PUTS(ctx, bulk, "true");
2744 } else {
2745 GRN_TEXT_PUTS(ctx, bulk, "false");
2746 }
2747 break;
2748 case GRN_DB_INT8 :
2749 grn_text_itoa(ctx, bulk, GRN_BULK_VSIZE(obj) ? GRN_INT8_VALUE(obj) : 0);
2750 break;
2751 case GRN_DB_UINT8 :
2752 grn_text_lltoa(ctx, bulk, GRN_BULK_VSIZE(obj) ? GRN_UINT8_VALUE(obj) : 0);
2753 break;
2754 case GRN_DB_INT16 :
2755 grn_text_itoa(ctx, bulk, GRN_BULK_VSIZE(obj) ? GRN_INT16_VALUE(obj) : 0);
2756 break;
2757 case GRN_DB_UINT16 :
2758 grn_text_lltoa(ctx, bulk, GRN_BULK_VSIZE(obj) ? GRN_UINT16_VALUE(obj) : 0);
2759 break;
2760 case GRN_DB_INT32 :
2761 grn_text_itoa(ctx, bulk, GRN_BULK_VSIZE(obj) ? GRN_INT32_VALUE(obj) : 0);
2762 break;
2763 case GRN_DB_UINT32 :
2764 grn_text_lltoa(ctx, bulk, GRN_BULK_VSIZE(obj) ? GRN_UINT32_VALUE(obj) : 0);
2765 break;
2766 case GRN_DB_INT64 :
2767 grn_text_lltoa(ctx, bulk, GRN_BULK_VSIZE(obj) ? GRN_INT64_VALUE(obj) : 0);
2768 break;
2769 case GRN_DB_UINT64 :
2770 grn_text_ulltoa(ctx, bulk, GRN_BULK_VSIZE(obj) ? GRN_UINT64_VALUE(obj) : 0);
2771 break;
2772 case GRN_DB_FLOAT :
2773 grn_text_ftoa(ctx, bulk, GRN_BULK_VSIZE(obj) ? GRN_FLOAT_VALUE(obj) : 0);
2774 break;
2775 case GRN_DB_TIME :
2776 {
2777 double dv = *((int64_t *)GRN_BULK_HEAD(obj));
2778 dv /= 1000000.0;
2779 grn_text_ftoa(ctx, bulk, dv);
2780 }
2781 break;
2782 case GRN_DB_TOKYO_GEO_POINT :
2783 case GRN_DB_WGS84_GEO_POINT :
2784 if (GRN_BULK_VSIZE(obj) == sizeof(grn_geo_point)) {
2785 grn_geo_point *gp = (grn_geo_point *)GRN_BULK_HEAD(obj);
2786 GRN_TEXT_PUTC(ctx, bulk, '"');
2787 grn_text_itoa(ctx, bulk, gp->latitude);
2788 GRN_TEXT_PUTC(ctx, bulk, 'x');
2789 grn_text_itoa(ctx, bulk, gp->longitude);
2790 GRN_TEXT_PUTC(ctx, bulk, '"');
2791 } else {
2792 GRN_TEXT_PUTS(ctx, bulk, "\"\"");
2793 }
2794 break;
2795 default :
2796 if (format) {
2797 int j;
2798 int ncolumns = GRN_BULK_VSIZE(&format->columns)/sizeof(grn_obj *);
2799 grn_id id = GRN_RECORD_VALUE(obj);
2800 grn_obj **columns = (grn_obj **)GRN_BULK_HEAD(&format->columns);
2801 if (format->flags & GRN_OBJ_FORMAT_WITH_COLUMN_NAMES) {
2802 GRN_TEXT_PUTS(ctx, bulk, "[");
2803 for (j = 0; j < ncolumns; j++) {
2804 grn_id range_id;
2805 if (j) { GRN_TEXT_PUTC(ctx, bulk, ','); }
2806 GRN_TEXT_PUTS(ctx, bulk, "[");
2807 GRN_BULK_REWIND(&buf);
2808 grn_column_name_(ctx, columns[j], &buf);
2809 grn_text_otoj(ctx, bulk, &buf, NULL);
2810 GRN_TEXT_PUTC(ctx, bulk, ',');
2811 /* column range */
2812 range_id = grn_obj_get_range(ctx, columns[j]);
2813 if (range_id == GRN_ID_NIL) {
2814 GRN_TEXT_PUTS(ctx, bulk, "null");
2815 } else {
2816 int name_len;
2817 grn_obj *range_obj;
2818 char name_buf[GRN_TABLE_MAX_KEY_SIZE];
2819
2820 range_obj = grn_ctx_at(ctx, range_id);
2821 name_len = grn_obj_name(ctx, range_obj, name_buf,
2822 GRN_TABLE_MAX_KEY_SIZE);
2823 GRN_BULK_REWIND(&buf);
2824 GRN_TEXT_PUT(ctx, &buf, name_buf, name_len);
2825 grn_text_otoj(ctx, bulk, &buf, NULL);
2826 }
2827 GRN_TEXT_PUTS(ctx, bulk, "]");
2828 }
2829 GRN_TEXT_PUTS(ctx, bulk, "],");
2830 }
2831 GRN_TEXT_PUTC(ctx, bulk, '[');
2832 for (j = 0; j < ncolumns; j++) {
2833 if (j) { GRN_TEXT_PUTC(ctx, bulk, ','); }
2834 grn_text_atoj(ctx, bulk, columns[j], id);
2835 }
2836 GRN_TEXT_PUTC(ctx, bulk, ']');
2837 } else {
2838 if (GRN_BULK_VSIZE(obj) == 0) {
2839 GRN_TEXT_PUTS(ctx, bulk, "null");
2840 } else {
2841 grn_obj *table = grn_ctx_at(ctx, obj->header.domain);
2842 grn_id id = GRN_RECORD_VALUE(obj);
2843 if (table && table->header.type != GRN_TABLE_NO_KEY) {
2844 /* todo : temporal patch. grn_table_at() is kinda costful... */
2845 if (grn_table_at(ctx, table, id)) {
2846 grn_obj *accessor = grn_obj_column(ctx, table,
2847 GRN_COLUMN_NAME_KEY,
2848 GRN_COLUMN_NAME_KEY_LEN);
2849 if (accessor) {
2850 grn_obj_get_value(ctx, accessor, id, &buf);
2851 grn_obj_unlink(ctx, accessor);
2852 }
2853 }
2854 grn_text_otoj(ctx, bulk, &buf, format);
2855 } else {
2856 grn_text_lltoa(ctx, bulk, id);
2857 }
2858 }
2859 }
2860 }
2861 break;
2862 case GRN_UVECTOR :
2863 if (format) {
2864 if (format->flags & GRN_OBJ_FORMAT_WITH_WEIGHT) {
2865 int i, n;
2866 grn_obj *domain;
2867
2868 n = grn_uvector_size(ctx, obj);
2869 domain = grn_ctx_at(ctx, obj->header.domain);
2870 GRN_TEXT_PUTS(ctx, bulk, "{");
2871 for (i = 0; i < n; i++) {
2872 grn_id id;
2873 unsigned int weight;
2874
2875 if (i > 0) {
2876 GRN_TEXT_PUTC(ctx, bulk, ',');
2877 }
2878 id = grn_uvector_get_element(ctx, obj, i, &weight);
2879 if (domain) {
2880 if (domain->header.type == GRN_TABLE_NO_KEY) {
2881 GRN_TEXT_PUTC(ctx, bulk, '"');
2882 grn_text_ulltoa(ctx, bulk, id);
2883 GRN_TEXT_PUTC(ctx, bulk, '"');
2884 } else {
2885 GRN_BULK_REWIND(&buf);
2886 grn_table_get_key2(ctx, domain, id, &buf);
2887 grn_text_otoj(ctx, bulk, &buf, NULL);
2888 }
2889 } else {
2890 GRN_TEXT_PUTC(ctx, bulk, '"');
2891 grn_text_ulltoa(ctx, bulk, id);
2892 GRN_TEXT_PUTC(ctx, bulk, '"');
2893 }
2894 GRN_TEXT_PUTC(ctx, bulk, ':');
2895 grn_text_ulltoa(ctx, bulk, weight);
2896 }
2897 GRN_TEXT_PUTS(ctx, bulk, "}");
2898 } else {
2899 /* TODO: Does we still need this code? If we don't need this, we should
2900 remove this. */
2901 int i, j;
2902 grn_id *v = (grn_id *)GRN_BULK_HEAD(obj), *ve = (grn_id *)GRN_BULK_CURR(obj);
2903 int ncolumns = GRN_BULK_VSIZE(&format->columns) / sizeof(grn_obj *);
2904 grn_obj **columns = (grn_obj **)GRN_BULK_HEAD(&format->columns);
2905 GRN_TEXT_PUTS(ctx, bulk, "[[");
2906 grn_text_itoa(ctx, bulk, ve - v);
2907 GRN_TEXT_PUTC(ctx, bulk, ']');
2908 if (v < ve) {
2909 if (format->flags & GRN_OBJ_FORMAT_WITH_COLUMN_NAMES) {
2910 GRN_TEXT_PUTS(ctx, bulk, ",[");
2911 for (j = 0; j < ncolumns; j++) {
2912 grn_id range_id;
2913 if (j) { GRN_TEXT_PUTC(ctx, bulk, ','); }
2914 GRN_TEXT_PUTS(ctx, bulk, "[");
2915 GRN_BULK_REWIND(&buf);
2916 grn_column_name_(ctx, columns[j], &buf);
2917 grn_text_otoj(ctx, bulk, &buf, NULL);
2918 GRN_TEXT_PUTC(ctx, bulk, ',');
2919 /* column range */
2920 range_id = grn_obj_get_range(ctx, columns[j]);
2921 if (range_id == GRN_ID_NIL) {
2922 GRN_TEXT_PUTS(ctx, bulk, "null");
2923 } else {
2924 int name_len;
2925 grn_obj *range_obj;
2926 char name_buf[GRN_TABLE_MAX_KEY_SIZE];
2927
2928 range_obj = grn_ctx_at(ctx, range_id);
2929 name_len = grn_obj_name(ctx, range_obj, name_buf,
2930 GRN_TABLE_MAX_KEY_SIZE);
2931 GRN_BULK_REWIND(&buf);
2932 GRN_TEXT_PUT(ctx, &buf, name_buf, name_len);
2933 grn_text_otoj(ctx, bulk, &buf, NULL);
2934 }
2935 GRN_TEXT_PUTS(ctx, bulk, "]");
2936 }
2937 GRN_TEXT_PUTC(ctx, bulk, ']');
2938 }
2939 for (i = 0;; i++) {
2940 GRN_TEXT_PUTS(ctx, bulk, ",[");
2941 for (j = 0; j < ncolumns; j++) {
2942 if (j) { GRN_TEXT_PUTC(ctx, bulk, ','); }
2943 GRN_BULK_REWIND(&buf);
2944 grn_obj_get_value(ctx, columns[j], *v, &buf);
2945 grn_text_otoj(ctx, bulk, &buf, NULL);
2946 }
2947 GRN_TEXT_PUTC(ctx, bulk, ']');
2948 v++;
2949 if (v < ve) {
2950 GRN_TEXT_PUTC(ctx, bulk, ',');
2951 } else {
2952 break;
2953 }
2954 }
2955 }
2956 GRN_TEXT_PUTC(ctx, bulk, ']');
2957 }
2958 } else {
2959 grn_obj *range = grn_ctx_at(ctx, obj->header.domain);
2960 if (range && range->header.type == GRN_TYPE) {
2961 grn_id value_size = ((struct _grn_type *)range)->obj.range;
2962 char *v = (char *)GRN_BULK_HEAD(obj),
2963 *ve = (char *)GRN_BULK_CURR(obj);
2964 GRN_TEXT_PUTC(ctx, bulk, '[');
2965 if (v < ve) {
2966 for (;;) {
2967 grn_obj value;
2968 GRN_OBJ_INIT(&value, GRN_BULK, 0, obj->header.domain);
2969 grn_bulk_write_from(ctx, &value, v, 0, value_size);
2970 grn_text_otoj(ctx, bulk, &value, NULL);
2971
2972 v += value_size;
2973 if (v < ve) {
2974 GRN_TEXT_PUTC(ctx, bulk, ',');
2975 } else {
2976 break;
2977 }
2978 }
2979 }
2980 GRN_TEXT_PUTC(ctx, bulk, ']');
2981 } else {
2982 grn_id *v = (grn_id *)GRN_BULK_HEAD(obj),
2983 *ve = (grn_id *)GRN_BULK_CURR(obj);
2984 GRN_TEXT_PUTC(ctx, bulk, '[');
2985 if (v < ve) {
2986 for (;;) {
2987 if (range->header.type != GRN_TABLE_NO_KEY) {
2988 grn_obj key;
2989 GRN_OBJ_INIT(&key, GRN_BULK, 0, range->header.domain);
2990 grn_table_get_key2(ctx, range, *v, &key);
2991 grn_text_otoj(ctx, bulk, &key, NULL);
2992 GRN_OBJ_FIN(ctx, &key);
2993 } else {
2994 grn_text_lltoa(ctx, bulk, *v);
2995 }
2996 v++;
2997 if (v < ve) {
2998 GRN_TEXT_PUTC(ctx, bulk, ',');
2999 } else {
3000 break;
3001 }
3002 }
3003 }
3004 GRN_TEXT_PUTC(ctx, bulk, ']');
3005 }
3006 }
3007 break;
3008 case GRN_VECTOR :
3009 if (obj->header.domain == GRN_DB_VOID) {
3010 ERR(GRN_INVALID_ARGUMENT, "invalid obj->header.domain");
3011 } else {
3012 unsigned int i, n;
3013 grn_obj value;
3014 grn_obj weight;
3015 grn_bool with_weight;
3016
3017 GRN_VOID_INIT(&value);
3018 GRN_UINT32_INIT(&weight, 0);
3019 with_weight = (format && format->flags & GRN_OBJ_FORMAT_WITH_WEIGHT);
3020 n = grn_vector_size(ctx, obj);
3021 if (with_weight) {
3022 GRN_TEXT_PUTC(ctx, bulk, '{');
3023 } else {
3024 GRN_TEXT_PUTC(ctx, bulk, '[');
3025 }
3026 for (i = 0; i < n; i++) {
3027 const char *_value;
3028 unsigned int _weight, length;
3029 grn_id domain;
3030 if (i) { GRN_TEXT_PUTC(ctx, bulk, ','); }
3031
3032 length = grn_vector_get_element(ctx, obj, i,
3033 &_value, &_weight, &domain);
3034 if (domain != GRN_DB_VOID) {
3035 grn_obj_reinit(ctx, &value, domain, 0);
3036 } else {
3037 grn_obj_reinit(ctx, &value, obj->header.domain, 0);
3038 }
3039 grn_bulk_write(ctx, &value, _value, length);
3040 grn_text_otoj(ctx, bulk, &value, NULL);
3041 if (with_weight) {
3042 GRN_TEXT_PUTC(ctx, bulk, ':');
3043 GRN_UINT32_SET(ctx, &weight, _weight);
3044 grn_text_otoj(ctx, bulk, &weight, NULL);
3045 }
3046 }
3047 if (with_weight) {
3048 GRN_TEXT_PUTC(ctx, bulk, '}');
3049 } else {
3050 GRN_TEXT_PUTC(ctx, bulk, ']');
3051 }
3052 GRN_OBJ_FIN(ctx, &value);
3053 GRN_OBJ_FIN(ctx, &weight);
3054 }
3055 break;
3056 case GRN_PVECTOR :
3057 if (format) {
3058 ERR(GRN_FUNCTION_NOT_IMPLEMENTED,
3059 "cannot print GRN_PVECTOR using grn_obj_format");
3060 } else {
3061 unsigned int i, n;
3062 GRN_TEXT_PUTC(ctx, bulk, '[');
3063 n = GRN_BULK_VSIZE(obj) / sizeof(grn_obj *);
3064 for (i = 0; i < n; i++) {
3065 grn_obj *value;
3066
3067 if (i) { GRN_TEXT_PUTC(ctx, bulk, ','); }
3068 value = GRN_PTR_VALUE_AT(obj, i);
3069 grn_text_otoj(ctx, bulk, value, NULL);
3070 }
3071 GRN_TEXT_PUTC(ctx, bulk, ']');
3072 }
3073 break;
3074 case GRN_TABLE_HASH_KEY :
3075 case GRN_TABLE_PAT_KEY :
3076 case GRN_TABLE_NO_KEY :
3077 if (format) {
3078 int i, j;
3079 int ncolumns = GRN_BULK_VSIZE(&format->columns)/sizeof(grn_obj *);
3080 grn_obj **columns = (grn_obj **)GRN_BULK_HEAD(&format->columns);
3081 grn_table_cursor *tc = grn_table_cursor_open(ctx, obj, NULL, 0, NULL, 0,
3082 format->offset, format->limit,
3083 GRN_CURSOR_ASCENDING);
3084 if (!tc) { ERRCLR(ctx); }
3085 GRN_TEXT_PUTS(ctx, bulk, "[[");
3086 grn_text_itoa(ctx, bulk, format->nhits);
3087 GRN_TEXT_PUTC(ctx, bulk, ']');
3088 if (format->flags & GRN_OBJ_FORMAT_WITH_COLUMN_NAMES) {
3089 GRN_TEXT_PUTS(ctx, bulk, ",[");
3090 for (j = 0; j < ncolumns; j++) {
3091 grn_id range_id;
3092 if (j) { GRN_TEXT_PUTC(ctx, bulk, ','); }
3093 GRN_TEXT_PUTS(ctx, bulk, "[");
3094 GRN_BULK_REWIND(&buf);
3095 grn_column_name_(ctx, columns[j], &buf);
3096 grn_text_otoj(ctx, bulk, &buf, NULL);
3097 GRN_TEXT_PUTC(ctx, bulk, ',');
3098 /* column range */
3099 range_id = grn_obj_get_range(ctx, columns[j]);
3100 if (range_id == GRN_ID_NIL) {
3101 GRN_TEXT_PUTS(ctx, bulk, "null");
3102 } else {
3103 int name_len;
3104 grn_obj *range_obj;
3105 char name_buf[GRN_TABLE_MAX_KEY_SIZE];
3106
3107 range_obj = grn_ctx_at(ctx, range_id);
3108 name_len = grn_obj_name(ctx, range_obj, name_buf,
3109 GRN_TABLE_MAX_KEY_SIZE);
3110 GRN_BULK_REWIND(&buf);
3111 GRN_TEXT_PUT(ctx, &buf, name_buf, name_len);
3112 grn_text_otoj(ctx, bulk, &buf, NULL);
3113 }
3114 GRN_TEXT_PUTS(ctx, bulk, "]");
3115 }
3116 GRN_TEXT_PUTC(ctx, bulk, ']');
3117 }
3118 if (tc) {
3119 grn_id id;
3120 for (i = 0; (id = grn_table_cursor_next(ctx, tc)) != GRN_ID_NIL; i++) {
3121 GRN_TEXT_PUTS(ctx, bulk, ",[");
3122 for (j = 0; j < ncolumns; j++) {
3123 if (j) { GRN_TEXT_PUTC(ctx, bulk, ','); }
3124 grn_text_atoj(ctx, bulk, columns[j], id);
3125 }
3126 GRN_TEXT_PUTC(ctx, bulk, ']');
3127 }
3128 grn_table_cursor_close(ctx, tc);
3129 }
3130 GRN_TEXT_PUTC(ctx, bulk, ']');
3131 } else {
3132 int i;
3133 grn_id id;
3134 grn_obj *column = grn_obj_column(ctx, obj,
3135 GRN_COLUMN_NAME_KEY,
3136 GRN_COLUMN_NAME_KEY_LEN);
3137 grn_table_cursor *tc = grn_table_cursor_open(ctx, obj, NULL, 0, NULL, 0,
3138 0, -1, GRN_CURSOR_ASCENDING);
3139 GRN_TEXT_PUTC(ctx, bulk, '[');
3140 if (tc) {
3141 for (i = 0; (id = grn_table_cursor_next(ctx, tc)) != GRN_ID_NIL; i++) {
3142 if (i) { GRN_TEXT_PUTC(ctx, bulk, ','); }
3143 GRN_BULK_REWIND(&buf);
3144 grn_obj_get_value(ctx, column, id, &buf);
3145 grn_text_esc(ctx, bulk, GRN_BULK_HEAD(&buf), GRN_BULK_VSIZE(&buf));
3146 }
3147 grn_table_cursor_close(ctx, tc);
3148 }
3149 GRN_TEXT_PUTC(ctx, bulk, ']');
3150 grn_obj_unlink(ctx, column);
3151 }
3152 break;
3153 }
3154 grn_obj_close(ctx, &buf);
3155 return GRN_SUCCESS;
3156}
3157
3158const char *
3159grn_text_urldec(grn_ctx *ctx, grn_obj *buf, const char *p, const char *e, char d)
3160{
3161 while (p < e) {
3162 if (*p == d) {
3163 p++; break;
3164 } else if (*p == '%' && p + 3 <= e) {
3165 const char *r;
3166 unsigned int c = grn_htoui(p + 1, p + 3, &r);
3167 if (p + 3 == r) {
3168 GRN_TEXT_PUTC(ctx, buf, c);
3169 p += 3;
3170 } else {
3171 GRN_LOG(ctx, GRN_LOG_NOTICE, "invalid %% sequence (%c%c)", p[1], p[2]);
3172 GRN_TEXT_PUTC(ctx, buf, '%');
3173 p += 1;
3174 }
3175 } else {
3176 GRN_TEXT_PUTC(ctx, buf, *p);
3177 p++;
3178 }
3179 }
3180 return p;
3181}
3182
3183const char *
3184grn_text_cgidec(grn_ctx *ctx, grn_obj *buf, const char *p, const char *e,
3185 const char *delimiters)
3186{
3187 while (p < e) {
3188 grn_bool found_delimiter = GRN_FALSE;
3189 const char *delimiter;
3190 for (delimiter = delimiters; *delimiter; delimiter++) {
3191 if (*p == *delimiter) {
3192 found_delimiter = GRN_TRUE;
3193 break;
3194 }
3195 }
3196 if (found_delimiter) {
3197 p++;
3198 break;
3199 }
3200
3201 if (*p == '+') {
3202 GRN_TEXT_PUTC(ctx, buf, ' ');
3203 p++;
3204 } else if (*p == '%' && p + 3 <= e) {
3205 const char *r;
3206 unsigned int c = grn_htoui(p + 1, p + 3, &r);
3207 if (p + 3 == r) {
3208 GRN_TEXT_PUTC(ctx, buf, c);
3209 p += 3;
3210 } else {
3211 GRN_LOG(ctx, GRN_LOG_NOTICE, "invalid %% sequence (%c%c)", p[1], p[2]);
3212 GRN_TEXT_PUTC(ctx, buf, '%');
3213 p += 1;
3214 }
3215 } else {
3216 GRN_TEXT_PUTC(ctx, buf, *p);
3217 p++;
3218 }
3219 }
3220 return p;
3221}
3222
3223void
3224grn_str_url_path_normalize(grn_ctx *ctx, const char *path, size_t path_len,
3225 char *buf, size_t buf_len)
3226{
3227 char *b = buf, *be = buf + buf_len - 1;
3228 const char *p = path, *pe = path + path_len, *pc;
3229
3230 if (buf_len < 2) { return; }
3231
3232 while (p < pe) {
3233 for (pc = p; pc < pe && *pc != '/'; pc++) {}
3234 if (*p == '.') {
3235 if (pc == p + 2 && *(p + 1) == '.') {
3236 /* '..' */
3237 if (b - buf >= 2) {
3238 for (b -= 2; *b != '/' && b >= buf; b--) {}
3239 }
3240 if (*b == '/') {
3241 b++;
3242 ERR(GRN_INVALID_ARGUMENT, "parent path doesn't exist.");
3243 }
3244 p = pc + 1;
3245 continue;
3246 } else if (pc == p + 1) {
3247 /* '.' */
3248 p = pc + 1;
3249 continue;
3250 }
3251 }
3252 if (be - b >= pc - p) {
3253 grn_memcpy(b, p, (pc - p));
3254 b += pc - p;
3255 p = pc;
3256 if (p < pe && *pc == '/' && be > b) {
3257 *b++ = '/';
3258 p++;
3259 }
3260 }
3261 }
3262 *b = '\0';
3263}
3264
3265grn_bool
3266grn_bulk_is_zero(grn_ctx *ctx, grn_obj *obj)
3267{
3268 const char *v = GRN_BULK_HEAD(obj);
3269 unsigned int s = GRN_BULK_VSIZE(obj);
3270 for (; s; s--, v++) {
3271 if (*v) { return GRN_FALSE; }
3272 }
3273 return GRN_TRUE;
3274}
3275
3276