1/* -*- c-basic-offset: 2 -*- */
2/* Copyright(C) 2009-2014 Brazil
3
4 This library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Lesser General Public
6 License version 2.1 as published by the Free Software Foundation.
7
8 This library is distributed in the hope that it will be useful,
9 but WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 Lesser General Public License for more details.
12
13 You should have received a copy of the GNU Lesser General Public
14 License along with this library; if not, write to the Free Software
15 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16*/
17#include "grn.h"
18#include <string.h>
19#include <stddef.h>
20#include "grn_snip.h"
21#include "grn_ctx.h"
22
23#if !defined MAX
24#define MAX(a, b) ((a) > (b) ? (a) : (b))
25#endif
26
27#if !defined MIN
28#define MIN(a, b) ((a) < (b) ? (a) : (b))
29#endif
30
31static int
32grn_bm_check_euc(const unsigned char *x, const size_t y)
33{
34 const unsigned char *p;
35 for (p = x + y - 1; p >= x && *p >= 0x80U; p--);
36 return (int) ((x + y - p) & 1);
37}
38
39static int
40grn_bm_check_sjis(const unsigned char *x, const size_t y)
41{
42 const unsigned char *p;
43 for (p = x + y - 1; p >= x; p--)
44 if ((*p < 0x81U) || (*p > 0x9fU && *p < 0xe0U) || (*p > 0xfcU))
45 break;
46 return (int) ((x + y - p) & 1);
47}
48
49/*
50static void
51grn_bm_suffixes(const unsigned char *x, size_t m, size_t *suff)
52{
53 size_t f, g;
54 intptr_t i;
55 f = 0;
56 suff[m - 1] = m;
57 g = m - 1;
58 for (i = m - 2; i >= 0; --i) {
59 if (i > (intptr_t) g && suff[i + m - 1 - f] < i - g)
60 suff[i] = suff[i + m - 1 - f];
61 else {
62 if (i < (intptr_t) g)
63 g = i;
64 f = i;
65 while (g > 0 && x[g] == x[g + m - 1 - f])
66 --g;
67 suff[i] = f - g;
68 }
69 }
70}
71*/
72
73static void
74grn_bm_preBmBc(const unsigned char *x, size_t m, size_t *bmBc)
75{
76 size_t i;
77 for (i = 0; i < ASIZE; ++i) {
78 bmBc[i] = m;
79 }
80 for (i = 0; i < m - 1; ++i) {
81 bmBc[(unsigned int) x[i]] = m - (i + 1);
82 }
83}
84
85#define GRN_BM_COMPARE do { \
86 if (string_checks[found]) { \
87 size_t offset = cond->last_offset, found_alpha_head = cond->found_alpha_head; \
88 /* calc real offset */\
89 for (i = cond->last_found; i < found; i++) { \
90 if (string_checks[i] > 0) { \
91 found_alpha_head = i; \
92 offset += string_checks[i]; \
93 } \
94 } \
95 /* if real offset is in a character, move it the head of the character */ \
96 if (string_checks[found] < 0) { \
97 offset -= string_checks[found_alpha_head]; \
98 cond->last_found = found_alpha_head; \
99 } else { \
100 cond->last_found = found; \
101 } \
102 cond->start_offset = cond->last_offset = offset; \
103 if (flags & GRN_SNIP_SKIP_LEADING_SPACES) { \
104 while (cond->start_offset < string_original_length_in_bytes && \
105 (i = grn_isspace(string_original + cond->start_offset, \
106 string_encoding))) { cond->start_offset += i; } \
107 } \
108 for (i = cond->last_found; i < found + m; i++) { \
109 if (string_checks[i] > 0) { \
110 offset += string_checks[i]; \
111 } \
112 } \
113 cond->end_offset = offset; \
114 cond->found = found + shift; \
115 cond->found_alpha_head = found_alpha_head; \
116 /* printf("bm: cond:%p found:%zd last_found:%zd st_off:%zd ed_off:%zd\n", cond, cond->found,cond->last_found,cond->start_offset,cond->end_offset); */ \
117 return; \
118 } \
119} while (0)
120
121#define GRN_BM_BM_COMPARE do { \
122 if (p[-2] == ck) { \
123 for (i = 3; i <= m && p[-(intptr_t)i] == cp[-(intptr_t)i]; ++i) { \
124 } \
125 if (i > m) { \
126 found = p - y - m; \
127 GRN_BM_COMPARE; \
128 } \
129 } \
130} while (0)
131
132void
133grn_bm_tunedbm(grn_ctx *ctx, snip_cond *cond, grn_obj *string, int flags)
134{
135 register unsigned char *limit, ck;
136 register const unsigned char *p, *cp;
137 register size_t *bmBc, delta1, i;
138
139 const unsigned char *x;
140 unsigned char *y;
141 size_t shift, found;
142
143 const char *string_original;
144 unsigned int string_original_length_in_bytes;
145 const short *string_checks;
146 grn_encoding string_encoding;
147 const char *string_norm, *keyword_norm;
148 unsigned int n, m;
149
150 grn_string_get_original(ctx, string,
151 &string_original, &string_original_length_in_bytes);
152 string_checks = grn_string_get_checks(ctx, string);
153 string_encoding = grn_string_get_encoding(ctx, string);
154 grn_string_get_normalized(ctx, string, &string_norm, &n, NULL);
155 grn_string_get_normalized(ctx, cond->keyword, &keyword_norm, &m, NULL);
156
157 y = (unsigned char *)string_norm;
158 if (m == 1) {
159 if (n > cond->found) {
160 shift = 1;
161 p = memchr(y + cond->found, keyword_norm[0], n - cond->found);
162 if (p != NULL) {
163 found = p - y;
164 GRN_BM_COMPARE;
165 }
166 }
167 cond->stopflag = SNIPCOND_STOP;
168 return;
169 }
170
171 x = (unsigned char *)keyword_norm;
172 bmBc = cond->bmBc;
173 shift = cond->shift;
174
175 /* Restart */
176 p = y + m + cond->found;
177 cp = x + m;
178 ck = cp[-2];
179
180 /* 12 means 1(initial offset) + 10 (in loop) + 1 (shift) */
181 if (n - cond->found > 12 * m) {
182 limit = y + n - 11 * m;
183 while (p <= limit) {
184 p += bmBc[p[-1]];
185 if(!(delta1 = bmBc[p[-1]])) {
186 goto check;
187 }
188 p += delta1;
189 p += bmBc[p[-1]];
190 p += bmBc[p[-1]];
191 if(!(delta1 = bmBc[p[-1]])) {
192 goto check;
193 }
194 p += delta1;
195 p += bmBc[p[-1]];
196 p += bmBc[p[-1]];
197 if(!(delta1 = bmBc[p[-1]])) {
198 goto check;
199 }
200 p += delta1;
201 p += bmBc[p[-1]];
202 p += bmBc[p[-1]];
203 continue;
204 check:
205 GRN_BM_BM_COMPARE;
206 p += shift;
207 }
208 }
209 /* limit check + search */
210 limit = y + n;
211 while(p <= limit) {
212 if (!(delta1 = bmBc[p[-1]])) {
213 GRN_BM_BM_COMPARE;
214 p += shift;
215 }
216 p += delta1;
217 }
218 cond->stopflag = SNIPCOND_STOP;
219}
220
221static size_t
222count_mapped_chars(const char *str, const char *end)
223{
224 const char *p;
225 size_t dl;
226
227 dl = 0;
228 for (p = str; p != end; p++) {
229 switch (*p) {
230 case '<':
231 case '>':
232 dl += 4; /* &lt; or &gt; */
233 break;
234 case '&':
235 dl += 5; /* &amp; */
236 break;
237 case '"':
238 dl += 6; /* &quot; */
239 break;
240 default:
241 dl++;
242 break;
243 }
244 }
245 return dl;
246}
247
248grn_rc
249grn_snip_cond_close(grn_ctx *ctx, snip_cond *cond)
250{
251 if (!cond) {
252 return GRN_INVALID_ARGUMENT;
253 }
254 if (cond->keyword) {
255 grn_obj_close(ctx, cond->keyword);
256 }
257 return GRN_SUCCESS;
258}
259
260grn_rc
261grn_snip_cond_init(grn_ctx *ctx, snip_cond *sc, const char *keyword, unsigned int keyword_len,
262 grn_encoding enc, grn_obj *normalizer, int flags)
263{
264 const char *norm;
265 unsigned int norm_blen;
266 int f = GRN_STR_REMOVEBLANK;
267 memset(sc, 0, sizeof(snip_cond));
268 if (!(sc->keyword = grn_string_open(ctx, keyword, keyword_len,
269 normalizer, f))) {
270 GRN_LOG(ctx, GRN_LOG_ALERT,
271 "grn_string_open on snip_cond_init failed!");
272 return GRN_NO_MEMORY_AVAILABLE;
273 }
274 grn_string_get_normalized(ctx, sc->keyword, &norm, &norm_blen, NULL);
275 if (!norm_blen) {
276 grn_snip_cond_close(ctx, sc);
277 return GRN_INVALID_ARGUMENT;
278 }
279 if (norm_blen != 1) {
280 grn_bm_preBmBc((unsigned char *)norm, norm_blen, sc->bmBc);
281 sc->shift = sc->bmBc[(unsigned char)norm[norm_blen - 1]];
282 sc->bmBc[(unsigned char)norm[norm_blen - 1]] = 0;
283 }
284 return GRN_SUCCESS;
285}
286
287void
288grn_snip_cond_reinit(snip_cond *cond)
289{
290 cond->found = 0;
291 cond->last_found = 0;
292 cond->last_offset = 0;
293 cond->start_offset = 0;
294 cond->end_offset = 0;
295
296 cond->count = 0;
297 cond->stopflag = SNIPCOND_NONSTOP;
298}
299
300inline static char *
301grn_snip_strndup(grn_ctx *ctx, const char *string, unsigned int string_len)
302{
303 char *copied_string;
304
305 copied_string = GRN_MALLOC(string_len + 1);
306 if (!copied_string) {
307 return NULL;
308 }
309 grn_memcpy(copied_string, string, string_len);
310 copied_string[string_len]= '\0'; /* not required, but for ql use */
311 return copied_string;
312}
313
314inline static grn_rc
315grn_snip_cond_set_tag(grn_ctx *ctx,
316 const char **dest_tag, size_t *dest_tag_len,
317 const char *tag, unsigned int tag_len,
318 const char *default_tag, unsigned int default_tag_len,
319 int copy_tag)
320{
321 if (tag) {
322 if (copy_tag) {
323 char *copied_tag;
324 copied_tag = grn_snip_strndup(ctx, tag, tag_len);
325 if (!copied_tag) {
326 return GRN_NO_MEMORY_AVAILABLE;
327 }
328 *dest_tag = copied_tag;
329 } else {
330 *dest_tag = tag;
331 }
332 *dest_tag_len = tag_len;
333 } else {
334 *dest_tag = default_tag;
335 *dest_tag_len = default_tag_len;
336 }
337 return GRN_SUCCESS;
338}
339
340grn_rc
341grn_snip_set_normalizer(grn_ctx *ctx, grn_obj *snip,
342 grn_obj *normalizer)
343{
344 grn_snip *snip_;
345 if (!snip) {
346 return GRN_INVALID_ARGUMENT;
347 }
348
349 snip_ = (grn_snip *)snip;
350 snip_->normalizer = normalizer;
351 return GRN_SUCCESS;
352}
353
354grn_obj *
355grn_snip_get_normalizer(grn_ctx *ctx, grn_obj *snip)
356{
357 grn_snip *snip_;
358
359 if (!snip) {
360 return NULL;
361 }
362
363 snip_ = (grn_snip *)snip;
364 return snip_->normalizer;
365}
366
367grn_rc
368grn_snip_add_cond(grn_ctx *ctx, grn_obj *snip,
369 const char *keyword, unsigned int keyword_len,
370 const char *opentag, unsigned int opentag_len,
371 const char *closetag, unsigned int closetag_len)
372{
373 grn_rc rc;
374 int copy_tag;
375 snip_cond *cond;
376 unsigned int norm_blen;
377 grn_snip *snip_;
378
379 snip_ = (grn_snip *)snip;
380 if (!snip_ || !keyword || !keyword_len || snip_->cond_len >= MAX_SNIP_COND_COUNT) {
381 return GRN_INVALID_ARGUMENT;
382 }
383
384 cond = snip_->cond + snip_->cond_len;
385 if ((rc = grn_snip_cond_init(ctx, cond, keyword, keyword_len,
386 snip_->encoding, snip_->normalizer, snip_->flags))) {
387 return rc;
388 }
389 grn_string_get_normalized(ctx, cond->keyword, NULL, &norm_blen, NULL);
390 if (norm_blen > snip_->width) {
391 grn_snip_cond_close(ctx, cond);
392 return GRN_INVALID_ARGUMENT;
393 }
394
395 copy_tag = snip_->flags & GRN_SNIP_COPY_TAG;
396 rc = grn_snip_cond_set_tag(ctx,
397 &(cond->opentag), &(cond->opentag_len),
398 opentag, opentag_len,
399 snip_->defaultopentag, snip_->defaultopentag_len,
400 copy_tag);
401 if (rc) {
402 grn_snip_cond_close(ctx, cond);
403 return rc;
404 }
405
406 rc = grn_snip_cond_set_tag(ctx,
407 &(cond->closetag), &(cond->closetag_len),
408 closetag, closetag_len,
409 snip_->defaultclosetag, snip_->defaultclosetag_len,
410 copy_tag);
411 if (rc) {
412 if (opentag && copy_tag) {
413 GRN_FREE((void *)cond->opentag);
414 }
415 grn_snip_cond_close(ctx, cond);
416 return rc;
417 }
418
419 snip_->cond_len++;
420 return GRN_SUCCESS;
421}
422
423static size_t
424grn_snip_find_firstbyte(const char *string, grn_encoding encoding, size_t offset,
425 size_t doffset)
426{
427 switch (encoding) {
428 case GRN_ENC_EUC_JP:
429 while (!(grn_bm_check_euc((unsigned char *) string, offset)))
430 offset += doffset;
431 break;
432 case GRN_ENC_SJIS:
433 if (!(grn_bm_check_sjis((unsigned char *) string, offset)))
434 offset += doffset;
435 break;
436 case GRN_ENC_UTF8:
437 while ((signed char)string[offset] <= (signed char)0xc0)
438 offset += doffset;
439 break;
440 default:
441 break;
442 }
443 return offset;
444}
445
446inline static grn_rc
447grn_snip_set_default_tag(grn_ctx *ctx,
448 const char **dest_tag, size_t *dest_tag_len,
449 const char *tag, unsigned int tag_len,
450 int copy_tag)
451{
452 if (copy_tag && tag) {
453 char *copied_tag;
454 copied_tag = grn_snip_strndup(ctx, tag, tag_len);
455 if (!copied_tag) {
456 return GRN_NO_MEMORY_AVAILABLE;
457 }
458 *dest_tag = copied_tag;
459 } else {
460 *dest_tag = tag;
461 }
462 *dest_tag_len = tag_len;
463 return GRN_SUCCESS;
464}
465
466grn_obj *
467grn_snip_open(grn_ctx *ctx, int flags, unsigned int width,
468 unsigned int max_results,
469 const char *defaultopentag, unsigned int defaultopentag_len,
470 const char *defaultclosetag, unsigned int defaultclosetag_len,
471 grn_snip_mapping *mapping)
472{
473 int copy_tag;
474 grn_snip *ret = NULL;
475 if (!(ret = GRN_MALLOC(sizeof(grn_snip)))) {
476 GRN_LOG(ctx, GRN_LOG_ALERT, "grn_snip allocation failed on grn_snip_open");
477 return NULL;
478 }
479 if (max_results > MAX_SNIP_RESULT_COUNT || max_results == 0) {
480 GRN_LOG(ctx, GRN_LOG_WARNING, "max_results is invalid on grn_snip_open");
481 GRN_FREE(ret);
482 return NULL;
483 }
484 GRN_API_ENTER;
485 ret->encoding = ctx->encoding;
486 ret->flags = flags;
487 ret->width = width;
488 ret->max_results = max_results;
489 ret->defaultopentag = NULL;
490 ret->defaultclosetag = NULL;
491
492 copy_tag = flags & GRN_SNIP_COPY_TAG;
493 if (grn_snip_set_default_tag(ctx,
494 &(ret->defaultopentag),
495 &(ret->defaultopentag_len),
496 defaultopentag, defaultopentag_len,
497 copy_tag)) {
498 GRN_FREE(ret);
499 GRN_API_RETURN(NULL);
500 }
501
502 if (grn_snip_set_default_tag(ctx,
503 &(ret->defaultclosetag),
504 &(ret->defaultclosetag_len),
505 defaultclosetag, defaultclosetag_len,
506 copy_tag)) {
507 if (copy_tag && ret->defaultopentag) {
508 GRN_FREE((void *)ret->defaultopentag);
509 }
510 GRN_FREE(ret);
511 GRN_API_RETURN(NULL);
512 }
513
514 ret->cond_len = 0;
515 ret->mapping = mapping;
516 ret->nstr = NULL;
517 ret->tag_count = 0;
518 ret->snip_count = 0;
519 if (ret->flags & GRN_SNIP_NORMALIZE) {
520 ret->normalizer = GRN_NORMALIZER_AUTO;
521 } else {
522 ret->normalizer = NULL;
523 }
524
525 GRN_DB_OBJ_SET_TYPE(ret, GRN_SNIP);
526 {
527 grn_obj *db;
528 grn_id id;
529 db = grn_ctx_db(ctx);
530 id = grn_obj_register(ctx, db, NULL, 0);
531 DB_OBJ(ret)->header.domain = GRN_ID_NIL;
532 DB_OBJ(ret)->range = GRN_ID_NIL;
533 grn_db_obj_init(ctx, db, id, DB_OBJ(ret));
534 }
535
536 GRN_API_RETURN((grn_obj *)ret);
537}
538
539static grn_rc
540exec_clean(grn_ctx *ctx, grn_snip *snip)
541{
542 snip_cond *cond, *cond_end;
543 if (snip->nstr) {
544 grn_obj_close(ctx, snip->nstr);
545 snip->nstr = NULL;
546 }
547 snip->tag_count = 0;
548 snip->snip_count = 0;
549 for (cond = snip->cond, cond_end = cond + snip->cond_len;
550 cond < cond_end; cond++) {
551 grn_snip_cond_reinit(cond);
552 }
553 return GRN_SUCCESS;
554}
555
556grn_rc
557grn_snip_close(grn_ctx *ctx, grn_snip *snip)
558{
559 snip_cond *cond, *cond_end;
560 if (!snip) { return GRN_INVALID_ARGUMENT; }
561 GRN_API_ENTER;
562 if (snip->flags & GRN_SNIP_COPY_TAG) {
563 int i;
564 snip_cond *sc;
565 const char *dot = snip->defaultopentag, *dct = snip->defaultclosetag;
566 for (i = snip->cond_len, sc = snip->cond; i; i--, sc++) {
567 if (sc->opentag != dot) { GRN_FREE((void *)sc->opentag); }
568 if (sc->closetag != dct) { GRN_FREE((void *)sc->closetag); }
569 }
570 if (dot) { GRN_FREE((void *)dot); }
571 if (dct) { GRN_FREE((void *)dct); }
572 }
573 if (snip->nstr) {
574 grn_obj_close(ctx, snip->nstr);
575 }
576 for (cond = snip->cond, cond_end = cond + snip->cond_len;
577 cond < cond_end; cond++) {
578 grn_snip_cond_close(ctx, cond);
579 }
580 GRN_FREE(snip);
581 GRN_API_RETURN(GRN_SUCCESS);
582}
583
584grn_rc
585grn_snip_exec(grn_ctx *ctx, grn_obj *snip, const char *string, unsigned int string_len,
586 unsigned int *nresults, unsigned int *max_tagged_len)
587{
588 size_t i;
589 grn_snip *snip_;
590 int f = GRN_STR_WITH_CHECKS|GRN_STR_REMOVEBLANK;
591 if (!snip || !string || !nresults || !max_tagged_len) {
592 return GRN_INVALID_ARGUMENT;
593 }
594 GRN_API_ENTER;
595 snip_ = (grn_snip *)snip;
596 exec_clean(ctx, snip_);
597 *nresults = 0;
598 snip_->nstr = grn_string_open(ctx, string, string_len, snip_->normalizer, f);
599 if (!snip_->nstr) {
600 exec_clean(ctx, snip_);
601 GRN_LOG(ctx, GRN_LOG_ALERT, "grn_string_open on grn_snip_exec failed !");
602 GRN_API_RETURN(ctx->rc);
603 }
604 for (i = 0; i < snip_->cond_len; i++) {
605 grn_bm_tunedbm(ctx, snip_->cond + i, snip_->nstr, snip_->flags);
606 }
607
608 {
609 _snip_tag_result *tag_result = snip_->tag_result;
610 _snip_result *snip_result = snip_->snip_result;
611 size_t last_end_offset = 0, last_last_end_offset = 0;
612 unsigned int unfound_cond_count = snip_->cond_len;
613
614 *max_tagged_len = 0;
615 while (1) {
616 size_t tagged_len = 0, last_tag_end = 0;
617 int_least8_t all_stop = 1, found_cond = 0;
618 snip_result->tag_count = 0;
619
620 while (1) {
621 size_t min_start_offset = (size_t) -1;
622 size_t max_end_offset = 0;
623 snip_cond *cond = NULL;
624
625 /* get condition which have minimum offset and is not stopped */
626 for (i = 0; i < snip_->cond_len; i++) {
627 if (snip_->cond[i].stopflag == SNIPCOND_NONSTOP &&
628 (min_start_offset > snip_->cond[i].start_offset ||
629 (min_start_offset == snip_->cond[i].start_offset &&
630 max_end_offset < snip_->cond[i].end_offset))) {
631 min_start_offset = snip_->cond[i].start_offset;
632 max_end_offset = snip_->cond[i].end_offset;
633 cond = &snip_->cond[i];
634 }
635 }
636 if (!cond) {
637 break;
638 }
639 /* check whether condtion is the first condition in snippet */
640 if (snip_result->tag_count == 0) {
641 /* skip condition if the number of rest snippet field is smaller than */
642 /* the number of unfound keywords. */
643 if (snip_->max_results - *nresults <= unfound_cond_count && cond->count > 0) {
644 int_least8_t exclude_other_cond = 1;
645 for (i = 0; i < snip_->cond_len; i++) {
646 if ((snip_->cond + i) != cond
647 && snip_->cond[i].end_offset <= cond->start_offset + snip_->width
648 && snip_->cond[i].count == 0) {
649 exclude_other_cond = 0;
650 }
651 }
652 if (exclude_other_cond) {
653 grn_bm_tunedbm(ctx, cond, snip_->nstr, snip_->flags);
654 continue;
655 }
656 }
657 snip_result->start_offset = cond->start_offset;
658 snip_result->first_tag_result_idx = snip_->tag_count;
659 } else {
660 if (cond->start_offset >= snip_result->start_offset + snip_->width) {
661 break;
662 }
663 /* check nesting to make valid HTML */
664 /* ToDo: allow <test><te>te</te><st>st</st></test> */
665 if (cond->start_offset < last_tag_end) {
666 grn_bm_tunedbm(ctx, cond, snip_->nstr, snip_->flags);
667 continue;
668 }
669 }
670 if (cond->end_offset > snip_result->start_offset + snip_->width) {
671 /* If a keyword gets across a snippet, */
672 /* it was skipped and never to be tagged. */
673 cond->stopflag = SNIPCOND_ACROSS;
674 grn_bm_tunedbm(ctx, cond, snip_->nstr, snip_->flags);
675 } else {
676 found_cond = 1;
677 if (cond->count == 0) {
678 unfound_cond_count--;
679 }
680 cond->count++;
681 last_end_offset = cond->end_offset;
682
683 tag_result->cond = cond;
684 tag_result->start_offset = cond->start_offset;
685 tag_result->end_offset = last_tag_end = cond->end_offset;
686
687 snip_result->tag_count++;
688 tag_result++;
689 tagged_len += cond->opentag_len + cond->closetag_len;
690 if (++snip_->tag_count >= MAX_SNIP_TAG_COUNT) {
691 break;
692 }
693 grn_bm_tunedbm(ctx, cond, snip_->nstr, snip_->flags);
694 }
695 }
696 if (!found_cond) {
697 break;
698 }
699 if (snip_result->start_offset + last_end_offset < snip_->width) {
700 snip_result->start_offset = 0;
701 } else {
702 snip_result->start_offset =
703 MAX(MIN
704 ((snip_result->start_offset + last_end_offset - snip_->width) / 2,
705 string_len - snip_->width), last_last_end_offset);
706 }
707 snip_result->start_offset =
708 grn_snip_find_firstbyte(string, snip_->encoding, snip_result->start_offset, 1);
709
710 snip_result->end_offset = snip_result->start_offset + snip_->width;
711 if (snip_result->end_offset < string_len) {
712 snip_result->end_offset =
713 grn_snip_find_firstbyte(string, snip_->encoding, snip_result->end_offset, -1);
714 } else {
715 snip_result->end_offset = string_len;
716 }
717 last_last_end_offset = snip_result->end_offset;
718
719 if (snip_->mapping == (grn_snip_mapping *) -1) {
720 tagged_len +=
721 count_mapped_chars(&string[snip_result->start_offset],
722 &string[snip_result->end_offset]) + 1;
723 } else {
724 tagged_len += snip_result->end_offset - snip_result->start_offset + 1;
725 }
726
727 *max_tagged_len = MAX(*max_tagged_len, tagged_len);
728
729 snip_result->last_tag_result_idx = snip_->tag_count - 1;
730 (*nresults)++;
731 snip_result++;
732
733 if (*nresults == snip_->max_results || snip_->tag_count == MAX_SNIP_TAG_COUNT) {
734 break;
735 }
736 for (i = 0; i < snip_->cond_len; i++) {
737 if (snip_->cond[i].stopflag != SNIPCOND_STOP) {
738 all_stop = 0;
739 snip_->cond[i].stopflag = SNIPCOND_NONSTOP;
740 }
741 }
742 if (all_stop) {
743 break;
744 }
745 }
746 }
747 snip_->snip_count = *nresults;
748 snip_->string = string;
749
750 snip_->max_tagged_len = *max_tagged_len;
751
752 GRN_API_RETURN(ctx->rc);
753}
754
755grn_rc
756grn_snip_get_result(grn_ctx *ctx, grn_obj *snip, const unsigned int index, char *result, unsigned int *result_len)
757{
758 char *p;
759 size_t i, j, k;
760 _snip_result *sres;
761 grn_snip *snip_;
762
763 snip_ = (grn_snip *)snip;
764 if (snip_->snip_count <= index || !snip_->nstr) {
765 return GRN_INVALID_ARGUMENT;
766 }
767
768 GRN_ASSERT(snip_->snip_count != 0 && snip_->tag_count != 0);
769
770 GRN_API_ENTER;
771 sres = &snip_->snip_result[index];
772 j = sres->first_tag_result_idx;
773 for (p = result, i = sres->start_offset; i < sres->end_offset; i++) {
774 for (; j <= sres->last_tag_result_idx && snip_->tag_result[j].start_offset == i; j++) {
775 if (snip_->tag_result[j].end_offset > sres->end_offset) {
776 continue;
777 }
778 grn_memcpy(p,
779 snip_->tag_result[j].cond->opentag,
780 snip_->tag_result[j].cond->opentag_len);
781 p += snip_->tag_result[j].cond->opentag_len;
782 }
783
784 if (snip_->mapping == GRN_SNIP_MAPPING_HTML_ESCAPE) {
785 switch (snip_->string[i]) {
786 case '<':
787 *p++ = '&';
788 *p++ = 'l';
789 *p++ = 't';
790 *p++ = ';';
791 break;
792 case '>':
793 *p++ = '&';
794 *p++ = 'g';
795 *p++ = 't';
796 *p++ = ';';
797 break;
798 case '&':
799 *p++ = '&';
800 *p++ = 'a';
801 *p++ = 'm';
802 *p++ = 'p';
803 *p++ = ';';
804 break;
805 case '"':
806 *p++ = '&';
807 *p++ = 'q';
808 *p++ = 'u';
809 *p++ = 'o';
810 *p++ = 't';
811 *p++ = ';';
812 break;
813 default:
814 *p++ = snip_->string[i];
815 break;
816 }
817 } else {
818 *p++ = snip_->string[i];
819 }
820
821 for (k = sres->last_tag_result_idx;
822 snip_->tag_result[k].end_offset <= sres->end_offset; k--) {
823 /* TODO: avoid all loop */
824 if (snip_->tag_result[k].end_offset == i + 1) {
825 grn_memcpy(p,
826 snip_->tag_result[k].cond->closetag,
827 snip_->tag_result[k].cond->closetag_len);
828 p += snip_->tag_result[k].cond->closetag_len;
829 }
830 if (k <= sres->first_tag_result_idx) {
831 break;
832 }
833 };
834 }
835 *p = '\0';
836
837 if(result_len) { *result_len = (unsigned int)(p - result); }
838 GRN_ASSERT((unsigned int)(p - result) <= snip_->max_tagged_len);
839
840 GRN_API_RETURN(ctx->rc);
841}
842