1/* -*- c-basic-offset: 2 -*- */
2/*
3 Copyright(C) 2009-2017 Brazil
4
5 This library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License version 2.1 as published by the Free Software Foundation.
8
9 This library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
13
14 You should have received a copy of the GNU Lesser General Public
15 License along with this library; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17*/
18#include "grn.h"
19#include <stdio.h>
20#include <fcntl.h>
21#include <string.h>
22#include <sys/stat.h>
23
24#ifdef WIN32
25# include <io.h>
26# include <share.h>
27#endif /* WIN32 */
28
29#include "grn_ii.h"
30#include "grn_ctx_impl.h"
31#include "grn_token_cursor.h"
32#include "grn_pat.h"
33#include "grn_db.h"
34#include "grn_output.h"
35#include "grn_scorer.h"
36#include "grn_util.h"
37
38#ifdef GRN_WITH_ONIGMO
39# define GRN_II_SELECT_ENABLE_SEQUENTIAL_SEARCH
40#endif
41
42#ifdef GRN_II_SELECT_ENABLE_SEQUENTIAL_SEARCH
43# include "grn_string.h"
44# include <onigmo.h>
45#endif
46
47#define MAX_PSEG 0x20000
48#define MAX_PSEG_SMALL 0x00200
49/* MAX_PSEG_MEDIUM has enough space for the following source:
50 * * Single source.
51 * * Source is a fixed size column or _key of a table.
52 * * Source column is a scalar column.
53 * * Lexicon doesn't have tokenizer.
54 */
55#define MAX_PSEG_MEDIUM 0x10000
56#define S_CHUNK (1 << GRN_II_W_CHUNK)
57#define W_SEGMENT 18
58#define S_SEGMENT (1 << W_SEGMENT)
59#define W_ARRAY_ELEMENT 3
60#define S_ARRAY_ELEMENT (1 << W_ARRAY_ELEMENT)
61#define W_ARRAY (W_SEGMENT - W_ARRAY_ELEMENT)
62#define ARRAY_MASK_IN_A_SEGMENT ((1 << W_ARRAY) - 1)
63
64#define S_GARBAGE (1<<12)
65
66#define CHUNK_SPLIT 0x80000000
67#define CHUNK_SPLIT_THRESHOLD 0x60000
68
69#define MAX_N_ELEMENTS 5
70
71#define DEFINE_NAME(ii) \
72 const char *name; \
73 char name_buffer[GRN_TABLE_MAX_KEY_SIZE]; \
74 int name_size; \
75 do { \
76 if (DB_OBJ(ii)->id == GRN_ID_NIL) { \
77 name = "(temporary)"; \
78 name_size = strlen(name); \
79 } else { \
80 name_size = grn_obj_name(ctx, (grn_obj *)ii, \
81 name_buffer, GRN_TABLE_MAX_KEY_SIZE); \
82 name = name_buffer; \
83 } \
84 } while (GRN_FALSE)
85
86#define LSEG(pos) ((pos) >> 16)
87#define LPOS(pos) (((pos) & 0xffff) << 2)
88#define SEG2POS(seg,pos) ((((uint32_t)(seg)) << 16) + (((uint32_t)(pos)) >> 2))
89
90#ifndef S_IRUSR
91# define S_IRUSR 0400
92#endif /* S_IRUSR */
93#ifndef S_IWUSR
94# define S_IWUSR 0200
95#endif /* S_IWUSR */
96
97static grn_bool grn_ii_cursor_set_min_enable = GRN_TRUE;
98static double grn_ii_select_too_many_index_match_ratio = -1;
99static double grn_ii_estimate_size_for_query_reduce_ratio = 0.9;
100static grn_bool grn_ii_overlap_token_skip_enable = GRN_FALSE;
101static uint32_t grn_ii_builder_block_threshold_force = 0;
102static uint32_t grn_ii_max_n_segments_small = MAX_PSEG_SMALL;
103static uint32_t grn_ii_max_n_chunks_small = GRN_II_MAX_CHUNK_SMALL;
104
105void
106grn_ii_init_from_env(void)
107{
108 {
109 char grn_ii_cursor_set_min_enable_env[GRN_ENV_BUFFER_SIZE];
110 grn_getenv("GRN_II_CURSOR_SET_MIN_ENABLE",
111 grn_ii_cursor_set_min_enable_env,
112 GRN_ENV_BUFFER_SIZE);
113 if (strcmp(grn_ii_cursor_set_min_enable_env, "no") == 0) {
114 grn_ii_cursor_set_min_enable = GRN_FALSE;
115 } else {
116 grn_ii_cursor_set_min_enable = GRN_TRUE;
117 }
118 }
119
120 {
121 char grn_ii_select_too_many_index_match_ratio_env[GRN_ENV_BUFFER_SIZE];
122 grn_getenv("GRN_II_SELECT_TOO_MANY_INDEX_MATCH_RATIO",
123 grn_ii_select_too_many_index_match_ratio_env,
124 GRN_ENV_BUFFER_SIZE);
125 if (grn_ii_select_too_many_index_match_ratio_env[0]) {
126 grn_ii_select_too_many_index_match_ratio =
127 atof(grn_ii_select_too_many_index_match_ratio_env);
128 }
129 }
130
131 {
132 char grn_ii_estimate_size_for_query_reduce_ratio_env[GRN_ENV_BUFFER_SIZE];
133 grn_getenv("GRN_II_ESTIMATE_SIZE_FOR_QUERY_REDUCE_RATIO",
134 grn_ii_estimate_size_for_query_reduce_ratio_env,
135 GRN_ENV_BUFFER_SIZE);
136 if (grn_ii_estimate_size_for_query_reduce_ratio_env[0]) {
137 grn_ii_estimate_size_for_query_reduce_ratio =
138 atof(grn_ii_estimate_size_for_query_reduce_ratio_env);
139 }
140 }
141
142 {
143 char grn_ii_overlap_token_skip_enable_env[GRN_ENV_BUFFER_SIZE];
144 grn_getenv("GRN_II_OVERLAP_TOKEN_SKIP_ENABLE",
145 grn_ii_overlap_token_skip_enable_env,
146 GRN_ENV_BUFFER_SIZE);
147 if (grn_ii_overlap_token_skip_enable_env[0]) {
148 grn_ii_overlap_token_skip_enable = GRN_TRUE;
149 } else {
150 grn_ii_overlap_token_skip_enable = GRN_FALSE;
151 }
152 }
153
154 {
155 char grn_ii_builder_block_threshold_env[GRN_ENV_BUFFER_SIZE];
156 grn_getenv("GRN_II_BUILDER_BLOCK_THRESHOLD",
157 grn_ii_builder_block_threshold_env,
158 GRN_ENV_BUFFER_SIZE);
159 if (grn_ii_builder_block_threshold_env[0]) {
160 grn_ii_builder_block_threshold_force =
161 grn_atoui(grn_ii_builder_block_threshold_env,
162 grn_ii_builder_block_threshold_env +
163 strlen(grn_ii_builder_block_threshold_env),
164 NULL);
165 } else {
166 grn_ii_builder_block_threshold_force = 0;
167 }
168 }
169
170 {
171 char grn_ii_max_n_segments_small_env[GRN_ENV_BUFFER_SIZE];
172 grn_getenv("GRN_II_MAX_N_SEGMENTS_SMALL",
173 grn_ii_max_n_segments_small_env,
174 GRN_ENV_BUFFER_SIZE);
175 if (grn_ii_max_n_segments_small_env[0]) {
176 grn_ii_max_n_segments_small =
177 grn_atoui(grn_ii_max_n_segments_small_env,
178 grn_ii_max_n_segments_small_env +
179 strlen(grn_ii_max_n_segments_small_env),
180 NULL);
181 if (grn_ii_max_n_segments_small > MAX_PSEG) {
182 grn_ii_max_n_segments_small = MAX_PSEG;
183 }
184 }
185 }
186
187 {
188 char grn_ii_max_n_chunks_small_env[GRN_ENV_BUFFER_SIZE];
189 grn_getenv("GRN_II_MAX_N_CHUNKS_SMALL",
190 grn_ii_max_n_chunks_small_env,
191 GRN_ENV_BUFFER_SIZE);
192 if (grn_ii_max_n_chunks_small_env[0]) {
193 grn_ii_max_n_chunks_small =
194 grn_atoui(grn_ii_max_n_chunks_small_env,
195 grn_ii_max_n_chunks_small_env +
196 strlen(grn_ii_max_n_chunks_small_env),
197 NULL);
198 if (grn_ii_max_n_chunks_small > GRN_II_MAX_CHUNK) {
199 grn_ii_max_n_chunks_small = GRN_II_MAX_CHUNK;
200 }
201 }
202 }
203}
204
205void
206grn_ii_cursor_set_min_enable_set(grn_bool enable)
207{
208 grn_ii_cursor_set_min_enable = enable;
209}
210
211grn_bool
212grn_ii_cursor_set_min_enable_get(void)
213{
214 return grn_ii_cursor_set_min_enable;
215}
216
217/* segment */
218
219inline static uint32_t
220segment_get(grn_ctx *ctx, grn_ii *ii)
221{
222 uint32_t pseg;
223 if (ii->header->bgqtail == ((ii->header->bgqhead + 1) & (GRN_II_BGQSIZE - 1))) {
224 pseg = ii->header->bgqbody[ii->header->bgqtail];
225 ii->header->bgqtail = (ii->header->bgqtail + 1) & (GRN_II_BGQSIZE - 1);
226 } else {
227 pseg = ii->header->pnext;
228#ifndef CUT_OFF_COMPATIBILITY
229 if (!pseg) {
230 int i;
231 uint32_t pmax = 0;
232 char *used;
233 uint32_t max_segment = ii->seg->header->max_segment;
234 used = GRN_CALLOC(max_segment);
235 if (!used) { return max_segment; }
236 for (i = 0; i < GRN_II_MAX_LSEG && i < max_segment; i++) {
237 if ((pseg = ii->header->ainfo[i]) != GRN_II_PSEG_NOT_ASSIGNED) {
238 if (pseg > pmax) { pmax = pseg; }
239 used[pseg] = 1;
240 }
241 if ((pseg = ii->header->binfo[i]) != GRN_II_PSEG_NOT_ASSIGNED) {
242 if (pseg > pmax) { pmax = pseg; }
243 used[pseg] = 1;
244 }
245 }
246 for (pseg = 0; pseg < max_segment && used[pseg]; pseg++) ;
247 GRN_FREE(used);
248 ii->header->pnext = pmax + 1;
249 } else
250#endif /* CUT_OFF_COMPATIBILITY */
251 if (ii->header->pnext < ii->seg->header->max_segment) {
252 ii->header->pnext++;
253 }
254 }
255 return pseg;
256}
257
258inline static grn_rc
259segment_get_clear(grn_ctx *ctx, grn_ii *ii, uint32_t *pseg)
260{
261 uint32_t seg = segment_get(ctx, ii);
262 if (seg < ii->seg->header->max_segment) {
263 void *p = NULL;
264 GRN_IO_SEG_REF(ii->seg, seg, p);
265 if (!p) { return GRN_NO_MEMORY_AVAILABLE; }
266 memset(p, 0, S_SEGMENT);
267 GRN_IO_SEG_UNREF(ii->seg, seg);
268 *pseg = seg;
269 return GRN_SUCCESS;
270 } else {
271 return GRN_NO_MEMORY_AVAILABLE;
272 }
273}
274
275inline static grn_rc
276buffer_segment_new(grn_ctx *ctx, grn_ii *ii, uint32_t *segno)
277{
278 uint32_t lseg, pseg;
279 if (*segno < GRN_II_MAX_LSEG) {
280 if (ii->header->binfo[*segno] != GRN_II_PSEG_NOT_ASSIGNED) {
281 return GRN_INVALID_ARGUMENT;
282 }
283 lseg = *segno;
284 } else {
285 for (lseg = 0; lseg < GRN_II_MAX_LSEG; lseg++) {
286 if (ii->header->binfo[lseg] == GRN_II_PSEG_NOT_ASSIGNED) { break; }
287 }
288 if (lseg == GRN_II_MAX_LSEG) { return GRN_NO_MEMORY_AVAILABLE; }
289 *segno = lseg;
290 }
291 pseg = segment_get(ctx, ii);
292 if (pseg < ii->seg->header->max_segment) {
293 ii->header->binfo[lseg] = pseg;
294 if (lseg >= ii->header->bmax) { ii->header->bmax = lseg + 1; }
295 return GRN_SUCCESS;
296 } else {
297 return GRN_NO_MEMORY_AVAILABLE;
298 }
299}
300
301static grn_rc
302buffer_segment_reserve(grn_ctx *ctx, grn_ii *ii,
303 uint32_t *lseg0, uint32_t *pseg0,
304 uint32_t *lseg1, uint32_t *pseg1)
305{
306 uint32_t i = 0;
307 for (;; i++) {
308 if (i == GRN_II_MAX_LSEG) {
309 DEFINE_NAME(ii);
310 MERR("[ii][buffer][segment][reserve] "
311 "couldn't find a free buffer: <%.*s>: max:<%u>",
312 name_size, name,
313 GRN_II_MAX_LSEG);
314 return ctx->rc;
315 }
316 if (ii->header->binfo[i] == GRN_II_PSEG_NOT_ASSIGNED) { break; }
317 }
318 *lseg0 = i++;
319 for (;; i++) {
320 if (i == GRN_II_MAX_LSEG) {
321 DEFINE_NAME(ii);
322 MERR("[ii][buffer][segment][reserve] "
323 "couldn't find two free buffers: "
324 "<%.*s>: "
325 "found:<%u>, max:<%u>",
326 name_size, name,
327 *lseg0, GRN_II_MAX_LSEG);
328 return ctx->rc;
329 }
330 if (ii->header->binfo[i] == GRN_II_PSEG_NOT_ASSIGNED) { break; }
331 }
332 *lseg1 = i;
333 if ((*pseg0 = segment_get(ctx, ii)) == ii->seg->header->max_segment) {
334 DEFINE_NAME(ii);
335 MERR("[ii][buffer][segment][reserve] "
336 "couldn't allocate a free segment: <%.*s>: "
337 "buffer:<%u>, max:<%u>",
338 name_size, name,
339 *lseg0, ii->seg->header->max_segment);
340 return ctx->rc;
341 }
342 if ((*pseg1 = segment_get(ctx, ii)) == ii->seg->header->max_segment) {
343 DEFINE_NAME(ii);
344 MERR("[ii][buffer][segment][reserve] "
345 "couldn't allocate two free segments: "
346 "<%.*s>: "
347 "found:<%u>, not-found:<%u>, max:<%u>",
348 name_size, name,
349 *lseg0, *lseg1, ii->seg->header->max_segment);
350 return ctx->rc;
351 }
352 /*
353 {
354 uint32_t pseg;
355 char *used = GRN_CALLOC(ii->seg->header->max_segment);
356 if (!used) { return GRN_NO_MEMORY_AVAILABLE; }
357 for (i = 0; i < GRN_II_MAX_LSEG; i++) {
358 if ((pseg = ii->header->ainfo[i]) != GRN_II_PSEG_NOT_ASSIGNED) {
359 used[pseg] = 1;
360 }
361 if ((pseg = ii->header->binfo[i]) != GRN_II_PSEG_NOT_ASSIGNED) {
362 used[pseg] = 1;
363 }
364 }
365 for (pseg = 0;; pseg++) {
366 if (pseg == ii->seg->header->max_segment) {
367 GRN_FREE(used);
368 return GRN_NO_MEMORY_AVAILABLE;
369 }
370 if (!used[pseg]) { break; }
371 }
372 *pseg0 = pseg++;
373 for (;; pseg++) {
374 if (pseg == ii->seg->header->max_segment) {
375 GRN_FREE(used);
376 return GRN_NO_MEMORY_AVAILABLE;
377 }
378 if (!used[pseg]) { break; }
379 }
380 *pseg1 = pseg;
381 GRN_FREE(used);
382 }
383 */
384 return ctx->rc;
385}
386
387#define BGQENQUE(lseg) do {\
388 if (ii->header->binfo[lseg] != GRN_II_PSEG_NOT_ASSIGNED) {\
389 ii->header->bgqbody[ii->header->bgqhead] = ii->header->binfo[lseg];\
390 ii->header->bgqhead = (ii->header->bgqhead + 1) & (GRN_II_BGQSIZE - 1);\
391 GRN_ASSERT(ii->header->bgqhead != ii->header->bgqtail);\
392 }\
393} while (0)
394
395inline static void
396buffer_segment_update(grn_ii *ii, uint32_t lseg, uint32_t pseg)
397{
398 BGQENQUE(lseg);
399 // smb_wmb();
400 ii->header->binfo[lseg] = pseg;
401 if (lseg >= ii->header->bmax) { ii->header->bmax = lseg + 1; }
402}
403
404inline static void
405buffer_segment_clear(grn_ii *ii, uint32_t lseg)
406{
407 BGQENQUE(lseg);
408 // smb_wmb();
409 ii->header->binfo[lseg] = GRN_II_PSEG_NOT_ASSIGNED;
410}
411
412/* chunk */
413
414#define HEADER_CHUNK_AT(ii,offset) \
415 ((((ii)->header->chunks[((offset) >> 3)]) >> ((offset) & 7)) & 1)
416
417#define HEADER_CHUNK_ON(ii,offset) \
418 (((ii)->header->chunks[((offset) >> 3)]) |= (1 << ((offset) & 7)))
419
420#define HEADER_CHUNK_OFF(ii,offset) \
421 (((ii)->header->chunks[((offset) >> 3)]) &= ~(1 << ((offset) & 7)))
422
423#define N_GARBAGES_TH 1
424
425#define N_GARBAGES ((S_GARBAGE - (sizeof(uint32_t) * 4))/(sizeof(uint32_t)))
426
427typedef struct {
428 uint32_t head;
429 uint32_t tail;
430 uint32_t nrecs;
431 uint32_t next;
432 uint32_t recs[N_GARBAGES];
433} grn_ii_ginfo;
434
435#define WIN_MAP(chunk,ctx,iw,seg,pos,size,mode)\
436 grn_io_win_map(chunk, ctx, iw,\
437 ((seg) >> GRN_II_N_CHUNK_VARIATION),\
438 (((seg) & ((1 << GRN_II_N_CHUNK_VARIATION) - 1)) << GRN_II_W_LEAST_CHUNK) + (pos),\
439 size, mode)
440/*
441static int new_histogram[32];
442static int free_histogram[32];
443*/
444static grn_rc
445chunk_new(grn_ctx *ctx, grn_ii *ii, uint32_t *res, uint32_t size)
446{
447 uint32_t n_chunks;
448
449 n_chunks = ii->chunk->header->max_segment;
450
451 /*
452 if (size) {
453 int m, es = size - 1;
454 GRN_BIT_SCAN_REV(es, m);
455 m++;
456 new_histogram[m]++;
457 }
458 */
459 if (size > S_CHUNK) {
460 int i, j;
461 uint32_t n = (size + S_CHUNK - 1) >> GRN_II_W_CHUNK;
462 for (i = 0, j = -1; i < n_chunks; i++) {
463 if (HEADER_CHUNK_AT(ii, i)) {
464 j = i;
465 } else {
466 if (i == j + n) {
467 j++;
468 *res = j << GRN_II_N_CHUNK_VARIATION;
469 for (; j <= i; j++) { HEADER_CHUNK_ON(ii, j); }
470 return GRN_SUCCESS;
471 }
472 }
473 }
474 {
475 DEFINE_NAME(ii);
476 MERR("[ii][chunk][new] index is full: "
477 "<%.*s>: "
478 "size:<%u>, n-chunks:<%u>",
479 name_size, name,
480 size, n_chunks);
481 }
482 return ctx->rc;
483 } else {
484 uint32_t *vp;
485 int m, aligned_size;
486 if (size > (1 << GRN_II_W_LEAST_CHUNK)) {
487 int es = size - 1;
488 GRN_BIT_SCAN_REV(es, m);
489 m++;
490 } else {
491 m = GRN_II_W_LEAST_CHUNK;
492 }
493 aligned_size = 1 << (m - GRN_II_W_LEAST_CHUNK);
494 if (ii->header->ngarbages[m - GRN_II_W_LEAST_CHUNK] > N_GARBAGES_TH) {
495 grn_ii_ginfo *ginfo;
496 uint32_t *gseg;
497 grn_io_win iw, iw_;
498 iw_.addr = NULL;
499 gseg = &ii->header->garbages[m - GRN_II_W_LEAST_CHUNK];
500 while (*gseg != GRN_II_PSEG_NOT_ASSIGNED) {
501 ginfo = WIN_MAP(ii->chunk, ctx, &iw, *gseg, 0, S_GARBAGE, grn_io_rdwr);
502 //GRN_IO_SEG_MAP2(ii->chunk, *gseg, ginfo);
503 if (!ginfo) {
504 if (iw_.addr) { grn_io_win_unmap(&iw_); }
505 {
506 DEFINE_NAME(ii);
507 MERR("[ii][chunk][new] failed to allocate garbage segment: "
508 "<%.*s>: "
509 "n-garbages:<%u>, size:<%u>, n-chunks:<%u>",
510 name_size, name,
511 ii->header->ngarbages[m - GRN_II_W_LEAST_CHUNK],
512 size,
513 n_chunks);
514 }
515 return ctx->rc;
516 }
517 if (ginfo->next != GRN_II_PSEG_NOT_ASSIGNED ||
518 ginfo->nrecs > N_GARBAGES_TH) {
519 *res = ginfo->recs[ginfo->tail];
520 if (++ginfo->tail == N_GARBAGES) { ginfo->tail = 0; }
521 ginfo->nrecs--;
522 ii->header->ngarbages[m - GRN_II_W_LEAST_CHUNK]--;
523 if (!ginfo->nrecs) {
524 HEADER_CHUNK_OFF(ii, *gseg);
525 *gseg = ginfo->next;
526 }
527 if (iw_.addr) { grn_io_win_unmap(&iw_); }
528 grn_io_win_unmap(&iw);
529 return GRN_SUCCESS;
530 }
531 if (iw_.addr) { grn_io_win_unmap(&iw_); }
532 iw_ = iw;
533 gseg = &ginfo->next;
534 }
535 if (iw_.addr) { grn_io_win_unmap(&iw_); }
536 }
537 vp = &ii->header->free_chunks[m - GRN_II_W_LEAST_CHUNK];
538 if (*vp == GRN_II_PSEG_NOT_ASSIGNED) {
539 int i = 0;
540 while (HEADER_CHUNK_AT(ii, i)) {
541 if (++i >= n_chunks) {
542 DEFINE_NAME(ii);
543 MERR("[ii][chunk][new] failed to find a free chunk: "
544 "<%.*s>: "
545 "index:<%u>, size:<%u>, n-chunks:<%u>",
546 name_size, name,
547 m - GRN_II_W_LEAST_CHUNK,
548 size,
549 n_chunks);
550 return ctx->rc;
551 }
552 }
553 HEADER_CHUNK_ON(ii, i);
554 *vp = i << GRN_II_N_CHUNK_VARIATION;
555 }
556 *res = *vp;
557 *vp += 1 << (m - GRN_II_W_LEAST_CHUNK);
558 if (!(*vp & ((1 << GRN_II_N_CHUNK_VARIATION) - 1))) {
559 *vp = GRN_II_PSEG_NOT_ASSIGNED;
560 }
561 return GRN_SUCCESS;
562 }
563}
564
565static grn_rc
566chunk_free(grn_ctx *ctx, grn_ii *ii,
567 uint32_t offset, uint32_t dummy, uint32_t size)
568{
569 /*
570 if (size) {
571 int m, es = size - 1;
572 GRN_BIT_SCAN_REV(es, m);
573 m++;
574 free_histogram[m]++;
575 }
576 */
577 grn_io_win iw, iw_;
578 grn_ii_ginfo *ginfo= 0;
579 uint32_t seg, m, *gseg;
580 seg = offset >> GRN_II_N_CHUNK_VARIATION;
581 if (size > S_CHUNK) {
582 int n = (size + S_CHUNK - 1) >> GRN_II_W_CHUNK;
583 for (; n--; seg++) { HEADER_CHUNK_OFF(ii, seg); }
584 return GRN_SUCCESS;
585 }
586 if (size > (1 << GRN_II_W_LEAST_CHUNK)) {
587 int es = size - 1;
588 GRN_BIT_SCAN_REV(es, m);
589 m++;
590 } else {
591 m = GRN_II_W_LEAST_CHUNK;
592 }
593 gseg = &ii->header->garbages[m - GRN_II_W_LEAST_CHUNK];
594 iw_.addr = NULL;
595 while (*gseg != GRN_II_PSEG_NOT_ASSIGNED) {
596 ginfo = WIN_MAP(ii->chunk, ctx, &iw, *gseg, 0, S_GARBAGE, grn_io_rdwr);
597 // GRN_IO_SEG_MAP2(ii->chunk, *gseg, ginfo);
598 if (!ginfo) {
599 if (iw_.addr) { grn_io_win_unmap(&iw_); }
600 return GRN_NO_MEMORY_AVAILABLE;
601 }
602 if (ginfo->nrecs < N_GARBAGES) { break; }
603 if (iw_.addr) { grn_io_win_unmap(&iw_); }
604 iw_ = iw;
605 gseg = &ginfo->next;
606 }
607 if (*gseg == GRN_II_PSEG_NOT_ASSIGNED) {
608 grn_rc rc;
609 if ((rc = chunk_new(ctx, ii, gseg, S_GARBAGE))) {
610 if (iw_.addr) { grn_io_win_unmap(&iw_); }
611 return rc;
612 }
613 ginfo = WIN_MAP(ii->chunk, ctx, &iw, *gseg, 0, S_GARBAGE, grn_io_rdwr);
614 /*
615 uint32_t i = 0;
616 while (HEADER_CHUNK_AT(ii, i)) {
617 if (++i >= ii->chunk->header->max_segment) {
618 return GRN_NO_MEMORY_AVAILABLE;
619 }
620 }
621 HEADER_CHUNK_ON(ii, i);
622 *gseg = i;
623 GRN_IO_SEG_MAP2(ii->chunk, *gseg, ginfo);
624 */
625 if (!ginfo) {
626 if (iw_.addr) { grn_io_win_unmap(&iw_); }
627 return GRN_NO_MEMORY_AVAILABLE;
628 }
629 ginfo->head = 0;
630 ginfo->tail = 0;
631 ginfo->nrecs = 0;
632 ginfo->next = GRN_II_PSEG_NOT_ASSIGNED;
633 }
634 if (iw_.addr) { grn_io_win_unmap(&iw_); }
635 ginfo->recs[ginfo->head] = offset;
636 if (++ginfo->head == N_GARBAGES) { ginfo->head = 0; }
637 ginfo->nrecs++;
638 grn_io_win_unmap(&iw);
639 ii->header->ngarbages[m - GRN_II_W_LEAST_CHUNK]++;
640 return GRN_SUCCESS;
641}
642
643#define UNIT_SIZE 0x80
644#define UNIT_MASK (UNIT_SIZE - 1)
645
646/* <generated> */
647static uint8_t *
648pack_1(uint32_t *p, uint8_t *rp)
649{
650 uint8_t v;
651 v = *p++ << 7;
652 v += *p++ << 6;
653 v += *p++ << 5;
654 v += *p++ << 4;
655 v += *p++ << 3;
656 v += *p++ << 2;
657 v += *p++ << 1;
658 *rp++ = v + *p++;
659 return rp;
660}
661static uint8_t *
662unpack_1(uint32_t *p, uint8_t *dp)
663{
664 *p++ = (*dp >> 7);
665 *p++ = ((*dp >> 6) & 0x1);
666 *p++ = ((*dp >> 5) & 0x1);
667 *p++ = ((*dp >> 4) & 0x1);
668 *p++ = ((*dp >> 3) & 0x1);
669 *p++ = ((*dp >> 2) & 0x1);
670 *p++ = ((*dp >> 1) & 0x1);
671 *p++ = (*dp++ & 0x1);
672 return dp;
673}
674static uint8_t *
675pack_2(uint32_t *p, uint8_t *rp)
676{
677 uint8_t v;
678 v = *p++ << 6;
679 v += *p++ << 4;
680 v += *p++ << 2;
681 *rp++ = v + *p++;
682 v = *p++ << 6;
683 v += *p++ << 4;
684 v += *p++ << 2;
685 *rp++ = v + *p++;
686 return rp;
687}
688static uint8_t *
689unpack_2(uint32_t *p, uint8_t *dp)
690{
691 *p++ = (*dp >> 6);
692 *p++ = ((*dp >> 4) & 0x3);
693 *p++ = ((*dp >> 2) & 0x3);
694 *p++ = (*dp++ & 0x3);
695 *p++ = (*dp >> 6);
696 *p++ = ((*dp >> 4) & 0x3);
697 *p++ = ((*dp >> 2) & 0x3);
698 *p++ = (*dp++ & 0x3);
699 return dp;
700}
701static uint8_t *
702pack_3(uint32_t *p, uint8_t *rp)
703{
704 uint8_t v;
705 v = *p++ << 5;
706 v += *p++ << 2;
707 *rp++ = v + (*p >> 1); v = *p++ << 7;
708 v += *p++ << 4;
709 v += *p++ << 1;
710 *rp++ = v + (*p >> 2); v = *p++ << 6;
711 v += *p++ << 3;
712 *rp++ = v + *p++;
713 return rp;
714}
715static uint8_t *
716unpack_3(uint32_t *p, uint8_t *dp)
717{
718 uint32_t v;
719 *p++ = (*dp >> 5);
720 *p++ = ((*dp >> 2) & 0x7);
721 v = ((*dp++ << 1) & 0x7); *p++ = v + (*dp >> 7);
722 *p++ = ((*dp >> 4) & 0x7);
723 *p++ = ((*dp >> 1) & 0x7);
724 v = ((*dp++ << 2) & 0x7); *p++ = v + (*dp >> 6);
725 *p++ = ((*dp >> 3) & 0x7);
726 *p++ = (*dp++ & 0x7);
727 return dp;
728}
729static uint8_t *
730pack_4(uint32_t *p, uint8_t *rp)
731{
732 uint8_t v;
733 v = *p++ << 4;
734 *rp++ = v + *p++;
735 v = *p++ << 4;
736 *rp++ = v + *p++;
737 v = *p++ << 4;
738 *rp++ = v + *p++;
739 v = *p++ << 4;
740 *rp++ = v + *p++;
741 return rp;
742}
743static uint8_t *
744unpack_4(uint32_t *p, uint8_t *dp)
745{
746 *p++ = (*dp >> 4);
747 *p++ = (*dp++ & 0xf);
748 *p++ = (*dp >> 4);
749 *p++ = (*dp++ & 0xf);
750 *p++ = (*dp >> 4);
751 *p++ = (*dp++ & 0xf);
752 *p++ = (*dp >> 4);
753 *p++ = (*dp++ & 0xf);
754 return dp;
755}
756static uint8_t *
757pack_5(uint32_t *p, uint8_t *rp)
758{
759 uint8_t v;
760 v = *p++ << 3;
761 *rp++ = v + (*p >> 2); v = *p++ << 6;
762 v += *p++ << 1;
763 *rp++ = v + (*p >> 4); v = *p++ << 4;
764 *rp++ = v + (*p >> 1); v = *p++ << 7;
765 v += *p++ << 2;
766 *rp++ = v + (*p >> 3); v = *p++ << 5;
767 *rp++ = v + *p++;
768 return rp;
769}
770static uint8_t *
771unpack_5(uint32_t *p, uint8_t *dp)
772{
773 uint32_t v;
774 *p++ = (*dp >> 3);
775 v = ((*dp++ << 2) & 0x1f); *p++ = v + (*dp >> 6);
776 *p++ = ((*dp >> 1) & 0x1f);
777 v = ((*dp++ << 4) & 0x1f); *p++ = v + (*dp >> 4);
778 v = ((*dp++ << 1) & 0x1f); *p++ = v + (*dp >> 7);
779 *p++ = ((*dp >> 2) & 0x1f);
780 v = ((*dp++ << 3) & 0x1f); *p++ = v + (*dp >> 5);
781 *p++ = (*dp++ & 0x1f);
782 return dp;
783}
784static uint8_t *
785pack_6(uint32_t *p, uint8_t *rp)
786{
787 uint8_t v;
788 v = *p++ << 2;
789 *rp++ = v + (*p >> 4); v = *p++ << 4;
790 *rp++ = v + (*p >> 2); v = *p++ << 6;
791 *rp++ = v + *p++;
792 v = *p++ << 2;
793 *rp++ = v + (*p >> 4); v = *p++ << 4;
794 *rp++ = v + (*p >> 2); v = *p++ << 6;
795 *rp++ = v + *p++;
796 return rp;
797}
798static uint8_t *
799unpack_6(uint32_t *p, uint8_t *dp)
800{
801 uint32_t v;
802 *p++ = (*dp >> 2);
803 v = ((*dp++ << 4) & 0x3f); *p++ = v + (*dp >> 4);
804 v = ((*dp++ << 2) & 0x3f); *p++ = v + (*dp >> 6);
805 *p++ = (*dp++ & 0x3f);
806 *p++ = (*dp >> 2);
807 v = ((*dp++ << 4) & 0x3f); *p++ = v + (*dp >> 4);
808 v = ((*dp++ << 2) & 0x3f); *p++ = v + (*dp >> 6);
809 *p++ = (*dp++ & 0x3f);
810 return dp;
811}
812static uint8_t *
813pack_7(uint32_t *p, uint8_t *rp)
814{
815 uint8_t v;
816 v = *p++ << 1;
817 *rp++ = v + (*p >> 6); v = *p++ << 2;
818 *rp++ = v + (*p >> 5); v = *p++ << 3;
819 *rp++ = v + (*p >> 4); v = *p++ << 4;
820 *rp++ = v + (*p >> 3); v = *p++ << 5;
821 *rp++ = v + (*p >> 2); v = *p++ << 6;
822 *rp++ = v + (*p >> 1); v = *p++ << 7;
823 *rp++ = v + *p++;
824 return rp;
825}
826static uint8_t *
827unpack_7(uint32_t *p, uint8_t *dp)
828{
829 uint32_t v;
830 *p++ = (*dp >> 1);
831 v = ((*dp++ << 6) & 0x7f); *p++ = v + (*dp >> 2);
832 v = ((*dp++ << 5) & 0x7f); *p++ = v + (*dp >> 3);
833 v = ((*dp++ << 4) & 0x7f); *p++ = v + (*dp >> 4);
834 v = ((*dp++ << 3) & 0x7f); *p++ = v + (*dp >> 5);
835 v = ((*dp++ << 2) & 0x7f); *p++ = v + (*dp >> 6);
836 v = ((*dp++ << 1) & 0x7f); *p++ = v + (*dp >> 7);
837 *p++ = (*dp++ & 0x7f);
838 return dp;
839}
840static uint8_t *
841pack_8(uint32_t *p, uint8_t *rp)
842{
843 *rp++ = *p++;
844 *rp++ = *p++;
845 *rp++ = *p++;
846 *rp++ = *p++;
847 *rp++ = *p++;
848 *rp++ = *p++;
849 *rp++ = *p++;
850 *rp++ = *p++;
851 return rp;
852}
853static uint8_t *
854unpack_8(uint32_t *p, uint8_t *dp)
855{
856 *p++ = *dp++;
857 *p++ = *dp++;
858 *p++ = *dp++;
859 *p++ = *dp++;
860 *p++ = *dp++;
861 *p++ = *dp++;
862 *p++ = *dp++;
863 *p++ = *dp++;
864 return dp;
865}
866static uint8_t *
867pack_9(uint32_t *p, uint8_t *rp)
868{
869 uint8_t v;
870 *rp++ = (*p >> 1); v = *p++ << 7;
871 *rp++ = v + (*p >> 2); v = *p++ << 6;
872 *rp++ = v + (*p >> 3); v = *p++ << 5;
873 *rp++ = v + (*p >> 4); v = *p++ << 4;
874 *rp++ = v + (*p >> 5); v = *p++ << 3;
875 *rp++ = v + (*p >> 6); v = *p++ << 2;
876 *rp++ = v + (*p >> 7); v = *p++ << 1;
877 *rp++ = v + (*p >> 8); *rp++ = *p++;
878 return rp;
879}
880static uint8_t *
881unpack_9(uint32_t *p, uint8_t *dp)
882{
883 uint32_t v;
884 v = *dp++ << 1; *p++ = v + (*dp >> 7);
885 v = ((*dp++ << 2) & 0x1ff); *p++ = v + (*dp >> 6);
886 v = ((*dp++ << 3) & 0x1ff); *p++ = v + (*dp >> 5);
887 v = ((*dp++ << 4) & 0x1ff); *p++ = v + (*dp >> 4);
888 v = ((*dp++ << 5) & 0x1ff); *p++ = v + (*dp >> 3);
889 v = ((*dp++ << 6) & 0x1ff); *p++ = v + (*dp >> 2);
890 v = ((*dp++ << 7) & 0x1ff); *p++ = v + (*dp >> 1);
891 v = ((*dp++ << 8) & 0x1ff); *p++ = v + *dp++;
892 return dp;
893}
894static uint8_t *
895pack_10(uint32_t *p, uint8_t *rp)
896{
897 uint8_t v;
898 *rp++ = (*p >> 2); v = *p++ << 6;
899 *rp++ = v + (*p >> 4); v = *p++ << 4;
900 *rp++ = v + (*p >> 6); v = *p++ << 2;
901 *rp++ = v + (*p >> 8); *rp++ = *p++;
902 *rp++ = (*p >> 2); v = *p++ << 6;
903 *rp++ = v + (*p >> 4); v = *p++ << 4;
904 *rp++ = v + (*p >> 6); v = *p++ << 2;
905 *rp++ = v + (*p >> 8); *rp++ = *p++;
906 return rp;
907}
908static uint8_t *
909unpack_10(uint32_t *p, uint8_t *dp)
910{
911 uint32_t v;
912 v = *dp++ << 2; *p++ = v + (*dp >> 6);
913 v = ((*dp++ << 4) & 0x3ff); *p++ = v + (*dp >> 4);
914 v = ((*dp++ << 6) & 0x3ff); *p++ = v + (*dp >> 2);
915 v = ((*dp++ << 8) & 0x3ff); *p++ = v + *dp++;
916 v = *dp++ << 2; *p++ = v + (*dp >> 6);
917 v = ((*dp++ << 4) & 0x3ff); *p++ = v + (*dp >> 4);
918 v = ((*dp++ << 6) & 0x3ff); *p++ = v + (*dp >> 2);
919 v = ((*dp++ << 8) & 0x3ff); *p++ = v + *dp++;
920 return dp;
921}
922static uint8_t *
923pack_11(uint32_t *p, uint8_t *rp)
924{
925 uint8_t v;
926 *rp++ = (*p >> 3); v = *p++ << 5;
927 *rp++ = v + (*p >> 6); v = *p++ << 2;
928 *rp++ = v + (*p >> 9); *rp++ = (*p >> 1); v = *p++ << 7;
929 *rp++ = v + (*p >> 4); v = *p++ << 4;
930 *rp++ = v + (*p >> 7); v = *p++ << 1;
931 *rp++ = v + (*p >> 10); *rp++ = (*p >> 2); v = *p++ << 6;
932 *rp++ = v + (*p >> 5); v = *p++ << 3;
933 *rp++ = v + (*p >> 8); *rp++ = *p++;
934 return rp;
935}
936static uint8_t *
937unpack_11(uint32_t *p, uint8_t *dp)
938{
939 uint32_t v;
940 v = *dp++ << 3; *p++ = v + (*dp >> 5);
941 v = ((*dp++ << 6) & 0x7ff); *p++ = v + (*dp >> 2);
942 v = ((*dp++ << 9) & 0x7ff); v += *dp++ << 1; *p++ = v + (*dp >> 7);
943 v = ((*dp++ << 4) & 0x7ff); *p++ = v + (*dp >> 4);
944 v = ((*dp++ << 7) & 0x7ff); *p++ = v + (*dp >> 1);
945 v = ((*dp++ << 10) & 0x7ff); v += *dp++ << 2; *p++ = v + (*dp >> 6);
946 v = ((*dp++ << 5) & 0x7ff); *p++ = v + (*dp >> 3);
947 v = ((*dp++ << 8) & 0x7ff); *p++ = v + *dp++;
948 return dp;
949}
950static uint8_t *
951pack_12(uint32_t *p, uint8_t *rp)
952{
953 uint8_t v;
954 *rp++ = (*p >> 4); v = *p++ << 4;
955 *rp++ = v + (*p >> 8); *rp++ = *p++;
956 *rp++ = (*p >> 4); v = *p++ << 4;
957 *rp++ = v + (*p >> 8); *rp++ = *p++;
958 *rp++ = (*p >> 4); v = *p++ << 4;
959 *rp++ = v + (*p >> 8); *rp++ = *p++;
960 *rp++ = (*p >> 4); v = *p++ << 4;
961 *rp++ = v + (*p >> 8); *rp++ = *p++;
962 return rp;
963}
964static uint8_t *
965unpack_12(uint32_t *p, uint8_t *dp)
966{
967 uint32_t v;
968 v = *dp++ << 4; *p++ = v + (*dp >> 4);
969 v = ((*dp++ << 8) & 0xfff); *p++ = v + *dp++;
970 v = *dp++ << 4; *p++ = v + (*dp >> 4);
971 v = ((*dp++ << 8) & 0xfff); *p++ = v + *dp++;
972 v = *dp++ << 4; *p++ = v + (*dp >> 4);
973 v = ((*dp++ << 8) & 0xfff); *p++ = v + *dp++;
974 v = *dp++ << 4; *p++ = v + (*dp >> 4);
975 v = ((*dp++ << 8) & 0xfff); *p++ = v + *dp++;
976 return dp;
977}
978static uint8_t *
979pack_13(uint32_t *p, uint8_t *rp)
980{
981 uint8_t v;
982 *rp++ = (*p >> 5); v = *p++ << 3;
983 *rp++ = v + (*p >> 10); *rp++ = (*p >> 2); v = *p++ << 6;
984 *rp++ = v + (*p >> 7); v = *p++ << 1;
985 *rp++ = v + (*p >> 12); *rp++ = (*p >> 4); v = *p++ << 4;
986 *rp++ = v + (*p >> 9); *rp++ = (*p >> 1); v = *p++ << 7;
987 *rp++ = v + (*p >> 6); v = *p++ << 2;
988 *rp++ = v + (*p >> 11); *rp++ = (*p >> 3); v = *p++ << 5;
989 *rp++ = v + (*p >> 8); *rp++ = *p++;
990 return rp;
991}
992static uint8_t *
993unpack_13(uint32_t *p, uint8_t *dp)
994{
995 uint32_t v;
996 v = *dp++ << 5; *p++ = v + (*dp >> 3);
997 v = ((*dp++ << 10) & 0x1fff); v += *dp++ << 2; *p++ = v + (*dp >> 6);
998 v = ((*dp++ << 7) & 0x1fff); *p++ = v + (*dp >> 1);
999 v = ((*dp++ << 12) & 0x1fff); v += *dp++ << 4; *p++ = v + (*dp >> 4);
1000 v = ((*dp++ << 9) & 0x1fff); v += *dp++ << 1; *p++ = v + (*dp >> 7);
1001 v = ((*dp++ << 6) & 0x1fff); *p++ = v + (*dp >> 2);
1002 v = ((*dp++ << 11) & 0x1fff); v += *dp++ << 3; *p++ = v + (*dp >> 5);
1003 v = ((*dp++ << 8) & 0x1fff); *p++ = v + *dp++;
1004 return dp;
1005}
1006static uint8_t *
1007pack_14(uint32_t *p, uint8_t *rp)
1008{
1009 uint8_t v;
1010 *rp++ = (*p >> 6); v = *p++ << 2;
1011 *rp++ = v + (*p >> 12); *rp++ = (*p >> 4); v = *p++ << 4;
1012 *rp++ = v + (*p >> 10); *rp++ = (*p >> 2); v = *p++ << 6;
1013 *rp++ = v + (*p >> 8); *rp++ = *p++;
1014 *rp++ = (*p >> 6); v = *p++ << 2;
1015 *rp++ = v + (*p >> 12); *rp++ = (*p >> 4); v = *p++ << 4;
1016 *rp++ = v + (*p >> 10); *rp++ = (*p >> 2); v = *p++ << 6;
1017 *rp++ = v + (*p >> 8); *rp++ = *p++;
1018 return rp;
1019}
1020static uint8_t *
1021unpack_14(uint32_t *p, uint8_t *dp)
1022{
1023 uint32_t v;
1024 v = *dp++ << 6; *p++ = v + (*dp >> 2);
1025 v = ((*dp++ << 12) & 0x3fff); v += *dp++ << 4; *p++ = v + (*dp >> 4);
1026 v = ((*dp++ << 10) & 0x3fff); v += *dp++ << 2; *p++ = v + (*dp >> 6);
1027 v = ((*dp++ << 8) & 0x3fff); *p++ = v + *dp++;
1028 v = *dp++ << 6; *p++ = v + (*dp >> 2);
1029 v = ((*dp++ << 12) & 0x3fff); v += *dp++ << 4; *p++ = v + (*dp >> 4);
1030 v = ((*dp++ << 10) & 0x3fff); v += *dp++ << 2; *p++ = v + (*dp >> 6);
1031 v = ((*dp++ << 8) & 0x3fff); *p++ = v + *dp++;
1032 return dp;
1033}
1034static uint8_t *
1035pack_15(uint32_t *p, uint8_t *rp)
1036{
1037 uint8_t v;
1038 *rp++ = (*p >> 7); v = *p++ << 1;
1039 *rp++ = v + (*p >> 14); *rp++ = (*p >> 6); v = *p++ << 2;
1040 *rp++ = v + (*p >> 13); *rp++ = (*p >> 5); v = *p++ << 3;
1041 *rp++ = v + (*p >> 12); *rp++ = (*p >> 4); v = *p++ << 4;
1042 *rp++ = v + (*p >> 11); *rp++ = (*p >> 3); v = *p++ << 5;
1043 *rp++ = v + (*p >> 10); *rp++ = (*p >> 2); v = *p++ << 6;
1044 *rp++ = v + (*p >> 9); *rp++ = (*p >> 1); v = *p++ << 7;
1045 *rp++ = v + (*p >> 8); *rp++ = *p++;
1046 return rp;
1047}
1048static uint8_t *
1049unpack_15(uint32_t *p, uint8_t *dp)
1050{
1051 uint32_t v;
1052 v = *dp++ << 7; *p++ = v + (*dp >> 1);
1053 v = ((*dp++ << 14) & 0x7fff); v += *dp++ << 6; *p++ = v + (*dp >> 2);
1054 v = ((*dp++ << 13) & 0x7fff); v += *dp++ << 5; *p++ = v + (*dp >> 3);
1055 v = ((*dp++ << 12) & 0x7fff); v += *dp++ << 4; *p++ = v + (*dp >> 4);
1056 v = ((*dp++ << 11) & 0x7fff); v += *dp++ << 3; *p++ = v + (*dp >> 5);
1057 v = ((*dp++ << 10) & 0x7fff); v += *dp++ << 2; *p++ = v + (*dp >> 6);
1058 v = ((*dp++ << 9) & 0x7fff); v += *dp++ << 1; *p++ = v + (*dp >> 7);
1059 v = ((*dp++ << 8) & 0x7fff); *p++ = v + *dp++;
1060 return dp;
1061}
1062static uint8_t *
1063pack_16(uint32_t *p, uint8_t *rp)
1064{
1065 *rp++ = (*p >> 8); *rp++ = *p++;
1066 *rp++ = (*p >> 8); *rp++ = *p++;
1067 *rp++ = (*p >> 8); *rp++ = *p++;
1068 *rp++ = (*p >> 8); *rp++ = *p++;
1069 *rp++ = (*p >> 8); *rp++ = *p++;
1070 *rp++ = (*p >> 8); *rp++ = *p++;
1071 *rp++ = (*p >> 8); *rp++ = *p++;
1072 *rp++ = (*p >> 8); *rp++ = *p++;
1073 return rp;
1074}
1075static uint8_t *
1076unpack_16(uint32_t *p, uint8_t *dp)
1077{
1078 uint32_t v;
1079 v = *dp++ << 8; *p++ = v + *dp++;
1080 v = *dp++ << 8; *p++ = v + *dp++;
1081 v = *dp++ << 8; *p++ = v + *dp++;
1082 v = *dp++ << 8; *p++ = v + *dp++;
1083 v = *dp++ << 8; *p++ = v + *dp++;
1084 v = *dp++ << 8; *p++ = v + *dp++;
1085 v = *dp++ << 8; *p++ = v + *dp++;
1086 v = *dp++ << 8; *p++ = v + *dp++;
1087 return dp;
1088}
1089static uint8_t *
1090pack_17(uint32_t *p, uint8_t *rp)
1091{
1092 uint8_t v;
1093 *rp++ = (*p >> 9); *rp++ = (*p >> 1); v = *p++ << 7;
1094 *rp++ = v + (*p >> 10); *rp++ = (*p >> 2); v = *p++ << 6;
1095 *rp++ = v + (*p >> 11); *rp++ = (*p >> 3); v = *p++ << 5;
1096 *rp++ = v + (*p >> 12); *rp++ = (*p >> 4); v = *p++ << 4;
1097 *rp++ = v + (*p >> 13); *rp++ = (*p >> 5); v = *p++ << 3;
1098 *rp++ = v + (*p >> 14); *rp++ = (*p >> 6); v = *p++ << 2;
1099 *rp++ = v + (*p >> 15); *rp++ = (*p >> 7); v = *p++ << 1;
1100 *rp++ = v + (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++;
1101 return rp;
1102}
1103static uint8_t *
1104unpack_17(uint32_t *p, uint8_t *dp)
1105{
1106 uint32_t v;
1107 v = *dp++ << 9; v += *dp++ << 1; *p++ = v + (*dp >> 7);
1108 v = ((*dp++ << 10) & 0x1ffff); v += *dp++ << 2; *p++ = v + (*dp >> 6);
1109 v = ((*dp++ << 11) & 0x1ffff); v += *dp++ << 3; *p++ = v + (*dp >> 5);
1110 v = ((*dp++ << 12) & 0x1ffff); v += *dp++ << 4; *p++ = v + (*dp >> 4);
1111 v = ((*dp++ << 13) & 0x1ffff); v += *dp++ << 5; *p++ = v + (*dp >> 3);
1112 v = ((*dp++ << 14) & 0x1ffff); v += *dp++ << 6; *p++ = v + (*dp >> 2);
1113 v = ((*dp++ << 15) & 0x1ffff); v += *dp++ << 7; *p++ = v + (*dp >> 1);
1114 v = ((*dp++ << 16) & 0x1ffff); v += *dp++ << 8; *p++ = v + *dp++;
1115 return dp;
1116}
1117static uint8_t *
1118pack_18(uint32_t *p, uint8_t *rp)
1119{
1120 uint8_t v;
1121 *rp++ = (*p >> 10); *rp++ = (*p >> 2); v = *p++ << 6;
1122 *rp++ = v + (*p >> 12); *rp++ = (*p >> 4); v = *p++ << 4;
1123 *rp++ = v + (*p >> 14); *rp++ = (*p >> 6); v = *p++ << 2;
1124 *rp++ = v + (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++;
1125 *rp++ = (*p >> 10); *rp++ = (*p >> 2); v = *p++ << 6;
1126 *rp++ = v + (*p >> 12); *rp++ = (*p >> 4); v = *p++ << 4;
1127 *rp++ = v + (*p >> 14); *rp++ = (*p >> 6); v = *p++ << 2;
1128 *rp++ = v + (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++;
1129 return rp;
1130}
1131static uint8_t *
1132unpack_18(uint32_t *p, uint8_t *dp)
1133{
1134 uint32_t v;
1135 v = *dp++ << 10; v += *dp++ << 2; *p++ = v + (*dp >> 6);
1136 v = ((*dp++ << 12) & 0x3ffff); v += *dp++ << 4; *p++ = v + (*dp >> 4);
1137 v = ((*dp++ << 14) & 0x3ffff); v += *dp++ << 6; *p++ = v + (*dp >> 2);
1138 v = ((*dp++ << 16) & 0x3ffff); v += *dp++ << 8; *p++ = v + *dp++;
1139 v = *dp++ << 10; v += *dp++ << 2; *p++ = v + (*dp >> 6);
1140 v = ((*dp++ << 12) & 0x3ffff); v += *dp++ << 4; *p++ = v + (*dp >> 4);
1141 v = ((*dp++ << 14) & 0x3ffff); v += *dp++ << 6; *p++ = v + (*dp >> 2);
1142 v = ((*dp++ << 16) & 0x3ffff); v += *dp++ << 8; *p++ = v + *dp++;
1143 return dp;
1144}
1145static uint8_t *
1146pack_19(uint32_t *p, uint8_t *rp)
1147{
1148 uint8_t v;
1149 *rp++ = (*p >> 11); *rp++ = (*p >> 3); v = *p++ << 5;
1150 *rp++ = v + (*p >> 14); *rp++ = (*p >> 6); v = *p++ << 2;
1151 *rp++ = v + (*p >> 17); *rp++ = (*p >> 9); *rp++ = (*p >> 1); v = *p++ << 7;
1152 *rp++ = v + (*p >> 12); *rp++ = (*p >> 4); v = *p++ << 4;
1153 *rp++ = v + (*p >> 15); *rp++ = (*p >> 7); v = *p++ << 1;
1154 *rp++ = v + (*p >> 18); *rp++ = (*p >> 10); *rp++ = (*p >> 2); v = *p++ << 6;
1155 *rp++ = v + (*p >> 13); *rp++ = (*p >> 5); v = *p++ << 3;
1156 *rp++ = v + (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++;
1157 return rp;
1158}
1159static uint8_t *
1160unpack_19(uint32_t *p, uint8_t *dp)
1161{
1162 uint32_t v;
1163 v = *dp++ << 11; v += *dp++ << 3; *p++ = v + (*dp >> 5);
1164 v = ((*dp++ << 14) & 0x7ffff); v += *dp++ << 6; *p++ = v + (*dp >> 2);
1165 v = ((*dp++ << 17) & 0x7ffff); v += *dp++ << 9; v += *dp++ << 1;
1166 *p++ = v + (*dp >> 7);
1167 v = ((*dp++ << 12) & 0x7ffff); v += *dp++ << 4; *p++ = v + (*dp >> 4);
1168 v = ((*dp++ << 15) & 0x7ffff); v += *dp++ << 7; *p++ = v + (*dp >> 1);
1169 v = ((*dp++ << 18) & 0x7ffff); v += *dp++ << 10; v += *dp++ << 2;
1170 *p++ = v + (*dp >> 6);
1171 v = ((*dp++ << 13) & 0x7ffff); v += *dp++ << 5; *p++ = v + (*dp >> 3);
1172 v = ((*dp++ << 16) & 0x7ffff); v += *dp++ << 8; *p++ = v + *dp++;
1173 return dp;
1174}
1175static uint8_t *
1176pack_20(uint32_t *p, uint8_t *rp)
1177{
1178 uint8_t v;
1179 *rp++ = (*p >> 12); *rp++ = (*p >> 4); v = *p++ << 4;
1180 *rp++ = v + (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++;
1181 *rp++ = (*p >> 12); *rp++ = (*p >> 4); v = *p++ << 4;
1182 *rp++ = v + (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++;
1183 *rp++ = (*p >> 12); *rp++ = (*p >> 4); v = *p++ << 4;
1184 *rp++ = v + (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++;
1185 *rp++ = (*p >> 12); *rp++ = (*p >> 4); v = *p++ << 4;
1186 *rp++ = v + (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++;
1187 return rp;
1188}
1189static uint8_t *
1190unpack_20(uint32_t *p, uint8_t *dp)
1191{
1192 uint32_t v;
1193 v = *dp++ << 12; v += *dp++ << 4; *p++ = v + (*dp >> 4);
1194 v = ((*dp++ << 16) & 0xfffff); v += *dp++ << 8; *p++ = v + *dp++;
1195 v = *dp++ << 12; v += *dp++ << 4; *p++ = v + (*dp >> 4);
1196 v = ((*dp++ << 16) & 0xfffff); v += *dp++ << 8; *p++ = v + *dp++;
1197 v = *dp++ << 12; v += *dp++ << 4; *p++ = v + (*dp >> 4);
1198 v = ((*dp++ << 16) & 0xfffff); v += *dp++ << 8; *p++ = v + *dp++;
1199 v = *dp++ << 12; v += *dp++ << 4; *p++ = v + (*dp >> 4);
1200 v = ((*dp++ << 16) & 0xfffff); v += *dp++ << 8; *p++ = v + *dp++;
1201 return dp;
1202}
1203static uint8_t *
1204pack_21(uint32_t *p, uint8_t *rp)
1205{
1206 uint8_t v;
1207 *rp++ = (*p >> 13); *rp++ = (*p >> 5); v = *p++ << 3;
1208 *rp++ = v + (*p >> 18); *rp++ = (*p >> 10); *rp++ = (*p >> 2); v = *p++ << 6;
1209 *rp++ = v + (*p >> 15); *rp++ = (*p >> 7); v = *p++ << 1;
1210 *rp++ = v + (*p >> 20); *rp++ = (*p >> 12); *rp++ = (*p >> 4); v = *p++ << 4;
1211 *rp++ = v + (*p >> 17); *rp++ = (*p >> 9); *rp++ = (*p >> 1); v = *p++ << 7;
1212 *rp++ = v + (*p >> 14); *rp++ = (*p >> 6); v = *p++ << 2;
1213 *rp++ = v + (*p >> 19); *rp++ = (*p >> 11); *rp++ = (*p >> 3); v = *p++ << 5;
1214 *rp++ = v + (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++;
1215 return rp;
1216}
1217static uint8_t *
1218unpack_21(uint32_t *p, uint8_t *dp)
1219{
1220 uint32_t v;
1221 v = *dp++ << 13; v += *dp++ << 5; *p++ = v + (*dp >> 3);
1222 v = ((*dp++ << 18) & 0x1fffff); v += *dp++ << 10; v += *dp++ << 2;
1223 *p++ = v + (*dp >> 6);
1224 v = ((*dp++ << 15) & 0x1fffff); v += *dp++ << 7; *p++ = v + (*dp >> 1);
1225 v = ((*dp++ << 20) & 0x1fffff); v += *dp++ << 12; v += *dp++ << 4;
1226 *p++ = v + (*dp >> 4);
1227 v = ((*dp++ << 17) & 0x1fffff); v += *dp++ << 9; v += *dp++ << 1;
1228 *p++ = v + (*dp >> 7);
1229 v = ((*dp++ << 14) & 0x1fffff); v += *dp++ << 6; *p++ = v + (*dp >> 2);
1230 v = ((*dp++ << 19) & 0x1fffff); v += *dp++ << 11; v += *dp++ << 3;
1231 *p++ = v + (*dp >> 5);
1232 v = ((*dp++ << 16) & 0x1fffff); v += *dp++ << 8; *p++ = v + *dp++;
1233 return dp;
1234}
1235static uint8_t *
1236pack_22(uint32_t *p, uint8_t *rp)
1237{
1238 uint8_t v;
1239 *rp++ = (*p >> 14); *rp++ = (*p >> 6); v = *p++ << 2;
1240 *rp++ = v + (*p >> 20); *rp++ = (*p >> 12); *rp++ = (*p >> 4); v = *p++ << 4;
1241 *rp++ = v + (*p >> 18); *rp++ = (*p >> 10); *rp++ = (*p >> 2); v = *p++ << 6;
1242 *rp++ = v + (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++;
1243 *rp++ = (*p >> 14); *rp++ = (*p >> 6); v = *p++ << 2;
1244 *rp++ = v + (*p >> 20); *rp++ = (*p >> 12); *rp++ = (*p >> 4); v = *p++ << 4;
1245 *rp++ = v + (*p >> 18); *rp++ = (*p >> 10); *rp++ = (*p >> 2); v = *p++ << 6;
1246 *rp++ = v + (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++;
1247 return rp;
1248}
1249static uint8_t *
1250unpack_22(uint32_t *p, uint8_t *dp)
1251{
1252 uint32_t v;
1253 v = *dp++ << 14; v += *dp++ << 6; *p++ = v + (*dp >> 2);
1254 v = ((*dp++ << 20) & 0x3fffff); v += *dp++ << 12; v += *dp++ << 4;
1255 *p++ = v + (*dp >> 4);
1256 v = ((*dp++ << 18) & 0x3fffff); v += *dp++ << 10; v += *dp++ << 2;
1257 *p++ = v + (*dp >> 6);
1258 v = ((*dp++ << 16) & 0x3fffff); v += *dp++ << 8; *p++ = v + *dp++;
1259 v = *dp++ << 14; v += *dp++ << 6; *p++ = v + (*dp >> 2);
1260 v = ((*dp++ << 20) & 0x3fffff); v += *dp++ << 12; v += *dp++ << 4;
1261 *p++ = v + (*dp >> 4);
1262 v = ((*dp++ << 18) & 0x3fffff); v += *dp++ << 10; v += *dp++ << 2;
1263 *p++ = v + (*dp >> 6);
1264 v = ((*dp++ << 16) & 0x3fffff); v += *dp++ << 8; *p++ = v + *dp++;
1265 return dp;
1266}
1267static uint8_t *
1268pack_23(uint32_t *p, uint8_t *rp)
1269{
1270 uint8_t v;
1271 *rp++ = (*p >> 15); *rp++ = (*p >> 7); v = *p++ << 1;
1272 *rp++ = v + (*p >> 22); *rp++ = (*p >> 14); *rp++ = (*p >> 6); v = *p++ << 2;
1273 *rp++ = v + (*p >> 21); *rp++ = (*p >> 13); *rp++ = (*p >> 5); v = *p++ << 3;
1274 *rp++ = v + (*p >> 20); *rp++ = (*p >> 12); *rp++ = (*p >> 4); v = *p++ << 4;
1275 *rp++ = v + (*p >> 19); *rp++ = (*p >> 11); *rp++ = (*p >> 3); v = *p++ << 5;
1276 *rp++ = v + (*p >> 18); *rp++ = (*p >> 10); *rp++ = (*p >> 2); v = *p++ << 6;
1277 *rp++ = v + (*p >> 17); *rp++ = (*p >> 9); *rp++ = (*p >> 1); v = *p++ << 7;
1278 *rp++ = v + (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++;
1279 return rp;
1280}
1281static uint8_t *
1282unpack_23(uint32_t *p, uint8_t *dp)
1283{
1284 uint32_t v;
1285 v = *dp++ << 15; v += *dp++ << 7; *p++ = v + (*dp >> 1);
1286 v = ((*dp++ << 22) & 0x7fffff); v += *dp++ << 14; v += *dp++ << 6;
1287 *p++ = v + (*dp >> 2);
1288 v = ((*dp++ << 21) & 0x7fffff); v += *dp++ << 13; v += *dp++ << 5;
1289 *p++ = v + (*dp >> 3);
1290 v = ((*dp++ << 20) & 0x7fffff); v += *dp++ << 12; v += *dp++ << 4;
1291 *p++ = v + (*dp >> 4);
1292 v = ((*dp++ << 19) & 0x7fffff); v += *dp++ << 11; v += *dp++ << 3;
1293 *p++ = v + (*dp >> 5);
1294 v = ((*dp++ << 18) & 0x7fffff); v += *dp++ << 10; v += *dp++ << 2;
1295 *p++ = v + (*dp >> 6);
1296 v = ((*dp++ << 17) & 0x7fffff); v += *dp++ << 9; v += *dp++ << 1;
1297 *p++ = v + (*dp >> 7);
1298 v = ((*dp++ << 16) & 0x7fffff); v += *dp++ << 8; *p++ = v + *dp++;
1299 return dp;
1300}
1301static uint8_t *
1302pack_24(uint32_t *p, uint8_t *rp)
1303{
1304 *rp++ = (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++;
1305 *rp++ = (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++;
1306 *rp++ = (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++;
1307 *rp++ = (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++;
1308 *rp++ = (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++;
1309 *rp++ = (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++;
1310 *rp++ = (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++;
1311 *rp++ = (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++;
1312 return rp;
1313}
1314static uint8_t *
1315unpack_24(uint32_t *p, uint8_t *dp)
1316{
1317 uint32_t v;
1318 v = *dp++ << 16; v += *dp++ << 8; *p++ = v + *dp++;
1319 v = *dp++ << 16; v += *dp++ << 8; *p++ = v + *dp++;
1320 v = *dp++ << 16; v += *dp++ << 8; *p++ = v + *dp++;
1321 v = *dp++ << 16; v += *dp++ << 8; *p++ = v + *dp++;
1322 v = *dp++ << 16; v += *dp++ << 8; *p++ = v + *dp++;
1323 v = *dp++ << 16; v += *dp++ << 8; *p++ = v + *dp++;
1324 v = *dp++ << 16; v += *dp++ << 8; *p++ = v + *dp++;
1325 v = *dp++ << 16; v += *dp++ << 8; *p++ = v + *dp++;
1326 return dp;
1327}
1328static uint8_t *
1329pack_25(uint32_t *p, uint8_t *rp)
1330{
1331 uint8_t v;
1332 *rp++ = (*p >> 17); *rp++ = (*p >> 9); *rp++ = (*p >> 1); v = *p++ << 7;
1333 *rp++ = v + (*p >> 18); *rp++ = (*p >> 10); *rp++ = (*p >> 2); v = *p++ << 6;
1334 *rp++ = v + (*p >> 19); *rp++ = (*p >> 11); *rp++ = (*p >> 3); v = *p++ << 5;
1335 *rp++ = v + (*p >> 20); *rp++ = (*p >> 12); *rp++ = (*p >> 4); v = *p++ << 4;
1336 *rp++ = v + (*p >> 21); *rp++ = (*p >> 13); *rp++ = (*p >> 5); v = *p++ << 3;
1337 *rp++ = v + (*p >> 22); *rp++ = (*p >> 14); *rp++ = (*p >> 6); v = *p++ << 2;
1338 *rp++ = v + (*p >> 23); *rp++ = (*p >> 15); *rp++ = (*p >> 7); v = *p++ << 1;
1339 *rp++ = v + (*p >> 24); *rp++ = (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++;
1340 return rp;
1341}
1342static uint8_t *
1343unpack_25(uint32_t *p, uint8_t *dp)
1344{
1345 uint32_t v;
1346 v = *dp++ << 17; v += *dp++ << 9; v += *dp++ << 1; *p++ = v + (*dp >> 7);
1347 v = ((*dp++ << 18) & 0x1ffffff); v += *dp++ << 10; v += *dp++ << 2;
1348 *p++ = v + (*dp >> 6);
1349 v = ((*dp++ << 19) & 0x1ffffff); v += *dp++ << 11; v += *dp++ << 3;
1350 *p++ = v + (*dp >> 5);
1351 v = ((*dp++ << 20) & 0x1ffffff); v += *dp++ << 12; v += *dp++ << 4;
1352 *p++ = v + (*dp >> 4);
1353 v = ((*dp++ << 21) & 0x1ffffff); v += *dp++ << 13; v += *dp++ << 5;
1354 *p++ = v + (*dp >> 3);
1355 v = ((*dp++ << 22) & 0x1ffffff); v += *dp++ << 14; v += *dp++ << 6;
1356 *p++ = v + (*dp >> 2);
1357 v = ((*dp++ << 23) & 0x1ffffff); v += *dp++ << 15; v += *dp++ << 7;
1358 *p++ = v + (*dp >> 1);
1359 v = ((*dp++ << 24) & 0x1ffffff); v += *dp++ << 16; v += *dp++ << 8;
1360 *p++ = v + *dp++;
1361 return dp;
1362}
1363static uint8_t *
1364pack_26(uint32_t *p, uint8_t *rp)
1365{
1366 uint8_t v;
1367 *rp++ = (*p >> 18); *rp++ = (*p >> 10); *rp++ = (*p >> 2); v = *p++ << 6;
1368 *rp++ = v + (*p >> 20); *rp++ = (*p >> 12); *rp++ = (*p >> 4); v = *p++ << 4;
1369 *rp++ = v + (*p >> 22); *rp++ = (*p >> 14); *rp++ = (*p >> 6); v = *p++ << 2;
1370 *rp++ = v + (*p >> 24); *rp++ = (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++;
1371 *rp++ = (*p >> 18); *rp++ = (*p >> 10); *rp++ = (*p >> 2); v = *p++ << 6;
1372 *rp++ = v + (*p >> 20); *rp++ = (*p >> 12); *rp++ = (*p >> 4); v = *p++ << 4;
1373 *rp++ = v + (*p >> 22); *rp++ = (*p >> 14); *rp++ = (*p >> 6); v = *p++ << 2;
1374 *rp++ = v + (*p >> 24); *rp++ = (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++;
1375 return rp;
1376}
1377static uint8_t *
1378unpack_26(uint32_t *p, uint8_t *dp)
1379{
1380 uint32_t v;
1381 v = *dp++ << 18; v += *dp++ << 10; v += *dp++ << 2; *p++ = v + (*dp >> 6);
1382 v = ((*dp++ << 20) & 0x3ffffff); v += *dp++ << 12; v += *dp++ << 4;
1383 *p++ = v + (*dp >> 4);
1384 v = ((*dp++ << 22) & 0x3ffffff); v += *dp++ << 14; v += *dp++ << 6;
1385 *p++ = v + (*dp >> 2);
1386 v = ((*dp++ << 24) & 0x3ffffff); v += *dp++ << 16; v += *dp++ << 8;
1387 *p++ = v + *dp++;
1388 v = *dp++ << 18; v += *dp++ << 10; v += *dp++ << 2; *p++ = v + (*dp >> 6);
1389 v = ((*dp++ << 20) & 0x3ffffff); v += *dp++ << 12; v += *dp++ << 4;
1390 *p++ = v + (*dp >> 4);
1391 v = ((*dp++ << 22) & 0x3ffffff); v += *dp++ << 14; v += *dp++ << 6;
1392 *p++ = v + (*dp >> 2);
1393 v = ((*dp++ << 24) & 0x3ffffff); v += *dp++ << 16; v += *dp++ << 8;
1394 *p++ = v + *dp++;
1395 return dp;
1396}
1397static uint8_t *
1398pack_27(uint32_t *p, uint8_t *rp)
1399{
1400 uint8_t v;
1401 *rp++ = (*p >> 19); *rp++ = (*p >> 11); *rp++ = (*p >> 3); v = *p++ << 5;
1402 *rp++ = v + (*p >> 22); *rp++ = (*p >> 14); *rp++ = (*p >> 6); v = *p++ << 2;
1403 *rp++ = v + (*p >> 25); *rp++ = (*p >> 17); *rp++ = (*p >> 9);
1404 *rp++ = (*p >> 1); v = *p++ << 7;
1405 *rp++ = v + (*p >> 20); *rp++ = (*p >> 12); *rp++ = (*p >> 4); v = *p++ << 4;
1406 *rp++ = v + (*p >> 23); *rp++ = (*p >> 15); *rp++ = (*p >> 7); v = *p++ << 1;
1407 *rp++ = v + (*p >> 26); *rp++ = (*p >> 18); *rp++ = (*p >> 10);
1408 *rp++ = (*p >> 2); v = *p++ << 6;
1409 *rp++ = v + (*p >> 21); *rp++ = (*p >> 13); *rp++ = (*p >> 5); v = *p++ << 3;
1410 *rp++ = v + (*p >> 24); *rp++ = (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++;
1411 return rp;
1412}
1413static uint8_t *
1414unpack_27(uint32_t *p, uint8_t *dp)
1415{
1416 uint32_t v;
1417 v = *dp++ << 19; v += *dp++ << 11; v += *dp++ << 3; *p++ = v + (*dp >> 5);
1418 v = ((*dp++ << 22) & 0x7ffffff); v += *dp++ << 14; v += *dp++ << 6;
1419 *p++ = v + (*dp >> 2);
1420 v = ((*dp++ << 25) & 0x7ffffff); v += *dp++ << 17; v += *dp++ << 9;
1421 v += *dp++ << 1; *p++ = v + (*dp >> 7);
1422 v = ((*dp++ << 20) & 0x7ffffff); v += *dp++ << 12; v += *dp++ << 4;
1423 *p++ = v + (*dp >> 4);
1424 v = ((*dp++ << 23) & 0x7ffffff); v += *dp++ << 15; v += *dp++ << 7;
1425 *p++ = v + (*dp >> 1);
1426 v = ((*dp++ << 26) & 0x7ffffff); v += *dp++ << 18; v += *dp++ << 10;
1427 v += *dp++ << 2; *p++ = v + (*dp >> 6);
1428 v = ((*dp++ << 21) & 0x7ffffff); v += *dp++ << 13; v += *dp++ << 5;
1429 *p++ = v + (*dp >> 3);
1430 v = ((*dp++ << 24) & 0x7ffffff); v += *dp++ << 16; v += *dp++ << 8;
1431 *p++ = v + *dp++;
1432 return dp;
1433}
1434static uint8_t *
1435pack_28(uint32_t *p, uint8_t *rp)
1436{
1437 uint8_t v;
1438 *rp++ = (*p >> 20); *rp++ = (*p >> 12); *rp++ = (*p >> 4); v = *p++ << 4;
1439 *rp++ = v + (*p >> 24); *rp++ = (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++;
1440 *rp++ = (*p >> 20); *rp++ = (*p >> 12); *rp++ = (*p >> 4); v = *p++ << 4;
1441 *rp++ = v + (*p >> 24); *rp++ = (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++;
1442 *rp++ = (*p >> 20); *rp++ = (*p >> 12); *rp++ = (*p >> 4); v = *p++ << 4;
1443 *rp++ = v + (*p >> 24); *rp++ = (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++;
1444 *rp++ = (*p >> 20); *rp++ = (*p >> 12); *rp++ = (*p >> 4); v = *p++ << 4;
1445 *rp++ = v + (*p >> 24); *rp++ = (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++;
1446 return rp;
1447}
1448static uint8_t *
1449unpack_28(uint32_t *p, uint8_t *dp)
1450{
1451 uint32_t v;
1452 v = *dp++ << 20; v += *dp++ << 12; v += *dp++ << 4; *p++ = v + (*dp >> 4);
1453 v = ((*dp++ << 24) & 0xfffffff); v += *dp++ << 16; v += *dp++ << 8;
1454 *p++ = v + *dp++;
1455 v = *dp++ << 20; v += *dp++ << 12; v += *dp++ << 4; *p++ = v + (*dp >> 4);
1456 v = ((*dp++ << 24) & 0xfffffff); v += *dp++ << 16; v += *dp++ << 8;
1457 *p++ = v + *dp++;
1458 v = *dp++ << 20; v += *dp++ << 12; v += *dp++ << 4; *p++ = v + (*dp >> 4);
1459 v = ((*dp++ << 24) & 0xfffffff); v += *dp++ << 16; v += *dp++ << 8;
1460 *p++ = v + *dp++;
1461 v = *dp++ << 20; v += *dp++ << 12; v += *dp++ << 4; *p++ = v + (*dp >> 4);
1462 v = ((*dp++ << 24) & 0xfffffff); v += *dp++ << 16; v += *dp++ << 8;
1463 *p++ = v + *dp++;
1464 return dp;
1465}
1466static uint8_t *
1467pack_29(uint32_t *p, uint8_t *rp)
1468{
1469 uint8_t v;
1470 *rp++ = (*p >> 21); *rp++ = (*p >> 13); *rp++ = (*p >> 5); v = *p++ << 3;
1471 *rp++ = v + (*p >> 26); *rp++ = (*p >> 18); *rp++ = (*p >> 10);
1472 *rp++ = (*p >> 2); v = *p++ << 6;
1473 *rp++ = v + (*p >> 23); *rp++ = (*p >> 15); *rp++ = (*p >> 7); v = *p++ << 1;
1474 *rp++ = v + (*p >> 28); *rp++ = (*p >> 20); *rp++ = (*p >> 12);
1475 *rp++ = (*p >> 4); v = *p++ << 4;
1476 *rp++ = v + (*p >> 25); *rp++ = (*p >> 17); *rp++ = (*p >> 9);
1477 *rp++ = (*p >> 1); v = *p++ << 7;
1478 *rp++ = v + (*p >> 22); *rp++ = (*p >> 14); *rp++ = (*p >> 6); v = *p++ << 2;
1479 *rp++ = v + (*p >> 27); *rp++ = (*p >> 19); *rp++ = (*p >> 11);
1480 *rp++ = (*p >> 3); v = *p++ << 5;
1481 *rp++ = v + (*p >> 24); *rp++ = (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++;
1482 return rp;
1483}
1484static uint8_t *
1485unpack_29(uint32_t *p, uint8_t *dp)
1486{
1487 uint32_t v;
1488 v = *dp++ << 21; v += *dp++ << 13; v += *dp++ << 5; *p++ = v + (*dp >> 3);
1489 v = ((*dp++ << 26) & 0x1fffffff); v += *dp++ << 18; v += *dp++ << 10;
1490 v += *dp++ << 2; *p++ = v + (*dp >> 6);
1491 v = ((*dp++ << 23) & 0x1fffffff); v += *dp++ << 15; v += *dp++ << 7;
1492 *p++ = v + (*dp >> 1);
1493 v = ((*dp++ << 28) & 0x1fffffff); v += *dp++ << 20; v += *dp++ << 12;
1494 v += *dp++ << 4; *p++ = v + (*dp >> 4);
1495 v = ((*dp++ << 25) & 0x1fffffff); v += *dp++ << 17; v += *dp++ << 9;
1496 v += *dp++ << 1; *p++ = v + (*dp >> 7);
1497 v = ((*dp++ << 22) & 0x1fffffff); v += *dp++ << 14; v += *dp++ << 6;
1498 *p++ = v + (*dp >> 2);
1499 v = ((*dp++ << 27) & 0x1fffffff); v += *dp++ << 19; v += *dp++ << 11;
1500 v += *dp++ << 3; *p++ = v + (*dp >> 5);
1501 v = ((*dp++ << 24) & 0x1fffffff); v += *dp++ << 16; v += *dp++ << 8;
1502 *p++ = v + *dp++;
1503 return dp;
1504}
1505static uint8_t *
1506pack_30(uint32_t *p, uint8_t *rp)
1507{
1508 uint8_t v;
1509 *rp++ = (*p >> 22); *rp++ = (*p >> 14); *rp++ = (*p >> 6); v = *p++ << 2;
1510 *rp++ = v + (*p >> 28); *rp++ = (*p >> 20); *rp++ = (*p >> 12);
1511 *rp++ = (*p >> 4); v = *p++ << 4;
1512 *rp++ = v + (*p >> 26); *rp++ = (*p >> 18); *rp++ = (*p >> 10);
1513 *rp++ = (*p >> 2); v = *p++ << 6;
1514 *rp++ = v + (*p >> 24); *rp++ = (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++;
1515 *rp++ = (*p >> 22); *rp++ = (*p >> 14); *rp++ = (*p >> 6); v = *p++ << 2;
1516 *rp++ = v + (*p >> 28); *rp++ = (*p >> 20); *rp++ = (*p >> 12);
1517 *rp++ = (*p >> 4); v = *p++ << 4;
1518 *rp++ = v + (*p >> 26); *rp++ = (*p >> 18); *rp++ = (*p >> 10);
1519 *rp++ = (*p >> 2); v = *p++ << 6;
1520 *rp++ = v + (*p >> 24); *rp++ = (*p >> 16); *rp++ = (*p >> 8);
1521 *rp++ = *p++;
1522 return rp;
1523}
1524static uint8_t *
1525unpack_30(uint32_t *p, uint8_t *dp)
1526{
1527 uint32_t v;
1528 v = *dp++ << 22; v += *dp++ << 14; v += *dp++ << 6; *p++ = v + (*dp >> 2);
1529 v = ((*dp++ << 28) & 0x3fffffff); v += *dp++ << 20; v += *dp++ << 12;
1530 v += *dp++ << 4; *p++ = v + (*dp >> 4);
1531 v = ((*dp++ << 26) & 0x3fffffff); v += *dp++ << 18; v += *dp++ << 10;
1532 v += *dp++ << 2; *p++ = v + (*dp >> 6);
1533 v = ((*dp++ << 24) & 0x3fffffff); v += *dp++ << 16; v += *dp++ << 8;
1534 *p++ = v + *dp++;
1535 v = *dp++ << 22; v += *dp++ << 14; v += *dp++ << 6; *p++ = v + (*dp >> 2);
1536 v = ((*dp++ << 28) & 0x3fffffff); v += *dp++ << 20; v += *dp++ << 12;
1537 v += *dp++ << 4; *p++ = v + (*dp >> 4);
1538 v = ((*dp++ << 26) & 0x3fffffff); v += *dp++ << 18; v += *dp++ << 10;
1539 v += *dp++ << 2; *p++ = v + (*dp >> 6);
1540 v = ((*dp++ << 24) & 0x3fffffff); v += *dp++ << 16; v += *dp++ << 8;
1541 *p++ = v + *dp++;
1542 return dp;
1543}
1544static uint8_t *
1545pack_31(uint32_t *p, uint8_t *rp)
1546{
1547 uint8_t v;
1548 *rp++ = (*p >> 23); *rp++ = (*p >> 15); *rp++ = (*p >> 7); v = *p++ << 1;
1549 *rp++ = v + (*p >> 30); *rp++ = (*p >> 22); *rp++ = (*p >> 14);
1550 *rp++ = (*p >> 6); v = *p++ << 2;
1551 *rp++ = v + (*p >> 29); *rp++ = (*p >> 21); *rp++ = (*p >> 13);
1552 *rp++ = (*p >> 5); v = *p++ << 3;
1553 *rp++ = v + (*p >> 28); *rp++ = (*p >> 20); *rp++ = (*p >> 12);
1554 *rp++ = (*p >> 4); v = *p++ << 4;
1555 *rp++ = v + (*p >> 27); *rp++ = (*p >> 19); *rp++ = (*p >> 11);
1556 *rp++ = (*p >> 3); v = *p++ << 5;
1557 *rp++ = v + (*p >> 26); *rp++ = (*p >> 18); *rp++ = (*p >> 10);
1558 *rp++ = (*p >> 2); v = *p++ << 6;
1559 *rp++ = v + (*p >> 25); *rp++ = (*p >> 17); *rp++ = (*p >> 9);
1560 *rp++ = (*p >> 1); v = *p++ << 7;
1561 *rp++ = v + (*p >> 24); *rp++ = (*p >> 16); *rp++ = (*p >> 8);
1562 *rp++ = *p++;
1563 return rp;
1564}
1565static uint8_t *
1566unpack_31(uint32_t *p, uint8_t *dp)
1567{
1568 uint32_t v;
1569 v = *dp++ << 23; v += *dp++ << 15; v += *dp++ << 7; *p++ = v + (*dp >> 1);
1570 v = ((*dp++ << 30) & 0x7fffffff); v += *dp++ << 22; v += *dp++ << 14;
1571 v += *dp++ << 6; *p++ = v + (*dp >> 2);
1572 v = ((*dp++ << 29) & 0x7fffffff); v += *dp++ << 21; v += *dp++ << 13;
1573 v += *dp++ << 5; *p++ = v + (*dp >> 3);
1574 v = ((*dp++ << 28) & 0x7fffffff); v += *dp++ << 20; v += *dp++ << 12;
1575 v += *dp++ << 4; *p++ = v + (*dp >> 4);
1576 v = ((*dp++ << 27) & 0x7fffffff); v += *dp++ << 19; v += *dp++ << 11;
1577 v += *dp++ << 3; *p++ = v + (*dp >> 5);
1578 v = ((*dp++ << 26) & 0x7fffffff); v += *dp++ << 18; v += *dp++ << 10;
1579 v += *dp++ << 2; *p++ = v + (*dp >> 6);
1580 v = ((*dp++ << 25) & 0x7fffffff); v += *dp++ << 17; v += *dp++ << 9;
1581 v += *dp++ << 1; *p++ = v + (*dp >> 7);
1582 v = ((*dp++ << 24) & 0x7fffffff); v += *dp++ << 16; v += *dp++ << 8;
1583 *p++ = v + *dp++;
1584 return dp;
1585}
1586static uint8_t *
1587pack_32(uint32_t *p, uint8_t *rp)
1588{
1589 *rp++ = (*p >> 24); *rp++ = (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++;
1590 *rp++ = (*p >> 24); *rp++ = (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++;
1591 *rp++ = (*p >> 24); *rp++ = (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++;
1592 *rp++ = (*p >> 24); *rp++ = (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++;
1593 *rp++ = (*p >> 24); *rp++ = (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++;
1594 *rp++ = (*p >> 24); *rp++ = (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++;
1595 *rp++ = (*p >> 24); *rp++ = (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++;
1596 *rp++ = (*p >> 24); *rp++ = (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++;
1597 return rp;
1598}
1599static uint8_t *
1600unpack_32(uint32_t *p, uint8_t *dp)
1601{
1602 uint32_t v;
1603 v = *dp++ << 24; v += *dp++ << 16; v += *dp++ << 8; *p++ = v + *dp++;
1604 v = *dp++ << 24; v += *dp++ << 16; v += *dp++ << 8; *p++ = v + *dp++;
1605 v = *dp++ << 24; v += *dp++ << 16; v += *dp++ << 8; *p++ = v + *dp++;
1606 v = *dp++ << 24; v += *dp++ << 16; v += *dp++ << 8; *p++ = v + *dp++;
1607 v = *dp++ << 24; v += *dp++ << 16; v += *dp++ << 8; *p++ = v + *dp++;
1608 v = *dp++ << 24; v += *dp++ << 16; v += *dp++ << 8; *p++ = v + *dp++;
1609 v = *dp++ << 24; v += *dp++ << 16; v += *dp++ << 8; *p++ = v + *dp++;
1610 v = *dp++ << 24; v += *dp++ << 16; v += *dp++ << 8; *p++ = v + *dp++;
1611 return dp;
1612}
1613/* </generated> */
1614
1615static uint8_t *
1616pack_(uint32_t *p, uint32_t i, int w, uint8_t *rp)
1617{
1618 while (i >= 8) {
1619 switch (w) {
1620 case 0 : break;
1621 case 1 : rp = pack_1(p, rp); break;
1622 case 2 : rp = pack_2(p, rp); break;
1623 case 3 : rp = pack_3(p, rp); break;
1624 case 4 : rp = pack_4(p, rp); break;
1625 case 5 : rp = pack_5(p, rp); break;
1626 case 6 : rp = pack_6(p, rp); break;
1627 case 7 : rp = pack_7(p, rp); break;
1628 case 8 : rp = pack_8(p, rp); break;
1629 case 9 : rp = pack_9(p, rp); break;
1630 case 10 : rp = pack_10(p, rp); break;
1631 case 11 : rp = pack_11(p, rp); break;
1632 case 12 : rp = pack_12(p, rp); break;
1633 case 13 : rp = pack_13(p, rp); break;
1634 case 14 : rp = pack_14(p, rp); break;
1635 case 15 : rp = pack_15(p, rp); break;
1636 case 16 : rp = pack_16(p, rp); break;
1637 case 17 : rp = pack_17(p, rp); break;
1638 case 18 : rp = pack_18(p, rp); break;
1639 case 19 : rp = pack_19(p, rp); break;
1640 case 20 : rp = pack_20(p, rp); break;
1641 case 21 : rp = pack_21(p, rp); break;
1642 case 22 : rp = pack_22(p, rp); break;
1643 case 23 : rp = pack_23(p, rp); break;
1644 case 24 : rp = pack_24(p, rp); break;
1645 case 25 : rp = pack_25(p, rp); break;
1646 case 26 : rp = pack_26(p, rp); break;
1647 case 27 : rp = pack_27(p, rp); break;
1648 case 28 : rp = pack_28(p, rp); break;
1649 case 29 : rp = pack_29(p, rp); break;
1650 case 30 : rp = pack_30(p, rp); break;
1651 case 31 : rp = pack_31(p, rp); break;
1652 case 32 : rp = pack_32(p, rp); break;
1653 }
1654 p += 8;
1655 i -= 8;
1656 }
1657 {
1658 int b;
1659 uint8_t v;
1660 uint32_t *pe = p + i;
1661 for (b = 8 - w, v = 0; p < pe;) {
1662 if (b > 0) {
1663 v += *p++ << b;
1664 b -= w;
1665 } else if (b < 0) {
1666 *rp++ = v + (*p >> -b);
1667 b += 8;
1668 v = 0;
1669 } else {
1670 *rp++ = v + *p++;
1671 b = 8 - w;
1672 v = 0;
1673 }
1674 }
1675 if (b + w != 8) { *rp++ = v; }
1676 return rp;
1677 }
1678}
1679
1680static uint8_t *
1681pack(uint32_t *p, uint32_t i, uint8_t *freq, uint8_t *rp)
1682{
1683 int32_t k, w;
1684 uint8_t ebuf[UNIT_SIZE], *ep = ebuf;
1685 uint32_t s, *pe = p + i, r, th = i - (i >> 3);
1686 for (w = 0, s = 0; w <= 32; w++) {
1687 if ((s += freq[w]) >= th) { break; }
1688 }
1689 if (i == s) {
1690 *rp++ = w;
1691 return pack_(p, i, w, rp);
1692 }
1693 r = 1 << w;
1694 *rp++ = w + 0x80;
1695 *rp++ = i - s;
1696 if (r >= UNIT_SIZE) {
1697 uint32_t first, *last = &first;
1698 for (k = 0; p < pe; p++, k++) {
1699 if (*p >= r) {
1700 GRN_B_ENC(*p - r, ep);
1701 *last = k;
1702 last = p;
1703 }
1704 }
1705 *last = 0;
1706 *rp++ = (uint8_t) first;
1707 } else {
1708 for (k = 0; p < pe; p++, k++) {
1709 if (*p >= r) {
1710 *ep++ = k;
1711 GRN_B_ENC(*p - r, ep);
1712 *p = 0;
1713 }
1714 }
1715 }
1716 rp = pack_(p - i, i, w, rp);
1717 grn_memcpy(rp, ebuf, ep - ebuf);
1718 return rp + (ep - ebuf);
1719}
1720
1721int
1722grn_p_enc(grn_ctx *ctx, uint32_t *data, uint32_t data_size, uint8_t **res)
1723{
1724 uint8_t *rp, freq[33];
1725 uint32_t j, *dp, *dpe, d, w, buf[UNIT_SIZE];
1726 *res = rp = GRN_MALLOC(data_size * sizeof(uint32_t) * 2);
1727 GRN_B_ENC(data_size, rp);
1728 memset(freq, 0, 33);
1729 for (j = 0, dp = data, dpe = dp + data_size; dp < dpe; j++, dp++) {
1730 if (j == UNIT_SIZE) {
1731 rp = pack(buf, j, freq, rp);
1732 memset(freq, 0, 33);
1733 j = 0;
1734 }
1735 if ((d = buf[j] = *dp)) {
1736 GRN_BIT_SCAN_REV(d, w);
1737 freq[w + 1]++;
1738 } else {
1739 freq[0]++;
1740 }
1741 }
1742 if (j) { rp = pack(buf, j, freq, rp); }
1743 return rp - *res;
1744}
1745
1746#define USE_P_ENC (1<<0) /* Use PForDelta */
1747#define CUT_OFF (1<<1) /* Deprecated */
1748#define ODD (1<<2) /* Variable size data */
1749
1750typedef struct {
1751 uint32_t *data;
1752 uint32_t data_size;
1753 uint32_t flags;
1754} datavec;
1755
1756static grn_rc
1757datavec_reset(grn_ctx *ctx, datavec *dv, uint32_t dvlen,
1758 size_t unitsize, size_t totalsize)
1759{
1760 int i;
1761 if (!dv[0].data || dv[dvlen].data < dv[0].data + totalsize) {
1762 if (dv[0].data) { GRN_FREE(dv[0].data); }
1763 if (!(dv[0].data = GRN_MALLOC(totalsize * sizeof(uint32_t)))) {
1764 MERR("[ii][data-vector][reset] failed to allocate data: "
1765 "length:<%u>, "
1766 "unit-size:<%" GRN_FMT_SIZE ">, "
1767 "total-size:<%" GRN_FMT_SIZE ">",
1768 dvlen,
1769 unitsize,
1770 totalsize);
1771 return ctx->rc;
1772 }
1773 dv[dvlen].data = dv[0].data + totalsize;
1774 }
1775 for (i = 1; i < dvlen; i++) {
1776 dv[i].data = dv[i - 1].data + unitsize;
1777 }
1778 return GRN_SUCCESS;
1779}
1780
1781static grn_rc
1782datavec_init(grn_ctx *ctx, datavec *dv, uint32_t dvlen,
1783 size_t unitsize, size_t totalsize)
1784{
1785 int i;
1786 if (!totalsize) {
1787 memset(dv, 0, sizeof(datavec) * (dvlen + 1));
1788 return GRN_SUCCESS;
1789 }
1790 if (!(dv[0].data = GRN_MALLOC(totalsize * sizeof(uint32_t)))) {
1791 MERR("[ii][data-vector][init] failed to allocate data: "
1792 "length:<%u>, "
1793 "unit-size:<%" GRN_FMT_SIZE ">, "
1794 "total-size:<%" GRN_FMT_SIZE ">",
1795 dvlen,
1796 unitsize,
1797 totalsize);
1798 return ctx->rc;
1799 }
1800 dv[dvlen].data = dv[0].data + totalsize;
1801 for (i = 1; i < dvlen; i++) {
1802 dv[i].data = dv[i - 1].data + unitsize;
1803 }
1804 return GRN_SUCCESS;
1805}
1806
1807static void
1808datavec_fin(grn_ctx *ctx, datavec *dv)
1809{
1810 if (dv[0].data) { GRN_FREE(dv[0].data); }
1811}
1812
1813size_t
1814grn_p_encv(grn_ctx *ctx, datavec *dv, uint32_t dvlen, uint8_t *res)
1815{
1816 uint8_t *rp = res, freq[33];
1817 uint32_t pgap, usep, l, df, data_size, *dp, *dpe;
1818 if (!dvlen || !(df = dv[0].data_size)) { return 0; }
1819 for (usep = 0, data_size = 0, l = 0; l < dvlen; l++) {
1820 uint32_t dl = dv[l].data_size;
1821 if (dl < df || ((dl > df) && (l != dvlen - 1))) {
1822 /* invalid argument */
1823 return 0;
1824 }
1825 usep += (dv[l].flags & USE_P_ENC) << l;
1826 data_size += dl;
1827 }
1828 pgap = data_size - df * dvlen;
1829 if (!usep) {
1830 GRN_B_ENC((df << 1) + 1, rp);
1831 for (l = 0; l < dvlen; l++) {
1832 for (dp = dv[l].data, dpe = dp + dv[l].data_size; dp < dpe; dp++) {
1833 GRN_B_ENC(*dp, rp);
1834 }
1835 }
1836 } else {
1837 uint32_t buf[UNIT_SIZE];
1838 GRN_B_ENC((usep << 1), rp);
1839 GRN_B_ENC(df, rp);
1840 if (dv[dvlen - 1].flags & ODD) {
1841 GRN_B_ENC(pgap, rp);
1842 } else {
1843 GRN_ASSERT(!pgap);
1844 }
1845 for (l = 0; l < dvlen; l++) {
1846 dp = dv[l].data;
1847 dpe = dp + dv[l].data_size;
1848 if ((dv[l].flags & USE_P_ENC)) {
1849 uint32_t j = 0, d;
1850 memset(freq, 0, 33);
1851 while (dp < dpe) {
1852 if (j == UNIT_SIZE) {
1853 rp = pack(buf, j, freq, rp);
1854 memset(freq, 0, 33);
1855 j = 0;
1856 }
1857 if ((d = buf[j++] = *dp++)) {
1858 uint32_t w;
1859 GRN_BIT_SCAN_REV(d, w);
1860 freq[w + 1]++;
1861 } else {
1862 freq[0]++;
1863 }
1864 }
1865 if (j) { rp = pack(buf, j, freq, rp); }
1866 } else {
1867 while (dp < dpe) { GRN_B_ENC(*dp++, rp); }
1868 }
1869 }
1870 }
1871 return rp - res;
1872}
1873
1874#define GRN_B_DEC_CHECK(v,p,pe) do { \
1875 uint8_t *_p = (uint8_t *)p; \
1876 uint32_t _v; \
1877 if (_p >= pe) { return 0; } \
1878 _v = *_p++; \
1879 switch (_v >> 4) { \
1880 case 0x08 : \
1881 if (_v == 0x8f) { \
1882 if (_p + sizeof(uint32_t) > pe) { return 0; } \
1883 grn_memcpy(&_v, _p, sizeof(uint32_t)); \
1884 _p += sizeof(uint32_t); \
1885 } \
1886 break; \
1887 case 0x09 : \
1888 if (_p + 3 > pe) { return 0; } \
1889 _v = (_v - 0x90) * 0x100 + *_p++; \
1890 _v = _v * 0x100 + *_p++; \
1891 _v = _v * 0x100 + *_p++ + 0x20408f; \
1892 break; \
1893 case 0x0a : \
1894 case 0x0b : \
1895 if (_p + 2 > pe) { return 0; } \
1896 _v = (_v - 0xa0) * 0x100 + *_p++; \
1897 _v = _v * 0x100 + *_p++ + 0x408f; \
1898 break; \
1899 case 0x0c : \
1900 case 0x0d : \
1901 case 0x0e : \
1902 case 0x0f : \
1903 if (_p + 1 > pe) { return 0; } \
1904 _v = (_v - 0xc0) * 0x100 + *_p++ + 0x8f; \
1905 break; \
1906 } \
1907 v = _v; \
1908 p = _p; \
1909} while (0)
1910
1911static uint8_t *
1912unpack(uint8_t *dp, uint8_t *dpe, int i, uint32_t *rp)
1913{
1914 uint8_t ne = 0, k = 0, w = *dp++;
1915 uint32_t m, *p = rp;
1916 if (w & 0x80) {
1917 ne = *dp++;
1918 w -= 0x80;
1919 m = (1 << w) - 1;
1920 if (m >= UNIT_MASK) { k = *dp++; }
1921 } else {
1922 m = (1 << w) - 1;
1923 }
1924 if (w) {
1925 while (i >= 8) {
1926 if (dp + w > dpe) { return NULL; }
1927 switch (w) {
1928 case 1 : dp = unpack_1(p, dp); break;
1929 case 2 : dp = unpack_2(p, dp); break;
1930 case 3 : dp = unpack_3(p, dp); break;
1931 case 4 : dp = unpack_4(p, dp); break;
1932 case 5 : dp = unpack_5(p, dp); break;
1933 case 6 : dp = unpack_6(p, dp); break;
1934 case 7 : dp = unpack_7(p, dp); break;
1935 case 8 : dp = unpack_8(p, dp); break;
1936 case 9 : dp = unpack_9(p, dp); break;
1937 case 10 : dp = unpack_10(p, dp); break;
1938 case 11 : dp = unpack_11(p, dp); break;
1939 case 12 : dp = unpack_12(p, dp); break;
1940 case 13 : dp = unpack_13(p, dp); break;
1941 case 14 : dp = unpack_14(p, dp); break;
1942 case 15 : dp = unpack_15(p, dp); break;
1943 case 16 : dp = unpack_16(p, dp); break;
1944 case 17 : dp = unpack_17(p, dp); break;
1945 case 18 : dp = unpack_18(p, dp); break;
1946 case 19 : dp = unpack_19(p, dp); break;
1947 case 20 : dp = unpack_20(p, dp); break;
1948 case 21 : dp = unpack_21(p, dp); break;
1949 case 22 : dp = unpack_22(p, dp); break;
1950 case 23 : dp = unpack_23(p, dp); break;
1951 case 24 : dp = unpack_24(p, dp); break;
1952 case 25 : dp = unpack_25(p, dp); break;
1953 case 26 : dp = unpack_26(p, dp); break;
1954 case 27 : dp = unpack_27(p, dp); break;
1955 case 28 : dp = unpack_28(p, dp); break;
1956 case 29 : dp = unpack_29(p, dp); break;
1957 case 30 : dp = unpack_30(p, dp); break;
1958 case 31 : dp = unpack_31(p, dp); break;
1959 case 32 : dp = unpack_32(p, dp); break;
1960 }
1961 i -= 8;
1962 p += 8;
1963 }
1964 {
1965 int b;
1966 uint32_t v, *pe;
1967 for (b = 8 - w, v = 0, pe = p + i; p < pe && dp < dpe;) {
1968 if (b > 0) {
1969 *p++ = v + ((*dp >> b) & m);
1970 b -= w;
1971 v = 0;
1972 } else if (b < 0) {
1973 v += (*dp++ << -b) & m;
1974 b += 8;
1975 } else {
1976 *p++ = v + (*dp++ & m);
1977 b = 8 - w;
1978 v = 0;
1979 }
1980 }
1981 if (b + w != 8) { dp++; }
1982 }
1983 } else {
1984 memset(p, 0, sizeof(uint32_t) * i);
1985 }
1986 if (ne) {
1987 if (m >= UNIT_MASK) {
1988 uint32_t *pp;
1989 while (ne--) {
1990 pp = &rp[k];
1991 k = *pp;
1992 GRN_B_DEC_CHECK(*pp, dp, dpe);
1993 *pp += (m + 1);
1994 }
1995 } else {
1996 while (ne--) {
1997 k = *dp++;
1998 GRN_B_DEC_CHECK(rp[k], dp, dpe);
1999 rp[k] += (m + 1);
2000 }
2001 }
2002 }
2003 return dp;
2004}
2005
2006int
2007grn_p_dec(grn_ctx *ctx, uint8_t *data, uint32_t data_size, uint32_t nreq, uint32_t **res)
2008{
2009 uint8_t *dp = data, *dpe = data + data_size;
2010 uint32_t rest, orig_size, *rp, *rpe;
2011 GRN_B_DEC(orig_size, dp);
2012 if (!orig_size) {
2013 if (!nreq || nreq > data_size) { nreq = data_size; }
2014 if ((*res = rp = GRN_MALLOC(nreq * 4))) {
2015 for (rpe = rp + nreq; dp < data + data_size && rp < rpe; rp++) {
2016 GRN_B_DEC(*rp, dp);
2017 }
2018 }
2019 return rp - *res;
2020 } else {
2021 if (!(*res = rp = GRN_MALLOC(orig_size * sizeof(uint32_t)))) {
2022 return 0;
2023 }
2024 if (!nreq || nreq > orig_size) { nreq = orig_size; }
2025 for (rest = nreq; rest >= UNIT_SIZE; rest -= UNIT_SIZE) {
2026 if (!(dp = unpack(dp, dpe, UNIT_SIZE, rp))) { return 0; }
2027 rp += UNIT_SIZE;
2028 }
2029 if (rest) { if (!(dp = unpack(dp, dpe, rest, rp))) { return 0; } }
2030 GRN_ASSERT(data + data_size == dp);
2031 return nreq;
2032 }
2033}
2034
2035int
2036grn_p_decv(grn_ctx *ctx, uint8_t *data, uint32_t data_size, datavec *dv, uint32_t dvlen)
2037{
2038 size_t size;
2039 uint32_t df, l, i, *rp, nreq;
2040 uint8_t *dp = data, *dpe = data + data_size;
2041 if (!data_size) {
2042 dv[0].data_size = 0;
2043 return 0;
2044 }
2045 for (nreq = 0; nreq < dvlen; nreq++) {
2046 if (dv[nreq].flags & CUT_OFF) { break; }
2047 }
2048 if (!nreq) { return 0; }
2049 GRN_B_DEC_CHECK(df, dp, dpe);
2050 if ((df & 1)) {
2051 df >>= 1;
2052 size = nreq == dvlen ? data_size : df * nreq;
2053 if (dv[dvlen].data < dv[0].data + size) {
2054 if (dv[0].data) { GRN_FREE(dv[0].data); }
2055 if (!(rp = GRN_MALLOC(size * sizeof(uint32_t)))) { return 0; }
2056 dv[dvlen].data = rp + size;
2057 } else {
2058 rp = dv[0].data;
2059 }
2060 for (l = 0; l < dvlen; l++) {
2061 if (dv[l].flags & CUT_OFF) { break; }
2062 dv[l].data = rp;
2063 if (l < dvlen - 1) {
2064 for (i = 0; i < df; i++, rp++) { GRN_B_DEC_CHECK(*rp, dp, dpe); }
2065 } else {
2066 for (i = 0; dp < dpe; i++, rp++) { GRN_B_DEC_CHECK(*rp, dp, dpe); }
2067 }
2068 dv[l].data_size = i;
2069 }
2070 } else {
2071 uint32_t n, rest, usep = df >> 1;
2072 GRN_B_DEC_CHECK(df, dp, dpe);
2073 if (dv[dvlen -1].flags & ODD) {
2074 GRN_B_DEC_CHECK(rest, dp, dpe);
2075 } else {
2076 rest = 0;
2077 }
2078 size = df * nreq + (nreq == dvlen ? rest : 0);
2079 if (dv[dvlen].data < dv[0].data + size) {
2080 if (dv[0].data) { GRN_FREE(dv[0].data); }
2081 if (!(rp = GRN_MALLOC(size * sizeof(uint32_t)))) { return 0; }
2082 dv[dvlen].data = rp + size;
2083 } else {
2084 rp = dv[0].data;
2085 }
2086 for (l = 0; l < dvlen; l++) {
2087 if (dv[l].flags & CUT_OFF) { break; }
2088 dv[l].data = rp;
2089 dv[l].data_size = n = (l < dvlen - 1) ? df : df + rest;
2090 if (usep & (1 << l)) {
2091 for (; n >= UNIT_SIZE; n -= UNIT_SIZE) {
2092 if (!(dp = unpack(dp, dpe, UNIT_SIZE, rp))) { return 0; }
2093 rp += UNIT_SIZE;
2094 }
2095 if (n) {
2096 if (!(dp = unpack(dp, dpe, n, rp))) { return 0; }
2097 rp += n;
2098 }
2099 dv[l].flags |= USE_P_ENC;
2100 } else {
2101 for (; n; n--, rp++) {
2102 GRN_B_DEC_CHECK(*rp, dp, dpe);
2103 }
2104 }
2105 }
2106 GRN_ASSERT(dp == dpe);
2107 if (dp != dpe) {
2108 GRN_LOG(ctx, GRN_LOG_DEBUG, "data_size=%d, %" GRN_FMT_LLD,
2109 data_size, (long long int)(dpe - dp));
2110 }
2111 }
2112 return rp - dv[0].data;
2113}
2114
2115int
2116grn_b_enc(grn_ctx *ctx, uint32_t *data, uint32_t data_size, uint8_t **res)
2117{
2118 uint8_t *rp;
2119 uint32_t *dp, i;
2120 *res = rp = GRN_MALLOC(data_size * sizeof(uint32_t) * 2);
2121 GRN_B_ENC(data_size, rp);
2122 for (i = data_size, dp = data; i; i--, dp++) {
2123 GRN_B_ENC(*dp, rp);
2124 }
2125 return rp - *res;
2126}
2127
2128int
2129grn_b_dec(grn_ctx *ctx, uint8_t *data, uint32_t data_size, uint32_t **res)
2130{
2131 uint32_t i, *rp, orig_size;
2132 uint8_t *dp = data;
2133 GRN_B_DEC(orig_size, dp);
2134 *res = rp = GRN_MALLOC(orig_size * sizeof(uint32_t));
2135 for (i = orig_size; i; i--, rp++) {
2136 GRN_B_DEC(*rp, dp);
2137 }
2138 return orig_size;
2139}
2140
2141/* buffer */
2142
2143typedef struct {
2144 uint32_t tid;
2145 uint32_t size_in_chunk;
2146 uint32_t pos_in_chunk;
2147 uint16_t size_in_buffer;
2148 uint16_t pos_in_buffer;
2149} buffer_term;
2150
2151typedef struct {
2152 uint16_t step;
2153 uint16_t jump;
2154} buffer_rec;
2155
2156typedef struct {
2157 uint32_t chunk;
2158 uint32_t chunk_size;
2159 uint32_t buffer_free;
2160 uint16_t nterms;
2161 uint16_t nterms_void;
2162} buffer_header;
2163
2164struct grn_ii_buffer {
2165 buffer_header header;
2166 buffer_term terms[(S_SEGMENT - sizeof(buffer_header))/sizeof(buffer_term)];
2167};
2168
2169typedef struct grn_ii_buffer buffer;
2170
2171inline static uint32_t
2172buffer_open(grn_ctx *ctx, grn_ii *ii, uint32_t pos, buffer_term **bt, buffer **b)
2173{
2174 byte *p = NULL;
2175 uint16_t lseg = (uint16_t) (LSEG(pos));
2176 uint32_t pseg = ii->header->binfo[lseg];
2177 if (pseg != GRN_II_PSEG_NOT_ASSIGNED) {
2178 GRN_IO_SEG_REF(ii->seg, pseg, p);
2179 if (!p) { return GRN_II_PSEG_NOT_ASSIGNED; }
2180 if (b) { *b = (buffer *)p; }
2181 if (bt) { *bt = (buffer_term *)(p + LPOS(pos)); }
2182 }
2183 return pseg;
2184}
2185
2186inline static grn_rc
2187buffer_close(grn_ctx *ctx, grn_ii *ii, uint32_t pseg)
2188{
2189 if (pseg >= ii->seg->header->max_segment) {
2190 GRN_LOG(ctx, GRN_LOG_NOTICE, "invalid pseg buffer_close(%d)", pseg);
2191 return GRN_INVALID_ARGUMENT;
2192 }
2193 GRN_IO_SEG_UNREF(ii->seg, pseg);
2194 return GRN_SUCCESS;
2195}
2196
2197inline static uint32_t
2198buffer_open_if_capable(grn_ctx *ctx, grn_ii *ii, int32_t seg, int size, buffer **b)
2199{
2200 uint32_t pseg, pos = SEG2POS(seg, 0);
2201 if ((pseg = buffer_open(ctx, ii, pos, NULL, b)) != GRN_II_PSEG_NOT_ASSIGNED) {
2202 uint16_t nterms = (*b)->header.nterms - (*b)->header.nterms_void;
2203 if (!((nterms < 4096 ||
2204 (ii->header->total_chunk_size >> ((nterms >> 8) - 6))
2205 > (*b)->header.chunk_size) &&
2206 ((*b)->header.buffer_free >= size + sizeof(buffer_term)))) {
2207 buffer_close(ctx, ii, pseg);
2208 return GRN_II_PSEG_NOT_ASSIGNED;
2209 }
2210 }
2211 return pseg;
2212}
2213
2214typedef struct {
2215 uint32_t rid;
2216 uint32_t sid;
2217} docid;
2218
2219#define BUFFER_REC_DEL(r) ((r)->jump = 1)
2220#define BUFFER_REC_DELETED(r) ((r)->jump == 1)
2221
2222#define BUFFER_REC_AT(b,pos) ((buffer_rec *)(b) + (pos))
2223#define BUFFER_REC_POS(b,rec) ((uint16_t)((rec) - (buffer_rec *)(b)))
2224
2225inline static void
2226buffer_term_dump(grn_ctx *ctx, grn_ii *ii, buffer *b, buffer_term *bt)
2227{
2228 int pos, rid, sid;
2229 uint8_t *p;
2230 buffer_rec *r;
2231
2232 if (!grn_logger_pass(ctx, GRN_LOG_DEBUG)) {
2233 return;
2234 }
2235
2236 GRN_LOG(ctx, GRN_LOG_DEBUG,
2237 "b=(%x %u %u %u)", b->header.chunk, b->header.chunk_size,
2238 b->header.buffer_free, b->header.nterms);
2239 GRN_LOG(ctx, GRN_LOG_DEBUG,
2240 "bt=(%u %u %u %u %u)", bt->tid, bt->size_in_chunk, bt->pos_in_chunk,
2241 bt->size_in_buffer, bt->pos_in_buffer);
2242 for (pos = bt->pos_in_buffer; pos; pos = r->step) {
2243 r = BUFFER_REC_AT(b, pos);
2244 p = GRN_NEXT_ADDR(r);
2245 GRN_B_DEC(rid, p);
2246 if ((ii->header->flags & GRN_OBJ_WITH_SECTION)) {
2247 GRN_B_DEC(sid, p);
2248 } else {
2249 sid = 1;
2250 }
2251 GRN_LOG(ctx, GRN_LOG_DEBUG,
2252 "%d=(%d:%d),(%d:%d)", pos, r->jump, r->step, rid, sid);
2253 }
2254}
2255
2256inline static grn_rc
2257check_jump(grn_ctx *ctx, grn_ii *ii, buffer *b, buffer_rec *r, int j)
2258{
2259 uint16_t i = BUFFER_REC_POS(b, r);
2260 uint8_t *p;
2261 buffer_rec *r2;
2262 docid id, id2;
2263 if (!j) { return GRN_SUCCESS; }
2264 p = GRN_NEXT_ADDR(r);
2265 GRN_B_DEC(id.rid, p);
2266 if ((ii->header->flags & GRN_OBJ_WITH_SECTION)) {
2267 GRN_B_DEC(id.sid, p);
2268 } else {
2269 id.sid = 1;
2270 }
2271 if (j == 1) {
2272 GRN_LOG(ctx, GRN_LOG_DEBUG, "deleting! %d(%d:%d)", i, id.rid, id.sid);
2273 return GRN_SUCCESS;
2274 }
2275 r2 = BUFFER_REC_AT(b, j);
2276 p = GRN_NEXT_ADDR(r2);
2277 GRN_B_DEC(id2.rid, p);
2278 if ((ii->header->flags & GRN_OBJ_WITH_SECTION)) {
2279 GRN_B_DEC(id2.sid, p);
2280 } else {
2281 id2.sid = 1;
2282 }
2283 if (r2->step == i) {
2284 GRN_LOG(ctx, GRN_LOG_EMERG, "cycle! %d(%d:%d)<->%d(%d:%d)",
2285 i, id.rid, id.sid, j, id2.rid, id2.sid);
2286 return GRN_FILE_CORRUPT;
2287 }
2288 if (id2.rid < id.rid || (id2.rid == id.rid && id2.sid <= id.sid)) {
2289 GRN_LOG(ctx, GRN_LOG_CRIT,
2290 "invalid jump! %d(%d:%d)(%d:%d)->%d(%d:%d)(%d:%d)",
2291 i, r->jump, r->step, id.rid, id.sid, j, r2->jump, r2->step,
2292 id2.rid, id2.sid);
2293 return GRN_FILE_CORRUPT;
2294 }
2295 return GRN_SUCCESS;
2296}
2297
2298inline static grn_rc
2299set_jump_r(grn_ctx *ctx, grn_ii *ii, buffer *b, buffer_rec *from, int to)
2300{
2301 int i, j, max_jump = 100;
2302 buffer_rec *r, *r2;
2303 for (r = from, j = to; j > 1 && max_jump--; r = BUFFER_REC_AT(b, r->step)) {
2304 r2 = BUFFER_REC_AT(b, j);
2305 if (r == r2) { break; }
2306 if (BUFFER_REC_DELETED(r2)) { break; }
2307 if (j == (i = r->jump)) { break; }
2308 if (j == r->step) { break; }
2309 if (check_jump(ctx, ii, b, r, j)) {
2310 ERR(GRN_FILE_CORRUPT, "check_jump failed");
2311 return ctx->rc;
2312 }
2313 r->jump = j;
2314 j = i;
2315 if (!r->step) { return GRN_FILE_CORRUPT; }
2316 }
2317 return GRN_SUCCESS;
2318}
2319
2320#define GET_NUM_BITS(x,n) do {\
2321 n = x;\
2322 n = (n & 0x55555555) + ((n >> 1) & 0x55555555);\
2323 n = (n & 0x33333333) + ((n >> 2) & 0x33333333);\
2324 n = (n & 0x0F0F0F0F) + ((n >> 4) & 0x0F0F0F0F);\
2325 n = (n & 0x00FF00FF) + ((n >> 8) & 0x00FF00FF);\
2326 n = (n & 0x0000FFFF) + ((n >>16) & 0x0000FFFF);\
2327} while (0)
2328
2329inline static grn_rc
2330buffer_put(grn_ctx *ctx, grn_ii *ii, buffer *b, buffer_term *bt,
2331 buffer_rec *rnew, uint8_t *bs, grn_ii_updspec *u, int size)
2332{
2333 uint8_t *p;
2334 docid id_curr = {0, 0}, id_start = {0, 0}, id_post = {0, 0};
2335 buffer_rec *r_curr, *r_start = NULL;
2336 uint16_t last = 0, *lastp = &bt->pos_in_buffer, pos = BUFFER_REC_POS(b, rnew);
2337 int vdelta = 0, delta, delta0 = 0, vhops = 0, nhops = 0, reset = 1;
2338 grn_memcpy(GRN_NEXT_ADDR(rnew), bs, size - sizeof(buffer_rec));
2339 for (;;) {
2340 if (!*lastp) {
2341 rnew->step = 0;
2342 rnew->jump = 0;
2343 // smb_wmb();
2344 *lastp = pos;
2345 if (bt->size_in_buffer++ > 1) {
2346 buffer_rec *rhead = BUFFER_REC_AT(b, bt->pos_in_buffer);
2347 rhead->jump = pos;
2348 if (!(bt->size_in_buffer & 1)) {
2349 int n;
2350 buffer_rec *r = BUFFER_REC_AT(b, rhead->step), *r2;
2351 GET_NUM_BITS(bt->size_in_buffer, n);
2352 while (n-- && (r->jump > 1)) {
2353 r2 = BUFFER_REC_AT(b, r->jump);
2354 if (BUFFER_REC_DELETED(r2)) { break; }
2355 r = r2;
2356 }
2357 if (r != rnew) { set_jump_r(ctx, ii, b, r, last); }
2358 }
2359 }
2360 break;
2361 }
2362 r_curr = BUFFER_REC_AT(b, *lastp);
2363 p = GRN_NEXT_ADDR(r_curr);
2364 GRN_B_DEC(id_curr.rid, p);
2365 if ((ii->header->flags & GRN_OBJ_WITH_SECTION)) {
2366 GRN_B_DEC(id_curr.sid, p);
2367 } else {
2368 id_curr.sid = 1;
2369 }
2370 if (id_curr.rid < id_post.rid ||
2371 (id_curr.rid == id_post.rid && id_curr.sid < id_post.sid)) {
2372 {
2373 DEFINE_NAME(ii);
2374 CRIT(GRN_FILE_CORRUPT,
2375 "[ii][buffer][put] loop is found: "
2376 "<%.*s>: "
2377 "(%d:%d)->(%d:%d)",
2378 name_size, name,
2379 id_post.rid, id_post.sid, id_curr.rid, id_curr.sid);
2380 }
2381 buffer_term_dump(ctx, ii, b, bt);
2382 bt->pos_in_buffer = 0;
2383 bt->size_in_buffer = 0;
2384 lastp = &bt->pos_in_buffer;
2385 continue;
2386 }
2387 id_post.rid = id_curr.rid;
2388 id_post.sid = id_curr.sid;
2389 if (u->rid < id_curr.rid || (u->rid == id_curr.rid && u->sid <= id_curr.sid)) {
2390 uint16_t step = *lastp, jump = r_curr->jump;
2391 if (u->rid == id_curr.rid) {
2392 if (u->sid == 0) {
2393 while (id_curr.rid == u->rid) {
2394 BUFFER_REC_DEL(r_curr);
2395 if (!(step = r_curr->step)) { break; }
2396 r_curr = BUFFER_REC_AT(b, step);
2397 p = GRN_NEXT_ADDR(r_curr);
2398 GRN_B_DEC(id_curr.rid, p);
2399 if ((ii->header->flags & GRN_OBJ_WITH_SECTION)) {
2400 GRN_B_DEC(id_curr.sid, p);
2401 } else {
2402 id_curr.sid = 1;
2403 }
2404 }
2405 } else if (u->sid == id_curr.sid) {
2406 BUFFER_REC_DEL(r_curr);
2407 step = r_curr->step;
2408 }
2409 }
2410 rnew->step = step;
2411 rnew->jump = check_jump(ctx, ii, b, rnew, jump) ? 0 : jump;
2412 // smb_wmb();
2413 *lastp = pos;
2414 break;
2415 }
2416
2417 if (reset) {
2418 r_start = r_curr;
2419 id_start.rid = id_curr.rid;
2420 id_start.sid = id_curr.sid;
2421 if (!(delta0 = u->rid - id_start.rid)) { delta0 = u->sid - id_start.sid; }
2422 nhops = 0;
2423 vhops = 1;
2424 vdelta = delta0 >> 1;
2425 } else {
2426 if (!(delta = id_curr.rid - id_start.rid)) {
2427 delta = id_curr.sid - id_start.sid;
2428 }
2429 if (vdelta < delta) {
2430 vdelta += (delta0 >> ++vhops);
2431 r_start = r_curr;
2432 }
2433 if (nhops > vhops) {
2434 set_jump_r(ctx, ii, b, r_start, *lastp);
2435 } else {
2436 nhops++;
2437 }
2438 }
2439
2440 last = *lastp;
2441 lastp = &r_curr->step;
2442 reset = 0;
2443 {
2444 uint16_t posj = r_curr->jump;
2445 if (posj > 1) {
2446 buffer_rec *rj = BUFFER_REC_AT(b, posj);
2447 if (!BUFFER_REC_DELETED(rj)) {
2448 docid idj;
2449 p = GRN_NEXT_ADDR(rj);
2450 GRN_B_DEC(idj.rid, p);
2451 if ((ii->header->flags & GRN_OBJ_WITH_SECTION)) {
2452 GRN_B_DEC(idj.sid, p);
2453 } else {
2454 idj.sid = 1;
2455 }
2456 if (idj.rid < u->rid || (idj.rid == u->rid && idj.sid < u->sid)) {
2457 last = posj;
2458 lastp = &rj->step;
2459 } else {
2460 reset = 1;
2461 }
2462 }
2463 }
2464 }
2465 }
2466 return ctx->rc;
2467}
2468
2469/* array */
2470
2471inline static uint32_t *
2472array_at(grn_ctx *ctx, grn_ii *ii, uint32_t id)
2473{
2474 byte *p = NULL;
2475 uint32_t seg, pseg;
2476 if (id > GRN_ID_MAX) { return NULL; }
2477 seg = id >> W_ARRAY;
2478 if ((pseg = ii->header->ainfo[seg]) == GRN_II_PSEG_NOT_ASSIGNED) {
2479 return NULL;
2480 }
2481 GRN_IO_SEG_REF(ii->seg, pseg, p);
2482 if (!p) { return NULL; }
2483 return (uint32_t *)(p + (id & ARRAY_MASK_IN_A_SEGMENT) * S_ARRAY_ELEMENT);
2484}
2485
2486inline static uint32_t *
2487array_get(grn_ctx *ctx, grn_ii *ii, uint32_t id)
2488{
2489 byte *p = NULL;
2490 uint16_t seg;
2491 uint32_t pseg;
2492 if (id > GRN_ID_MAX) { return NULL; }
2493 seg = id >> W_ARRAY;
2494 if ((pseg = ii->header->ainfo[seg]) == GRN_II_PSEG_NOT_ASSIGNED) {
2495 if (segment_get_clear(ctx, ii, &pseg)) { return NULL; }
2496 ii->header->ainfo[seg] = pseg;
2497 if (seg >= ii->header->amax) { ii->header->amax = seg + 1; }
2498 }
2499 GRN_IO_SEG_REF(ii->seg, pseg, p);
2500 if (!p) { return NULL; }
2501 return (uint32_t *)(p + (id & ARRAY_MASK_IN_A_SEGMENT) * S_ARRAY_ELEMENT);
2502}
2503
2504inline static void
2505array_unref(grn_ii *ii, uint32_t id)
2506{
2507 GRN_IO_SEG_UNREF(ii->seg, ii->header->ainfo[id >> W_ARRAY]);
2508}
2509
2510/* updspec */
2511
2512grn_ii_updspec *
2513grn_ii_updspec_open(grn_ctx *ctx, uint32_t rid, uint32_t sid)
2514{
2515 grn_ii_updspec *u;
2516 if (!(u = GRN_MALLOC(sizeof(grn_ii_updspec)))) { return NULL; }
2517 u->rid = rid;
2518 u->sid = sid;
2519 u->weight = 0;
2520 u->tf = 0;
2521 u->atf = 0;
2522 u->pos = NULL;
2523 u->tail = NULL;
2524 // u->vnodes = NULL;
2525 return u;
2526}
2527
2528#define GRN_II_MAX_TF 0x1ffff
2529
2530grn_rc
2531grn_ii_updspec_add(grn_ctx *ctx, grn_ii_updspec *u, int pos, int32_t weight)
2532{
2533 struct _grn_ii_pos *p;
2534 u->atf++;
2535 if (u->tf >= GRN_II_MAX_TF) { return GRN_SUCCESS; }
2536 if (!(p = GRN_MALLOC(sizeof(struct _grn_ii_pos)))) {
2537 return GRN_NO_MEMORY_AVAILABLE;
2538 }
2539 u->weight += weight;
2540 p->pos = pos;
2541 p->next = NULL;
2542 if (u->tail) {
2543 u->tail->next = p;
2544 } else {
2545 u->pos = p;
2546 }
2547 u->tail = p;
2548 u->tf++;
2549 return GRN_SUCCESS;
2550}
2551
2552int
2553grn_ii_updspec_cmp(grn_ii_updspec *a, grn_ii_updspec *b)
2554{
2555 struct _grn_ii_pos *pa, *pb;
2556 if (a->rid != b->rid) { return a->rid - b->rid; }
2557 if (a->sid != b->sid) { return a->sid - b->sid; }
2558 if (a->weight != b->weight) { return a->weight - b->weight; }
2559 if (a->tf != b->tf) { return a->tf - b->tf; }
2560 for (pa = a->pos, pb = b->pos; pa && pb; pa = pa->next, pb = pb->next) {
2561 if (pa->pos != pb->pos) { return pa->pos - pb->pos; }
2562 }
2563 if (pa) { return 1; }
2564 if (pb) { return -1; }
2565 return 0;
2566}
2567
2568grn_rc
2569grn_ii_updspec_close(grn_ctx *ctx, grn_ii_updspec *u)
2570{
2571 struct _grn_ii_pos *p = u->pos, *q;
2572 while (p) {
2573 q = p->next;
2574 GRN_FREE(p);
2575 p = q;
2576 }
2577 GRN_FREE(u);
2578 return GRN_SUCCESS;
2579}
2580
2581inline static uint8_t *
2582encode_rec(grn_ctx *ctx, grn_ii *ii, grn_ii_updspec *u, unsigned int *size, int deletep)
2583{
2584 uint8_t *br, *p;
2585 struct _grn_ii_pos *pp;
2586 uint32_t lpos, tf, weight;
2587 if (deletep) {
2588 tf = 0;
2589 weight = 0;
2590 } else {
2591 tf = u->tf;
2592 weight = u->weight;
2593 }
2594 if (!(br = GRN_MALLOC((tf + 4) * 5))) {
2595 return NULL;
2596 }
2597 p = br;
2598 GRN_B_ENC(u->rid, p);
2599 if ((ii->header->flags & GRN_OBJ_WITH_SECTION)) {
2600 GRN_B_ENC(u->sid, p);
2601 } else {
2602 u->sid = 1;
2603 }
2604 GRN_B_ENC(tf, p);
2605 if ((ii->header->flags & GRN_OBJ_WITH_WEIGHT)) { GRN_B_ENC(weight, p); }
2606 if ((ii->header->flags & GRN_OBJ_WITH_POSITION)) {
2607 for (lpos = 0, pp = u->pos; pp && tf--; lpos = pp->pos, pp = pp->next) {
2608 GRN_B_ENC(pp->pos - lpos, p);
2609 }
2610 }
2611 while (((intptr_t)p & 0x03)) { *p++ = 0; }
2612 *size = (unsigned int) ((p - br) + sizeof(buffer_rec));
2613 return br;
2614}
2615
2616typedef struct {
2617 grn_ii *ii;
2618 grn_hash *h;
2619} lexicon_deletable_arg;
2620
2621#ifdef CASCADE_DELETE_LEXICON
2622static int
2623lexicon_deletable(grn_ctx *ctx, grn_obj *lexicon, grn_id tid, void *arg)
2624{
2625 uint32_t *a;
2626 grn_hash *h = ((lexicon_deletable_arg *)arg)->h;
2627 grn_ii *ii = ((lexicon_deletable_arg *)arg)->ii;
2628 if (!h) { return 0; }
2629 if ((a = array_at(ctx, ii, tid))) {
2630 if (a[0]) {
2631 array_unref(ii, tid);
2632 return 0;
2633 }
2634 array_unref(ii, tid);
2635 }
2636 {
2637 grn_ii_updspec **u;
2638 if (!grn_hash_get(ctx, h, &tid, sizeof(grn_id), (void **) &u)) {
2639 return (ERRP(ctx, GRN_ERROR)) ? 0 : 1;
2640 }
2641 if (!(*u)->tf || !(*u)->sid) { return 1; }
2642 return 0;
2643 }
2644}
2645#endif /* CASCADE_DELETE_LEXICON */
2646
2647inline static void
2648lexicon_delete(grn_ctx *ctx, grn_ii *ii, uint32_t tid, grn_hash *h)
2649{
2650#ifdef CASCADE_DELETE_LEXICON
2651 lexicon_deletable_arg arg = {ii, h};
2652 grn_table_delete_optarg optarg = {0, lexicon_deletable, &arg};
2653 _grn_table_delete_by_id(ctx, ii->lexicon, tid, &optarg);
2654#endif /* CASCADE_DELETE_LEXICON */
2655}
2656
2657typedef struct {
2658 grn_id rid;
2659 uint32_t sid;
2660 uint32_t tf;
2661 uint32_t weight;
2662 uint32_t flags;
2663} docinfo;
2664
2665#define GETNEXTC() do {\
2666 if (sdf) {\
2667 uint32_t dgap = *srp++;\
2668 cid.rid += dgap;\
2669 if (dgap) { cid.sid = 0; }\
2670 snp += cid.tf;\
2671 cid.tf = 1 + *stp++;\
2672 if ((ii->header->flags & GRN_OBJ_WITH_WEIGHT)) { cid.weight = *sop++; }\
2673 if ((ii->header->flags & GRN_OBJ_WITH_SECTION)) {\
2674 cid.sid += 1 + *ssp++;\
2675 } else {\
2676 cid.sid = 1;\
2677 }\
2678 sdf--;\
2679 } else {\
2680 cid.rid = 0;\
2681 }\
2682} while (0)
2683
2684#define PUTNEXT_(id) do {\
2685 uint32_t dgap = id.rid - lid.rid;\
2686 uint32_t sgap = (dgap ? id.sid : id.sid - lid.sid) - 1;\
2687 *ridp++ = dgap;\
2688 if ((ii->header->flags & GRN_OBJ_WITH_SECTION)) {\
2689 *sidp++ = sgap;\
2690 }\
2691 *tfp++ = id.tf - 1;\
2692 if ((ii->header->flags & GRN_OBJ_WITH_WEIGHT)) { *weightp++ = id.weight; }\
2693 lid.rid = id.rid;\
2694 lid.sid = id.sid;\
2695} while (0)
2696
2697#define PUTNEXTC() do {\
2698 if (cid.rid) {\
2699 if (cid.tf) {\
2700 if (lid.rid > cid.rid || (lid.rid == cid.rid && lid.sid >= cid.sid)) {\
2701 DEFINE_NAME(ii);\
2702 CRIT(GRN_FILE_CORRUPT,\
2703 "[ii][broken] posting in list is larger than posting in chunk: "\
2704 "<%.*s>: (%d:%d) -> (%d:%d)",\
2705 name_size, name, lid.rid, lid.sid, cid.rid, cid.sid);\
2706 break;\
2707 }\
2708 PUTNEXT_(cid);\
2709 if ((ii->header->flags & GRN_OBJ_WITH_POSITION)) {\
2710 uint32_t i;\
2711 for (i = 0; i < cid.tf; i++) {\
2712 *posp++ = snp[i];\
2713 spos += snp[i];\
2714 }\
2715 }\
2716 } else {\
2717 DEFINE_NAME(ii);\
2718 CRIT(GRN_FILE_CORRUPT,\
2719 "[ii][broken] invalid posting in chunk: <%.*s>: (%d,%d)",\
2720 name_size, name, bt->tid, cid.rid);\
2721 break;\
2722 }\
2723 }\
2724 GETNEXTC();\
2725} while (0)
2726
2727#define GETNEXTB() do {\
2728 if (nextb) {\
2729 uint32_t lrid = bid.rid, lsid = bid.sid;\
2730 buffer_rec *br = BUFFER_REC_AT(sb, nextb);\
2731 sbp = GRN_NEXT_ADDR(br);\
2732 GRN_B_DEC(bid.rid, sbp);\
2733 if ((ii->header->flags & GRN_OBJ_WITH_SECTION)) {\
2734 GRN_B_DEC(bid.sid, sbp);\
2735 } else {\
2736 bid.sid = 1;\
2737 }\
2738 if (lrid > bid.rid || (lrid == bid.rid && lsid >= bid.sid)) {\
2739 DEFINE_NAME(ii);\
2740 CRIT(GRN_FILE_CORRUPT,\
2741 "[ii][broken] postings in block aren't sorted: "\
2742 "<%.*s>: (%d:%d) -> (%d:%d)",\
2743 name_size, name, lrid, lsid, bid.rid, bid.sid);\
2744 break;\
2745 }\
2746 nextb = br->step;\
2747 } else {\
2748 bid.rid = 0;\
2749 }\
2750} while (0)
2751
2752#define PUTNEXTB() do {\
2753 if (bid.rid && bid.sid) {\
2754 GRN_B_DEC(bid.tf, sbp);\
2755 if (bid.tf > 0) {\
2756 if (lid.rid > bid.rid || (lid.rid == bid.rid && lid.sid >= bid.sid)) {\
2757 DEFINE_NAME(ii);\
2758 CRIT(GRN_FILE_CORRUPT,\
2759 "[ii][broken] posting in list is larger than posting in buffer: "\
2760 "<%.*s>: (%d:%d) -> (%d:%d)",\
2761 name_size, name, lid.rid, lid.sid, bid.rid, bid.sid);\
2762 break;\
2763 }\
2764 if ((ii->header->flags & GRN_OBJ_WITH_WEIGHT)) {\
2765 GRN_B_DEC(bid.weight, sbp);\
2766 }\
2767 PUTNEXT_(bid);\
2768 if ((ii->header->flags & GRN_OBJ_WITH_POSITION)) {\
2769 while (bid.tf--) { GRN_B_DEC(*posp, sbp); spos += *posp++; }\
2770 }\
2771 }\
2772 }\
2773 GETNEXTB();\
2774} while (0)
2775
2776#define MERGE_BC(cond) do {\
2777 if (bid.rid) {\
2778 if (cid.rid) {\
2779 if (cid.rid < bid.rid) {\
2780 PUTNEXTC();\
2781 if (ctx->rc != GRN_SUCCESS) { break; }\
2782 } else {\
2783 if (bid.rid < cid.rid) {\
2784 PUTNEXTB();\
2785 if (ctx->rc != GRN_SUCCESS) { break; }\
2786 } else {\
2787 if (bid.sid) {\
2788 if (cid.sid < bid.sid) {\
2789 PUTNEXTC();\
2790 if (ctx->rc != GRN_SUCCESS) { break; }\
2791 } else {\
2792 if (bid.sid == cid.sid) { GETNEXTC(); }\
2793 PUTNEXTB();\
2794 if (ctx->rc != GRN_SUCCESS) { break; }\
2795 }\
2796 } else {\
2797 GETNEXTC();\
2798 }\
2799 }\
2800 }\
2801 } else {\
2802 PUTNEXTB();\
2803 if (ctx->rc != GRN_SUCCESS) { break; }\
2804 }\
2805 } else {\
2806 if (cid.rid) {\
2807 PUTNEXTC();\
2808 if (ctx->rc != GRN_SUCCESS) { break; }\
2809 } else {\
2810 break;\
2811 }\
2812 }\
2813} while (cond)
2814
2815typedef struct {
2816 uint32_t segno;
2817 uint32_t size;
2818 uint32_t dgap;
2819} chunk_info;
2820
2821static grn_rc
2822chunk_flush(grn_ctx *ctx, grn_ii *ii, chunk_info *cinfo, uint8_t *enc, uint32_t encsize)
2823{
2824 uint8_t *dc;
2825 uint32_t dcn;
2826 grn_io_win dw;
2827 if (encsize) {
2828 chunk_new(ctx, ii, &dcn, encsize);
2829 if (ctx->rc == GRN_SUCCESS) {
2830 if ((dc = WIN_MAP(ii->chunk, ctx, &dw, dcn, 0, encsize, grn_io_wronly))) {
2831 grn_memcpy(dc, enc, encsize);
2832 grn_io_win_unmap(&dw);
2833 cinfo->segno = dcn;
2834 cinfo->size = encsize;
2835 } else {
2836 chunk_free(ctx, ii, dcn, 0, encsize);
2837 {
2838 DEFINE_NAME(ii);
2839 MERR("[ii][chunk][flush] failed to allocate a destination chunk: "
2840 "<%.*s> :"
2841 "segment:<%u>, size:<%u>",
2842 name_size, name,
2843 dcn, encsize);
2844 }
2845 }
2846 }
2847 } else {
2848 cinfo->segno = 0;
2849 cinfo->size = 0;
2850 }
2851 return ctx->rc;
2852}
2853
2854static grn_rc
2855chunk_merge(grn_ctx *ctx, grn_ii *ii, buffer *sb, buffer_term *bt,
2856 chunk_info *cinfo, grn_id rid, datavec *dv,
2857 uint16_t *nextbp, uint8_t **sbpp, docinfo *bidp, int32_t *balance)
2858{
2859 grn_io_win sw;
2860 uint64_t spos = 0;
2861 uint32_t segno = cinfo->segno, size = cinfo->size, sdf = 0, ndf = 0;
2862 uint32_t *ridp = NULL, *sidp = NULL, *tfp, *weightp = NULL, *posp = NULL;
2863 docinfo cid = {0, 0, 0, 0, 0}, lid = {0, 0, 0, 0, 0}, bid = *bidp;
2864 uint8_t *scp = WIN_MAP(ii->chunk, ctx, &sw, segno, 0, size, grn_io_rdonly);
2865
2866 if (scp) {
2867 uint16_t nextb = *nextbp;
2868 uint32_t snn = 0, *srp, *ssp = NULL, *stp, *sop = NULL, *snp;
2869 uint8_t *sbp = *sbpp;
2870 datavec rdv[MAX_N_ELEMENTS + 1];
2871 size_t bufsize = S_SEGMENT * ii->n_elements;
2872 datavec_init(ctx, rdv, ii->n_elements, 0, 0);
2873 if ((ii->header->flags & GRN_OBJ_WITH_POSITION)) {
2874 rdv[ii->n_elements - 1].flags = ODD;
2875 }
2876 bufsize += grn_p_decv(ctx, scp, cinfo->size, rdv, ii->n_elements);
2877 // (df in chunk list) = a[1] - sdf;
2878 {
2879 int j = 0;
2880 sdf = rdv[j].data_size;
2881 srp = rdv[j++].data;
2882 if ((ii->header->flags & GRN_OBJ_WITH_SECTION)) { ssp = rdv[j++].data; }
2883 stp = rdv[j++].data;
2884 if ((ii->header->flags & GRN_OBJ_WITH_WEIGHT)) { sop = rdv[j++].data; }
2885 snn = rdv[j].data_size;
2886 snp = rdv[j].data;
2887 }
2888 datavec_reset(ctx, dv, ii->n_elements, sdf + S_SEGMENT, bufsize);
2889 if (ctx->rc == GRN_SUCCESS) {
2890 {
2891 int j = 0;
2892 ridp = dv[j++].data;
2893 if ((ii->header->flags & GRN_OBJ_WITH_SECTION)) { sidp = dv[j++].data; }
2894 tfp = dv[j++].data;
2895 if ((ii->header->flags & GRN_OBJ_WITH_WEIGHT)) { weightp = dv[j++].data; }
2896 posp = dv[j].data;
2897 }
2898 GETNEXTC();
2899 MERGE_BC(bid.rid <= rid || cid.rid);
2900 if (ctx->rc == GRN_SUCCESS) {
2901 *sbpp = sbp;
2902 *nextbp = nextb;
2903 *bidp = bid;
2904 GRN_ASSERT(posp < dv[ii->n_elements].data);
2905 ndf = ridp - dv[0].data;
2906 }
2907 }
2908 datavec_fin(ctx, rdv);
2909 grn_io_win_unmap(&sw);
2910 } else {
2911 DEFINE_NAME(ii);
2912 MERR("[ii][chunk][merge] failed to allocate a source chunk: "
2913 "<%.*s> :"
2914 "record:<%u>, segment:<%u>, size:<%u>",
2915 name_size, name,
2916 rid,
2917 segno,
2918 size);
2919 }
2920 if (ctx->rc == GRN_SUCCESS) {
2921 int j = 0;
2922 uint8_t *enc;
2923 uint32_t encsize;
2924 uint32_t np = posp - dv[ii->n_elements - 1].data;
2925 uint32_t f_s = (ndf < 3) ? 0 : USE_P_ENC;
2926 uint32_t f_d = ((ndf < 16) || (ndf <= (lid.rid >> 8))) ? 0 : USE_P_ENC;
2927 dv[j].data_size = ndf; dv[j++].flags = f_d;
2928 if ((ii->header->flags & GRN_OBJ_WITH_SECTION)) {
2929 dv[j].data_size = ndf; dv[j++].flags = f_s;
2930 }
2931 dv[j].data_size = ndf; dv[j++].flags = f_s;
2932 if ((ii->header->flags & GRN_OBJ_WITH_WEIGHT)) {
2933 dv[j].data_size = ndf; dv[j++].flags = f_s;
2934 }
2935 if ((ii->header->flags & GRN_OBJ_WITH_POSITION)) {
2936 uint32_t f_p = ((np < 32) || (np <= (spos >> 13))) ? 0 : USE_P_ENC;
2937 dv[j].data_size = np; dv[j].flags = f_p|ODD;
2938 }
2939 if ((enc = GRN_MALLOC((ndf * 4 + np) * 2))) {
2940 encsize = grn_p_encv(ctx, dv, ii->n_elements, enc);
2941 chunk_flush(ctx, ii, cinfo, enc, encsize);
2942 if (ctx->rc == GRN_SUCCESS) {
2943 chunk_free(ctx, ii, segno, 0, size);
2944 }
2945 GRN_FREE(enc);
2946 } else {
2947 DEFINE_NAME(ii);
2948 MERR("[ii][chunk][merge] failed to allocate a encode buffer: "
2949 "<%.*s> :"
2950 "record:<%u>, segment:<%u>, size:<%u>",
2951 name_size, name,
2952 rid,
2953 segno,
2954 size);
2955 }
2956 }
2957 *balance += (ndf - sdf);
2958 return ctx->rc;
2959}
2960
2961static void
2962buffer_merge_dump_datavec(grn_ctx *ctx,
2963 grn_ii *ii,
2964 datavec *dv,
2965 datavec *rdv)
2966{
2967 int i, j;
2968 grn_obj buffer;
2969
2970 GRN_TEXT_INIT(&buffer, 0);
2971 for (i = 0; i < ii->n_elements; i++) {
2972 GRN_LOG(ctx, GRN_LOG_DEBUG, "rdv[%d] data_size=%d, flags=%d",
2973 i, rdv[i].data_size, rdv[i].flags);
2974 GRN_BULK_REWIND(&buffer);
2975 for (j = 0; j < rdv[i].data_size;) {
2976 grn_text_printf(ctx, &buffer, " %d", rdv[i].data[j]);
2977 j++;
2978 if (!(j % 32) || j == rdv[i].data_size) {
2979 GRN_LOG(ctx, GRN_LOG_DEBUG,
2980 "rdv[%d].data[%d]%.*s",
2981 i, j,
2982 (int)GRN_TEXT_LEN(&buffer),
2983 GRN_TEXT_VALUE(&buffer));
2984 GRN_BULK_REWIND(&buffer);
2985 }
2986 }
2987 }
2988
2989 for (i = 0; i < ii->n_elements; i++) {
2990 GRN_LOG(ctx, GRN_LOG_DEBUG, "dv[%d] data_size=%d, flags=%d",
2991 i, dv[i].data_size, dv[i].flags);
2992 GRN_BULK_REWIND(&buffer);
2993 for (j = 0; j < dv[i].data_size;) {
2994 grn_text_printf(ctx, &buffer, " %d", dv[i].data[j]);
2995 j++;
2996 if (!(j % 32) || j == dv[i].data_size) {
2997 GRN_LOG(ctx, GRN_LOG_DEBUG,
2998 "dv[%d].data[%d]%.*s",
2999 i, j,
3000 (int)GRN_TEXT_LEN(&buffer),
3001 GRN_TEXT_VALUE(&buffer));
3002 GRN_BULK_REWIND(&buffer);
3003 }
3004 }
3005 }
3006
3007 GRN_OBJ_FIN(ctx, &buffer);
3008}
3009
3010/* If dc doesn't have enough space, program may be crashed.
3011 * TODO: Support auto space extension or max size check.
3012 */
3013static grn_rc
3014buffer_merge(grn_ctx *ctx, grn_ii *ii, uint32_t seg, grn_hash *h,
3015 buffer *sb, uint8_t *sc, buffer *db, uint8_t *dc)
3016{
3017 buffer_term *bt;
3018 uint8_t *sbp = NULL, *dcp = dc;
3019 datavec dv[MAX_N_ELEMENTS + 1];
3020 datavec rdv[MAX_N_ELEMENTS + 1];
3021 uint16_t n = db->header.nterms, nterms_void = 0;
3022 size_t unitsize = (S_SEGMENT + sb->header.chunk_size / sb->header.nterms) * 2;
3023 // size_t unitsize = (S_SEGMENT + sb->header.chunk_size) * 2 + (1<<24);
3024 size_t totalsize = unitsize * ii->n_elements;
3025 //todo : realloc
3026 datavec_init(ctx, dv, ii->n_elements, unitsize, totalsize);
3027 if (ctx->rc != GRN_SUCCESS) {
3028 DEFINE_NAME(ii);
3029 ERR(ctx->rc,
3030 "[ii][buffer][merge] failed to initialize data vector: "
3031 "<%.*s>: "
3032 "unit-size:<%" GRN_FMT_SIZE ">, "
3033 "total-size:<%" GRN_FMT_SIZE ">",
3034 name_size, name,
3035 unitsize,
3036 totalsize);
3037 return ctx->rc;
3038 }
3039 datavec_init(ctx, rdv, ii->n_elements, 0, 0);
3040 if ((ii->header->flags & GRN_OBJ_WITH_POSITION)) {
3041 rdv[ii->n_elements - 1].flags = ODD;
3042 }
3043 for (bt = db->terms; n; n--, bt++) {
3044 uint16_t nextb;
3045 uint64_t spos = 0;
3046 int32_t balance = 0;
3047 uint32_t *ridp, *sidp = NULL, *tfp, *weightp = NULL, *posp, nchunks = 0;
3048 uint32_t nvchunks = 0;
3049 chunk_info *cinfo = NULL;
3050 grn_id crid = GRN_ID_NIL;
3051 docinfo cid = {0, 0, 0, 0, 0}, lid = {0, 0, 0, 0, 0}, bid = {0, 0};
3052 uint32_t sdf = 0, snn = 0, ndf;
3053 uint32_t *srp = NULL, *ssp = NULL, *stp = NULL, *sop = NULL, *snp = NULL;
3054 if (!bt->tid) {
3055 nterms_void++;
3056 continue;
3057 }
3058 if (!bt->pos_in_buffer) {
3059 GRN_ASSERT(!bt->size_in_buffer);
3060 if (bt->size_in_chunk) {
3061 grn_memcpy(dcp, sc + bt->pos_in_chunk, bt->size_in_chunk);
3062 bt->pos_in_chunk = (uint32_t)(dcp - dc);
3063 dcp += bt->size_in_chunk;
3064 }
3065 continue;
3066 }
3067 nextb = bt->pos_in_buffer;
3068 GETNEXTB();
3069 if (sc && bt->size_in_chunk) {
3070 uint8_t *scp = sc + bt->pos_in_chunk;
3071 uint8_t *sce = scp + bt->size_in_chunk;
3072 size_t size = S_SEGMENT * ii->n_elements;
3073 if ((bt->tid & CHUNK_SPLIT)) {
3074 int i;
3075 GRN_B_DEC(nchunks, scp);
3076 if (!(cinfo = GRN_MALLOCN(chunk_info, nchunks + 1))) {
3077 datavec_fin(ctx, dv);
3078 datavec_fin(ctx, rdv);
3079 {
3080 DEFINE_NAME(ii);
3081 MERR("[ii][buffer][merge] failed to allocate chunk info: "
3082 "<%.*s> :"
3083 "segment:<%u>, "
3084 "n-chunks:<%u>, "
3085 "unit-size:<%" GRN_FMT_SIZE ">, "
3086 "total-size:<%" GRN_FMT_SIZE ">",
3087 name_size, name,
3088 seg,
3089 nchunks,
3090 unitsize,
3091 totalsize);
3092 }
3093 return ctx->rc;
3094 }
3095 for (i = 0; i < nchunks; i++) {
3096 GRN_B_DEC(cinfo[i].segno, scp);
3097 GRN_B_DEC(cinfo[i].size, scp);
3098 GRN_B_DEC(cinfo[i].dgap, scp);
3099 crid += cinfo[i].dgap;
3100 if (bid.rid <= crid) {
3101 chunk_merge(ctx, ii, sb, bt, &cinfo[i], crid, dv,
3102 &nextb, &sbp, &bid, &balance);
3103 if (ctx->rc != GRN_SUCCESS) {
3104 if (cinfo) { GRN_FREE(cinfo); }
3105 datavec_fin(ctx, dv);
3106 datavec_fin(ctx, rdv);
3107 {
3108 DEFINE_NAME(ii);
3109 ERR(ctx->rc,
3110 "[ii][buffer][merge] failed to merge chunk: "
3111 "<%.*s>: "
3112 "chunk:<%u>, "
3113 "n-chunks:<%u>",
3114 name_size, name,
3115 i,
3116 nchunks);
3117 }
3118 return ctx->rc;
3119 }
3120 }
3121 if (cinfo[i].size) {
3122 nvchunks++;
3123 } else {
3124 crid -= cinfo[i].dgap;
3125 cinfo[i + 1].dgap += cinfo[i].dgap;
3126 }
3127 }
3128 }
3129 if (sce > scp) {
3130 size += grn_p_decv(ctx, scp, sce - scp, rdv, ii->n_elements);
3131 {
3132 int j = 0;
3133 sdf = rdv[j].data_size;
3134 srp = rdv[j++].data;
3135 if ((ii->header->flags & GRN_OBJ_WITH_SECTION)) { ssp = rdv[j++].data; }
3136 stp = rdv[j++].data;
3137 if ((ii->header->flags & GRN_OBJ_WITH_WEIGHT)) { sop = rdv[j++].data; }
3138 snn = rdv[j].data_size;
3139 snp = rdv[j].data;
3140 }
3141 datavec_reset(ctx, dv, ii->n_elements, sdf + S_SEGMENT, size);
3142 if (ctx->rc != GRN_SUCCESS) {
3143 if (cinfo) { GRN_FREE(cinfo); }
3144 datavec_fin(ctx, dv);
3145 datavec_fin(ctx, rdv);
3146 {
3147 DEFINE_NAME(ii);
3148 ERR(ctx->rc,
3149 "[ii][buffer][merge] failed to reset data vector: "
3150 "<%.*s>: "
3151 "unit-size:<%" GRN_FMT_SIZE ">, "
3152 "total-size:<%" GRN_FMT_SIZE ">",
3153 name_size, name,
3154 (size_t)(sdf + S_SEGMENT),
3155 size);
3156 }
3157 return ctx->rc;
3158 }
3159 }
3160 }
3161 {
3162 int j = 0;
3163 ridp = dv[j++].data;
3164 if ((ii->header->flags & GRN_OBJ_WITH_SECTION)) { sidp = dv[j++].data; }
3165 tfp = dv[j++].data;
3166 if ((ii->header->flags & GRN_OBJ_WITH_WEIGHT)) { weightp = dv[j++].data; }
3167 posp = dv[j].data;
3168 }
3169 GETNEXTC();
3170 MERGE_BC(1);
3171 if (ctx->rc != GRN_SUCCESS) {
3172 if (cinfo) { GRN_FREE(cinfo); }
3173 datavec_fin(ctx, dv);
3174 datavec_fin(ctx, rdv);
3175 {
3176 DEFINE_NAME(ii);
3177 ERR(ctx->rc,
3178 "[ii][buffer][merge] failed to merge chunk: <%.*s>",
3179 name_size, name);
3180 }
3181 return ctx->rc;
3182 }
3183 GRN_ASSERT(posp < dv[ii->n_elements].data);
3184 ndf = ridp - dv[0].data;
3185 /*
3186 {
3187 grn_obj buf;
3188 uint32_t rid, sid, tf, i, pos, *pp;
3189 GRN_TEXT_INIT(&buf, 0);
3190 rid = 0;
3191 pp = dv[3].data;
3192 for (i = 0; i < ndf; i++) {
3193 GRN_BULK_REWIND(&buf);
3194 rid += dv[0].data[i];
3195 if (dv[0].data[i]) { sid = 0; }
3196 sid += dv[1].data[i] + 1;
3197 tf = dv[2].data[i] + 1;
3198 pos = 0;
3199 grn_text_itoa(ctx, &buf, rid);
3200 GRN_TEXT_PUTC(ctx, &buf, ':');
3201 grn_text_itoa(ctx, &buf, sid);
3202 GRN_TEXT_PUTC(ctx, &buf, ':');
3203 grn_text_itoa(ctx, &buf, tf);
3204 GRN_TEXT_PUTC(ctx, &buf, ':');
3205 while (tf--) {
3206 pos += *pp++;
3207 grn_text_itoa(ctx, &buf, pos);
3208 if (tf) { GRN_TEXT_PUTC(ctx, &buf, ','); }
3209 }
3210 GRN_TEXT_PUTC(ctx, &buf, '\0');
3211 GRN_LOG(ctx, GRN_LOG_DEBUG, "Posting:%s", GRN_TEXT_VALUE(&buf));
3212 }
3213 GRN_OBJ_FIN(ctx, &buf);
3214 }
3215 */
3216 {
3217 grn_id tid = bt->tid & GRN_ID_MAX;
3218 uint32_t *a = array_at(ctx, ii, tid);
3219 if (!a) {
3220 GRN_LOG(ctx, GRN_LOG_DEBUG, "array_entry not found tid=%d", tid);
3221 memset(bt, 0, sizeof(buffer_term));
3222 nterms_void++;
3223 } else {
3224 if (!ndf && !nvchunks) {
3225 a[0] = 0;
3226 a[1] = 0;
3227 lexicon_delete(ctx, ii, tid, h);
3228 memset(bt, 0, sizeof(buffer_term));
3229 nterms_void++;
3230 } else if ((ii->header->flags & GRN_OBJ_WITH_SECTION)
3231 && !nvchunks && ndf == 1 && lid.rid < 0x100000 &&
3232 lid.sid < 0x800 && lid.tf == 1 && lid.weight == 0) {
3233 a[0] = (lid.rid << 12) + (lid.sid << 1) + 1;
3234 a[1] = (ii->header->flags & GRN_OBJ_WITH_POSITION) ? posp[-1] : 0;
3235 memset(bt, 0, sizeof(buffer_term));
3236 nterms_void++;
3237 } else if (!(ii->header->flags & GRN_OBJ_WITH_SECTION)
3238 && !nvchunks && ndf == 1 && lid.tf == 1 && lid.weight == 0) {
3239 a[0] = (lid.rid << 1) + 1;
3240 a[1] = (ii->header->flags & GRN_OBJ_WITH_POSITION) ? posp[-1] : 0;
3241 memset(bt, 0, sizeof(buffer_term));
3242 nterms_void++;
3243 } else {
3244 int j = 0;
3245 uint8_t *dcp0;
3246 uint32_t encsize;
3247 uint32_t f_s = (ndf < 3) ? 0 : USE_P_ENC;
3248 uint32_t f_d = ((ndf < 16) || (ndf <= (lid.rid >> 8))) ? 0 : USE_P_ENC;
3249 dv[j].data_size = ndf; dv[j++].flags = f_d;
3250 if ((ii->header->flags & GRN_OBJ_WITH_SECTION)) {
3251 dv[j].data_size = ndf; dv[j++].flags = f_s;
3252 }
3253 dv[j].data_size = ndf; dv[j++].flags = f_s;
3254 if ((ii->header->flags & GRN_OBJ_WITH_WEIGHT)) {
3255 dv[j].data_size = ndf; dv[j++].flags = f_s;
3256 }
3257 if ((ii->header->flags & GRN_OBJ_WITH_POSITION)) {
3258 uint32_t np = posp - dv[ii->n_elements - 1].data;
3259 uint32_t f_p = ((np < 32) || (np <= (spos >> 13))) ? 0 : USE_P_ENC;
3260 dv[j].data_size = np; dv[j].flags = f_p|ODD;
3261 }
3262 dcp0 = dcp;
3263 a[1] = (bt->size_in_chunk ? a[1] : 0) + (ndf - sdf) + balance;
3264 if (nvchunks) {
3265 int i;
3266 GRN_B_ENC(nvchunks, dcp);
3267 for (i = 0; i < nchunks; i++) {
3268 if (cinfo[i].size) {
3269 GRN_B_ENC(cinfo[i].segno, dcp);
3270 GRN_B_ENC(cinfo[i].size, dcp);
3271 GRN_B_ENC(cinfo[i].dgap, dcp);
3272 }
3273 }
3274 }
3275 encsize = grn_p_encv(ctx, dv, ii->n_elements, dcp);
3276
3277 if (grn_logger_pass(ctx, GRN_LOG_DEBUG)) {
3278 if (sb->header.chunk_size + S_SEGMENT <= (dcp - dc) + encsize) {
3279 GRN_LOG(ctx, GRN_LOG_DEBUG,
3280 "cs(%d)+(%d)=(%d)"
3281 "<=(%" GRN_FMT_LLD ")+(%d)="
3282 "(%" GRN_FMT_LLD ")",
3283 sb->header.chunk_size,
3284 S_SEGMENT,
3285 sb->header.chunk_size + S_SEGMENT,
3286 (long long int)(dcp - dc),
3287 encsize,
3288 (long long int)((dcp - dc) + encsize));
3289 buffer_merge_dump_datavec(ctx, ii, dv, rdv);
3290 }
3291 }
3292
3293 if (encsize > CHUNK_SPLIT_THRESHOLD &&
3294 (cinfo || (cinfo = GRN_MALLOCN(chunk_info, nchunks + 1))) &&
3295 !chunk_flush(ctx, ii, &cinfo[nchunks], dcp, encsize)) {
3296 int i;
3297 cinfo[nchunks].dgap = lid.rid - crid;
3298 nvchunks++;
3299 dcp = dcp0;
3300 GRN_B_ENC(nvchunks, dcp);
3301 for (i = 0; i <= nchunks; i++) {
3302 if (cinfo[i].size) {
3303 GRN_B_ENC(cinfo[i].segno, dcp);
3304 GRN_B_ENC(cinfo[i].size, dcp);
3305 GRN_B_ENC(cinfo[i].dgap, dcp);
3306 }
3307 }
3308 GRN_LOG(ctx, GRN_LOG_DEBUG, "split (%d) encsize=%d", tid, encsize);
3309 bt->tid |= CHUNK_SPLIT;
3310 } else {
3311 dcp += encsize;
3312 if (!nvchunks) {
3313 bt->tid &= ~CHUNK_SPLIT;
3314 }
3315 }
3316 bt->pos_in_chunk = (uint32_t)(dcp0 - dc);
3317 bt->size_in_chunk = (uint32_t)(dcp - dcp0);
3318 bt->size_in_buffer = 0;
3319 bt->pos_in_buffer = 0;
3320 }
3321 array_unref(ii, tid);
3322 }
3323 }
3324 if (cinfo) { GRN_FREE(cinfo); }
3325 }
3326 datavec_fin(ctx, rdv);
3327 datavec_fin(ctx, dv);
3328 db->header.chunk_size = (uint32_t)(dcp - dc);
3329 db->header.buffer_free =
3330 S_SEGMENT - sizeof(buffer_header) - db->header.nterms * sizeof(buffer_term);
3331 db->header.nterms_void = nterms_void;
3332 return ctx->rc;
3333}
3334
3335static void
3336fake_map(grn_ctx *ctx, grn_io *io, grn_io_win *iw, void *addr, uint32_t seg, uint32_t size)
3337{
3338 iw->ctx = ctx;
3339 iw->diff = 0;
3340 iw->io = io;
3341 iw->mode = grn_io_wronly;
3342 iw->segment = ((seg) >> GRN_II_N_CHUNK_VARIATION);
3343 iw->offset = (((seg) & ((1 << GRN_II_N_CHUNK_VARIATION) - 1)) << GRN_II_W_LEAST_CHUNK);
3344 iw->size = size;
3345 iw->cached = 0;
3346 iw->addr = addr;
3347}
3348
3349static grn_rc
3350buffer_flush(grn_ctx *ctx, grn_ii *ii, uint32_t seg, grn_hash *h)
3351{
3352 grn_io_win sw, dw;
3353 buffer *sb, *db = NULL;
3354 uint8_t *dc, *sc = NULL;
3355 uint32_t ds, pseg, scn, dcn = 0;
3356 if (ii->header->binfo[seg] == GRN_II_PSEG_NOT_ASSIGNED) {
3357 DEFINE_NAME(ii);
3358 CRIT(GRN_FILE_CORRUPT,
3359 "[ii][buffer][flush] invalid segment: "
3360 "<%.*s> :"
3361 "request:<%u>, max:<%u>",
3362 name_size, name,
3363 seg, ii->seg->header->max_segment);
3364 return ctx->rc;
3365 }
3366 if ((ds = segment_get(ctx, ii)) == ii->seg->header->max_segment) {
3367 DEFINE_NAME(ii);
3368 MERR("[ii][buffer][flush] segment is full: "
3369 "<%.*s> :"
3370 "request:<%u>, max:<%u>",
3371 name_size, name,
3372 seg, ii->seg->header->max_segment);
3373 return ctx->rc;
3374 }
3375 pseg = buffer_open(ctx, ii, SEG2POS(seg, 0), NULL, &sb);
3376 if (pseg == GRN_II_PSEG_NOT_ASSIGNED) {
3377 DEFINE_NAME(ii);
3378 MERR("[ii][buffer][flush] failed to open buffer: "
3379 "<%.*s> :"
3380 "segment:<%u>, position:<%u>, max:<%u>",
3381 name_size, name,
3382 seg, SEG2POS(seg, 0), ii->seg->header->max_segment);
3383 return ctx->rc;
3384 }
3385 {
3386 GRN_IO_SEG_REF(ii->seg, ds, db);
3387 if (db) {
3388 uint32_t actual_chunk_size = 0;
3389 uint32_t max_dest_chunk_size = sb->header.chunk_size + S_SEGMENT;
3390 if ((dc = GRN_MALLOC(max_dest_chunk_size * 2))) {
3391 if ((scn = sb->header.chunk) == GRN_II_PSEG_NOT_ASSIGNED ||
3392 (sc = WIN_MAP(ii->chunk, ctx, &sw, scn, 0,
3393 sb->header.chunk_size, grn_io_rdonly))) {
3394 uint16_t n = sb->header.nterms;
3395 memset(db, 0, S_SEGMENT);
3396 grn_memcpy(db->terms, sb->terms, n * sizeof(buffer_term));
3397 db->header.nterms = n;
3398 buffer_merge(ctx, ii, seg, h, sb, sc, db, dc);
3399 if (ctx->rc == GRN_SUCCESS) {
3400 actual_chunk_size = db->header.chunk_size;
3401 if (actual_chunk_size > 0) {
3402 chunk_new(ctx, ii, &dcn, actual_chunk_size);
3403 }
3404 if (ctx->rc == GRN_SUCCESS) {
3405 grn_rc rc;
3406 db->header.chunk =
3407 actual_chunk_size ? dcn : GRN_II_PSEG_NOT_ASSIGNED;
3408 fake_map(ctx, ii->chunk, &dw, dc, dcn, actual_chunk_size);
3409 rc = grn_io_win_unmap(&dw);
3410 if (rc == GRN_SUCCESS) {
3411 buffer_segment_update(ii, seg, ds);
3412 ii->header->total_chunk_size += actual_chunk_size;
3413 if (scn != GRN_II_PSEG_NOT_ASSIGNED) {
3414 grn_io_win_unmap(&sw);
3415 chunk_free(ctx, ii, scn, 0, sb->header.chunk_size);
3416 ii->header->total_chunk_size -= sb->header.chunk_size;
3417 }
3418 } else {
3419 GRN_FREE(dc);
3420 if (actual_chunk_size) {
3421 chunk_free(ctx, ii, dcn, 0, actual_chunk_size);
3422 }
3423 if (scn != GRN_II_PSEG_NOT_ASSIGNED) { grn_io_win_unmap(&sw); }
3424 {
3425 DEFINE_NAME(ii);
3426 ERR(rc,
3427 "[ii][buffer][flush] failed to unmap a destination chunk: "
3428 "<%.*s> : "
3429 "segment:<%u>, destination-segment:<%u>, actual-size:<%u>",
3430 name_size, name,
3431 seg,
3432 dcn,
3433 actual_chunk_size);
3434 }
3435 }
3436 } else {
3437 GRN_FREE(dc);
3438 if (scn != GRN_II_PSEG_NOT_ASSIGNED) { grn_io_win_unmap(&sw); }
3439 }
3440 } else {
3441 GRN_FREE(dc);
3442 if (scn != GRN_II_PSEG_NOT_ASSIGNED) { grn_io_win_unmap(&sw); }
3443 }
3444 } else {
3445 GRN_FREE(dc);
3446 {
3447 DEFINE_NAME(ii);
3448 MERR("[ii][buffer][flush] failed to map a source chunk: "
3449 "<%.*s> :"
3450 "segment:<%u>, source-segment:<%u>, chunk-size:<%u>",
3451 name_size, name,
3452 seg,
3453 scn,
3454 sb->header.chunk_size);
3455 }
3456 }
3457 } else {
3458 DEFINE_NAME(ii);
3459 MERR("[ii][buffer][flush] failed to allocate a destination chunk: "
3460 "<%.*s> :"
3461 "segment:<%u>, destination-segment:<%u>",
3462 name_size, name,
3463 seg,
3464 ds);
3465 }
3466 GRN_IO_SEG_UNREF(ii->seg, ds);
3467 } else {
3468 DEFINE_NAME(ii);
3469 MERR("[ii][buffer][flush] failed to allocate a destination segment: "
3470 "<%.*s> :"
3471 "segment:<%u>, destination-segment:<%u>",
3472 name_size, name,
3473 seg,
3474 ds);
3475 }
3476 buffer_close(ctx, ii, pseg);
3477 }
3478 return ctx->rc;
3479}
3480
3481void
3482grn_ii_buffer_check(grn_ctx *ctx, grn_ii *ii, uint32_t seg)
3483{
3484 grn_io_win sw;
3485 buffer *sb;
3486 uint8_t *sc = NULL;
3487 uint32_t pseg, scn, nterms_with_corrupt_chunk = 0, nterm_with_chunk = 0;
3488 uint32_t ndeleted_terms_with_value = 0;
3489 buffer_term *bt;
3490 uint8_t *sbp = NULL;
3491 datavec rdv[MAX_N_ELEMENTS + 1];
3492 uint16_t n;
3493 int nterms_void = 0;
3494 int size_in_buffer = 0;
3495 grn_obj buf;
3496 size_t lower_bound;
3497 int64_t nloops = 0, nviolations = 0;
3498 if (ii->header->binfo[seg] == GRN_II_PSEG_NOT_ASSIGNED) {
3499 GRN_OUTPUT_BOOL(GRN_FALSE);
3500 return;
3501 }
3502 pseg = buffer_open(ctx, ii, SEG2POS(seg, 0), NULL, &sb);
3503 if (pseg == GRN_II_PSEG_NOT_ASSIGNED) {
3504 GRN_OUTPUT_BOOL(GRN_FALSE);
3505 return;
3506 }
3507 lower_bound =
3508 (sb->header.buffer_free + sizeof(buffer_term) * sb->header.nterms)
3509 / sizeof(buffer_rec);
3510 datavec_init(ctx, rdv, ii->n_elements, 0, 0);
3511 if ((ii->header->flags & GRN_OBJ_WITH_POSITION)) {
3512 rdv[ii->n_elements - 1].flags = ODD;
3513 }
3514 GRN_OUTPUT_MAP_OPEN("BUFFER", -1);
3515 GRN_OUTPUT_CSTR("buffer id");
3516 GRN_OUTPUT_INT64(seg);
3517 if ((scn = sb->header.chunk) == GRN_II_PSEG_NOT_ASSIGNED) {
3518 GRN_OUTPUT_CSTR("void chunk size");
3519 GRN_OUTPUT_INT64(sb->header.chunk_size);
3520 } else {
3521 if ((sc = WIN_MAP(ii->chunk, ctx, &sw, scn, 0, sb->header.chunk_size,
3522 grn_io_rdonly))) {
3523 GRN_OUTPUT_CSTR("chunk size");
3524 GRN_OUTPUT_INT64(sb->header.chunk_size);
3525 } else {
3526 GRN_OUTPUT_CSTR("unmappable chunk size");
3527 GRN_OUTPUT_INT64(sb->header.chunk_size);
3528 }
3529 }
3530 GRN_OUTPUT_CSTR("buffer term");
3531 GRN_OUTPUT_ARRAY_OPEN("TERMS", sb->header.nterms);
3532
3533 GRN_OBJ_INIT(&buf, GRN_BULK, 0, ii->lexicon->header.domain);
3534 for (bt = sb->terms, n = sb->header.nterms; n; n--, bt++) {
3535 grn_id tid, tid_;
3536 char key[GRN_TABLE_MAX_KEY_SIZE];
3537 int key_size;
3538 uint16_t nextb;
3539 uint32_t nchunks = 0;
3540 chunk_info *cinfo = NULL;
3541 grn_id crid = GRN_ID_NIL;
3542 docinfo bid = {0, 0};
3543 uint32_t sdf = 0, snn = 0;
3544 uint32_t *srp = NULL, *ssp = NULL, *stp = NULL, *sop = NULL, *snp = NULL;
3545 if (!bt->tid && !bt->pos_in_buffer && !bt->size_in_buffer) {
3546 nterms_void++;
3547 continue;
3548 }
3549 GRN_OUTPUT_ARRAY_OPEN("TERM", -1);
3550 tid = (bt->tid & GRN_ID_MAX);
3551 key_size = grn_table_get_key(ctx, ii->lexicon, tid, key,
3552 GRN_TABLE_MAX_KEY_SIZE);
3553 tid_ = grn_table_get(ctx, ii->lexicon, key, key_size);
3554 GRN_TEXT_SET(ctx, &buf, key, key_size);
3555 GRN_OUTPUT_OBJ(&buf, NULL);
3556 GRN_OUTPUT_INT64(bt->tid);
3557 GRN_OUTPUT_INT64(tid_);
3558 nextb = bt->pos_in_buffer;
3559 size_in_buffer += bt->size_in_buffer;
3560 if (tid != tid_ && (bt->size_in_buffer || bt->size_in_chunk)) {
3561 ndeleted_terms_with_value++;
3562 }
3563 GETNEXTB();
3564 GRN_OUTPUT_INT64(bt->size_in_buffer);
3565 GRN_OUTPUT_INT64(bt->size_in_chunk);
3566 if (sc && bt->size_in_chunk) {
3567 uint8_t *scp = sc + bt->pos_in_chunk;
3568 uint8_t *sce = scp + bt->size_in_chunk;
3569 size_t size = S_SEGMENT * ii->n_elements;
3570 if ((bt->tid & CHUNK_SPLIT)) {
3571 int i;
3572 GRN_B_DEC(nchunks, scp);
3573 if (!(cinfo = GRN_MALLOCN(chunk_info, nchunks + 1))) {
3574 datavec_fin(ctx, rdv);
3575 GRN_OBJ_FIN(ctx, &buf);
3576 return;
3577 }
3578 for (i = 0; i < nchunks; i++) {
3579 GRN_B_DEC(cinfo[i].segno, scp);
3580 GRN_B_DEC(cinfo[i].size, scp);
3581 GRN_B_DEC(cinfo[i].dgap, scp);
3582 crid += cinfo[i].dgap;
3583 }
3584 }
3585 if (sce > scp) {
3586 size += grn_p_decv(ctx, scp, sce - scp, rdv, ii->n_elements);
3587 {
3588 int j = 0;
3589 sdf = rdv[j].data_size;
3590 GRN_OUTPUT_INT64(sdf);
3591 srp = rdv[j++].data;
3592 if ((ii->header->flags & GRN_OBJ_WITH_SECTION)) { ssp = rdv[j++].data; }
3593 if (sdf != rdv[j].data_size) {
3594 nterms_with_corrupt_chunk++;
3595 }
3596 stp = rdv[j++].data;
3597 if ((ii->header->flags & GRN_OBJ_WITH_WEIGHT)) { sop = rdv[j++].data; }
3598 GRN_OUTPUT_INT64(rdv[j].data_size);
3599 snn = rdv[j].data_size;
3600 snp = rdv[j].data;
3601 }
3602 nterm_with_chunk++;
3603 }
3604 }
3605 {
3606 uint16_t pos;
3607 grn_id rid, sid, rid_ = 0, sid_ = 0;
3608 uint8_t *p;
3609 buffer_rec *r;
3610 for (pos = bt->pos_in_buffer; pos; pos = r->step) {
3611 if (pos < lower_bound) {
3612 nviolations++;
3613 }
3614 r = BUFFER_REC_AT(sb, pos);
3615 p = GRN_NEXT_ADDR(r);
3616 GRN_B_DEC(rid, p);
3617 if ((ii->header->flags & GRN_OBJ_WITH_SECTION)) {
3618 GRN_B_DEC(sid, p);
3619 } else {
3620 sid = 1;
3621 }
3622 if (rid < rid_ || (rid == rid_ && sid < sid_)) {
3623 nloops++;
3624 }
3625 rid_ = rid;
3626 sid_ = sid;
3627 }
3628 }
3629 GRN_OUTPUT_ARRAY_CLOSE();
3630 if (cinfo) { GRN_FREE(cinfo); }
3631 }
3632 GRN_OBJ_FIN(ctx, &buf);
3633
3634 GRN_OUTPUT_ARRAY_CLOSE();
3635 GRN_OUTPUT_CSTR("buffer free");
3636 GRN_OUTPUT_INT64(sb->header.buffer_free);
3637 GRN_OUTPUT_CSTR("size in buffer");
3638 GRN_OUTPUT_INT64(size_in_buffer);
3639 GRN_OUTPUT_CSTR("nterms");
3640 GRN_OUTPUT_INT64(sb->header.nterms);
3641 if (nterms_void != sb->header.nterms_void) {
3642 GRN_OUTPUT_CSTR("nterms void gap");
3643 GRN_OUTPUT_INT64(nterms_void - sb->header.nterms_void);
3644 }
3645 GRN_OUTPUT_CSTR("nterms with chunk");
3646 GRN_OUTPUT_INT64(nterm_with_chunk);
3647 if (nterms_with_corrupt_chunk) {
3648 GRN_OUTPUT_CSTR("nterms with corrupt chunk");
3649 GRN_OUTPUT_INT64(nterms_with_corrupt_chunk);
3650 }
3651 if (ndeleted_terms_with_value) {
3652 GRN_OUTPUT_CSTR("number of deleted terms with value");
3653 GRN_OUTPUT_INT64(ndeleted_terms_with_value);
3654 }
3655 if (nloops) {
3656 GRN_OUTPUT_CSTR("number of loops");
3657 GRN_OUTPUT_INT64(nloops);
3658 }
3659 if (nviolations) {
3660 GRN_OUTPUT_CSTR("number of violations");
3661 GRN_OUTPUT_INT64(nviolations);
3662 }
3663 GRN_OUTPUT_MAP_CLOSE();
3664 datavec_fin(ctx, rdv);
3665 if (sc) { grn_io_win_unmap(&sw); }
3666 buffer_close(ctx, ii, pseg);
3667}
3668
3669typedef struct {
3670 buffer_term *bt;
3671 const char *key;
3672 uint32_t key_size;
3673} term_sort;
3674
3675static int
3676term_compar(const void *t1, const void *t2)
3677{
3678 int r;
3679 const term_sort *x = (term_sort *)t1, *y = (term_sort *)t2;
3680 if (x->key_size > y->key_size) {
3681 r = memcmp(x->key, y->key, y->key_size);
3682 return r ? r : x->key_size - y->key_size;
3683 } else {
3684 r = memcmp(x->key, y->key, x->key_size);
3685 return r ? r : x->key_size - y->key_size;
3686 }
3687}
3688
3689static grn_rc
3690term_split(grn_ctx *ctx, grn_obj *lexicon, buffer *sb, buffer *db0, buffer *db1)
3691{
3692 uint16_t i, n, *nt;
3693 buffer_term *bt;
3694 uint32_t s, th = (sb->header.chunk_size + sb->header.nterms) >> 1;
3695 term_sort *ts = GRN_MALLOC(sb->header.nterms * sizeof(term_sort));
3696 if (!ts) { return GRN_NO_MEMORY_AVAILABLE; }
3697 for (i = 0, n = sb->header.nterms, bt = sb->terms; n; bt++, n--) {
3698 if (bt->tid) {
3699 grn_id tid = bt->tid & GRN_ID_MAX;
3700 ts[i].key = _grn_table_key(ctx, lexicon, tid, &ts[i].key_size);
3701 ts[i].bt = bt;
3702 i++;
3703 }
3704 }
3705 qsort(ts, i, sizeof(term_sort), term_compar);
3706 memset(db0, 0, S_SEGMENT);
3707 bt = db0->terms;
3708 nt = &db0->header.nterms;
3709 for (s = 0; n + 1 < i && s <= th; n++, bt++) {
3710 grn_memcpy(bt, ts[n].bt, sizeof(buffer_term));
3711 (*nt)++;
3712 s += ts[n].bt->size_in_chunk + 1;
3713 }
3714 memset(db1, 0, S_SEGMENT);
3715 bt = db1->terms;
3716 nt = &db1->header.nterms;
3717 for (; n < i; n++, bt++) {
3718 grn_memcpy(bt, ts[n].bt, sizeof(buffer_term));
3719 (*nt)++;
3720 }
3721 GRN_FREE(ts);
3722 GRN_LOG(ctx, GRN_LOG_DEBUG, "d0=%d d1=%d",
3723 db0->header.nterms, db1->header.nterms);
3724 return GRN_SUCCESS;
3725}
3726
3727static void
3728array_update(grn_ctx *ctx, grn_ii *ii, uint32_t dls, buffer *db)
3729{
3730 uint16_t n;
3731 buffer_term *bt;
3732 uint32_t *a, pos = SEG2POS(dls, sizeof(buffer_header));
3733 for (n = db->header.nterms, bt = db->terms; n; n--, bt++) {
3734 if (bt->tid) {
3735 grn_id tid = bt->tid & GRN_ID_MAX;
3736 if ((a = array_at(ctx, ii, tid))) {
3737 a[0] = pos;
3738 array_unref(ii, tid);
3739 } else {
3740 GRN_LOG(ctx, GRN_LOG_WARNING, "array_at failed (%d)", tid);
3741 }
3742 }
3743 pos += sizeof(buffer_term) >> 2;
3744 }
3745}
3746
3747static grn_rc
3748buffer_split(grn_ctx *ctx, grn_ii *ii, uint32_t seg, grn_hash *h)
3749{
3750 grn_io_win sw, dw0, dw1;
3751 buffer *sb, *db0 = NULL, *db1 = NULL;
3752 uint8_t *sc = NULL, *dc0, *dc1;
3753 uint32_t dps0 = 0, dps1 = 0, dls0 = 0, dls1 = 0, sps, scn, dcn0 = 0, dcn1 = 0;
3754 if (ii->header->binfo[seg] == GRN_II_PSEG_NOT_ASSIGNED) {
3755 DEFINE_NAME(ii);
3756 CRIT(GRN_FILE_CORRUPT,
3757 "[ii][buffer][split] invalid segment: "
3758 "<%.*s> :"
3759 "request:<%u>, max:<%u>",
3760 name_size, name,
3761 seg, ii->seg->header->max_segment);
3762 return ctx->rc;
3763 }
3764 buffer_segment_reserve(ctx, ii, &dls0, &dps0, &dls1, &dps1);
3765 if (ctx->rc != GRN_SUCCESS) {
3766 DEFINE_NAME(ii);
3767 ERR(ctx->rc,
3768 "[ii][buffer][split] failed to reserve buffer segments: "
3769 "<%.*s> :"
3770 "request:<%u>, max:<%u>",
3771 name_size, name,
3772 seg, ii->seg->header->max_segment);
3773 return ctx->rc;
3774 }
3775 sps = buffer_open(ctx, ii, SEG2POS(seg, 0), NULL, &sb);
3776 if (sps == GRN_II_PSEG_NOT_ASSIGNED) {
3777 DEFINE_NAME(ii);
3778 MERR("[ii][buffer][split] failed to open buffer: "
3779 "<%.*s> :"
3780 "segment:<%u>, position:<%u>, max-segment:<%u>",
3781 name_size, name,
3782 seg, SEG2POS(seg, 0), ii->seg->header->max_segment);
3783 } else {
3784 GRN_IO_SEG_REF(ii->seg, dps0, db0);
3785 if (db0) {
3786 GRN_IO_SEG_REF(ii->seg, dps1, db1);
3787 if (db1) {
3788 uint32_t actual_db0_chunk_size = 0;
3789 uint32_t actual_db1_chunk_size = 0;
3790 uint32_t max_dest_chunk_size = sb->header.chunk_size + S_SEGMENT;
3791 if ((dc0 = GRN_MALLOC(max_dest_chunk_size * 2))) {
3792 if ((dc1 = GRN_MALLOC(max_dest_chunk_size * 2))) {
3793 if ((scn = sb->header.chunk) == GRN_II_PSEG_NOT_ASSIGNED ||
3794 (sc = WIN_MAP(ii->chunk, ctx, &sw, scn, 0,
3795 sb->header.chunk_size, grn_io_rdonly))) {
3796 term_split(ctx, ii->lexicon, sb, db0, db1);
3797 buffer_merge(ctx, ii, seg, h, sb, sc, db0, dc0);
3798 if (ctx->rc == GRN_SUCCESS) {
3799 actual_db0_chunk_size = db0->header.chunk_size;
3800 if (actual_db0_chunk_size > 0) {
3801 chunk_new(ctx, ii, &dcn0, actual_db0_chunk_size);
3802 }
3803 if (ctx->rc == GRN_SUCCESS) {
3804 grn_rc rc;
3805 db0->header.chunk =
3806 actual_db0_chunk_size ? dcn0 : GRN_II_PSEG_NOT_ASSIGNED;
3807 fake_map(ctx, ii->chunk, &dw0, dc0, dcn0, actual_db0_chunk_size);
3808 rc = grn_io_win_unmap(&dw0);
3809 if (rc == GRN_SUCCESS) {
3810 buffer_merge(ctx, ii, seg, h, sb, sc, db1, dc1);
3811 if (ctx->rc == GRN_SUCCESS) {
3812 actual_db1_chunk_size = db1->header.chunk_size;
3813 if (actual_db1_chunk_size > 0) {
3814 chunk_new(ctx, ii, &dcn1, actual_db1_chunk_size);
3815 }
3816 if (ctx->rc == GRN_SUCCESS) {
3817 fake_map(ctx, ii->chunk, &dw1, dc1, dcn1,
3818 actual_db1_chunk_size);
3819 rc = grn_io_win_unmap(&dw1);
3820 if (rc == GRN_SUCCESS) {
3821 db1->header.chunk =
3822 actual_db1_chunk_size ? dcn1 : GRN_II_PSEG_NOT_ASSIGNED;
3823 buffer_segment_update(ii, dls0, dps0);
3824 buffer_segment_update(ii, dls1, dps1);
3825 array_update(ctx, ii, dls0, db0);
3826 array_update(ctx, ii, dls1, db1);
3827 buffer_segment_clear(ii, seg);
3828 ii->header->total_chunk_size += actual_db0_chunk_size;
3829 ii->header->total_chunk_size += actual_db1_chunk_size;
3830 if (scn != GRN_II_PSEG_NOT_ASSIGNED) {
3831 grn_io_win_unmap(&sw);
3832 chunk_free(ctx, ii, scn, 0, sb->header.chunk_size);
3833 ii->header->total_chunk_size -= sb->header.chunk_size;
3834 }
3835 } else {
3836 if (actual_db1_chunk_size) {
3837 chunk_free(ctx, ii, dcn1, 0, actual_db1_chunk_size);
3838 }
3839 if (actual_db0_chunk_size) {
3840 chunk_free(ctx, ii, dcn0, 0, actual_db0_chunk_size);
3841 }
3842 GRN_FREE(dc1);
3843 if (scn != GRN_II_PSEG_NOT_ASSIGNED) {
3844 grn_io_win_unmap(&sw);
3845 }
3846 {
3847 DEFINE_NAME(ii);
3848 ERR(rc,
3849 "[ii][buffer[merge] "
3850 "failed to unmap a destination chunk2: "
3851 "<%.*s> :"
3852 "segment:<%u>, "
3853 "destination-chunk1:<%u>, "
3854 "destination-chunk2:<%u>, "
3855 "actual-size1:<%u>, "
3856 "actual-size2:<%u>",
3857 name_size, name,
3858 seg,
3859 dcn0,
3860 dcn1,
3861 actual_db0_chunk_size,
3862 actual_db1_chunk_size);
3863 }
3864 }
3865 } else {
3866 if (actual_db0_chunk_size) {
3867 chunk_free(ctx, ii, dcn0, 0, actual_db0_chunk_size);
3868 }
3869 GRN_FREE(dc1);
3870 if (scn != GRN_II_PSEG_NOT_ASSIGNED) {
3871 grn_io_win_unmap(&sw);
3872 }
3873 }
3874 } else {
3875 if (actual_db0_chunk_size) {
3876 chunk_free(ctx, ii, dcn0, 0, actual_db0_chunk_size);
3877 }
3878 GRN_FREE(dc1);
3879 if (scn != GRN_II_PSEG_NOT_ASSIGNED) {
3880 grn_io_win_unmap(&sw);
3881 }
3882 }
3883 } else {
3884 if (actual_db0_chunk_size) {
3885 chunk_free(ctx, ii, dcn0, 0, actual_db0_chunk_size);
3886 }
3887 GRN_FREE(dc1);
3888 GRN_FREE(dc0);
3889 if (scn != GRN_II_PSEG_NOT_ASSIGNED) {
3890 grn_io_win_unmap(&sw);
3891 }
3892 {
3893 DEFINE_NAME(ii);
3894 ERR(rc,
3895 "[ii][buffer[merge] "
3896 "failed to unmap a destination chunk1: "
3897 "<%.*s> :"
3898 "segment:<%u>, "
3899 "destination-chunk1:<%u>, "
3900 "actual-size1:<%u>",
3901 name_size, name,
3902 seg,
3903 dcn0,
3904 actual_db0_chunk_size);
3905 }
3906 }
3907 } else {
3908 GRN_FREE(dc1);
3909 GRN_FREE(dc0);
3910 if (scn != GRN_II_PSEG_NOT_ASSIGNED) { grn_io_win_unmap(&sw); }
3911 }
3912 } else {
3913 GRN_FREE(dc1);
3914 GRN_FREE(dc0);
3915 if (scn != GRN_II_PSEG_NOT_ASSIGNED) { grn_io_win_unmap(&sw); }
3916 }
3917 } else {
3918 GRN_FREE(dc1);
3919 GRN_FREE(dc0);
3920 {
3921 DEFINE_NAME(ii);
3922 MERR("[ii][buffer][split] failed to map a source chunk: "
3923 "<%.*s> :"
3924 "segment:<%u>, "
3925 "source-segment:<%u>, "
3926 "chunk-size:<%u>",
3927 name_size, name,
3928 seg,
3929 scn,
3930 sb->header.chunk_size);
3931 }
3932 }
3933 } else {
3934 GRN_FREE(dc0);
3935 {
3936 DEFINE_NAME(ii);
3937 MERR("[ii][buffer][split] "
3938 "failed to allocate a destination chunk2: "
3939 "<%.*s> :"
3940 "segment:<%u>, "
3941 "destination-segment1:<%u>, "
3942 "destination-segment2:<%u>",
3943 name_size, name,
3944 seg,
3945 dps0,
3946 dps1);
3947 }
3948 }
3949 } else {
3950 DEFINE_NAME(ii);
3951 MERR("[ii][buffer][split] failed to allocate a destination chunk1: "
3952 "<%.*s>: "
3953 "segment:<%u>, "
3954 "destination-segment1:<%u>, "
3955 "destination-segment2:<%u>",
3956 name_size, name,
3957 seg,
3958 dps0,
3959 dps1);
3960 }
3961 GRN_IO_SEG_UNREF(ii->seg, dps1);
3962 } else {
3963 DEFINE_NAME(ii);
3964 MERR("[ii][buffer][split] failed to allocate a destination segment2: "
3965 "<%.*s>: "
3966 "segment:<%u>, "
3967 "destination-segment1:<%u>, "
3968 "destination-segment2:<%u>",
3969 name_size, name,
3970 seg,
3971 dps0,
3972 dps1);
3973 }
3974 GRN_IO_SEG_UNREF(ii->seg, dps0);
3975 } else {
3976 DEFINE_NAME(ii);
3977 MERR("[ii][buffer][split] failed to allocate a destination segment1: "
3978 "<%.*s>: "
3979 "segment:<%u>, "
3980 "destination-segment1:<%u>, "
3981 "destination-segment2:<%u>",
3982 name_size, name,
3983 seg,
3984 dps0,
3985 dps1);
3986 }
3987 buffer_close(ctx, ii, sps);
3988 }
3989 return ctx->rc;
3990}
3991
3992#define SCALE_FACTOR 2048
3993#define MAX_NTERMS 8192
3994#define SPLIT_COND(ii, buffer)\
3995 ((buffer)->header.nterms > 1024 ||\
3996 ((buffer)->header.nterms > 1 &&\
3997 (buffer)->header.chunk_size * 100 > (ii)->header->total_chunk_size))
3998
3999inline static void
4000buffer_new_find_segment(grn_ctx *ctx,
4001 grn_ii *ii,
4002 int size,
4003 grn_id tid,
4004 grn_hash *h,
4005 buffer **b,
4006 uint32_t *lseg,
4007 uint32_t *pseg)
4008{
4009 uint32_t *a;
4010
4011 a = array_at(ctx, ii, tid);
4012 if (!a) {
4013 return;
4014 }
4015
4016 for (;;) {
4017 uint32_t pos = a[0];
4018 if (!pos || (pos & 1)) { break; }
4019 *pseg = buffer_open(ctx, ii, pos, NULL, b);
4020 if (*pseg == GRN_II_PSEG_NOT_ASSIGNED) { break; }
4021 if ((*b)->header.buffer_free >= size + sizeof(buffer_term)) {
4022 *lseg = LSEG(pos);
4023 break;
4024 }
4025 buffer_close(ctx, ii, *pseg);
4026 if (SPLIT_COND(ii, (*b))) {
4027 /* ((S_SEGMENT - sizeof(buffer_header) + ii->header->bmax -
4028 (*b)->header.nterms * sizeof(buffer_term)) * 4 <
4029 (*b)->header.chunk_size) */
4030 GRN_LOG(ctx, GRN_LOG_DEBUG,
4031 "nterms=%d chunk=%d total=%" GRN_FMT_INT64U,
4032 (*b)->header.nterms,
4033 (*b)->header.chunk_size,
4034 ii->header->total_chunk_size >> 10);
4035 if (buffer_split(ctx, ii, LSEG(pos), h)) { break; }
4036 } else {
4037 if (S_SEGMENT - sizeof(buffer_header)
4038 - (*b)->header.nterms * sizeof(buffer_term)
4039 < size + sizeof(buffer_term)) {
4040 break;
4041 }
4042 if (buffer_flush(ctx, ii, LSEG(pos), h)) { break; }
4043 }
4044 }
4045
4046 array_unref(ii, tid);
4047}
4048
4049inline static void
4050buffer_new_lexicon_pat(grn_ctx *ctx,
4051 grn_ii *ii,
4052 int size,
4053 grn_id id,
4054 grn_hash *h,
4055 buffer **b,
4056 uint32_t *lseg,
4057 uint32_t *pseg)
4058{
4059 grn_pat_cursor *cursor;
4060 char key[GRN_TABLE_MAX_KEY_SIZE];
4061 int key_size;
4062
4063 key_size = grn_table_get_key(ctx, ii->lexicon, id, key,
4064 GRN_TABLE_MAX_KEY_SIZE);
4065 if (ii->lexicon->header.flags & GRN_OBJ_KEY_VAR_SIZE) {
4066 grn_obj *tokenizer = NULL;
4067
4068 grn_table_get_info(ctx, ii->lexicon, NULL, NULL, &tokenizer, NULL, NULL);
4069 if (tokenizer) {
4070 /* For natural language */
4071 cursor = grn_pat_cursor_open(ctx,
4072 (grn_pat *)(ii->lexicon),
4073 key,
4074 key_size,
4075 NULL,
4076 0,
4077 0,
4078 -1,
4079 GRN_CURSOR_ASCENDING|GRN_CURSOR_GT);
4080 if (cursor) {
4081 grn_id tid;
4082 while (ctx->rc == GRN_SUCCESS &&
4083 *lseg == GRN_II_PSEG_NOT_ASSIGNED &&
4084 (tid = grn_pat_cursor_next(ctx, cursor))) {
4085 buffer_new_find_segment(ctx, ii, size, tid, h, b, lseg, pseg);
4086 }
4087 grn_pat_cursor_close(ctx, cursor);
4088 }
4089 } else {
4090 /* For text data */
4091 int target_key_size = key_size;
4092 int reduced_key_size = 0;
4093
4094 while (*lseg == GRN_II_PSEG_NOT_ASSIGNED && target_key_size > 0) {
4095 grn_id tid;
4096
4097 cursor = grn_pat_cursor_open(ctx,
4098 (grn_pat *)(ii->lexicon),
4099 key, target_key_size,
4100 NULL, 0, 0, -1,
4101 GRN_CURSOR_PREFIX);
4102 if (!cursor) {
4103 break;
4104 }
4105
4106 if (reduced_key_size == 0) {
4107 while (ctx->rc == GRN_SUCCESS &&
4108 *lseg == GRN_II_PSEG_NOT_ASSIGNED &&
4109 (tid = grn_pat_cursor_next(ctx, cursor))) {
4110 buffer_new_find_segment(ctx, ii, size, tid, h, b, lseg, pseg);
4111 }
4112 } else {
4113 while (ctx->rc == GRN_SUCCESS &&
4114 *lseg == GRN_II_PSEG_NOT_ASSIGNED &&
4115 (tid = grn_pat_cursor_next(ctx, cursor))) {
4116 void *current_key;
4117 int current_key_size;
4118
4119 current_key_size = grn_pat_cursor_get_key(ctx, cursor, &current_key);
4120 if (memcmp(((char *)current_key) + target_key_size,
4121 key + target_key_size,
4122 reduced_key_size) == 0) {
4123 continue;
4124 }
4125 buffer_new_find_segment(ctx, ii, size, tid, h, b, lseg, pseg);
4126 }
4127 }
4128 grn_pat_cursor_close(ctx, cursor);
4129
4130 if (reduced_key_size == 0) {
4131 reduced_key_size = 1;
4132 } else {
4133 reduced_key_size *= 2;
4134 }
4135 target_key_size -= reduced_key_size;
4136 }
4137 }
4138 } else {
4139 /* For other data */
4140 cursor = grn_pat_cursor_open(ctx,
4141 (grn_pat *)(ii->lexicon),
4142 NULL, 0, key, key_size, 0, -1,
4143 GRN_CURSOR_PREFIX);
4144 if (cursor) {
4145 grn_id tid;
4146 while (ctx->rc == GRN_SUCCESS &&
4147 *lseg == GRN_II_PSEG_NOT_ASSIGNED &&
4148 (tid = grn_pat_cursor_next(ctx, cursor))) {
4149 buffer_new_find_segment(ctx, ii, size, tid, h, b, lseg, pseg);
4150 }
4151 grn_pat_cursor_close(ctx, cursor);
4152 }
4153 }
4154}
4155
4156inline static void
4157buffer_new_lexicon_other(grn_ctx *ctx,
4158 grn_ii *ii,
4159 int size,
4160 grn_id id,
4161 grn_hash *h,
4162 buffer **b,
4163 uint32_t *lseg,
4164 uint32_t *pseg)
4165{
4166 GRN_TABLE_EACH_BEGIN(ctx, ii->lexicon, cursor, tid) {
4167 if (ctx->rc != GRN_SUCCESS || *lseg != GRN_II_PSEG_NOT_ASSIGNED) {
4168 break;
4169 }
4170 buffer_new_find_segment(ctx, ii, size, tid, h, b, lseg, pseg);
4171 } GRN_TABLE_EACH_END(ctx, cursor);
4172}
4173
4174
4175inline static uint32_t
4176buffer_new(grn_ctx *ctx, grn_ii *ii, int size, uint32_t *pos,
4177 buffer_term **bt, buffer_rec **br, buffer **bp, grn_id id, grn_hash *h)
4178{
4179 buffer *b = NULL;
4180 uint16_t offset;
4181 uint32_t lseg = GRN_II_PSEG_NOT_ASSIGNED, pseg = GRN_II_PSEG_NOT_ASSIGNED;
4182 if (S_SEGMENT - sizeof(buffer_header) < size + sizeof(buffer_term)) {
4183 DEFINE_NAME(ii);
4184 MERR("[ii][buffer][new] requested size is too large: "
4185 "<%.*s> :"
4186 "requested:<%" GRN_FMT_SIZE ">, max:<%" GRN_FMT_SIZE ">",
4187 name_size, name,
4188 (size_t)(size + sizeof(buffer_term)),
4189 (size_t)(S_SEGMENT - sizeof(buffer_header)));
4190 return GRN_II_PSEG_NOT_ASSIGNED;
4191 }
4192 if (ii->lexicon->header.type == GRN_TABLE_PAT_KEY) {
4193 buffer_new_lexicon_pat(ctx, ii, size, id, h, &b, &lseg, &pseg);
4194 } else {
4195 buffer_new_lexicon_other(ctx, ii, size, id, h, &b, &lseg, &pseg);
4196 }
4197 if (lseg == GRN_II_PSEG_NOT_ASSIGNED) {
4198 if (buffer_segment_new(ctx, ii, &lseg) ||
4199 (pseg = buffer_open(ctx, ii, SEG2POS(lseg, 0), NULL, &b)) == GRN_II_PSEG_NOT_ASSIGNED) {
4200 return GRN_II_PSEG_NOT_ASSIGNED;
4201 }
4202 memset(b, 0, S_SEGMENT);
4203 b->header.buffer_free = S_SEGMENT - sizeof(buffer_header);
4204 b->header.chunk = GRN_II_PSEG_NOT_ASSIGNED;
4205 }
4206 if (b->header.nterms_void) {
4207 for (offset = 0; offset < b->header.nterms; offset++) {
4208 if (!b->terms[offset].tid) { break; }
4209 }
4210 if (offset == b->header.nterms) {
4211 GRN_LOG(ctx, GRN_LOG_DEBUG, "inconsistent buffer(%d)", lseg);
4212 b->header.nterms_void = 0;
4213 b->header.nterms++;
4214 b->header.buffer_free -= size + sizeof(buffer_term);
4215 } else {
4216 b->header.nterms_void--;
4217 b->header.buffer_free -= size;
4218 }
4219 } else {
4220 offset = b->header.nterms++;
4221 b->header.buffer_free -= size + sizeof(buffer_term);
4222 }
4223 *pos = SEG2POS(lseg, (sizeof(buffer_header) + sizeof(buffer_term) * offset));
4224 *bt = &b->terms[offset];
4225 *br = (buffer_rec *)(((byte *)&b->terms[b->header.nterms]) + b->header.buffer_free);
4226 *bp = b;
4227 return pseg;
4228}
4229
4230/* ii */
4231
4232static grn_ii *
4233_grn_ii_create(grn_ctx *ctx, grn_ii *ii, const char *path, grn_obj *lexicon, uint32_t flags)
4234{
4235 int i;
4236 uint32_t max_n_segments;
4237 uint32_t max_n_chunks;
4238 grn_io *seg, *chunk;
4239 char path2[PATH_MAX];
4240 struct grn_ii_header *header;
4241 grn_table_flags lflags;
4242 grn_encoding encoding;
4243 grn_obj *tokenizer;
4244 /*
4245 for (i = 0; i < 32; i++) {
4246 new_histogram[i] = 0;
4247 free_histogram[i] = 0;
4248 }
4249 */
4250 if (grn_table_get_info(ctx, lexicon, &lflags, &encoding, &tokenizer,
4251 NULL, NULL)) {
4252 return NULL;
4253 }
4254 if (path && strlen(path) + 6 >= PATH_MAX) { return NULL; }
4255
4256 if (flags & GRN_OBJ_INDEX_SMALL) {
4257 max_n_segments = grn_ii_max_n_segments_small;
4258 max_n_chunks = grn_ii_max_n_chunks_small;
4259 } else if (flags & GRN_OBJ_INDEX_MEDIUM) {
4260 max_n_segments = MAX_PSEG_MEDIUM;
4261 max_n_chunks = GRN_II_MAX_CHUNK_MEDIUM;
4262 } else {
4263 max_n_segments = MAX_PSEG;
4264 max_n_chunks = GRN_II_MAX_CHUNK;
4265 }
4266
4267 seg = grn_io_create(ctx,
4268 path,
4269 sizeof(struct grn_ii_header),
4270 S_SEGMENT,
4271 max_n_segments,
4272 grn_io_auto,
4273 GRN_IO_EXPIRE_SEGMENT);
4274 if (!seg) { return NULL; }
4275 if (path) {
4276 grn_strcpy(path2, PATH_MAX, path);
4277 grn_strcat(path2, PATH_MAX, ".c");
4278 chunk = grn_io_create(ctx, path2, 0, S_CHUNK, max_n_chunks, grn_io_auto,
4279 GRN_IO_EXPIRE_SEGMENT);
4280 } else {
4281 chunk = grn_io_create(ctx, NULL, 0, S_CHUNK, max_n_chunks, grn_io_auto, 0);
4282 }
4283 if (!chunk) {
4284 grn_io_close(ctx, seg);
4285 grn_io_remove(ctx, path);
4286 return NULL;
4287 }
4288 header = grn_io_header(seg);
4289 grn_io_set_type(seg, GRN_COLUMN_INDEX);
4290 for (i = 0; i < GRN_II_MAX_LSEG; i++) {
4291 header->ainfo[i] = GRN_II_PSEG_NOT_ASSIGNED;
4292 header->binfo[i] = GRN_II_PSEG_NOT_ASSIGNED;
4293 }
4294 for (i = 0; i <= GRN_II_N_CHUNK_VARIATION; i++) {
4295 header->free_chunks[i] = GRN_II_PSEG_NOT_ASSIGNED;
4296 header->garbages[i] = GRN_II_PSEG_NOT_ASSIGNED;
4297 }
4298 header->flags = flags;
4299 ii->seg = seg;
4300 ii->chunk = chunk;
4301 ii->lexicon = lexicon;
4302 ii->lflags = lflags;
4303 ii->encoding = encoding;
4304 ii->header = header;
4305 ii->n_elements = 2;
4306 if ((flags & GRN_OBJ_WITH_SECTION)) { ii->n_elements++; }
4307 if ((flags & GRN_OBJ_WITH_WEIGHT)) { ii->n_elements++; }
4308 if ((flags & GRN_OBJ_WITH_POSITION)) { ii->n_elements++; }
4309 return ii;
4310}
4311
4312grn_ii *
4313grn_ii_create(grn_ctx *ctx, const char *path, grn_obj *lexicon, uint32_t flags)
4314{
4315 grn_ii *ii = NULL;
4316 if (!(ii = GRN_MALLOCN(grn_ii, 1))) {
4317 return NULL;
4318 }
4319 GRN_DB_OBJ_SET_TYPE(ii, GRN_COLUMN_INDEX);
4320 if (!_grn_ii_create(ctx, ii, path, lexicon, flags)) {
4321 GRN_FREE(ii);
4322 return NULL;
4323 }
4324 return ii;
4325}
4326
4327grn_rc
4328grn_ii_remove(grn_ctx *ctx, const char *path)
4329{
4330 grn_rc rc;
4331 char buffer[PATH_MAX];
4332 if (!path || strlen(path) > PATH_MAX - 4) { return GRN_INVALID_ARGUMENT; }
4333 if ((rc = grn_io_remove(ctx, path))) { goto exit; }
4334 grn_snprintf(buffer, PATH_MAX, PATH_MAX,
4335 "%s.c", path);
4336 rc = grn_io_remove(ctx, buffer);
4337exit :
4338 return rc;
4339}
4340
4341grn_rc
4342grn_ii_truncate(grn_ctx *ctx, grn_ii *ii)
4343{
4344 grn_rc rc;
4345 const char *io_segpath, *io_chunkpath;
4346 char *segpath, *chunkpath = NULL;
4347 grn_obj *lexicon;
4348 uint32_t flags;
4349 if ((io_segpath = grn_io_path(ii->seg)) && *io_segpath != '\0') {
4350 if (!(segpath = GRN_STRDUP(io_segpath))) {
4351 ERR(GRN_NO_MEMORY_AVAILABLE, "cannot duplicate path: <%s>", io_segpath);
4352 return GRN_NO_MEMORY_AVAILABLE;
4353 }
4354 if ((io_chunkpath = grn_io_path(ii->chunk)) && *io_chunkpath != '\0') {
4355 if (!(chunkpath = GRN_STRDUP(io_chunkpath))) {
4356 ERR(GRN_NO_MEMORY_AVAILABLE, "cannot duplicate path: <%s>", io_chunkpath);
4357 return GRN_NO_MEMORY_AVAILABLE;
4358 }
4359 } else {
4360 chunkpath = NULL;
4361 }
4362 } else {
4363 segpath = NULL;
4364 }
4365 lexicon = ii->lexicon;
4366 flags = ii->header->flags;
4367 if ((rc = grn_io_close(ctx, ii->seg))) { goto exit; }
4368 if ((rc = grn_io_close(ctx, ii->chunk))) { goto exit; }
4369 ii->seg = NULL;
4370 ii->chunk = NULL;
4371 if (segpath && (rc = grn_io_remove(ctx, segpath))) { goto exit; }
4372 if (chunkpath && (rc = grn_io_remove(ctx, chunkpath))) { goto exit; }
4373 if (!_grn_ii_create(ctx, ii, segpath, lexicon, flags)) {
4374 rc = GRN_UNKNOWN_ERROR;
4375 }
4376exit:
4377 if (segpath) { GRN_FREE(segpath); }
4378 if (chunkpath) { GRN_FREE(chunkpath); }
4379 return rc;
4380}
4381
4382grn_ii *
4383grn_ii_open(grn_ctx *ctx, const char *path, grn_obj *lexicon)
4384{
4385 grn_io *seg, *chunk;
4386 grn_ii *ii;
4387 char path2[PATH_MAX];
4388 struct grn_ii_header *header;
4389 uint32_t io_type;
4390 grn_table_flags lflags;
4391 grn_encoding encoding;
4392 grn_obj *tokenizer;
4393 if (grn_table_get_info(ctx, lexicon, &lflags, &encoding, &tokenizer,
4394 NULL, NULL)) {
4395 return NULL;
4396 }
4397 if (strlen(path) + 6 >= PATH_MAX) { return NULL; }
4398 grn_strcpy(path2, PATH_MAX, path);
4399 grn_strcat(path2, PATH_MAX, ".c");
4400 seg = grn_io_open(ctx, path, grn_io_auto);
4401 if (!seg) { return NULL; }
4402 chunk = grn_io_open(ctx, path2, grn_io_auto);
4403 if (!chunk) {
4404 grn_io_close(ctx, seg);
4405 return NULL;
4406 }
4407 header = grn_io_header(seg);
4408 io_type = grn_io_get_type(seg);
4409 if (io_type != GRN_COLUMN_INDEX) {
4410 ERR(GRN_INVALID_FORMAT,
4411 "[column][index] file type must be %#04x: <%#04x>",
4412 GRN_COLUMN_INDEX, io_type);
4413 grn_io_close(ctx, seg);
4414 grn_io_close(ctx, chunk);
4415 return NULL;
4416 }
4417 if (!(ii = GRN_MALLOCN(grn_ii, 1))) {
4418 grn_io_close(ctx, seg);
4419 grn_io_close(ctx, chunk);
4420 return NULL;
4421 }
4422 GRN_DB_OBJ_SET_TYPE(ii, GRN_COLUMN_INDEX);
4423 ii->seg = seg;
4424 ii->chunk = chunk;
4425 ii->lexicon = lexicon;
4426 ii->lflags = lflags;
4427 ii->encoding = encoding;
4428 ii->header = header;
4429 ii->n_elements = 2;
4430 if ((header->flags & GRN_OBJ_WITH_SECTION)) { ii->n_elements++; }
4431 if ((header->flags & GRN_OBJ_WITH_WEIGHT)) { ii->n_elements++; }
4432 if ((header->flags & GRN_OBJ_WITH_POSITION)) { ii->n_elements++; }
4433 return ii;
4434}
4435
4436grn_rc
4437grn_ii_close(grn_ctx *ctx, grn_ii *ii)
4438{
4439 grn_rc rc;
4440 if (!ii) { return GRN_INVALID_ARGUMENT; }
4441 if ((rc = grn_io_close(ctx, ii->seg))) { return rc; }
4442 if ((rc = grn_io_close(ctx, ii->chunk))) { return rc; }
4443 GRN_FREE(ii);
4444 /*
4445 {
4446 int i;
4447 for (i = 0; i < 32; i++) {
4448 GRN_LOG(ctx, GRN_LOG_DEBUG, "new[%d]=%d free[%d]=%d",
4449 i, new_histogram[i],
4450 i, free_histogram[i]);
4451 }
4452 }
4453 */
4454 return rc;
4455}
4456
4457grn_rc
4458grn_ii_info(grn_ctx *ctx, grn_ii *ii, uint64_t *seg_size, uint64_t *chunk_size)
4459{
4460 grn_rc rc;
4461
4462 if (seg_size) {
4463 if ((rc = grn_io_size(ctx, ii->seg, seg_size))) {
4464 return rc;
4465 }
4466 }
4467
4468 if (chunk_size) {
4469 if ((rc = grn_io_size(ctx, ii->chunk, chunk_size))) {
4470 return rc;
4471 }
4472 }
4473
4474 return GRN_SUCCESS;
4475}
4476
4477grn_column_flags
4478grn_ii_get_flags(grn_ctx *ctx, grn_ii *ii)
4479{
4480 if (!ii) {
4481 return 0;
4482 }
4483
4484 return ii->header->flags;
4485}
4486
4487uint32_t
4488grn_ii_get_n_elements(grn_ctx *ctx, grn_ii *ii)
4489{
4490 if (!ii) {
4491 return 0;
4492 }
4493
4494 return ii->n_elements;
4495}
4496
4497void
4498grn_ii_expire(grn_ctx *ctx, grn_ii *ii)
4499{
4500 /*
4501 grn_io_expire(ctx, ii->seg, 128, 1000000);
4502 */
4503 grn_io_expire(ctx, ii->chunk, 0, 1000000);
4504}
4505
4506grn_rc
4507grn_ii_flush(grn_ctx *ctx, grn_ii *ii)
4508{
4509 grn_rc rc;
4510
4511 rc = grn_io_flush(ctx, ii->seg);
4512 if (rc == GRN_SUCCESS) {
4513 rc = grn_io_flush(ctx, ii->chunk);
4514 }
4515
4516 return rc;
4517}
4518
4519size_t
4520grn_ii_get_disk_usage(grn_ctx *ctx, grn_ii *ii)
4521{
4522 size_t usage;
4523
4524 usage = grn_io_get_disk_usage(ctx, ii->seg);
4525 usage += grn_io_get_disk_usage(ctx, ii->chunk);
4526
4527 return usage;
4528}
4529
4530#define BIT11_01(x) ((x >> 1) & 0x7ff)
4531#define BIT31_12(x) (x >> 12)
4532
4533grn_rc
4534grn_ii_update_one(grn_ctx *ctx, grn_ii *ii, grn_id tid, grn_ii_updspec *u, grn_hash *h)
4535{
4536 buffer *b;
4537 uint8_t *bs;
4538 buffer_rec *br = NULL;
4539 buffer_term *bt;
4540 uint32_t pseg = 0, pos = 0, size, *a;
4541 if (!tid) { return ctx->rc; }
4542 if (!u->tf || !u->sid) { return grn_ii_delete_one(ctx, ii, tid, u, h); }
4543 if (u->sid > ii->header->smax) { ii->header->smax = u->sid; }
4544 if (!(a = array_get(ctx, ii, tid))) {
4545 DEFINE_NAME(ii);
4546 MERR("[ii][update][one] failed to allocate an array: "
4547 "<%.*s>: "
4548 "<%u>:<%u>:<%u>",
4549 name_size, name,
4550 u->rid, u->sid, tid);
4551 return ctx->rc;
4552 }
4553 if (!(bs = encode_rec(ctx, ii, u, &size, 0))) {
4554 DEFINE_NAME(ii);
4555 MERR("[ii][update][one] failed to encode a record: "
4556 "<%.*s>: "
4557 "<%u>:<%u>:<%u>",
4558 name_size, name,
4559 u->rid, u->sid, tid);
4560 goto exit;
4561 }
4562 for (;;) {
4563 if (a[0]) {
4564 if (!(a[0] & 1)) {
4565 pos = a[0];
4566 if ((pseg = buffer_open(ctx, ii, pos, &bt, &b)) == GRN_II_PSEG_NOT_ASSIGNED) {
4567 DEFINE_NAME(ii);
4568 MERR("[ii][update][one] failed to allocate a buffer: "
4569 "<%.*s>: "
4570 "<%u>:<%u>:<%u>: "
4571 "segment:<%u>",
4572 name_size, name,
4573 u->rid, u->sid, tid,
4574 pos);
4575 goto exit;
4576 }
4577 if (b->header.buffer_free < size) {
4578 int bfb = b->header.buffer_free;
4579 GRN_LOG(ctx, GRN_LOG_DEBUG, "flushing a[0]=%d seg=%d(%p) free=%d",
4580 a[0], LSEG(a[0]), b, b->header.buffer_free);
4581 buffer_close(ctx, ii, pseg);
4582 if (SPLIT_COND(ii, b)) {
4583 /*((S_SEGMENT - sizeof(buffer_header) + ii->header->bmax -
4584 b->header.nterms * sizeof(buffer_term)) * 4 <
4585 b->header.chunk_size)*/
4586 GRN_LOG(ctx, GRN_LOG_DEBUG,
4587 "nterms=%d chunk=%d total=%" GRN_FMT_INT64U,
4588 b->header.nterms,
4589 b->header.chunk_size,
4590 ii->header->total_chunk_size >> 10);
4591 buffer_split(ctx, ii, LSEG(pos), h);
4592 if (ctx->rc != GRN_SUCCESS) {
4593 DEFINE_NAME(ii);
4594 ERR(ctx->rc,
4595 "[ii][update][one] failed to split a buffer: "
4596 "<%.*s>: "
4597 "<%u>:<%u><%u>: "
4598 "segment:<%u>",
4599 name_size, name,
4600 u->rid, u->sid, tid,
4601 pos);
4602 goto exit;
4603 }
4604 continue;
4605 }
4606 buffer_flush(ctx, ii, LSEG(pos), h);
4607 if (ctx->rc != GRN_SUCCESS) {
4608 DEFINE_NAME(ii);
4609 ERR(ctx->rc,
4610 "[ii][update][one] failed to flush a buffer: "
4611 "<%.*s>: "
4612 "<%u>:<%u><%u>: "
4613 "segment:<%u>",
4614 name_size, name,
4615 u->rid, u->sid, tid,
4616 pos);
4617 goto exit;
4618 }
4619 if (a[0] != pos) {
4620 GRN_LOG(ctx, GRN_LOG_DEBUG,
4621 "grn_ii_update_one: a[0] changed %d->%d", a[0], pos);
4622 continue;
4623 }
4624 if ((pseg = buffer_open(ctx, ii, pos, &bt, &b)) == GRN_II_PSEG_NOT_ASSIGNED) {
4625 GRN_LOG(ctx, GRN_LOG_CRIT, "buffer not found a[0]=%d", a[0]);
4626 {
4627 DEFINE_NAME(ii);
4628 MERR("[ii][update][one] failed to reallocate a buffer: "
4629 "<%.*s>: "
4630 "<%u>:<%u>:<%u>: "
4631 "segment:<%u>, new-segment:<%u>",
4632 name_size, name,
4633 u->rid, u->sid, tid,
4634 pos, a[0]);
4635 }
4636 goto exit;
4637 }
4638 GRN_LOG(ctx, GRN_LOG_DEBUG,
4639 "flushed a[0]=%d seg=%d(%p) free=%d->%d nterms=%d v=%d",
4640 a[0], LSEG(a[0]), b, bfb, b->header.buffer_free,
4641 b->header.nterms, b->header.nterms_void);
4642 if (b->header.buffer_free < size) {
4643 DEFINE_NAME(ii);
4644 MERR("[ii][update][one] buffer is full: "
4645 "<%.*s>: "
4646 "<%u>:<%u><%u>: "
4647 "segment:<%u>, new-segment:<%u>, free:<%u>, required:<%u>",
4648 name_size, name,
4649 u->rid, u->sid, tid,
4650 pos, a[0], b->header.buffer_free, size);
4651 buffer_close(ctx, ii, pseg);
4652 /* todo: direct merge */
4653 goto exit;
4654 }
4655 }
4656 b->header.buffer_free -= size;
4657 br = (buffer_rec *)(((byte *)&b->terms[b->header.nterms])
4658 + b->header.buffer_free);
4659 } else {
4660 grn_ii_updspec u2;
4661 uint32_t size2 = 0, v = a[0];
4662 struct _grn_ii_pos pos2;
4663 pos2.pos = a[1];
4664 pos2.next = NULL;
4665 u2.pos = &pos2;
4666 if ((ii->header->flags & GRN_OBJ_WITH_SECTION)) {
4667 u2.rid = BIT31_12(v);
4668 u2.sid = BIT11_01(v);
4669 } else {
4670 u2.rid = v >> 1;
4671 u2.sid = 1;
4672 }
4673 u2.tf = 1;
4674 u2.weight = 0;
4675 if (u2.rid != u->rid || u2.sid != u->sid) {
4676 uint8_t *bs2 = encode_rec(ctx, ii, &u2, &size2, 0);
4677 if (!bs2) {
4678 DEFINE_NAME(ii);
4679 MERR("[ii][update][one] failed to encode a record2: "
4680 "<%.*s>: "
4681 "<%u>:<%u>:<%u>",
4682 name_size, name,
4683 u2.rid, u2.sid, tid);
4684 goto exit;
4685 }
4686 pseg = buffer_new(ctx, ii, size + size2, &pos, &bt, &br, &b, tid, h);
4687 if (pseg == GRN_II_PSEG_NOT_ASSIGNED) {
4688 GRN_FREE(bs2);
4689 {
4690 DEFINE_NAME(ii);
4691 MERR("[ii][update][one] failed to create a buffer2: "
4692 "<%.*s>: "
4693 "<%u>:<%u>:<%u>: "
4694 "size:<%u>",
4695 name_size, name,
4696 u2.rid, u2.sid, tid,
4697 size + size2);
4698 }
4699 goto exit;
4700 }
4701 bt->tid = tid;
4702 bt->size_in_chunk = 0;
4703 bt->pos_in_chunk = 0;
4704 bt->size_in_buffer = 0;
4705 bt->pos_in_buffer = 0;
4706 buffer_put(ctx, ii, b, bt, br, bs2, &u2, size2);
4707 if (ctx->rc != GRN_SUCCESS) {
4708 GRN_FREE(bs2);
4709 buffer_close(ctx, ii, pseg);
4710 {
4711 DEFINE_NAME(ii);
4712 MERR("[ii][update][one] failed to put to buffer: "
4713 "<%.*s>: "
4714 "<%u>:<%u>:<%u>",
4715 name_size, name,
4716 u2.rid, u2.sid, tid);
4717 }
4718 goto exit;
4719 }
4720 br = (buffer_rec *)(((byte *)br) + size2);
4721 GRN_FREE(bs2);
4722 }
4723 }
4724 }
4725 break;
4726 }
4727 if (!br) {
4728 if (u->tf == 1 && u->weight == 0) {
4729 if ((ii->header->flags & GRN_OBJ_WITH_SECTION)) {
4730 if (u->rid < 0x100000 && u->sid < 0x800) {
4731 a[0] = (u->rid << 12) + (u->sid << 1) + 1;
4732 a[1] = u->pos->pos;
4733 goto exit;
4734 }
4735 } else {
4736 a[0] = (u->rid << 1) + 1;
4737 a[1] = u->pos->pos;
4738 goto exit;
4739 }
4740 }
4741 pseg = buffer_new(ctx, ii, size, &pos, &bt, &br, &b, tid, h);
4742 if (pseg == GRN_II_PSEG_NOT_ASSIGNED) {
4743 DEFINE_NAME(ii);
4744 MERR("[ii][update][one] failed to create a buffer: "
4745 "<%.*s>: "
4746 "<%u>:<%u>:<%u>: "
4747 "size:<%u>",
4748 name_size, name,
4749 u->rid, u->sid, tid,
4750 size);
4751 goto exit;
4752 }
4753 bt->tid = tid;
4754 bt->size_in_chunk = 0;
4755 bt->pos_in_chunk = 0;
4756 bt->size_in_buffer = 0;
4757 bt->pos_in_buffer = 0;
4758 }
4759 buffer_put(ctx, ii, b, bt, br, bs, u, size);
4760 buffer_close(ctx, ii, pseg);
4761 if (!a[0] || (a[0] & 1)) { a[0] = pos; }
4762exit :
4763 array_unref(ii, tid);
4764 if (bs) { GRN_FREE(bs); }
4765 if (u->tf != u->atf) {
4766 grn_obj *source_table;
4767 char source_table_name[GRN_TABLE_MAX_KEY_SIZE];
4768 int source_table_name_size;
4769 char term[GRN_TABLE_MAX_KEY_SIZE];
4770 int term_size;
4771
4772 source_table = grn_ctx_at(ctx, DB_OBJ(ii)->range);
4773 if (source_table) {
4774 source_table_name_size = grn_obj_name(ctx,
4775 source_table,
4776 source_table_name,
4777 GRN_TABLE_MAX_KEY_SIZE);
4778 } else {
4779 grn_strcpy(source_table_name, GRN_TABLE_MAX_KEY_SIZE, "(null)");
4780 source_table_name_size = strlen(source_table_name);
4781 }
4782 term_size = grn_table_get_key(ctx, ii->lexicon, tid,
4783 term, GRN_TABLE_MAX_KEY_SIZE);
4784 {
4785 DEFINE_NAME(ii);
4786 GRN_LOG(ctx, GRN_LOG_WARNING,
4787 "[ii][update][one] too many postings: "
4788 "<%.*s>: "
4789 "record:<%.*s>(%d), "
4790 "n-postings:<%d>, "
4791 "n-discarded-postings:<%d>, "
4792 "term:<%d>(<%.*s>)",
4793 name_size, name,
4794 source_table_name_size, source_table_name,
4795 u->rid,
4796 u->atf,
4797 u->atf - u->tf,
4798 tid, term_size, term);
4799 }
4800 }
4801 grn_ii_expire(ctx, ii);
4802 return ctx->rc;
4803}
4804
4805grn_rc
4806grn_ii_delete_one(grn_ctx *ctx, grn_ii *ii, grn_id tid, grn_ii_updspec *u, grn_hash *h)
4807{
4808 buffer *b;
4809 uint8_t *bs = NULL;
4810 buffer_rec *br;
4811 buffer_term *bt;
4812 uint32_t pseg, size, *a;
4813 if (!tid) { return ctx->rc; }
4814 if (!(a = array_at(ctx, ii, tid))) {
4815 return ctx->rc;
4816 }
4817 for (;;) {
4818 if (!a[0]) { goto exit; }
4819 if (a[0] & 1) {
4820 if ((ii->header->flags & GRN_OBJ_WITH_SECTION)) {
4821 uint32_t rid = BIT31_12(a[0]);
4822 uint32_t sid = BIT11_01(a[0]);
4823 if (u->rid == rid && (!u->sid || u->sid == sid)) {
4824 a[0] = 0;
4825 lexicon_delete(ctx, ii, tid, h);
4826 }
4827 } else {
4828 uint32_t rid = a[0] >> 1;
4829 if (u->rid == rid) {
4830 a[0] = 0;
4831 lexicon_delete(ctx, ii, tid, h);
4832 }
4833 }
4834 goto exit;
4835 }
4836 if (!(bs = encode_rec(ctx, ii, u, &size, 1))) {
4837 DEFINE_NAME(ii);
4838 MERR("[ii][delete][one] failed to encode a record: "
4839 "<%.*s>: "
4840 "<%u>:<%u>:<%u>",
4841 name_size, name,
4842 u->rid, u->sid, tid);
4843 goto exit;
4844 }
4845 if ((pseg = buffer_open(ctx, ii, a[0], &bt, &b)) == GRN_II_PSEG_NOT_ASSIGNED) {
4846 DEFINE_NAME(ii);
4847 MERR("[ii][delete][one] failed to allocate a buffer: "
4848 "<%.*s>: "
4849 "<%u>:<%u><%u>: "
4850 "position:<%u>",
4851 name_size, name,
4852 u->rid, u->sid, tid,
4853 a[0]);
4854 goto exit;
4855 }
4856 if (b->header.buffer_free < size) {
4857 uint32_t _a = a[0];
4858 GRN_LOG(ctx, GRN_LOG_DEBUG, "flushing! b=%p free=%d, seg(%d)",
4859 b, b->header.buffer_free, LSEG(a[0]));
4860 buffer_close(ctx, ii, pseg);
4861 buffer_flush(ctx, ii, LSEG(a[0]), h);
4862 if (ctx->rc != GRN_SUCCESS) {
4863 DEFINE_NAME(ii);
4864 ERR(ctx->rc,
4865 "[ii][delete][one] failed to flush a buffer: "
4866 "<%.*s>: "
4867 "<%u>:<%u><%u>: "
4868 "position:<%u>",
4869 name_size, name,
4870 u->rid, u->sid, tid,
4871 a[0]);
4872 goto exit;
4873 }
4874 if (a[0] != _a) {
4875 GRN_LOG(ctx, GRN_LOG_DEBUG, "grn_ii_delete_one: a[0] changed %d->%d)",
4876 a[0], _a);
4877 continue;
4878 }
4879 if ((pseg = buffer_open(ctx, ii, a[0], &bt, &b)) == GRN_II_PSEG_NOT_ASSIGNED) {
4880 DEFINE_NAME(ii);
4881 MERR("[ii][delete][one] failed to reallocate a buffer: "
4882 "<%.*s>: "
4883 "<%u>:<%u><%u>: "
4884 "position:<%u>",
4885 name_size, name,
4886 u->rid, u->sid, tid,
4887 a[0]);
4888 goto exit;
4889 }
4890 GRN_LOG(ctx, GRN_LOG_DEBUG, "flushed! b=%p free=%d, seg(%d)",
4891 b, b->header.buffer_free, LSEG(a[0]));
4892 if (b->header.buffer_free < size) {
4893 DEFINE_NAME(ii);
4894 MERR("[ii][delete][one] buffer is full: "
4895 "<%.*s>: "
4896 "<%u>:<%u><%u>: "
4897 "segment:<%u>, free:<%u>, required:<%u>",
4898 name_size, name,
4899 u->rid, u->sid, tid,
4900 a[0], b->header.buffer_free, size);
4901 buffer_close(ctx, ii, pseg);
4902 goto exit;
4903 }
4904 }
4905
4906 b->header.buffer_free -= size;
4907 br = (buffer_rec *)(((byte *)&b->terms[b->header.nterms]) + b->header.buffer_free);
4908 buffer_put(ctx, ii, b, bt, br, bs, u, size);
4909 buffer_close(ctx, ii, pseg);
4910 break;
4911 }
4912exit :
4913 array_unref(ii, tid);
4914 if (bs) { GRN_FREE(bs); }
4915 return ctx->rc;
4916}
4917
4918#define CHUNK_USED 1
4919#define BUFFER_USED 2
4920#define SOLE_DOC_USED 4
4921#define SOLE_POS_USED 8
4922
4923struct _grn_ii_cursor {
4924 grn_db_obj obj;
4925 grn_ctx *ctx;
4926 grn_ii *ii;
4927 grn_id id;
4928 grn_posting *post;
4929
4930 grn_id min; /* Minimum record ID */
4931 grn_id max;
4932 grn_posting pc;
4933 grn_posting pb;
4934
4935 uint32_t cdf; /* Document frequency */
4936 uint32_t *cdp;
4937 uint32_t *crp; /* Record ID */
4938 uint32_t *csp; /* Section ID */
4939 uint32_t *ctp; /* Term frequency */
4940 uint32_t *cwp; /* Weight */
4941 uint32_t *cpp; /* Position */
4942
4943 uint8_t *bp;
4944
4945 int nelements;
4946 uint32_t nchunks;
4947 uint32_t curr_chunk;
4948 chunk_info *cinfo;
4949 grn_io_win iw;
4950 uint8_t *cp;
4951 uint8_t *cpe;
4952 datavec rdv[MAX_N_ELEMENTS + 1];
4953
4954 struct grn_ii_buffer *buf;
4955 uint16_t stat;
4956 uint16_t nextb;
4957 uint32_t buffer_pseg;
4958 int flags;
4959 uint32_t *ppseg;
4960
4961 int weight;
4962
4963 uint32_t prev_chunk_rid;
4964};
4965
4966static grn_bool
4967buffer_is_reused(grn_ctx *ctx, grn_ii *ii, grn_ii_cursor *c)
4968{
4969 if (*c->ppseg != c->buffer_pseg) {
4970 uint32_t i;
4971 for (i = ii->header->bgqtail; i != ii->header->bgqhead;
4972 i = (i + 1) & (GRN_II_BGQSIZE - 1)) {
4973 if (ii->header->bgqbody[i] == c->buffer_pseg) { return GRN_FALSE; }
4974 }
4975 return GRN_TRUE;
4976 }
4977 return GRN_FALSE;
4978}
4979
4980static int
4981chunk_is_reused(grn_ctx *ctx, grn_ii *ii, grn_ii_cursor *c, uint32_t offset, uint32_t size)
4982{
4983 if (*c->ppseg != c->buffer_pseg) {
4984 uint32_t i, m, gseg;
4985 if (size > S_CHUNK) { return 1; }
4986 if (size > (1 << GRN_II_W_LEAST_CHUNK)) {
4987 int es = size - 1;
4988 GRN_BIT_SCAN_REV(es, m);
4989 m++;
4990 } else {
4991 m = GRN_II_W_LEAST_CHUNK;
4992 }
4993 gseg = ii->header->garbages[m - GRN_II_W_LEAST_CHUNK];
4994 while (gseg != GRN_II_PSEG_NOT_ASSIGNED) {
4995 grn_io_win iw;
4996 grn_ii_ginfo *ginfo = WIN_MAP(ii->chunk, ctx, &iw, gseg, 0, S_GARBAGE,
4997 grn_io_rdwr);
4998 if (!ginfo) { break; }
4999 for (i = 0; i < ginfo->nrecs; i++) {
5000 if (ginfo->recs[i] == offset) {
5001 grn_io_win_unmap(&iw);
5002 return 0;
5003 }
5004 }
5005 gseg = ginfo->next;
5006 grn_io_win_unmap(&iw);
5007 }
5008 return 1;
5009 }
5010 return 0;
5011}
5012
5013#define GRN_II_CURSOR_CMP(c1,c2) \
5014 (((c1)->post->rid > (c2)->post->rid) || \
5015 (((c1)->post->rid == (c2)->post->rid) && \
5016 (((c1)->post->sid > (c2)->post->sid) || \
5017 (((c1)->post->sid == (c2)->post->sid) && \
5018 ((c1)->post->pos > (c2)->post->pos)))))
5019
5020grn_ii_cursor *
5021grn_ii_cursor_open(grn_ctx *ctx, grn_ii *ii, grn_id tid,
5022 grn_id min, grn_id max, int nelements, int flags)
5023{
5024 grn_ii_cursor *c = NULL;
5025 uint32_t pos, *a;
5026 if (!(a = array_at(ctx, ii, tid))) { return NULL; }
5027 for (;;) {
5028 c = NULL;
5029 if (!(pos = a[0])) { goto exit; }
5030 if (!(c = GRN_MALLOC(sizeof(grn_ii_cursor)))) { goto exit; }
5031 memset(c, 0, sizeof(grn_ii_cursor));
5032 c->ctx = ctx;
5033 c->ii = ii;
5034 c->id = tid;
5035 c->min = min;
5036 c->max = max;
5037 c->nelements = nelements;
5038 c->flags = flags;
5039 c->weight = 0;
5040 if (pos & 1) {
5041 c->stat = 0;
5042 if ((ii->header->flags & GRN_OBJ_WITH_SECTION)) {
5043 c->pb.rid = BIT31_12(pos);
5044 c->pb.sid = BIT11_01(pos);
5045 } else {
5046 c->pb.rid = pos >> 1;
5047 c->pb.sid = 1;
5048 }
5049 c->pb.tf = 1;
5050 c->pb.weight = 0;
5051 c->pb.pos = a[1];
5052 } else {
5053 uint32_t chunk;
5054 buffer_term *bt;
5055 c->buffer_pseg = buffer_open(ctx, ii, pos, &bt, &c->buf);
5056 if (c->buffer_pseg == GRN_II_PSEG_NOT_ASSIGNED) {
5057 GRN_FREE(c);
5058 c = NULL;
5059 goto exit;
5060 }
5061 c->ppseg = &ii->header->binfo[LSEG(pos)];
5062 if (bt->size_in_chunk && (chunk = c->buf->header.chunk) != GRN_II_PSEG_NOT_ASSIGNED) {
5063 if (!(c->cp = WIN_MAP(ii->chunk, ctx, &c->iw, chunk, bt->pos_in_chunk,
5064 bt->size_in_chunk, grn_io_rdonly))) {
5065 buffer_close(ctx, ii, c->buffer_pseg);
5066 GRN_FREE(c);
5067 c = NULL;
5068 goto exit;
5069 }
5070 if (buffer_is_reused(ctx, ii, c)) {
5071 grn_ii_cursor_close(ctx, c);
5072 continue;
5073 }
5074 c->cpe = c->cp + bt->size_in_chunk;
5075 if ((bt->tid & CHUNK_SPLIT)) {
5076 int i;
5077 grn_id crid;
5078 GRN_B_DEC(c->nchunks, c->cp);
5079 if (chunk_is_reused(ctx, ii, c, chunk, c->buf->header.chunk_size)) {
5080 grn_ii_cursor_close(ctx, c);
5081 continue;
5082 }
5083 if (!(c->cinfo = GRN_MALLOCN(chunk_info, c->nchunks))) {
5084 buffer_close(ctx, ii, c->buffer_pseg);
5085 grn_io_win_unmap(&c->iw);
5086 GRN_FREE(c);
5087 c = NULL;
5088 goto exit;
5089 }
5090 for (i = 0, crid = GRN_ID_NIL; i < c->nchunks; i++) {
5091 GRN_B_DEC(c->cinfo[i].segno, c->cp);
5092 GRN_B_DEC(c->cinfo[i].size, c->cp);
5093 GRN_B_DEC(c->cinfo[i].dgap, c->cp);
5094 crid += c->cinfo[i].dgap;
5095 if (crid < min) {
5096 c->pc.rid = crid;
5097 c->curr_chunk = i + 1;
5098 }
5099 }
5100 if (chunk_is_reused(ctx, ii, c, chunk, c->buf->header.chunk_size)) {
5101 grn_ii_cursor_close(ctx, c);
5102 continue;
5103 }
5104 }
5105 if ((ii->header->flags & GRN_OBJ_WITH_POSITION)) {
5106 c->rdv[ii->n_elements - 1].flags = ODD;
5107 }
5108 }
5109 c->nextb = bt->pos_in_buffer;
5110 c->stat = CHUNK_USED|BUFFER_USED;
5111 }
5112 if (pos == a[0]) { break; }
5113 grn_ii_cursor_close(ctx, c);
5114 }
5115exit :
5116 array_unref(ii, tid);
5117 return c;
5118}
5119
5120static inline void
5121grn_ii_cursor_set_min(grn_ctx *ctx, grn_ii_cursor *c, grn_id min)
5122{
5123 if (c->min >= min) {
5124 return;
5125 }
5126
5127 if (grn_ii_cursor_set_min_enable) {
5128 grn_id old_min = c->min;
5129 c->min = min;
5130 if (c->buf &&
5131 c->pc.rid != GRN_ID_NIL &&
5132 c->pc.rid < c->min &&
5133 c->prev_chunk_rid < c->min &&
5134 c->curr_chunk < c->nchunks) {
5135 uint32_t i;
5136 uint32_t skip_chunk = 0;
5137 grn_id rid = c->prev_chunk_rid;
5138
5139 if (c->curr_chunk > 0) {
5140 i = c->curr_chunk - 1;
5141 } else {
5142 i = 0;
5143 }
5144 for (; i < c->nchunks; i++) {
5145 rid += c->cinfo[i].dgap;
5146 if (rid < c->min) {
5147 skip_chunk = i + 1;
5148 } else {
5149 rid -= c->cinfo[i].dgap;
5150 break;
5151 }
5152 }
5153 if (skip_chunk > c->curr_chunk) {
5154 uint32_t old_chunk = c->curr_chunk;
5155 grn_bool old_chunk_used = (c->stat & CHUNK_USED);
5156 c->pc.rid = rid;
5157 c->pc.rest = 0;
5158 c->prev_chunk_rid = rid - c->cinfo[skip_chunk - 1].dgap;
5159 c->curr_chunk = skip_chunk;
5160 c->crp = c->cdp + c->cdf;
5161 c->stat |= CHUNK_USED;
5162 GRN_LOG(ctx, GRN_LOG_DEBUG,
5163 "[ii][cursor][min] skip: %p: min(%u->%u): chunk(%u->%u): "
5164 "chunk-used(%s->%s)",
5165 c,
5166 old_min, min,
5167 old_chunk, c->curr_chunk,
5168 old_chunk_used ? "true" : "false",
5169 (c->stat & CHUNK_USED) ? "true" : "false");
5170 }
5171 }
5172 }
5173}
5174
5175typedef struct {
5176 grn_bool include_garbage;
5177} grn_ii_cursor_next_options;
5178
5179static inline grn_posting *
5180grn_ii_cursor_next_internal(grn_ctx *ctx, grn_ii_cursor *c,
5181 grn_ii_cursor_next_options *options)
5182{
5183 const grn_bool include_garbage = options->include_garbage;
5184 if (c->buf) {
5185 for (;;) {
5186 if (c->stat & CHUNK_USED) {
5187 for (;;) {
5188 if (c->crp < c->cdp + c->cdf) {
5189 uint32_t dgap = *c->crp++;
5190 c->pc.rid += dgap;
5191 if (dgap) { c->pc.sid = 0; }
5192 if ((c->ii->header->flags & GRN_OBJ_WITH_SECTION)) {
5193 c->pc.sid += 1 + *c->csp++;
5194 } else {
5195 c->pc.sid = 1;
5196 }
5197 c->cpp += c->pc.rest;
5198 c->pc.rest = c->pc.tf = 1 + *c->ctp++;
5199 if ((c->ii->header->flags & GRN_OBJ_WITH_WEIGHT)) {
5200 c->pc.weight = *c->cwp++;
5201 } else {
5202 c->pc.weight = 0;
5203 }
5204 c->pc.pos = 0;
5205 /*
5206 {
5207 static int count = 0;
5208 int tf = c->pc.tf, pos = 0, *pp = (int *)c->cpp;
5209 grn_obj buf;
5210 GRN_TEXT_INIT(&buf, 0);
5211 grn_text_itoa(ctx, &buf, c->pc.rid);
5212 GRN_TEXT_PUTC(ctx, &buf, ':');
5213 grn_text_itoa(ctx, &buf, c->pc.sid);
5214 GRN_TEXT_PUTC(ctx, &buf, ':');
5215 grn_text_itoa(ctx, &buf, c->pc.tf);
5216 GRN_TEXT_PUTC(ctx, &buf, '(');
5217 while (tf--) {
5218 pos += *pp++;
5219 count++;
5220 grn_text_itoa(ctx, &buf, pos);
5221 if (tf) { GRN_TEXT_PUTC(ctx, &buf, ':'); }
5222 }
5223 GRN_TEXT_PUTC(ctx, &buf, ')');
5224 GRN_TEXT_PUTC(ctx, &buf, '\0');
5225 GRN_LOG(ctx, GRN_LOG_DEBUG, "posting(%d):%s", count, GRN_TEXT_VALUE(&buf));
5226 GRN_OBJ_FIN(ctx, &buf);
5227 }
5228 */
5229 } else {
5230 if (c->curr_chunk <= c->nchunks) {
5231 if (c->curr_chunk == c->nchunks) {
5232 if (c->cp < c->cpe) {
5233 int decoded_size;
5234 decoded_size =
5235 grn_p_decv(ctx, c->cp, c->cpe - c->cp,
5236 c->rdv, c->ii->n_elements);
5237 if (decoded_size == 0) {
5238 GRN_LOG(ctx, GRN_LOG_WARNING,
5239 "[ii][cursor][next][chunk][last] "
5240 "chunk(%d) is changed by another thread "
5241 "while decoding: %p",
5242 c->cinfo[c->curr_chunk].segno,
5243 c);
5244 c->pc.rid = GRN_ID_NIL;
5245 break;
5246 }
5247 if (buffer_is_reused(ctx, c->ii, c)) {
5248 GRN_LOG(ctx, GRN_LOG_WARNING,
5249 "[ii][cursor][next][chunk][last] "
5250 "buffer is reused by another thread: %p",
5251 c);
5252 c->pc.rid = GRN_ID_NIL;
5253 break;
5254 }
5255 if (chunk_is_reused(ctx, c->ii, c,
5256 c->buf->header.chunk,
5257 c->buf->header.chunk_size)) {
5258 GRN_LOG(ctx, GRN_LOG_WARNING,
5259 "[ii][cursor][next][chunk][last] "
5260 "chunk(%d) is reused by another thread: %p",
5261 c->buf->header.chunk,
5262 c);
5263 c->pc.rid = GRN_ID_NIL;
5264 break;
5265 }
5266 } else {
5267 c->pc.rid = GRN_ID_NIL;
5268 break;
5269 }
5270 } else {
5271 uint8_t *cp;
5272 grn_io_win iw;
5273 uint32_t size = c->cinfo[c->curr_chunk].size;
5274 if (size && (cp = WIN_MAP(c->ii->chunk, ctx, &iw,
5275 c->cinfo[c->curr_chunk].segno, 0,
5276 size, grn_io_rdonly))) {
5277 int decoded_size;
5278 decoded_size =
5279 grn_p_decv(ctx, cp, size, c->rdv, c->ii->n_elements);
5280 grn_io_win_unmap(&iw);
5281 if (decoded_size == 0) {
5282 GRN_LOG(ctx, GRN_LOG_WARNING,
5283 "[ii][cursor][next][chunk] "
5284 "chunk(%d) is changed by another thread "
5285 "while decoding: %p",
5286 c->cinfo[c->curr_chunk].segno,
5287 c);
5288 c->pc.rid = GRN_ID_NIL;
5289 break;
5290 }
5291 if (chunk_is_reused(ctx, c->ii, c,
5292 c->cinfo[c->curr_chunk].segno, size)) {
5293 GRN_LOG(ctx, GRN_LOG_WARNING,
5294 "[ii][cursor][next][chunk] "
5295 "chunk(%d) is reused by another thread: %p",
5296 c->cinfo[c->curr_chunk].segno,
5297 c);
5298 c->pc.rid = GRN_ID_NIL;
5299 break;
5300 }
5301 } else {
5302 c->pc.rid = GRN_ID_NIL;
5303 break;
5304 }
5305 }
5306 {
5307 int j = 0;
5308 c->cdf = c->rdv[j].data_size;
5309 c->crp = c->cdp = c->rdv[j++].data;
5310 if ((c->ii->header->flags & GRN_OBJ_WITH_SECTION)) {
5311 c->csp = c->rdv[j++].data;
5312 }
5313 c->ctp = c->rdv[j++].data;
5314 if ((c->ii->header->flags & GRN_OBJ_WITH_WEIGHT)) {
5315 c->cwp = c->rdv[j++].data;
5316 }
5317 if ((c->ii->header->flags & GRN_OBJ_WITH_POSITION)) {
5318 c->cpp = c->rdv[j].data;
5319 }
5320 }
5321 c->prev_chunk_rid = c->pc.rid;
5322 c->pc.rid = GRN_ID_NIL;
5323 c->pc.sid = 0;
5324 c->pc.rest = 0;
5325 c->curr_chunk++;
5326 continue;
5327 } else {
5328 c->pc.rid = GRN_ID_NIL;
5329 }
5330 }
5331 break;
5332 }
5333 }
5334 if (c->stat & BUFFER_USED) {
5335 for (;;) {
5336 if (c->nextb) {
5337 uint32_t lrid = c->pb.rid, lsid = c->pb.sid; /* for check */
5338 buffer_rec *br = BUFFER_REC_AT(c->buf, c->nextb);
5339 if (buffer_is_reused(ctx, c->ii, c)) {
5340 GRN_LOG(ctx, GRN_LOG_WARNING,
5341 "[ii][cursor][next][buffer] "
5342 "buffer(%d,%d) is reused by another thread: %p",
5343 c->buffer_pseg, *c->ppseg,
5344 c);
5345 c->pb.rid = GRN_ID_NIL;
5346 break;
5347 }
5348 c->bp = GRN_NEXT_ADDR(br);
5349 GRN_B_DEC(c->pb.rid, c->bp);
5350 if ((c->ii->header->flags & GRN_OBJ_WITH_SECTION)) {
5351 GRN_B_DEC(c->pb.sid, c->bp);
5352 } else {
5353 c->pb.sid = 1;
5354 }
5355 if (lrid > c->pb.rid || (lrid == c->pb.rid && lsid >= c->pb.sid)) {
5356 DEFINE_NAME(c->ii);
5357 ERR(GRN_FILE_CORRUPT,
5358 "[ii][broken][cursor][next][buffer] "
5359 "posting in list in buffer isn't sorted: "
5360 "<%.*s>: (%d:%d) -> (%d:%d) (%d->%d)",
5361 name_size, name,
5362 lrid, lsid,
5363 c->pb.rid, c->pb.sid,
5364 c->buffer_pseg, *c->ppseg);
5365 c->pb.rid = GRN_ID_NIL;
5366 break;
5367 }
5368 if (c->pb.rid < c->min) {
5369 c->pb.rid = 0;
5370 if (br->jump > 0 && !BUFFER_REC_DELETED(br)) {
5371 buffer_rec *jump_br = BUFFER_REC_AT(c->buf, br->jump);
5372 if (BUFFER_REC_DELETED(jump_br)) {
5373 c->nextb = br->step;
5374 } else {
5375 uint8_t *jump_bp;
5376 uint32_t jump_rid;
5377 jump_bp = GRN_NEXT_ADDR(jump_br);
5378 GRN_B_DEC(jump_rid, jump_bp);
5379 if (jump_rid < c->min) {
5380 c->nextb = br->jump;
5381 } else {
5382 c->nextb = br->step;
5383 }
5384 }
5385 } else {
5386 c->nextb = br->step;
5387 }
5388 continue;
5389 }
5390 c->nextb = br->step;
5391 GRN_B_DEC(c->pb.tf, c->bp);
5392 if ((c->ii->header->flags & GRN_OBJ_WITH_WEIGHT)) {
5393 GRN_B_DEC(c->pb.weight, c->bp);
5394 } else {
5395 c->pb.weight = 0;
5396 }
5397 c->pb.rest = c->pb.tf;
5398 c->pb.pos = 0;
5399 } else {
5400 c->pb.rid = 0;
5401 }
5402 break;
5403 }
5404 }
5405 if (c->pb.rid) {
5406 if (c->pc.rid) {
5407 if (c->pc.rid < c->pb.rid) {
5408 c->stat = CHUNK_USED;
5409 if (include_garbage || (c->pc.tf && c->pc.sid)) {
5410 c->post = &c->pc;
5411 break;
5412 }
5413 } else {
5414 if (c->pb.rid < c->pc.rid) {
5415 c->stat = BUFFER_USED;
5416 if (include_garbage || (c->pb.tf && c->pb.sid)) {
5417 c->post = &c->pb;
5418 break;
5419 }
5420 } else {
5421 if (c->pb.sid) {
5422 if (c->pc.sid < c->pb.sid) {
5423 c->stat = CHUNK_USED;
5424 if (include_garbage || (c->pc.tf && c->pc.sid)) {
5425 c->post = &c->pc;
5426 break;
5427 }
5428 } else {
5429 c->stat = BUFFER_USED;
5430 if (c->pb.sid == c->pc.sid) { c->stat |= CHUNK_USED; }
5431 if (include_garbage || (c->pb.tf)) {
5432 c->post = &c->pb;
5433 break;
5434 }
5435 }
5436 } else {
5437 c->stat = CHUNK_USED;
5438 }
5439 }
5440 }
5441 } else {
5442 c->stat = BUFFER_USED;
5443 if (include_garbage || (c->pb.tf && c->pb.sid)) {
5444 c->post = &c->pb;
5445 break;
5446 }
5447 }
5448 } else {
5449 if (c->pc.rid) {
5450 c->stat = CHUNK_USED;
5451 if (include_garbage || (c->pc.tf && c->pc.sid)) {
5452 c->post = &c->pc;
5453 break;
5454 }
5455 } else {
5456 c->post = NULL;
5457 return NULL;
5458 }
5459 }
5460 }
5461 } else {
5462 if (c->stat & SOLE_DOC_USED) {
5463 c->post = NULL;
5464 return NULL;
5465 } else {
5466 c->post = &c->pb;
5467 c->stat |= SOLE_DOC_USED;
5468 if (c->post->rid < c->min) {
5469 c->post = NULL;
5470 return NULL;
5471 }
5472 }
5473 }
5474 return c->post;
5475}
5476
5477grn_posting *
5478grn_ii_cursor_next(grn_ctx *ctx, grn_ii_cursor *c)
5479{
5480 grn_ii_cursor_next_options options = {
5481 .include_garbage = GRN_FALSE
5482 };
5483 return grn_ii_cursor_next_internal(ctx, c, &options);
5484}
5485
5486grn_posting *
5487grn_ii_cursor_next_pos(grn_ctx *ctx, grn_ii_cursor *c)
5488{
5489 uint32_t gap;
5490 if ((c->ii->header->flags & GRN_OBJ_WITH_POSITION)) {
5491 if (c->nelements == c->ii->n_elements) {
5492 if (c->buf) {
5493 if (c->post == &c->pc) {
5494 if (c->pc.rest) {
5495 c->pc.rest--;
5496 c->pc.pos += *c->cpp++;
5497 } else {
5498 return NULL;
5499 }
5500 } else if (c->post == &c->pb) {
5501 if (buffer_is_reused(ctx, c->ii, c)) {
5502 GRN_LOG(ctx, GRN_LOG_WARNING,
5503 "[ii][cursor][next][pos][buffer] "
5504 "buffer(%d,%d) is reused by another thread: %p",
5505 c->buffer_pseg, *c->ppseg,
5506 c);
5507 return NULL;
5508 }
5509 if (c->pb.rest) {
5510 c->pb.rest--;
5511 GRN_B_DEC(gap, c->bp);
5512 c->pb.pos += gap;
5513 } else {
5514 return NULL;
5515 }
5516 } else {
5517 return NULL;
5518 }
5519 } else {
5520 if (c->stat & SOLE_POS_USED) {
5521 return NULL;
5522 } else {
5523 c->stat |= SOLE_POS_USED;
5524 }
5525 }
5526 }
5527 } else {
5528 if (c->stat & SOLE_POS_USED) {
5529 return NULL;
5530 } else {
5531 c->stat |= SOLE_POS_USED;
5532 }
5533 }
5534 return c->post;
5535}
5536
5537grn_rc
5538grn_ii_cursor_close(grn_ctx *ctx, grn_ii_cursor *c)
5539{
5540 if (!c) { return GRN_INVALID_ARGUMENT; }
5541 datavec_fin(ctx, c->rdv);
5542 if (c->cinfo) { GRN_FREE(c->cinfo); }
5543 if (c->buf) { buffer_close(ctx, c->ii, c->buffer_pseg); }
5544 if (c->cp) { grn_io_win_unmap(&c->iw); }
5545 GRN_FREE(c);
5546 return GRN_SUCCESS;
5547}
5548
5549uint32_t
5550grn_ii_get_chunksize(grn_ctx *ctx, grn_ii *ii, grn_id tid)
5551{
5552 uint32_t res, pos, *a;
5553 a = array_at(ctx, ii, tid);
5554 if (!a) { return 0; }
5555 if ((pos = a[0])) {
5556 if (pos & 1) {
5557 res = 0;
5558 } else {
5559 buffer *buf;
5560 uint32_t pseg;
5561 buffer_term *bt;
5562 if ((pseg = buffer_open(ctx, ii, pos, &bt, &buf)) == GRN_II_PSEG_NOT_ASSIGNED) {
5563 res = 0;
5564 } else {
5565 res = bt->size_in_chunk;
5566 buffer_close(ctx, ii, pseg);
5567 }
5568 }
5569 } else {
5570 res = 0;
5571 }
5572 array_unref(ii, tid);
5573 return res;
5574}
5575
5576uint32_t
5577grn_ii_estimate_size(grn_ctx *ctx, grn_ii *ii, grn_id tid)
5578{
5579 uint32_t res, pos, *a;
5580 a = array_at(ctx, ii, tid);
5581 if (!a) { return 0; }
5582 if ((pos = a[0])) {
5583 if (pos & 1) {
5584 res = 1;
5585 } else {
5586 buffer *buf;
5587 uint32_t pseg;
5588 buffer_term *bt;
5589 if ((pseg = buffer_open(ctx, ii, pos, &bt, &buf)) == GRN_II_PSEG_NOT_ASSIGNED) {
5590 res = 0;
5591 } else {
5592 res = a[1] + bt->size_in_buffer + 2;
5593 buffer_close(ctx, ii, pseg);
5594 }
5595 }
5596 } else {
5597 res = 0;
5598 }
5599 array_unref(ii, tid);
5600 return res;
5601}
5602
5603int
5604grn_ii_entry_info(grn_ctx *ctx, grn_ii *ii, grn_id tid, unsigned int *a,
5605 unsigned int *chunk, unsigned int *chunk_size,
5606 unsigned int *buffer_free,
5607 unsigned int *nterms, unsigned int *nterms_void,
5608 unsigned int *bt_tid,
5609 unsigned int *size_in_chunk, unsigned int *pos_in_chunk,
5610 unsigned int *size_in_buffer, unsigned int *pos_in_buffer)
5611{
5612 buffer *b;
5613 buffer_term *bt;
5614 uint32_t pseg, *ap;
5615 ERRCLR(NULL);
5616 ap = array_at(ctx, ii, tid);
5617 if (!ap) { return 0; }
5618 a[0] = *ap;
5619 array_unref(ii, tid);
5620 if (!a[0]) { return 1; }
5621 if (a[0] & 1) { return 2; }
5622 if ((pseg = buffer_open(ctx, ii, a[0], &bt, &b)) == GRN_II_PSEG_NOT_ASSIGNED) { return 3; }
5623 *chunk = b->header.chunk;
5624 *chunk_size = b->header.chunk_size;
5625 *buffer_free = b->header.buffer_free;
5626 *nterms = b->header.nterms;
5627 *bt_tid = bt->tid;
5628 *size_in_chunk = bt->size_in_chunk;
5629 *pos_in_chunk = bt->pos_in_chunk;
5630 *size_in_buffer = bt->size_in_buffer;
5631 *pos_in_buffer = bt->pos_in_buffer;
5632 buffer_close(ctx, ii, pseg);
5633 return 4;
5634}
5635
5636const char *
5637grn_ii_path(grn_ii *ii)
5638{
5639 return grn_io_path(ii->seg);
5640}
5641
5642uint32_t
5643grn_ii_max_section(grn_ii *ii)
5644{
5645 return ii->header->smax;
5646}
5647
5648grn_obj *
5649grn_ii_lexicon(grn_ii *ii)
5650{
5651 return ii->lexicon;
5652}
5653
5654/* private classes */
5655
5656/* b-heap */
5657
5658typedef struct {
5659 int n_entries;
5660 int n_bins;
5661 grn_ii_cursor **bins;
5662} cursor_heap;
5663
5664static inline cursor_heap *
5665cursor_heap_open(grn_ctx *ctx, int max)
5666{
5667 cursor_heap *h = GRN_MALLOC(sizeof(cursor_heap));
5668 if (!h) { return NULL; }
5669 h->bins = GRN_MALLOC(sizeof(grn_ii_cursor *) * max);
5670 if (!h->bins) {
5671 GRN_FREE(h);
5672 return NULL;
5673 }
5674 h->n_entries = 0;
5675 h->n_bins = max;
5676 return h;
5677}
5678
5679static inline grn_rc
5680cursor_heap_push(grn_ctx *ctx, cursor_heap *h, grn_ii *ii, grn_id tid, uint32_t offset2,
5681 int weight, grn_id min)
5682{
5683 int n, n2;
5684 grn_ii_cursor *c, *c2;
5685 if (h->n_entries >= h->n_bins) {
5686 int max = h->n_bins * 2;
5687 grn_ii_cursor **bins = GRN_REALLOC(h->bins, sizeof(grn_ii_cursor *) * max);
5688 GRN_LOG(ctx, GRN_LOG_DEBUG, "expanded cursor_heap to %d,%p", max, bins);
5689 if (!bins) { return GRN_NO_MEMORY_AVAILABLE; }
5690 h->n_bins = max;
5691 h->bins = bins;
5692 }
5693 {
5694 if (!(c = grn_ii_cursor_open(ctx, ii, tid, min, GRN_ID_MAX,
5695 ii->n_elements, 0))) {
5696 GRN_LOG(ctx, GRN_LOG_ERROR, "cursor open failed");
5697 return ctx->rc;
5698 }
5699 if (!grn_ii_cursor_next(ctx, c)) {
5700 grn_ii_cursor_close(ctx, c);
5701 return GRN_END_OF_DATA;
5702 }
5703 if (!grn_ii_cursor_next_pos(ctx, c)) {
5704 if (grn_logger_pass(ctx, GRN_LOG_ERROR)) {
5705 char token[GRN_TABLE_MAX_KEY_SIZE];
5706 int token_size;
5707 token_size = grn_table_get_key(ctx,
5708 c->ii->lexicon,
5709 c->id,
5710 &token,
5711 GRN_TABLE_MAX_KEY_SIZE);
5712 GRN_LOG(ctx, GRN_LOG_ERROR,
5713 "[ii][cursor][heap][push] invalid cursor: "
5714 "%p: token:<%.*s>(%u)",
5715 c, token_size, token, c->id);
5716 }
5717 grn_ii_cursor_close(ctx, c);
5718 return GRN_END_OF_DATA;
5719 }
5720 if (weight) {
5721 c->weight = weight;
5722 }
5723 n = h->n_entries++;
5724 while (n) {
5725 n2 = (n - 1) >> 1;
5726 c2 = h->bins[n2];
5727 if (GRN_II_CURSOR_CMP(c, c2)) { break; }
5728 h->bins[n] = c2;
5729 n = n2;
5730 }
5731 h->bins[n] = c;
5732 }
5733 return GRN_SUCCESS;
5734}
5735
5736static inline grn_rc
5737cursor_heap_push2(cursor_heap *h)
5738{
5739 grn_rc rc = GRN_SUCCESS;
5740 return rc;
5741}
5742
5743static inline grn_ii_cursor *
5744cursor_heap_min(cursor_heap *h)
5745{
5746 return h->n_entries ? h->bins[0] : NULL;
5747}
5748
5749static inline void
5750cursor_heap_recalc_min(cursor_heap *h)
5751{
5752 int n = 0, n1, n2, m;
5753 if ((m = h->n_entries) > 1) {
5754 grn_ii_cursor *c = h->bins[0], *c1, *c2;
5755 for (;;) {
5756 n1 = n * 2 + 1;
5757 n2 = n1 + 1;
5758 c1 = n1 < m ? h->bins[n1] : NULL;
5759 c2 = n2 < m ? h->bins[n2] : NULL;
5760 if (c1 && GRN_II_CURSOR_CMP(c, c1)) {
5761 if (c2 && GRN_II_CURSOR_CMP(c, c2) && GRN_II_CURSOR_CMP(c1, c2)) {
5762 h->bins[n] = c2;
5763 n = n2;
5764 } else {
5765 h->bins[n] = c1;
5766 n = n1;
5767 }
5768 } else {
5769 if (c2 && GRN_II_CURSOR_CMP(c, c2)) {
5770 h->bins[n] = c2;
5771 n = n2;
5772 } else {
5773 h->bins[n] = c;
5774 break;
5775 }
5776 }
5777 }
5778 }
5779}
5780
5781static inline void
5782cursor_heap_pop(grn_ctx *ctx, cursor_heap *h, grn_id min)
5783{
5784 if (h->n_entries) {
5785 grn_ii_cursor *c = h->bins[0];
5786 grn_ii_cursor_set_min(ctx, c, min);
5787 if (!grn_ii_cursor_next(ctx, c)) {
5788 grn_ii_cursor_close(ctx, c);
5789 h->bins[0] = h->bins[--h->n_entries];
5790 } else if (!grn_ii_cursor_next_pos(ctx, c)) {
5791 if (grn_logger_pass(ctx, GRN_LOG_ERROR)) {
5792 char token[GRN_TABLE_MAX_KEY_SIZE];
5793 int token_size;
5794 token_size = grn_table_get_key(ctx,
5795 c->ii->lexicon,
5796 c->id,
5797 &token,
5798 GRN_TABLE_MAX_KEY_SIZE);
5799 GRN_LOG(ctx, GRN_LOG_ERROR,
5800 "[ii][cursor][heap][pop] invalid cursor: "
5801 "%p: token:<%.*s>(%u)",
5802 c, token_size, token, c->id);
5803 }
5804 grn_ii_cursor_close(ctx, c);
5805 h->bins[0] = h->bins[--h->n_entries];
5806 }
5807 if (h->n_entries > 1) { cursor_heap_recalc_min(h); }
5808 }
5809}
5810
5811static inline void
5812cursor_heap_pop_pos(grn_ctx *ctx, cursor_heap *h)
5813{
5814 if (h->n_entries) {
5815 grn_ii_cursor *c = h->bins[0];
5816 if (!grn_ii_cursor_next_pos(ctx, c)) {
5817 if (!grn_ii_cursor_next(ctx, c)) {
5818 grn_ii_cursor_close(ctx, c);
5819 h->bins[0] = h->bins[--h->n_entries];
5820 } else if (!grn_ii_cursor_next_pos(ctx, c)) {
5821 if (grn_logger_pass(ctx, GRN_LOG_ERROR)) {
5822 char token[GRN_TABLE_MAX_KEY_SIZE];
5823 int token_size;
5824 token_size = grn_table_get_key(ctx,
5825 c->ii->lexicon,
5826 c->id,
5827 &token,
5828 GRN_TABLE_MAX_KEY_SIZE);
5829 GRN_LOG(ctx, GRN_LOG_ERROR,
5830 "[ii][cursor][heap][pop][position] invalid cursor: "
5831 "%p: token:<%.*s>(%u)",
5832 c, token_size, token, c->id);
5833 }
5834 grn_ii_cursor_close(ctx, c);
5835 h->bins[0] = h->bins[--h->n_entries];
5836 }
5837 }
5838 if (h->n_entries > 1) { cursor_heap_recalc_min(h); }
5839 }
5840}
5841
5842static inline void
5843cursor_heap_close(grn_ctx *ctx, cursor_heap *h)
5844{
5845 int i;
5846 if (!h) { return; }
5847 for (i = h->n_entries; i--;) { grn_ii_cursor_close(ctx, h->bins[i]); }
5848 GRN_FREE(h->bins);
5849 GRN_FREE(h);
5850}
5851
5852/* update */
5853#ifdef USE_VGRAM
5854
5855inline static grn_rc
5856index_add(grn_ctx *ctx, grn_id rid, grn_obj *lexicon, grn_ii *ii, grn_vgram *vgram,
5857 const char *value, size_t value_len)
5858{
5859 grn_hash *h;
5860 unsigned int token_flags = 0;
5861 grn_token_cursor *token_cursor;
5862 grn_ii_updspec **u;
5863 grn_id tid, *tp;
5864 grn_rc r, rc = GRN_SUCCESS;
5865 grn_vgram_buf *sbuf = NULL;
5866 if (!rid) { return GRN_INVALID_ARGUMENT; }
5867 if (!(token_cursor = grn_token_cursor_open(ctx, lexicon, value, value_len,
5868 GRN_TOKEN_ADD, token_flags))) {
5869 return GRN_NO_MEMORY_AVAILABLE;
5870 }
5871 if (vgram) { sbuf = grn_vgram_buf_open(value_len); }
5872 h = grn_hash_create(ctx, NULL, sizeof(grn_id), sizeof(grn_ii_updspec *),
5873 GRN_HASH_TINY);
5874 if (!h) {
5875 GRN_LOG(ctx, GRN_LOG_ALERT, "grn_hash_create on index_add failed !");
5876 grn_token_cursor_close(ctx, token_cursor);
5877 if (sbuf) { grn_vgram_buf_close(sbuf); }
5878 return GRN_NO_MEMORY_AVAILABLE;
5879 }
5880 while (!token_cursor->status) {
5881 (tid = grn_token_cursor_next(ctx, token_cursor));
5882 if (tid) {
5883 if (!grn_hash_add(ctx, h, &tid, sizeof(grn_id), (void **) &u, NULL)) {
5884 break;
5885 }
5886 if (!*u) {
5887 if (!(*u = grn_ii_updspec_open(ctx, rid, 1))) {
5888 GRN_LOG(ctx, GRN_LOG_ERROR,
5889 "grn_ii_updspec_open on index_add failed!");
5890 goto exit;
5891 }
5892 }
5893 if (grn_ii_updspec_add(ctx, *u, token_cursor->pos, 0)) {
5894 GRN_LOG(ctx, GRN_LOG_ERROR,
5895 "grn_ii_updspec_add on index_add failed!");
5896 goto exit;
5897 }
5898 if (sbuf) { grn_vgram_buf_add(sbuf, tid); }
5899 }
5900 }
5901 grn_token_cursor_close(ctx, token_cursor);
5902 // todo : support vgram
5903 // if (sbuf) { grn_vgram_update(vgram, rid, sbuf, (grn_set *)h); }
5904 GRN_HASH_EACH(ctx, h, id, &tp, NULL, &u, {
5905 if ((r = grn_ii_update_one(ctx, ii, *tp, *u, h))) { rc = r; }
5906 grn_ii_updspec_close(ctx, *u);
5907 });
5908 grn_hash_close(ctx, h);
5909 if (sbuf) { grn_vgram_buf_close(sbuf); }
5910 return rc;
5911exit:
5912 grn_hash_close(ctx, h);
5913 grn_token_cursor_close(ctx, token_cursor);
5914 if (sbuf) { grn_vgram_buf_close(sbuf); }
5915 return GRN_NO_MEMORY_AVAILABLE;
5916}
5917
5918inline static grn_rc
5919index_del(grn_ctx *ctx, grn_id rid, grn_obj *lexicon, grn_ii *ii, grn_vgram *vgram,
5920 const char *value, size_t value_len)
5921{
5922 grn_rc rc = GRN_SUCCESS;
5923 grn_hash *h;
5924 unsigned int token_flags = 0;
5925 grn_token_cursor *token_cursor;
5926 grn_ii_updspec **u;
5927 grn_id tid, *tp;
5928 if (!rid) { return GRN_INVALID_ARGUMENT; }
5929 if (!(token_cursor = grn_token_cursor_open(ctx, lexicon, value, value_len,
5930 GRN_TOKEN_DEL, token_flags))) {
5931 return GRN_NO_MEMORY_AVAILABLE;
5932 }
5933 h = grn_hash_create(ctx, NULL, sizeof(grn_id), sizeof(grn_ii_updspec *),
5934 GRN_HASH_TINY);
5935 if (!h) {
5936 GRN_LOG(ctx, GRN_LOG_ALERT, "grn_hash_create on index_del failed !");
5937 grn_token_cursor_close(ctx, token_cursor);
5938 return GRN_NO_MEMORY_AVAILABLE;
5939 }
5940 while (!token_cursor->status) {
5941 if ((tid = grn_token_cursor_next(ctx, token_cursor))) {
5942 if (!grn_hash_add(ctx, h, &tid, sizeof(grn_id), (void **) &u, NULL)) {
5943 break;
5944 }
5945 if (!*u) {
5946 if (!(*u = grn_ii_updspec_open(ctx, rid, 0))) {
5947 GRN_LOG(ctx, GRN_LOG_ALERT,
5948 "grn_ii_updspec_open on index_del failed !");
5949 grn_hash_close(ctx, h);
5950 grn_token_cursor_close(ctx, token_cursor);
5951 return GRN_NO_MEMORY_AVAILABLE;
5952 }
5953 }
5954 }
5955 }
5956 grn_token_cursor_close(ctx, token_cursor);
5957 GRN_HASH_EACH(ctx, h, id, &tp, NULL, &u, {
5958 if (*tp) {
5959 grn_rc r;
5960 r = grn_ii_delete_one(ctx, ii, *tp, *u, NULL);
5961 if (r) {
5962 rc = r;
5963 }
5964 }
5965 grn_ii_updspec_close(ctx, *u);
5966 });
5967 grn_hash_close(ctx, h);
5968 return rc;
5969}
5970
5971grn_rc
5972grn_ii_upd(grn_ctx *ctx, grn_ii *ii, grn_id rid, grn_vgram *vgram,
5973 const char *oldvalue, unsigned int oldvalue_len,
5974 const char *newvalue, unsigned int newvalue_len)
5975{
5976 grn_rc rc;
5977 grn_obj *lexicon = ii->lexicon;
5978 if (!rid) { return GRN_INVALID_ARGUMENT; }
5979 if (oldvalue && *oldvalue) {
5980 if ((rc = index_del(ctx, rid, lexicon, ii, vgram, oldvalue, oldvalue_len))) {
5981 GRN_LOG(ctx, GRN_LOG_ERROR, "index_del on grn_ii_upd failed !");
5982 goto exit;
5983 }
5984 }
5985 if (newvalue && *newvalue) {
5986 rc = index_add(ctx, rid, lexicon, ii, vgram, newvalue, newvalue_len);
5987 }
5988exit :
5989 return rc;
5990}
5991
5992grn_rc
5993grn_ii_update(grn_ctx *ctx, grn_ii *ii, grn_id rid, grn_vgram *vgram, unsigned int section,
5994 grn_values *oldvalues, grn_values *newvalues)
5995{
5996 int j;
5997 grn_value *v;
5998 unsigned int token_flags = 0;
5999 grn_token_cursor *token_cursor;
6000 grn_rc rc = GRN_SUCCESS;
6001 grn_hash *old, *new;
6002 grn_id tid, *tp;
6003 grn_ii_updspec **u, **un;
6004 grn_obj *lexicon = ii->lexicon;
6005 if (!lexicon || !ii || !rid) {
6006 GRN_LOG(ctx, GRN_LOG_WARNING, "grn_ii_update: invalid argument");
6007 return GRN_INVALID_ARGUMENT;
6008 }
6009 if (newvalues) {
6010 new = grn_hash_create(ctx, NULL, sizeof(grn_id), sizeof(grn_ii_updspec *),
6011 GRN_HASH_TINY);
6012 if (!new) {
6013 GRN_LOG(ctx, GRN_LOG_ALERT, "grn_hash_create on grn_ii_update failed !");
6014 rc = GRN_NO_MEMORY_AVAILABLE;
6015 goto exit;
6016 }
6017 for (j = newvalues->n_values, v = newvalues->values; j; j--, v++) {
6018 if ((token_cursor = grn_token_cursor_open(ctx, lexicon, v->str,
6019 v->str_len, GRN_TOKEN_ADD,
6020 token_flags))) {
6021 while (!token_cursor->status) {
6022 if ((tid = grn_token_cursor_next(ctx, token_cursor))) {
6023 if (!grn_hash_add(ctx, new, &tid, sizeof(grn_id), (void **) &u,
6024 NULL)) {
6025 break;
6026 }
6027 if (!*u) {
6028 if (!(*u = grn_ii_updspec_open(ctx, rid, section))) {
6029 GRN_LOG(ctx, GRN_LOG_ALERT,
6030 "grn_ii_updspec_open on grn_ii_update failed!");
6031 grn_token_cursor_close(ctx, token_cursor);
6032 grn_hash_close(ctx, new);
6033 rc = GRN_NO_MEMORY_AVAILABLE;
6034 goto exit;
6035 }
6036 }
6037 if (grn_ii_updspec_add(ctx, *u, token_cursor->pos, v->weight)) {
6038 GRN_LOG(ctx, GRN_LOG_ALERT,
6039 "grn_ii_updspec_add on grn_ii_update failed!");
6040 grn_token_cursor_close(ctx, token_cursor);
6041 grn_hash_close(ctx, new);
6042 rc = GRN_NO_MEMORY_AVAILABLE;
6043 goto exit;
6044 }
6045 }
6046 }
6047 grn_token_cursor_close(ctx, token_cursor);
6048 }
6049 }
6050 if (!GRN_HASH_SIZE(new)) {
6051 grn_hash_close(ctx, new);
6052 new = NULL;
6053 }
6054 } else {
6055 new = NULL;
6056 }
6057 if (oldvalues) {
6058 old = grn_hash_create(ctx, NULL, sizeof(grn_id), sizeof(grn_ii_updspec *),
6059 GRN_HASH_TINY);
6060 if (!old) {
6061 GRN_LOG(ctx, GRN_LOG_ALERT,
6062 "grn_hash_create(ctx, NULL, old) on grn_ii_update failed!");
6063 if (new) { grn_hash_close(ctx, new); }
6064 rc = GRN_NO_MEMORY_AVAILABLE;
6065 goto exit;
6066 }
6067 for (j = oldvalues->n_values, v = oldvalues->values; j; j--, v++) {
6068 if ((token_cursor = grn_token_cursor_open(ctx, lexicon, v->str,
6069 v->str_len, GRN_TOKEN_DEL,
6070 token_flags))) {
6071 while (!token_cursor->status) {
6072 if ((tid = grn_token_cursor_next(ctx, token_cursor))) {
6073 if (!grn_hash_add(ctx, old, &tid, sizeof(grn_id), (void **) &u,
6074 NULL)) {
6075 break;
6076 }
6077 if (!*u) {
6078 if (!(*u = grn_ii_updspec_open(ctx, rid, section))) {
6079 GRN_LOG(ctx, GRN_LOG_ALERT,
6080 "grn_ii_updspec_open on grn_ii_update failed!");
6081 grn_token_cursor_close(ctx, token_cursor);
6082 if (new) { grn_hash_close(ctx, new); };
6083 grn_hash_close(ctx, old);
6084 rc = GRN_NO_MEMORY_AVAILABLE;
6085 goto exit;
6086 }
6087 }
6088 if (grn_ii_updspec_add(ctx, *u, token_cursor->pos, v->weight)) {
6089 GRN_LOG(ctx, GRN_LOG_ALERT,
6090 "grn_ii_updspec_add on grn_ii_update failed!");
6091 grn_token_cursor_close(ctx, token_cursor);
6092 if (new) { grn_hash_close(ctx, new); };
6093 grn_hash_close(ctx, old);
6094 rc = GRN_NO_MEMORY_AVAILABLE;
6095 goto exit;
6096 }
6097 }
6098 }
6099 grn_token_cursor_close(ctx, token_cursor);
6100 }
6101 }
6102 } else {
6103 old = NULL;
6104 }
6105 if (old) {
6106 grn_id eid;
6107 GRN_HASH_EACH(ctx, old, id, &tp, NULL, &u, {
6108 if (new && (eid = grn_hash_get(ctx, new, tp, sizeof(grn_id),
6109 (void **) &un))) {
6110 if (!grn_ii_updspec_cmp(*u, *un)) {
6111 grn_ii_updspec_close(ctx, *un);
6112 grn_hash_delete_by_id(ctx, new, eid, NULL);
6113 }
6114 } else {
6115 grn_rc r;
6116 r = grn_ii_delete_one(ctx, ii, *tp, *u, new);
6117 if (r) {
6118 rc = r;
6119 }
6120 }
6121 grn_ii_updspec_close(ctx, *u);
6122 });
6123 grn_hash_close(ctx, old);
6124 }
6125 if (new) {
6126 GRN_HASH_EACH(ctx, new, id, &tp, NULL, &u, {
6127 grn_rc r;
6128 if ((r = grn_ii_update_one(ctx, ii, *tp, *u, new))) { rc = r; }
6129 grn_ii_updspec_close(ctx, *u);
6130 });
6131 grn_hash_close(ctx, new);
6132 } else {
6133 if (!section) {
6134 /* todo: delete key when all sections deleted */
6135 }
6136 }
6137exit :
6138 return rc;
6139}
6140#endif /* USE_VGRAM */
6141
6142static grn_rc
6143grn_vector2updspecs(grn_ctx *ctx, grn_ii *ii, grn_id rid, unsigned int section,
6144 grn_obj *in, grn_obj *out, grn_tokenize_mode mode,
6145 grn_obj *posting)
6146{
6147 int j;
6148 grn_id tid;
6149 grn_section *v;
6150 grn_token_cursor *token_cursor;
6151 grn_ii_updspec **u;
6152 grn_hash *h = (grn_hash *)out;
6153 grn_obj *lexicon = ii->lexicon;
6154 if (in->u.v.body) {
6155 const char *head = GRN_BULK_HEAD(in->u.v.body);
6156 for (j = in->u.v.n_sections, v = in->u.v.sections; j; j--, v++) {
6157 unsigned int token_flags = 0;
6158 if (v->length &&
6159 (token_cursor = grn_token_cursor_open(ctx, lexicon, head + v->offset,
6160 v->length, mode,
6161 token_flags))) {
6162 while (!token_cursor->status) {
6163 if ((tid = grn_token_cursor_next(ctx, token_cursor))) {
6164 if (posting) { GRN_RECORD_PUT(ctx, posting, tid); }
6165 if (!grn_hash_add(ctx, h, &tid, sizeof(grn_id), (void **) &u,
6166 NULL)) {
6167 break;
6168 }
6169 if (!*u) {
6170 if (!(*u = grn_ii_updspec_open(ctx, rid, section))) {
6171 DEFINE_NAME(ii);
6172 MERR("[ii][update][spec] failed to create an update spec: "
6173 "<%.*s>: "
6174 "record:<%u>:<%u>, token:<%u>:<%d>:<%u>",
6175 name_size, name,
6176 rid, section,
6177 tid, token_cursor->pos, v->weight);
6178 grn_token_cursor_close(ctx, token_cursor);
6179 return ctx->rc;
6180 }
6181 }
6182 if (grn_ii_updspec_add(ctx, *u, token_cursor->pos, v->weight)) {
6183 DEFINE_NAME(ii);
6184 MERR("[ii][update][spec] failed to add to update spec: "
6185 "<%.*s>: "
6186 "record:<%u>:<%u>, token:<%u>:<%d>:<%u>",
6187 name_size, name,
6188 rid, section,
6189 tid, token_cursor->pos, v->weight);
6190 grn_token_cursor_close(ctx, token_cursor);
6191 return ctx->rc;
6192 }
6193 }
6194 }
6195 grn_token_cursor_close(ctx, token_cursor);
6196 }
6197 }
6198 }
6199 return ctx->rc;
6200}
6201
6202static grn_rc
6203grn_uvector2updspecs_data(grn_ctx *ctx, grn_ii *ii, grn_id rid,
6204 unsigned int section, grn_obj *in, grn_obj *out,
6205 grn_tokenize_mode mode, grn_obj *posting)
6206{
6207 int i, n;
6208 grn_hash *h = (grn_hash *)out;
6209 grn_obj *lexicon = ii->lexicon;
6210 unsigned int element_size;
6211
6212 n = grn_uvector_size(ctx, in);
6213 element_size = grn_uvector_element_size(ctx, in);
6214 for (i = 0; i < n; i++) {
6215 grn_obj *tokenizer;
6216 grn_token_cursor *token_cursor;
6217 unsigned int token_flags = 0;
6218 const char *element;
6219
6220 tokenizer = grn_obj_get_info(ctx, lexicon, GRN_INFO_DEFAULT_TOKENIZER,
6221 NULL);
6222
6223 element = GRN_BULK_HEAD(in) + (element_size * i);
6224 token_cursor = grn_token_cursor_open(ctx, lexicon,
6225 element, element_size,
6226 mode, token_flags);
6227 if (!token_cursor) {
6228 continue;
6229 }
6230
6231 while (!token_cursor->status) {
6232 grn_id tid;
6233 if ((tid = grn_token_cursor_next(ctx, token_cursor))) {
6234 grn_ii_updspec **u;
6235 int pos;
6236
6237 if (posting) { GRN_RECORD_PUT(ctx, posting, tid); }
6238 if (!grn_hash_add(ctx, h, &tid, sizeof(grn_id), (void **)&u, NULL)) {
6239 break;
6240 }
6241 if (!*u) {
6242 if (!(*u = grn_ii_updspec_open(ctx, rid, section))) {
6243 GRN_LOG(ctx, GRN_LOG_ALERT,
6244 "grn_ii_updspec_open on grn_uvector2updspecs_data failed!");
6245 grn_token_cursor_close(ctx, token_cursor);
6246 return GRN_NO_MEMORY_AVAILABLE;
6247 }
6248 }
6249 if (tokenizer) {
6250 pos = token_cursor->pos;
6251 } else {
6252 pos = i;
6253 }
6254 if (grn_ii_updspec_add(ctx, *u, pos, 0)) {
6255 GRN_LOG(ctx, GRN_LOG_ALERT,
6256 "grn_ii_updspec_add on grn_uvector2updspecs failed!");
6257 grn_token_cursor_close(ctx, token_cursor);
6258 return GRN_NO_MEMORY_AVAILABLE;
6259 }
6260 }
6261 }
6262
6263 grn_token_cursor_close(ctx, token_cursor);
6264 }
6265
6266 return GRN_SUCCESS;
6267}
6268
6269static grn_rc
6270grn_uvector2updspecs_id(grn_ctx *ctx, grn_ii *ii, grn_id rid,
6271 unsigned int section, grn_obj *in, grn_obj *out)
6272{
6273 int i, n;
6274 grn_ii_updspec **u;
6275 grn_hash *h = (grn_hash *)out;
6276
6277 n = grn_vector_size(ctx, in);
6278 for (i = 0; i < n; i++) {
6279 grn_id id;
6280 unsigned int weight;
6281
6282 id = grn_uvector_get_element(ctx, in, i, &weight);
6283 if (!grn_hash_add(ctx, h, &id, sizeof(grn_id), (void **)&u, NULL)) {
6284 break;
6285 }
6286 if (!*u) {
6287 if (!(*u = grn_ii_updspec_open(ctx, rid, section))) {
6288 GRN_LOG(ctx, GRN_LOG_ALERT,
6289 "grn_ii_updspec_open on grn_ii_update failed!");
6290 return GRN_NO_MEMORY_AVAILABLE;
6291 }
6292 }
6293 if (grn_ii_updspec_add(ctx, *u, i, weight)) {
6294 GRN_LOG(ctx, GRN_LOG_ALERT,
6295 "grn_ii_updspec_add on grn_ii_update failed!");
6296 return GRN_NO_MEMORY_AVAILABLE;
6297 }
6298 }
6299 return GRN_SUCCESS;
6300}
6301
6302static grn_rc
6303grn_uvector2updspecs(grn_ctx *ctx, grn_ii *ii, grn_id rid,
6304 unsigned int section, grn_obj *in, grn_obj *out,
6305 grn_tokenize_mode mode, grn_obj *posting)
6306{
6307 if (in->header.domain < GRN_N_RESERVED_TYPES) {
6308 return grn_uvector2updspecs_data(ctx, ii, rid, section, in, out,
6309 mode, posting);
6310 } else {
6311 return grn_uvector2updspecs_id(ctx, ii, rid, section, in, out);
6312 }
6313}
6314
6315grn_rc
6316grn_ii_column_update(grn_ctx *ctx, grn_ii *ii, grn_id rid, unsigned int section,
6317 grn_obj *oldvalue, grn_obj *newvalue, grn_obj *posting)
6318{
6319 grn_id *tp;
6320 grn_bool do_grn_ii_updspec_cmp = GRN_TRUE;
6321 grn_ii_updspec **u, **un;
6322 grn_obj *old_, *old = oldvalue, *new_, *new = newvalue, oldv, newv;
6323 grn_obj buf, *post = NULL;
6324
6325 if (!ii) {
6326 ERR(GRN_INVALID_ARGUMENT, "[ii][column][update] ii is NULL");
6327 return ctx->rc;
6328 }
6329 if (!ii->lexicon) {
6330 ERR(GRN_INVALID_ARGUMENT, "[ii][column][update] lexicon is NULL");
6331 return ctx->rc;
6332 }
6333 if (rid == GRN_ID_NIL) {
6334 ERR(GRN_INVALID_ARGUMENT, "[ii][column][update] record ID is nil");
6335 return ctx->rc;
6336 }
6337 if (old || new) {
6338 unsigned char type = GRN_VOID;
6339 if (old) {
6340 type = (ii->obj.header.domain == old->header.domain)
6341 ? GRN_UVECTOR
6342 : old->header.type;
6343 }
6344 if (new) {
6345 type = (ii->obj.header.domain == new->header.domain)
6346 ? GRN_UVECTOR
6347 : new->header.type;
6348 }
6349 if (type == GRN_VECTOR) {
6350 grn_obj *tokenizer;
6351 grn_table_get_info(ctx, ii->lexicon, NULL, NULL, &tokenizer, NULL, NULL);
6352 if (tokenizer) {
6353 grn_obj old_elem, new_elem;
6354 unsigned int i, max_n;
6355 unsigned int old_n = 0, new_n = 0;
6356 if (old) {
6357 old_n = grn_vector_size(ctx, old);
6358 }
6359 if (new) {
6360 new_n = grn_vector_size(ctx, new);
6361 }
6362 max_n = (old_n > new_n) ? old_n : new_n;
6363 GRN_OBJ_INIT(&old_elem, GRN_BULK, GRN_OBJ_DO_SHALLOW_COPY, old->header.domain);
6364 GRN_OBJ_INIT(&new_elem, GRN_BULK, GRN_OBJ_DO_SHALLOW_COPY, new->header.domain);
6365 for (i = 0; i < max_n; i++) {
6366 grn_rc rc;
6367 grn_obj *old_p = NULL, *new_p = NULL;
6368 if (i < old_n) {
6369 const char *str;
6370 unsigned int size = grn_vector_get_element(ctx, old, i, &str, NULL, NULL);
6371 GRN_TEXT_SET_REF(&old_elem, str, size);
6372 old_p = &old_elem;
6373 }
6374 if (i < new_n) {
6375 const char *str;
6376 unsigned int size = grn_vector_get_element(ctx, new, i, &str, NULL, NULL);
6377 GRN_TEXT_SET_REF(&new_elem, str, size);
6378 new_p = &new_elem;
6379 }
6380 rc = grn_ii_column_update(ctx, ii, rid, section + i, old_p, new_p, posting);
6381 if (rc != GRN_SUCCESS) {
6382 break;
6383 }
6384 }
6385 GRN_OBJ_FIN(ctx, &old_elem);
6386 GRN_OBJ_FIN(ctx, &new_elem);
6387 return ctx->rc;
6388 }
6389 }
6390 }
6391 if (posting) {
6392 GRN_RECORD_INIT(&buf, GRN_OBJ_VECTOR, grn_obj_id(ctx, ii->lexicon));
6393 post = &buf;
6394 }
6395 if (grn_io_lock(ctx, ii->seg, grn_lock_timeout)) { return ctx->rc; }
6396 if (new) {
6397 unsigned char type = (ii->obj.header.domain == new->header.domain)
6398 ? GRN_UVECTOR
6399 : new->header.type;
6400 switch (type) {
6401 case GRN_BULK :
6402 {
6403 if (grn_bulk_is_zero(ctx, new)) {
6404 do_grn_ii_updspec_cmp = GRN_FALSE;
6405 }
6406 new_ = new;
6407 GRN_OBJ_INIT(&newv, GRN_VECTOR, GRN_OBJ_DO_SHALLOW_COPY, GRN_DB_TEXT);
6408 newv.u.v.body = new;
6409 new = &newv;
6410 grn_vector_delimit(ctx, new, 0, GRN_ID_NIL);
6411 if (new_ != newvalue) { grn_obj_close(ctx, new_); }
6412 }
6413 /* fallthru */
6414 case GRN_VECTOR :
6415 new_ = new;
6416 new = (grn_obj *)grn_hash_create(ctx, NULL, sizeof(grn_id),
6417 sizeof(grn_ii_updspec *),
6418 GRN_HASH_TINY);
6419 if (!new) {
6420 DEFINE_NAME(ii);
6421 MERR("[ii][column][update][new][vector] failed to create a hash table: "
6422 "<%.*s>: ",
6423 name_size, name);
6424 } else {
6425 grn_vector2updspecs(ctx, ii, rid, section, new_, new,
6426 GRN_TOKEN_ADD, post);
6427 }
6428 if (new_ != newvalue) { grn_obj_close(ctx, new_); }
6429 if (ctx->rc != GRN_SUCCESS) { goto exit; }
6430 break;
6431 case GRN_UVECTOR :
6432 new_ = new;
6433 new = (grn_obj *)grn_hash_create(ctx, NULL, sizeof(grn_id),
6434 sizeof(grn_ii_updspec *),
6435 GRN_HASH_TINY);
6436 if (!new) {
6437 DEFINE_NAME(ii);
6438 MERR("[ii][column][update][new][uvector] failed to create a hash table: "
6439 "<%.*s>: ",
6440 name_size, name);
6441 } else {
6442 if (new_->header.type == GRN_UVECTOR) {
6443 grn_uvector2updspecs(ctx, ii, rid, section, new_, new,
6444 GRN_TOKEN_ADD, post);
6445 } else {
6446 grn_obj uvector;
6447 unsigned int weight = 0;
6448 GRN_VALUE_FIX_SIZE_INIT(&uvector, GRN_OBJ_VECTOR,
6449 new_->header.domain);
6450 if (new_->header.impl_flags & GRN_OBJ_WITH_WEIGHT) {
6451 uvector.header.impl_flags |= GRN_OBJ_WITH_WEIGHT;
6452 }
6453 grn_uvector_add_element(ctx, &uvector, GRN_RECORD_VALUE(new_),
6454 weight);
6455 grn_uvector2updspecs(ctx, ii, rid, section, &uvector, new,
6456 GRN_TOKEN_ADD, post);
6457 GRN_OBJ_FIN(ctx, &uvector);
6458 }
6459 }
6460 if (new_ != newvalue) { grn_obj_close(ctx, new_); }
6461 if (ctx->rc != GRN_SUCCESS) { goto exit; }
6462 break;
6463 case GRN_TABLE_HASH_KEY :
6464 break;
6465 default :
6466 {
6467 DEFINE_NAME(ii);
6468 ERR(GRN_INVALID_ARGUMENT,
6469 "[ii][column][update][new] invalid object: "
6470 "<%.*s>: "
6471 "<%s>(%#x)",
6472 name_size, name,
6473 grn_obj_type_to_string(type),
6474 type);
6475 }
6476 goto exit;
6477 }
6478 }
6479 if (posting) {
6480 grn_ii_updspec *u_;
6481 uint32_t offset = 0;
6482 grn_id tid_ = 0, gap, tid, *tpe;
6483 grn_table_sort_optarg arg = {GRN_TABLE_SORT_ASC|
6484 GRN_TABLE_SORT_AS_NUMBER|
6485 GRN_TABLE_SORT_AS_UNSIGNED, NULL, NULL,0 };
6486 grn_array *sorted = grn_array_create(ctx, NULL, sizeof(grn_id), 0);
6487 grn_hash_sort(ctx, (grn_hash *)new, -1, sorted, &arg);
6488 GRN_TEXT_PUT(ctx, posting, ((grn_hash *)new)->n_entries, sizeof(uint32_t));
6489 GRN_ARRAY_EACH(ctx, sorted, 0, 0, id, &tp, {
6490 grn_hash_get_key(ctx, (grn_hash *)new, *tp, &tid, sizeof(grn_id));
6491 gap = tid - tid_;
6492 GRN_TEXT_PUT(ctx, posting, &gap, sizeof(grn_id));
6493 tid_ = tid;
6494 });
6495 GRN_ARRAY_EACH(ctx, sorted, 0, 0, id, &tp, {
6496 grn_hash_get_value(ctx, (grn_hash *)new, *tp, &u_);
6497 u_->offset = offset++;
6498 GRN_TEXT_PUT(ctx, posting, &u_->tf, sizeof(int32_t));
6499 });
6500 tpe = (grn_id *)GRN_BULK_CURR(post);
6501 for (tp = (grn_id *)GRN_BULK_HEAD(post); tp < tpe; tp++) {
6502 grn_hash_get(ctx, (grn_hash *)new, (void *)tp, sizeof(grn_id),
6503 (void **)&u);
6504 GRN_TEXT_PUT(ctx, posting, &(*u)->offset, sizeof(int32_t));
6505 }
6506 GRN_OBJ_FIN(ctx, post);
6507 grn_array_close(ctx, sorted);
6508 }
6509
6510 if (old) {
6511 unsigned char type = (ii->obj.header.domain == old->header.domain)
6512 ? GRN_UVECTOR
6513 : old->header.type;
6514 switch (type) {
6515 case GRN_BULK :
6516 {
6517 // const char *str = GRN_BULK_HEAD(old);
6518 // unsigned int str_len = GRN_BULK_VSIZE(old);
6519 old_ = old;
6520 GRN_OBJ_INIT(&oldv, GRN_VECTOR, GRN_OBJ_DO_SHALLOW_COPY, GRN_DB_TEXT);
6521 oldv.u.v.body = old;
6522 old = &oldv;
6523 grn_vector_delimit(ctx, old, 0, GRN_ID_NIL);
6524 if (old_ != oldvalue) { grn_obj_close(ctx, old_); }
6525 }
6526 /* fallthru */
6527 case GRN_VECTOR :
6528 old_ = old;
6529 old = (grn_obj *)grn_hash_create(ctx, NULL, sizeof(grn_id),
6530 sizeof(grn_ii_updspec *),
6531 GRN_HASH_TINY);
6532 if (!old) {
6533 DEFINE_NAME(ii);
6534 MERR("[ii][column][update][old][vector] failed to create a hash table: "
6535 "<%.*s>: ",
6536 name_size, name);
6537 } else {
6538 grn_vector2updspecs(ctx, ii, rid, section, old_, old,
6539 GRN_TOKEN_DEL, NULL);
6540 }
6541 if (old_ != oldvalue) { grn_obj_close(ctx, old_); }
6542 if (ctx->rc != GRN_SUCCESS) { goto exit; }
6543 break;
6544 case GRN_UVECTOR :
6545 old_ = old;
6546 old = (grn_obj *)grn_hash_create(ctx, NULL, sizeof(grn_id),
6547 sizeof(grn_ii_updspec *),
6548 GRN_HASH_TINY);
6549 if (!old) {
6550 DEFINE_NAME(ii);
6551 MERR("[ii][column][update][old][uvector] failed to create a hash table: "
6552 "<%.*s>: ",
6553 name_size, name);
6554 } else {
6555 if (old_->header.type == GRN_UVECTOR) {
6556 grn_uvector2updspecs(ctx, ii, rid, section, old_, old,
6557 GRN_TOKEN_DEL, NULL);
6558 } else {
6559 grn_obj uvector;
6560 unsigned int weight = 0;
6561 GRN_VALUE_FIX_SIZE_INIT(&uvector, GRN_OBJ_VECTOR,
6562 old_->header.domain);
6563 if (old_->header.impl_flags & GRN_OBJ_WITH_WEIGHT) {
6564 uvector.header.impl_flags |= GRN_OBJ_WITH_WEIGHT;
6565 }
6566 grn_uvector_add_element(ctx, &uvector, GRN_RECORD_VALUE(old_),
6567 weight);
6568 grn_uvector2updspecs(ctx, ii, rid, section, &uvector, old,
6569 GRN_TOKEN_DEL, NULL);
6570 GRN_OBJ_FIN(ctx, &uvector);
6571 }
6572 }
6573 if (old_ != oldvalue) { grn_obj_close(ctx, old_); }
6574 if (ctx->rc != GRN_SUCCESS) { goto exit; }
6575 break;
6576 case GRN_TABLE_HASH_KEY :
6577 break;
6578 default :
6579 {
6580 DEFINE_NAME(ii);
6581 ERR(GRN_INVALID_ARGUMENT,
6582 "[ii][column][update][old] invalid object: "
6583 "<%.*s>: "
6584 "<%s>(%#x)",
6585 name_size, name,
6586 grn_obj_type_to_string(type),
6587 type);
6588 }
6589 goto exit;
6590 }
6591 }
6592
6593 if (old) {
6594 grn_id eid;
6595 grn_hash *o = (grn_hash *)old;
6596 grn_hash *n = (grn_hash *)new;
6597 GRN_HASH_EACH(ctx, o, id, &tp, NULL, &u, {
6598 if (n && (eid = grn_hash_get(ctx, n, tp, sizeof(grn_id),
6599 (void **) &un))) {
6600 if (do_grn_ii_updspec_cmp && !grn_ii_updspec_cmp(*u, *un)) {
6601 grn_ii_updspec_close(ctx, *un);
6602 grn_hash_delete_by_id(ctx, n, eid, NULL);
6603 }
6604 } else {
6605 grn_ii_delete_one(ctx, ii, *tp, *u, n);
6606 }
6607 grn_ii_updspec_close(ctx, *u);
6608 if (ctx->rc != GRN_SUCCESS) {
6609 break;
6610 }
6611 });
6612 }
6613 if (new) {
6614 grn_hash *n = (grn_hash *)new;
6615 GRN_HASH_EACH(ctx, n, id, &tp, NULL, &u, {
6616 grn_ii_update_one(ctx, ii, *tp, *u, n);
6617 grn_ii_updspec_close(ctx, *u);
6618 if (ctx->rc != GRN_SUCCESS) {
6619 break;
6620 }
6621 });
6622 } else {
6623 if (!section) {
6624 /* todo: delete key when all sections deleted */
6625 }
6626 }
6627exit :
6628 grn_io_unlock(ii->seg);
6629 if (old && old != oldvalue) { grn_obj_close(ctx, old); }
6630 if (new && new != newvalue) { grn_obj_close(ctx, new); }
6631 return ctx->rc;
6632}
6633
6634/* token_info */
6635
6636typedef struct {
6637 cursor_heap *cursors;
6638 int offset;
6639 int pos;
6640 int size;
6641 int ntoken;
6642 grn_posting *p;
6643} token_info;
6644
6645#define EX_NONE 0
6646#define EX_PREFIX 1
6647#define EX_SUFFIX 2
6648#define EX_BOTH 3
6649#define EX_FUZZY 4
6650
6651inline static void
6652token_info_expand_both(grn_ctx *ctx, grn_obj *lexicon, grn_ii *ii,
6653 const char *key, unsigned int key_size, token_info *ti)
6654{
6655 int s = 0;
6656 grn_hash *h, *g;
6657 uint32_t *offset2;
6658 grn_hash_cursor *c;
6659 grn_id *tp, *tq;
6660 if ((h = grn_hash_create(ctx, NULL, sizeof(grn_id), 0, 0))) {
6661 grn_table_search(ctx, lexicon, key, key_size,
6662 GRN_OP_PREFIX, (grn_obj *)h, GRN_OP_OR);
6663 if (GRN_HASH_SIZE(h)) {
6664 if ((ti->cursors = cursor_heap_open(ctx, GRN_HASH_SIZE(h) + 256))) {
6665 if ((c = grn_hash_cursor_open(ctx, h, NULL, 0, NULL, 0, 0, -1, 0))) {
6666 uint32_t key2_size;
6667 const char *key2;
6668 while (grn_hash_cursor_next(ctx, c)) {
6669 grn_hash_cursor_get_key(ctx, c, (void **) &tp);
6670 key2 = _grn_table_key(ctx, lexicon, *tp, &key2_size);
6671 if (!key2) { break; }
6672 if ((lexicon->header.type != GRN_TABLE_PAT_KEY) ||
6673 !(lexicon->header.flags & GRN_OBJ_KEY_WITH_SIS) ||
6674 key2_size <= 2) { // todo: refine
6675 if ((s = grn_ii_estimate_size(ctx, ii, *tp))) {
6676 cursor_heap_push(ctx, ti->cursors, ii, *tp, 0, 0, GRN_ID_NIL);
6677 ti->ntoken++;
6678 ti->size += s;
6679 }
6680 } else {
6681 if ((g = grn_hash_create(ctx, NULL, sizeof(grn_id), 0,
6682 GRN_HASH_TINY))) {
6683 grn_pat_suffix_search(ctx, (grn_pat *)lexicon, key2, key2_size,
6684 g);
6685 GRN_HASH_EACH(ctx, g, id, &tq, NULL, &offset2, {
6686 if ((s = grn_ii_estimate_size(ctx, ii, *tq))) {
6687 cursor_heap_push(ctx, ti->cursors, ii, *tq,
6688 /* *offset2 */ 0, 0, GRN_ID_NIL);
6689 ti->ntoken++;
6690 ti->size += s;
6691 }
6692 });
6693 grn_hash_close(ctx, g);
6694 }
6695 }
6696 }
6697 grn_hash_cursor_close(ctx, c);
6698 }
6699 }
6700 }
6701 grn_hash_close(ctx, h);
6702 }
6703}
6704
6705inline static grn_rc
6706token_info_close(grn_ctx *ctx, token_info *ti)
6707{
6708 cursor_heap_close(ctx, ti->cursors);
6709 GRN_FREE(ti);
6710 return GRN_SUCCESS;
6711}
6712
6713inline static token_info *
6714token_info_open(grn_ctx *ctx, grn_obj *lexicon, grn_ii *ii,
6715 const char *key, unsigned int key_size, uint32_t offset,
6716 int mode, grn_fuzzy_search_optarg *args, grn_id min)
6717{
6718 int s = 0;
6719 grn_hash *h;
6720 token_info *ti;
6721 grn_id tid;
6722 grn_id *tp;
6723 if (!key) { return NULL; }
6724 if (!(ti = GRN_MALLOC(sizeof(token_info)))) { return NULL; }
6725 ti->cursors = NULL;
6726 ti->size = 0;
6727 ti->ntoken = 0;
6728 ti->offset = offset;
6729 switch (mode) {
6730 case EX_BOTH :
6731 token_info_expand_both(ctx, lexicon, ii, key, key_size, ti);
6732 break;
6733 case EX_NONE :
6734 if ((tid = grn_table_get(ctx, lexicon, key, key_size)) &&
6735 (s = grn_ii_estimate_size(ctx, ii, tid)) &&
6736 (ti->cursors = cursor_heap_open(ctx, 1))) {
6737 cursor_heap_push(ctx, ti->cursors, ii, tid, 0, 0, min);
6738 ti->ntoken++;
6739 ti->size = s;
6740 }
6741 break;
6742 case EX_PREFIX :
6743 if ((h = grn_hash_create(ctx, NULL, sizeof(grn_id), 0, 0))) {
6744 grn_table_search(ctx, lexicon, key, key_size,
6745 GRN_OP_PREFIX, (grn_obj *)h, GRN_OP_OR);
6746 if (GRN_HASH_SIZE(h)) {
6747 if ((ti->cursors = cursor_heap_open(ctx, GRN_HASH_SIZE(h)))) {
6748 GRN_HASH_EACH(ctx, h, id, &tp, NULL, NULL, {
6749 if ((s = grn_ii_estimate_size(ctx, ii, *tp))) {
6750 cursor_heap_push(ctx, ti->cursors, ii, *tp, 0, 0, min);
6751 ti->ntoken++;
6752 ti->size += s;
6753 }
6754 });
6755 }
6756 }
6757 grn_hash_close(ctx, h);
6758 }
6759 break;
6760 case EX_SUFFIX :
6761 if ((h = grn_hash_create(ctx, NULL, sizeof(grn_id), 0, 0))) {
6762 grn_table_search(ctx, lexicon, key, key_size,
6763 GRN_OP_SUFFIX, (grn_obj *)h, GRN_OP_OR);
6764 if (GRN_HASH_SIZE(h)) {
6765 if ((ti->cursors = cursor_heap_open(ctx, GRN_HASH_SIZE(h)))) {
6766 uint32_t *offset2;
6767 GRN_HASH_EACH(ctx, h, id, &tp, NULL, &offset2, {
6768 if ((s = grn_ii_estimate_size(ctx, ii, *tp))) {
6769 cursor_heap_push(ctx, ti->cursors, ii, *tp, /* *offset2 */ 0, 0, min);
6770 ti->ntoken++;
6771 ti->size += s;
6772 }
6773 });
6774 }
6775 }
6776 grn_hash_close(ctx, h);
6777 }
6778 break;
6779 case EX_FUZZY :
6780 if ((h = (grn_hash *)grn_table_create(ctx, NULL, 0, NULL,
6781 GRN_OBJ_TABLE_HASH_KEY|GRN_OBJ_WITH_SUBREC,
6782 grn_ctx_at(ctx, GRN_DB_UINT32), NULL))) {
6783 grn_table_fuzzy_search(ctx, lexicon, key, key_size,
6784 args, (grn_obj *)h, GRN_OP_OR);
6785 if (GRN_HASH_SIZE(h)) {
6786 if ((ti->cursors = cursor_heap_open(ctx, GRN_HASH_SIZE(h)))) {
6787 grn_rset_recinfo *ri;
6788 GRN_HASH_EACH(ctx, h, id, &tp, NULL, (void **)&ri, {
6789 if ((s = grn_ii_estimate_size(ctx, ii, *tp))) {
6790 cursor_heap_push(ctx, ti->cursors, ii, *tp, 0, ri->score - 1, min);
6791 ti->ntoken++;
6792 ti->size += s;
6793 }
6794 });
6795 }
6796 }
6797 grn_obj_close(ctx, (grn_obj *)h);
6798 }
6799 break;
6800 }
6801 if (cursor_heap_push2(ti->cursors)) {
6802 token_info_close(ctx, ti);
6803 return NULL;
6804 }
6805 {
6806 grn_ii_cursor *ic;
6807 if (ti->cursors && (ic = cursor_heap_min(ti->cursors))) {
6808 grn_posting *p = ic->post;
6809 ti->pos = p->pos - ti->offset;
6810 ti->p = p;
6811 } else {
6812 token_info_close(ctx, ti);
6813 ti = NULL;
6814 }
6815 }
6816 return ti;
6817}
6818
6819static inline grn_rc
6820token_info_skip(grn_ctx *ctx, token_info *ti, uint32_t rid, uint32_t sid)
6821{
6822 grn_ii_cursor *c;
6823 grn_posting *p;
6824 for (;;) {
6825 if (!(c = cursor_heap_min(ti->cursors))) { return GRN_END_OF_DATA; }
6826 p = c->post;
6827 if (p->rid > rid || (p->rid == rid && p->sid >= sid)) { break; }
6828 cursor_heap_pop(ctx, ti->cursors, rid);
6829 }
6830 ti->pos = p->pos - ti->offset;
6831 ti->p = p;
6832 return GRN_SUCCESS;
6833}
6834
6835static inline grn_rc
6836token_info_skip_pos(grn_ctx *ctx, token_info *ti, uint32_t rid, uint32_t sid, uint32_t pos)
6837{
6838 grn_ii_cursor *c;
6839 grn_posting *p;
6840 pos += ti->offset;
6841 for (;;) {
6842 if (!(c = cursor_heap_min(ti->cursors))) { return GRN_END_OF_DATA; }
6843 p = c->post;
6844 if (p->rid != rid || p->sid != sid || p->pos >= pos) { break; }
6845 cursor_heap_pop_pos(ctx, ti->cursors);
6846 }
6847 ti->pos = p->pos - ti->offset;
6848 ti->p = p;
6849 return GRN_SUCCESS;
6850}
6851
6852inline static int
6853token_compare(const void *a, const void *b)
6854{
6855 const token_info *t1 = *((token_info **)a), *t2 = *((token_info **)b);
6856 return t1->size - t2->size;
6857}
6858
6859#define TOKEN_CANDIDATE_NODE_SIZE 32
6860#define TOKEN_CANDIDATE_ADJACENT_MAX_SIZE 16
6861#define TOKEN_CANDIDATE_QUEUE_SIZE 64
6862#define TOKEN_CANDIDATE_SIZE 16
6863
6864typedef struct {
6865 grn_id tid;
6866 const unsigned char *token;
6867 uint32_t token_size;
6868 int32_t pos;
6869 grn_token_cursor_status status;
6870 int ef;
6871 uint32_t estimated_size;
6872 uint8_t adjacent[TOKEN_CANDIDATE_ADJACENT_MAX_SIZE]; /* Index of adjacent node from top */
6873 uint8_t n_adjacent;
6874} token_candidate_node;
6875
6876typedef struct {
6877 uint32_t *candidates; /* Standing bits indicate index of token_candidate_node */
6878 int top;
6879 int rear;
6880 int size;
6881} token_candidate_queue;
6882
6883inline static void
6884token_candidate_adjacent_set(grn_ctx *ctx, grn_token_cursor *token_cursor,
6885 token_candidate_node *top, token_candidate_node *curr)
6886{
6887 grn_bool exists_adjacent = GRN_FALSE;
6888 token_candidate_node *adj;
6889 for (adj = top; adj < curr; adj++) {
6890 if (token_cursor->curr <= adj->token + adj->token_size) {
6891 if (adj->n_adjacent < TOKEN_CANDIDATE_ADJACENT_MAX_SIZE) {
6892 adj->adjacent[adj->n_adjacent] = curr - top;
6893 adj->n_adjacent++;
6894 exists_adjacent = GRN_TRUE;
6895 }
6896 }
6897 }
6898 if (!exists_adjacent) {
6899 adj = curr - 1;
6900 if (adj->n_adjacent < TOKEN_CANDIDATE_ADJACENT_MAX_SIZE) {
6901 adj->adjacent[adj->n_adjacent] = curr - top;
6902 adj->n_adjacent++;
6903 }
6904 }
6905}
6906
6907inline static grn_rc
6908token_candidate_init(grn_ctx *ctx, grn_ii *ii, grn_token_cursor *token_cursor,
6909 grn_id tid, int ef, token_candidate_node **nodes, int *n_nodes,
6910 uint32_t *max_estimated_size)
6911{
6912 grn_rc rc;
6913 token_candidate_node *top, *curr;
6914 int size = TOKEN_CANDIDATE_NODE_SIZE;
6915
6916 *nodes = GRN_MALLOC(TOKEN_CANDIDATE_NODE_SIZE * sizeof(token_candidate_node));
6917 if (!*nodes) {
6918 return GRN_NO_MEMORY_AVAILABLE;
6919 }
6920 top = *nodes;
6921 curr = top;
6922
6923#define TOKEN_CANDIDATE_NODE_SET() { \
6924 curr->tid = tid; \
6925 curr->token = token_cursor->curr; \
6926 curr->token_size = token_cursor->curr_size; \
6927 curr->pos = token_cursor->pos; \
6928 curr->status = token_cursor->status; \
6929 curr->ef = ef; \
6930 curr->estimated_size = grn_ii_estimate_size(ctx, ii, tid); \
6931 curr->n_adjacent = 0; \
6932}
6933 TOKEN_CANDIDATE_NODE_SET();
6934 GRN_LOG(ctx, GRN_LOG_DEBUG, "[ii][overlap_token_skip] tid=%u pos=%d estimated_size=%u",
6935 curr->tid, curr->pos, curr->estimated_size);
6936 *max_estimated_size = curr->estimated_size;
6937 curr++;
6938
6939 while (token_cursor->status == GRN_TOKEN_CURSOR_DOING) {
6940 if (curr - top >= size) {
6941 if (!(*nodes = GRN_REALLOC(*nodes,
6942 (curr - top + TOKEN_CANDIDATE_NODE_SIZE) * sizeof(token_candidate_node)))) {
6943 return GRN_NO_MEMORY_AVAILABLE;
6944 }
6945 top = *nodes;
6946 curr = top + size;
6947 size += TOKEN_CANDIDATE_NODE_SIZE;
6948 }
6949 tid = grn_token_cursor_next(ctx, token_cursor);
6950 if (token_cursor->status != GRN_TOKEN_CURSOR_DONE_SKIP) {
6951 if (token_cursor->force_prefix) { ef |= EX_PREFIX; }
6952 TOKEN_CANDIDATE_NODE_SET();
6953 token_candidate_adjacent_set(ctx, token_cursor, top, curr);
6954 if (curr->estimated_size > *max_estimated_size) {
6955 *max_estimated_size = curr->estimated_size;
6956 }
6957 curr++;
6958 }
6959 }
6960 *n_nodes = curr - top;
6961 rc = GRN_SUCCESS;
6962 return rc;
6963#undef TOKEN_CANDIDATE_NODE_SET
6964}
6965
6966inline static grn_rc
6967token_candidate_queue_init(grn_ctx *ctx, token_candidate_queue *q)
6968{
6969 q->top = 0;
6970 q->rear = 0;
6971 q->size = TOKEN_CANDIDATE_QUEUE_SIZE;
6972
6973 q->candidates = GRN_MALLOC(TOKEN_CANDIDATE_QUEUE_SIZE * sizeof(uint32_t));
6974 if (!q->candidates) {
6975 q->size = 0;
6976 return GRN_NO_MEMORY_AVAILABLE;
6977 }
6978 return GRN_SUCCESS;
6979}
6980
6981inline static grn_rc
6982token_candidate_enqueue(grn_ctx *ctx, token_candidate_queue *q, uint32_t candidate)
6983{
6984 if (q->rear >= q->size) {
6985 if (!(q->candidates =
6986 GRN_REALLOC(q->candidates,
6987 (q->rear + TOKEN_CANDIDATE_QUEUE_SIZE) * sizeof(uint32_t)))) {
6988 q->size = 0;
6989 return GRN_NO_MEMORY_AVAILABLE;
6990 }
6991 q->size += TOKEN_CANDIDATE_QUEUE_SIZE;
6992 }
6993 *(q->candidates + q->rear) = candidate;
6994 q->rear++;
6995 return GRN_SUCCESS;
6996}
6997
6998inline static grn_rc
6999token_candidate_dequeue(grn_ctx *ctx, token_candidate_queue *q, uint32_t *candidate)
7000{
7001 if (q->top == q->rear) {
7002 return GRN_END_OF_DATA;
7003 }
7004 *candidate = *(q->candidates + q->top);
7005 q->top++;
7006 return GRN_SUCCESS;
7007}
7008
7009inline static void
7010token_candidate_queue_fin(grn_ctx *ctx, token_candidate_queue *q)
7011{
7012 GRN_FREE(q->candidates);
7013}
7014
7015inline static token_candidate_node*
7016token_candidate_last_node(grn_ctx *ctx, token_candidate_node *nodes, uint32_t candidate, int offset)
7017{
7018 int i;
7019 GRN_BIT_SCAN_REV(candidate, i);
7020 return nodes + i + offset;
7021}
7022
7023inline static uint64_t
7024token_candidate_score(grn_ctx *ctx, token_candidate_node *nodes, uint32_t candidate,
7025 int offset, uint32_t max_estimated_size)
7026{
7027 int i, last;
7028 uint64_t score = 0;
7029 GRN_BIT_SCAN_REV(candidate, last);
7030 for (i = 0; i <= last; i++) {
7031 if (candidate & (1 << i)) {
7032 token_candidate_node *node = nodes + i + offset;
7033 if (node->estimated_size > 0) {
7034 score += max_estimated_size / node->estimated_size;
7035 }
7036 }
7037 }
7038 return score;
7039}
7040
7041inline static grn_rc
7042token_candidate_select(grn_ctx *ctx, token_candidate_node *nodes,
7043 int offset, int limit, int end,
7044 uint32_t *selected_candidate, uint32_t max_estimated_size)
7045{
7046 grn_rc rc;
7047 token_candidate_queue q;
7048 uint32_t candidate;
7049 uint64_t max_score = 0;
7050 int i, min_n_nodes = 0;
7051
7052 if (offset + limit > end) {
7053 limit = end - offset;
7054 }
7055 rc = token_candidate_queue_init(ctx, &q);
7056 if (rc != GRN_SUCCESS) {
7057 return rc;
7058 }
7059 rc = token_candidate_enqueue(ctx, &q, 1);
7060 if (rc != GRN_SUCCESS) {
7061 goto exit;
7062 }
7063 while (token_candidate_dequeue(ctx, &q, &candidate) != GRN_END_OF_DATA) {
7064 token_candidate_node *candidate_last_node =
7065 token_candidate_last_node(ctx, nodes, candidate, offset);
7066 for (i = 0; i < candidate_last_node->n_adjacent; i++) {
7067 int adjacent, n_nodes = 0;
7068 uint32_t new_candidate;
7069 adjacent = candidate_last_node->adjacent[i] - offset;
7070 if (adjacent > limit) {
7071 break;
7072 }
7073 new_candidate = candidate | (1 << adjacent);
7074 GET_NUM_BITS(new_candidate, n_nodes);
7075 if (min_n_nodes > 0 && n_nodes > min_n_nodes + 1) {
7076 goto exit;
7077 }
7078 rc = token_candidate_enqueue(ctx, &q, new_candidate);
7079 if (rc != GRN_SUCCESS) {
7080 goto exit;
7081 }
7082 if (adjacent == limit) {
7083 if (min_n_nodes == 0) {
7084 min_n_nodes = n_nodes;
7085 }
7086 if (n_nodes >= min_n_nodes && n_nodes <= min_n_nodes + 1) {
7087 uint64_t score;
7088 score = token_candidate_score(ctx, nodes, new_candidate, offset, max_estimated_size);
7089 if (score > max_score) {
7090 max_score = score;
7091 *selected_candidate = new_candidate;
7092 }
7093 }
7094 }
7095 }
7096 }
7097 rc = GRN_SUCCESS;
7098exit :
7099 token_candidate_queue_fin(ctx, &q);
7100 return rc;
7101}
7102
7103inline static grn_rc
7104token_candidate_build(grn_ctx *ctx, grn_obj *lexicon, grn_ii *ii,
7105 token_info **tis, uint32_t *n,
7106 token_candidate_node *nodes, uint32_t selected_candidate,
7107 int offset, grn_id min)
7108{
7109 grn_rc rc = GRN_END_OF_DATA;
7110 token_info *ti;
7111 const char *key;
7112 uint32_t size;
7113 int i, last = 0;
7114 GRN_BIT_SCAN_REV(selected_candidate, last);
7115 for (i = 1; i <= last; i++) {
7116 if (selected_candidate & (1 << i)) {
7117 token_candidate_node *node = nodes + i + offset;
7118 switch (node->status) {
7119 case GRN_TOKEN_CURSOR_DOING :
7120 key = _grn_table_key(ctx, lexicon, node->tid, &size);
7121 ti = token_info_open(ctx, lexicon, ii, key, size, node->pos,
7122 EX_NONE, NULL, min);
7123 break;
7124 case GRN_TOKEN_CURSOR_DONE :
7125 if (node->tid) {
7126 key = _grn_table_key(ctx, lexicon, node->tid, &size);
7127 ti = token_info_open(ctx, lexicon, ii, key, size, node->pos,
7128 node->ef & EX_PREFIX, NULL, min);
7129 break;
7130 } /* else fallthru */
7131 default :
7132 ti = token_info_open(ctx, lexicon, ii, (char *)node->token,
7133 node->token_size, node->pos,
7134 node->ef & EX_PREFIX, NULL, min);
7135 break;
7136 }
7137 if (!ti) {
7138 goto exit;
7139 }
7140 tis[(*n)++] = ti;
7141 GRN_LOG(ctx, GRN_LOG_DEBUG, "[ii][overlap_token_skip] tid=%u pos=%d estimated_size=%u",
7142 node->tid, node->pos, node->estimated_size);
7143 }
7144 }
7145 rc = GRN_SUCCESS;
7146exit :
7147 return rc;
7148}
7149
7150inline static grn_rc
7151token_info_build_skipping_overlap(grn_ctx *ctx, grn_obj *lexicon, grn_ii *ii,
7152 token_info **tis, uint32_t *n,
7153 grn_token_cursor *token_cursor,
7154 grn_id tid, int ef, grn_id min)
7155{
7156 grn_rc rc;
7157 token_candidate_node *nodes = NULL;
7158 int n_nodes = 0, offset = 0, limit = TOKEN_CANDIDATE_SIZE - 1;
7159 uint32_t max_estimated_size;
7160
7161 rc = token_candidate_init(ctx, ii, token_cursor, tid, ef, &nodes, &n_nodes, &max_estimated_size);
7162 if (rc != GRN_SUCCESS) {
7163 return rc;
7164 }
7165 while (offset < n_nodes - 1) {
7166 uint32_t selected_candidate = 0;
7167 rc = token_candidate_select(ctx, nodes, offset, limit, n_nodes - 1,
7168 &selected_candidate, max_estimated_size);
7169 if (rc != GRN_SUCCESS) {
7170 goto exit;
7171 }
7172 rc = token_candidate_build(ctx, lexicon, ii, tis, n, nodes, selected_candidate, offset, min);
7173 if (rc != GRN_SUCCESS) {
7174 goto exit;
7175 }
7176 offset += limit;
7177 }
7178 rc = GRN_SUCCESS;
7179exit :
7180 if (nodes) {
7181 GRN_FREE(nodes);
7182 }
7183 return rc;
7184}
7185
7186inline static grn_rc
7187token_info_build(grn_ctx *ctx, grn_obj *lexicon, grn_ii *ii, const char *string, unsigned int string_len,
7188 token_info **tis, uint32_t *n, grn_bool *only_skip_token, grn_id min,
7189 grn_operator mode)
7190{
7191 token_info *ti;
7192 const char *key;
7193 uint32_t size;
7194 grn_rc rc = GRN_END_OF_DATA;
7195 unsigned int token_flags = GRN_TOKEN_CURSOR_ENABLE_TOKENIZED_DELIMITER;
7196 grn_token_cursor *token_cursor = grn_token_cursor_open(ctx, lexicon,
7197 string, string_len,
7198 GRN_TOKEN_GET,
7199 token_flags);
7200 *only_skip_token = GRN_FALSE;
7201 if (!token_cursor) { return GRN_NO_MEMORY_AVAILABLE; }
7202 if (mode == GRN_OP_UNSPLIT) {
7203 if ((ti = token_info_open(ctx, lexicon, ii, (char *)token_cursor->orig,
7204 token_cursor->orig_blen, 0, EX_BOTH, NULL, min))) {
7205 tis[(*n)++] = ti;
7206 rc = GRN_SUCCESS;
7207 }
7208 } else {
7209 grn_id tid;
7210 int ef;
7211 switch (mode) {
7212 case GRN_OP_PREFIX :
7213 ef = EX_PREFIX;
7214 break;
7215 case GRN_OP_SUFFIX :
7216 ef = EX_SUFFIX;
7217 break;
7218 case GRN_OP_PARTIAL :
7219 ef = EX_BOTH;
7220 break;
7221 default :
7222 ef = EX_NONE;
7223 break;
7224 }
7225 tid = grn_token_cursor_next(ctx, token_cursor);
7226 if (token_cursor->force_prefix) { ef |= EX_PREFIX; }
7227 switch (token_cursor->status) {
7228 case GRN_TOKEN_CURSOR_DOING :
7229 key = _grn_table_key(ctx, lexicon, tid, &size);
7230 ti = token_info_open(ctx, lexicon, ii, key, size, token_cursor->pos,
7231 ef & EX_SUFFIX, NULL, min);
7232 break;
7233 case GRN_TOKEN_CURSOR_DONE :
7234 ti = token_info_open(ctx, lexicon, ii, (const char *)token_cursor->curr,
7235 token_cursor->curr_size, 0, ef, NULL, min);
7236 /*
7237 key = _grn_table_key(ctx, lexicon, tid, &size);
7238 ti = token_info_open(ctx, lexicon, ii, token_cursor->curr, token_cursor->curr_size, token_cursor->pos, ef, NULL, GRN_ID_NIL);
7239 ti = token_info_open(ctx, lexicon, ii, (char *)token_cursor->orig,
7240 token_cursor->orig_blen, token_cursor->pos, ef, NULL, GRN_ID_NIL);
7241 */
7242 break;
7243 case GRN_TOKEN_CURSOR_NOT_FOUND :
7244 ti = token_info_open(ctx, lexicon, ii, (char *)token_cursor->orig,
7245 token_cursor->orig_blen, 0, ef, NULL, min);
7246 break;
7247 case GRN_TOKEN_CURSOR_DONE_SKIP :
7248 *only_skip_token = GRN_TRUE;
7249 goto exit;
7250 default :
7251 goto exit;
7252 }
7253 if (!ti) { goto exit ; }
7254 tis[(*n)++] = ti;
7255
7256 if (grn_ii_overlap_token_skip_enable) {
7257 rc = token_info_build_skipping_overlap(ctx, lexicon, ii, tis, n, token_cursor, tid, ef, min);
7258 goto exit;
7259 }
7260
7261 while (token_cursor->status == GRN_TOKEN_CURSOR_DOING) {
7262 tid = grn_token_cursor_next(ctx, token_cursor);
7263 if (token_cursor->force_prefix) { ef |= EX_PREFIX; }
7264 switch (token_cursor->status) {
7265 case GRN_TOKEN_CURSOR_DONE_SKIP :
7266 continue;
7267 case GRN_TOKEN_CURSOR_DOING :
7268 key = _grn_table_key(ctx, lexicon, tid, &size);
7269 ti = token_info_open(ctx, lexicon, ii, key, size, token_cursor->pos,
7270 EX_NONE, NULL, min);
7271 break;
7272 case GRN_TOKEN_CURSOR_DONE :
7273 if (tid) {
7274 key = _grn_table_key(ctx, lexicon, tid, &size);
7275 ti = token_info_open(ctx, lexicon, ii, key, size, token_cursor->pos,
7276 ef & EX_PREFIX, NULL, min);
7277 break;
7278 } /* else fallthru */
7279 default :
7280 ti = token_info_open(ctx, lexicon, ii, (char *)token_cursor->curr,
7281 token_cursor->curr_size, token_cursor->pos,
7282 ef & EX_PREFIX, NULL, min);
7283 break;
7284 }
7285 if (!ti) {
7286 goto exit;
7287 }
7288 tis[(*n)++] = ti;
7289 }
7290 rc = GRN_SUCCESS;
7291 }
7292exit :
7293 grn_token_cursor_close(ctx, token_cursor);
7294 return rc;
7295}
7296
7297inline static grn_rc
7298token_info_build_fuzzy(grn_ctx *ctx, grn_obj *lexicon, grn_ii *ii,
7299 const char *string, unsigned int string_len,
7300 token_info **tis, uint32_t *n, grn_bool *only_skip_token,
7301 grn_id min, grn_operator mode, grn_fuzzy_search_optarg *args)
7302{
7303 token_info *ti;
7304 grn_rc rc = GRN_END_OF_DATA;
7305 unsigned int token_flags = GRN_TOKEN_CURSOR_ENABLE_TOKENIZED_DELIMITER;
7306 grn_token_cursor *token_cursor = grn_token_cursor_open(ctx, lexicon,
7307 string, string_len,
7308 GRN_TOKENIZE_ONLY,
7309 token_flags);
7310 *only_skip_token = GRN_FALSE;
7311 if (!token_cursor) { return GRN_NO_MEMORY_AVAILABLE; }
7312 grn_token_cursor_next(ctx, token_cursor);
7313 switch (token_cursor->status) {
7314 case GRN_TOKEN_CURSOR_DONE_SKIP :
7315 *only_skip_token = GRN_TRUE;
7316 goto exit;
7317 case GRN_TOKEN_CURSOR_DOING :
7318 case GRN_TOKEN_CURSOR_DONE :
7319 ti = token_info_open(ctx, lexicon, ii, (const char *)token_cursor->curr,
7320 token_cursor->curr_size, token_cursor->pos, EX_FUZZY,
7321 args, min);
7322 break;
7323 default :
7324 ti = NULL;
7325 break;
7326 }
7327 if (!ti) {
7328 goto exit ;
7329 }
7330 tis[(*n)++] = ti;
7331 while (token_cursor->status == GRN_TOKEN_CURSOR_DOING) {
7332 grn_token_cursor_next(ctx, token_cursor);
7333 switch (token_cursor->status) {
7334 case GRN_TOKEN_CURSOR_DONE_SKIP :
7335 continue;
7336 case GRN_TOKEN_CURSOR_DOING :
7337 case GRN_TOKEN_CURSOR_DONE :
7338 ti = token_info_open(ctx, lexicon, ii, (const char *)token_cursor->curr,
7339 token_cursor->curr_size, token_cursor->pos, EX_FUZZY,
7340 args, min);
7341 break;
7342 default :
7343 break;
7344 }
7345 if (!ti) {
7346 goto exit;
7347 }
7348 tis[(*n)++] = ti;
7349 }
7350 rc = GRN_SUCCESS;
7351exit :
7352 grn_token_cursor_close(ctx, token_cursor);
7353 return rc;
7354}
7355
7356static void
7357token_info_clear_offset(token_info **tis, uint32_t n)
7358{
7359 token_info **tie;
7360 for (tie = tis + n; tis < tie; tis++) { (*tis)->offset = 0; }
7361}
7362
7363/* select */
7364
7365inline static void
7366res_add(grn_ctx *ctx, grn_hash *s, grn_rset_posinfo *pi, double score,
7367 grn_operator op)
7368{
7369 grn_rset_recinfo *ri;
7370 switch (op) {
7371 case GRN_OP_OR :
7372 if (grn_hash_add(ctx, s, pi, s->key_size, (void **)&ri, NULL)) {
7373 if (s->obj.header.flags & GRN_OBJ_WITH_SUBREC) {
7374 grn_table_add_subrec((grn_obj *)s, ri, score, pi, 1);
7375 }
7376 }
7377 break;
7378 case GRN_OP_AND :
7379 if (grn_hash_get(ctx, s, pi, s->key_size, (void **)&ri)) {
7380 if (s->obj.header.flags & GRN_OBJ_WITH_SUBREC) {
7381 ri->n_subrecs |= GRN_RSET_UTIL_BIT;
7382 grn_table_add_subrec((grn_obj *)s, ri, score, pi, 1);
7383 }
7384 }
7385 break;
7386 case GRN_OP_AND_NOT :
7387 {
7388 grn_id id;
7389 if ((id = grn_hash_get(ctx, s, pi, s->key_size, (void **)&ri))) {
7390 grn_hash_delete_by_id(ctx, s, id, NULL);
7391 }
7392 }
7393 break;
7394 case GRN_OP_ADJUST :
7395 if (grn_hash_get(ctx, s, pi, s->key_size, (void **)&ri)) {
7396 if (s->obj.header.flags & GRN_OBJ_WITH_SUBREC) {
7397 ri->score += score;
7398 }
7399 }
7400 break;
7401 default :
7402 break;
7403 }
7404}
7405
7406grn_rc
7407grn_ii_posting_add(grn_ctx *ctx, grn_posting *pos, grn_hash *s, grn_operator op)
7408{
7409 res_add(ctx, s, (grn_rset_posinfo *)(pos), (1 + pos->weight), op);
7410 return ctx->rc;
7411}
7412
7413#ifdef USE_BHEAP
7414
7415/* todo */
7416
7417#else /* USE_BHEAP */
7418
7419struct _btr_node {
7420 struct _btr_node *car;
7421 struct _btr_node *cdr;
7422 token_info *ti;
7423};
7424
7425typedef struct _btr_node btr_node;
7426
7427typedef struct {
7428 int n;
7429 token_info *min;
7430 token_info *max;
7431 btr_node *root;
7432 btr_node *nodes;
7433} btr;
7434
7435inline static void
7436bt_zap(btr *bt)
7437{
7438 bt->n = 0;
7439 bt->min = NULL;
7440 bt->max = NULL;
7441 bt->root = NULL;
7442}
7443
7444inline static btr *
7445bt_open(grn_ctx *ctx, int size)
7446{
7447 btr *bt = GRN_MALLOC(sizeof(btr));
7448 if (bt) {
7449 bt_zap(bt);
7450 if (!(bt->nodes = GRN_MALLOC(sizeof(btr_node) * size))) {
7451 GRN_FREE(bt);
7452 bt = NULL;
7453 }
7454 }
7455 return bt;
7456}
7457
7458inline static void
7459bt_close(grn_ctx *ctx, btr *bt)
7460{
7461 if (!bt) { return; }
7462 GRN_FREE(bt->nodes);
7463 GRN_FREE(bt);
7464}
7465
7466inline static void
7467bt_push(btr *bt, token_info *ti)
7468{
7469 int pos = ti->pos, minp = 1, maxp = 1;
7470 btr_node *node, *new, **last;
7471 new = bt->nodes + bt->n++;
7472 new->ti = ti;
7473 new->car = NULL;
7474 new->cdr = NULL;
7475 for (last = &bt->root; (node = *last);) {
7476 if (pos < node->ti->pos) {
7477 last = &node->car;
7478 maxp = 0;
7479 } else {
7480 last = &node->cdr;
7481 minp = 0;
7482 }
7483 }
7484 *last = new;
7485 if (minp) { bt->min = ti; }
7486 if (maxp) { bt->max = ti; }
7487}
7488
7489inline static void
7490bt_pop(btr *bt)
7491{
7492 btr_node *node, *min, *newmin, **last;
7493 for (last = &bt->root; (min = *last) && min->car; last = &min->car) ;
7494 if (min) {
7495 int pos = min->ti->pos, minp = 1, maxp = 1;
7496 *last = min->cdr;
7497 min->cdr = NULL;
7498 for (last = &bt->root; (node = *last);) {
7499 if (pos < node->ti->pos) {
7500 last = &node->car;
7501 maxp = 0;
7502 } else {
7503 last = &node->cdr;
7504 minp = 0;
7505 }
7506 }
7507 *last = min;
7508 if (maxp) { bt->max = min->ti; }
7509 if (!minp) {
7510 for (newmin = bt->root; newmin->car; newmin = newmin->car) ;
7511 bt->min = newmin->ti;
7512 }
7513 }
7514}
7515
7516#endif /* USE_BHEAP */
7517
7518typedef enum {
7519 grn_wv_none = 0,
7520 grn_wv_static,
7521 grn_wv_dynamic,
7522 grn_wv_constant
7523} grn_wv_mode;
7524
7525inline static double
7526get_weight(grn_ctx *ctx, grn_hash *s, grn_id rid, int sid,
7527 grn_wv_mode wvm, grn_select_optarg *optarg)
7528{
7529 switch (wvm) {
7530 case grn_wv_none :
7531 return 1;
7532 case grn_wv_static :
7533 return sid <= optarg->vector_size ? optarg->weight_vector[sid - 1] : 0;
7534 case grn_wv_dynamic :
7535 /* todo : support hash with keys
7536 if (s->keys) {
7537 uint32_t key_size;
7538 const char *key = _grn_table_key(ctx, s->keys, rid, &key_size);
7539 // todo : change grn_select_optarg
7540 return key ? optarg->func(s, key, key_size, sid, optarg->func_arg) : 0;
7541 }
7542 */
7543 /* todo : cast */
7544 return optarg->func(ctx, (void *)s, (void *)(intptr_t)rid, sid,
7545 optarg->func_arg);
7546 case grn_wv_constant :
7547 return optarg->vector_size;
7548 default :
7549 return 1;
7550 }
7551}
7552
7553grn_rc
7554grn_ii_similar_search(grn_ctx *ctx, grn_ii *ii,
7555 const char *string, unsigned int string_len,
7556 grn_hash *s, grn_operator op, grn_select_optarg *optarg)
7557{
7558 int *w1, limit;
7559 grn_id tid, *tp, max_size;
7560 grn_rc rc = GRN_SUCCESS;
7561 grn_hash *h;
7562 grn_token_cursor *token_cursor;
7563 unsigned int token_flags = GRN_TOKEN_CURSOR_ENABLE_TOKENIZED_DELIMITER;
7564 grn_obj *lexicon = ii->lexicon;
7565 if (!lexicon || !ii || !string || !string_len || !s || !optarg) {
7566 return GRN_INVALID_ARGUMENT;
7567 }
7568 if (!(h = grn_hash_create(ctx, NULL, sizeof(grn_id), sizeof(int), 0))) {
7569 return GRN_NO_MEMORY_AVAILABLE;
7570 }
7571 if (!(token_cursor = grn_token_cursor_open(ctx, lexicon, string, string_len,
7572 GRN_TOKEN_GET, token_flags))) {
7573 grn_hash_close(ctx, h);
7574 return GRN_NO_MEMORY_AVAILABLE;
7575 }
7576 if (!(max_size = optarg->max_size)) { max_size = 1048576; }
7577 while (token_cursor->status != GRN_TOKEN_CURSOR_DONE &&
7578 token_cursor->status != GRN_TOKEN_CURSOR_DONE_SKIP) {
7579 if ((tid = grn_token_cursor_next(ctx, token_cursor))) {
7580 if (grn_hash_add(ctx, h, &tid, sizeof(grn_id), (void **)&w1, NULL)) {
7581 (*w1)++;
7582 }
7583 }
7584 if (tid && token_cursor->curr_size) {
7585 if (optarg->mode == GRN_OP_UNSPLIT) {
7586 grn_table_search(ctx, lexicon, token_cursor->curr,
7587 token_cursor->curr_size,
7588 GRN_OP_PREFIX, (grn_obj *)h, GRN_OP_OR);
7589 }
7590 if (optarg->mode == GRN_OP_PARTIAL) {
7591 grn_table_search(ctx, lexicon, token_cursor->curr,
7592 token_cursor->curr_size,
7593 GRN_OP_SUFFIX, (grn_obj *)h, GRN_OP_OR);
7594 }
7595 }
7596 }
7597 grn_token_cursor_close(ctx, token_cursor);
7598 {
7599 grn_hash_cursor *c = grn_hash_cursor_open(ctx, h, NULL, 0, NULL, 0,
7600 0, -1, 0);
7601 if (!c) {
7602 GRN_LOG(ctx, GRN_LOG_ALERT,
7603 "grn_hash_cursor_open on grn_ii_similar_search failed !");
7604 grn_hash_close(ctx, h);
7605 return GRN_NO_MEMORY_AVAILABLE;
7606 }
7607 while (grn_hash_cursor_next(ctx, c)) {
7608 uint32_t es;
7609 grn_hash_cursor_get_key_value(ctx, c, (void **) &tp, NULL, (void **) &w1);
7610 if ((es = grn_ii_estimate_size(ctx, ii, *tp))) {
7611 *w1 += max_size / es;
7612 } else {
7613 grn_hash_cursor_delete(ctx, c, NULL);
7614 }
7615 }
7616 grn_hash_cursor_close(ctx, c);
7617 }
7618 limit = optarg->similarity_threshold
7619 ? (optarg->similarity_threshold > GRN_HASH_SIZE(h)
7620 ? GRN_HASH_SIZE(h)
7621 : optarg->similarity_threshold)
7622 : (GRN_HASH_SIZE(h) >> 3) + 1;
7623 if (GRN_HASH_SIZE(h)) {
7624 grn_id j, id;
7625 int w2, rep;
7626 grn_ii_cursor *c;
7627 grn_posting *pos;
7628 grn_wv_mode wvm = grn_wv_none;
7629 grn_table_sort_optarg arg = {
7630 GRN_TABLE_SORT_DESC|GRN_TABLE_SORT_BY_VALUE|GRN_TABLE_SORT_AS_NUMBER,
7631 NULL,
7632 NULL,
7633 0
7634 };
7635 grn_array *sorted = grn_array_create(ctx, NULL, sizeof(grn_id), 0);
7636 if (!sorted) {
7637 GRN_LOG(ctx, GRN_LOG_ALERT,
7638 "grn_hash_sort on grn_ii_similar_search failed !");
7639 grn_hash_close(ctx, h);
7640 return GRN_NO_MEMORY_AVAILABLE;
7641 }
7642 grn_hash_sort(ctx, h, limit, sorted, &arg);
7643 /* todo support subrec
7644 rep = (s->record_unit == grn_rec_position || s->subrec_unit == grn_rec_position);
7645 */
7646 rep = 0;
7647 if (optarg->func) {
7648 wvm = grn_wv_dynamic;
7649 } else if (optarg->vector_size) {
7650 wvm = optarg->weight_vector ? grn_wv_static : grn_wv_constant;
7651 }
7652 for (j = 1; j <= limit; j++) {
7653 grn_array_get_value(ctx, sorted, j, &id);
7654 _grn_hash_get_key_value(ctx, h, id, (void **) &tp, (void **) &w1);
7655 if (!*tp || !(c = grn_ii_cursor_open(ctx, ii, *tp, GRN_ID_NIL, GRN_ID_MAX,
7656 rep
7657 ? ii->n_elements
7658 : ii->n_elements - 1, 0))) {
7659 GRN_LOG(ctx, GRN_LOG_ERROR, "cursor open failed (%d)", *tp);
7660 continue;
7661 }
7662 if (rep) {
7663 while (grn_ii_cursor_next(ctx, c)) {
7664 pos = c->post;
7665 if ((w2 = get_weight(ctx, s, pos->rid, pos->sid, wvm, optarg)) > 0) {
7666 while (grn_ii_cursor_next_pos(ctx, c)) {
7667 res_add(ctx, s, (grn_rset_posinfo *) pos,
7668 *w1 * w2 * (1 + pos->weight), op);
7669 }
7670 }
7671 }
7672 } else {
7673 while (grn_ii_cursor_next(ctx, c)) {
7674 pos = c->post;
7675 if ((w2 = get_weight(ctx, s, pos->rid, pos->sid, wvm, optarg)) > 0) {
7676 res_add(ctx, s, (grn_rset_posinfo *) pos,
7677 *w1 * w2 * (pos->tf + pos->weight), op);
7678 }
7679 }
7680 }
7681 grn_ii_cursor_close(ctx, c);
7682 }
7683 grn_array_close(ctx, sorted);
7684 }
7685 grn_hash_close(ctx, h);
7686 grn_ii_resolve_sel_and(ctx, s, op);
7687 // grn_hash_cursor_clear(r);
7688 return rc;
7689}
7690
7691#define TERM_EXTRACT_EACH_POST 0
7692#define TERM_EXTRACT_EACH_TERM 1
7693
7694grn_rc
7695grn_ii_term_extract(grn_ctx *ctx, grn_ii *ii, const char *string,
7696 unsigned int string_len, grn_hash *s,
7697 grn_operator op, grn_select_optarg *optarg)
7698{
7699 grn_rset_posinfo pi;
7700 grn_id tid;
7701 const char *p, *pe;
7702 grn_obj *nstr;
7703 const char *normalized;
7704 unsigned int normalized_length_in_bytes;
7705 grn_ii_cursor *c;
7706 grn_posting *pos;
7707 int skip, rep, policy;
7708 grn_rc rc = GRN_SUCCESS;
7709 grn_wv_mode wvm = grn_wv_none;
7710 if (!ii || !string || !string_len || !s || !optarg) {
7711 return GRN_INVALID_ARGUMENT;
7712 }
7713 if (!(nstr = grn_string_open(ctx, string, string_len, NULL, 0))) {
7714 return GRN_INVALID_ARGUMENT;
7715 }
7716 policy = optarg->max_interval;
7717 if (optarg->func) {
7718 wvm = grn_wv_dynamic;
7719 } else if (optarg->vector_size) {
7720 wvm = optarg->weight_vector ? grn_wv_static : grn_wv_constant;
7721 }
7722 /* todo support subrec
7723 if (policy == TERM_EXTRACT_EACH_POST) {
7724 if ((rc = grn_records_reopen(s, grn_rec_section, grn_rec_none, 0))) { goto exit; }
7725 }
7726 rep = (s->record_unit == grn_rec_position || s->subrec_unit == grn_rec_position);
7727 */
7728 rep = 0;
7729 grn_string_get_normalized(ctx, nstr, &normalized, &normalized_length_in_bytes,
7730 NULL);
7731 for (p = normalized, pe = p + normalized_length_in_bytes; p < pe; p += skip) {
7732 if ((tid = grn_table_lcp_search(ctx, ii->lexicon, p, pe - p))) {
7733 if (policy == TERM_EXTRACT_EACH_POST) {
7734 if (!(skip = grn_table_get_key(ctx, ii->lexicon, tid, NULL, 0))) { break; }
7735 } else {
7736 if (!(skip = (int)grn_charlen(ctx, p, pe))) { break; }
7737 }
7738 if (!(c = grn_ii_cursor_open(ctx, ii, tid, GRN_ID_NIL, GRN_ID_MAX,
7739 rep
7740 ? ii->n_elements
7741 : ii->n_elements - 1, 0))) {
7742 GRN_LOG(ctx, GRN_LOG_ERROR, "cursor open failed (%d)", tid);
7743 continue;
7744 }
7745 if (rep) {
7746 while (grn_ii_cursor_next(ctx, c)) {
7747 pos = c->post;
7748 while (grn_ii_cursor_next_pos(ctx, c)) {
7749 res_add(ctx, s, (grn_rset_posinfo *) pos,
7750 get_weight(ctx, s, pos->rid, pos->sid, wvm, optarg), op);
7751 }
7752 }
7753 } else {
7754 while (grn_ii_cursor_next(ctx, c)) {
7755 if (policy == TERM_EXTRACT_EACH_POST) {
7756 pi.rid = c->post->rid;
7757 pi.sid = p - normalized;
7758 res_add(ctx, s, &pi, pi.sid + 1, op);
7759 } else {
7760 pos = c->post;
7761 res_add(ctx, s, (grn_rset_posinfo *) pos,
7762 get_weight(ctx, s, pos->rid, pos->sid, wvm, optarg), op);
7763 }
7764 }
7765 }
7766 grn_ii_cursor_close(ctx, c);
7767 } else {
7768 if (!(skip = (int)grn_charlen(ctx, p, pe))) {
7769 break;
7770 }
7771 }
7772 }
7773 grn_obj_close(ctx, nstr);
7774 return rc;
7775}
7776
7777typedef struct {
7778 grn_id rid;
7779 uint32_t sid;
7780 uint32_t start_pos;
7781 uint32_t end_pos;
7782 uint32_t tf;
7783 uint32_t weight;
7784} grn_ii_select_cursor_posting;
7785
7786typedef struct {
7787 btr *bt;
7788 grn_ii *ii;
7789 token_info **tis;
7790 uint32_t n_tis;
7791 int max_interval;
7792 grn_operator mode;
7793 grn_ii_select_cursor_posting posting;
7794 const char *string;
7795 unsigned int string_len;
7796 grn_bool done;
7797 grn_ii_select_cursor_posting unshifted_posting;
7798 grn_bool have_unshifted_posting;
7799} grn_ii_select_cursor;
7800
7801static grn_rc
7802grn_ii_select_cursor_close(grn_ctx *ctx,
7803 grn_ii_select_cursor *cursor)
7804{
7805 token_info **tip;
7806
7807 if (!cursor) {
7808 return GRN_SUCCESS;
7809 }
7810
7811 for (tip = cursor->tis; tip < cursor->tis + cursor->n_tis; tip++) {
7812 if (*tip) {
7813 token_info_close(ctx, *tip);
7814 }
7815 }
7816 if (cursor->tis) {
7817 GRN_FREE(cursor->tis);
7818 }
7819 bt_close(ctx, cursor->bt);
7820 GRN_FREE(cursor);
7821
7822 return GRN_SUCCESS;
7823}
7824
7825static grn_ii_select_cursor *
7826grn_ii_select_cursor_open(grn_ctx *ctx,
7827 grn_ii *ii,
7828 const char *string,
7829 unsigned int string_len,
7830 grn_select_optarg *optarg)
7831{
7832 grn_operator mode = GRN_OP_EXACT;
7833 grn_ii_select_cursor *cursor;
7834
7835 if (string_len == 0) {
7836 ERR(GRN_INVALID_ARGUMENT,
7837 "[ii][select][cursor][open] empty string");
7838 return NULL;
7839 }
7840
7841 if (optarg) {
7842 mode = optarg->mode;
7843 }
7844 switch (mode) {
7845 case GRN_OP_EXACT :
7846 case GRN_OP_FUZZY :
7847 case GRN_OP_NEAR :
7848 case GRN_OP_NEAR2 :
7849 break;
7850 default :
7851 ERR(GRN_INVALID_ARGUMENT,
7852 "[ii][select][cursor][open] "
7853 "EXACT, FUZZY, NEAR and NEAR2 are only supported mode: %s",
7854 grn_operator_to_string(mode));
7855 break;
7856 }
7857
7858 cursor = GRN_CALLOC(sizeof(grn_ii_select_cursor));
7859 if (!cursor) {
7860 ERR(ctx->rc,
7861 "[ii][select][cursor][open] failed to allocate cursor: %s",
7862 ctx->errbuf);
7863 return NULL;
7864 }
7865
7866 cursor->ii = ii;
7867 cursor->mode = mode;
7868
7869 if (!(cursor->tis = GRN_MALLOC(sizeof(token_info *) * string_len * 2))) {
7870 ERR(ctx->rc,
7871 "[ii][select][cursor][open] failed to allocate token info container: %s",
7872 ctx->errbuf);
7873 GRN_FREE(cursor);
7874 return NULL;
7875 }
7876 cursor->n_tis = 0;
7877 if (cursor->mode == GRN_OP_FUZZY) {
7878 grn_bool only_skip_token = GRN_FALSE;
7879 grn_id previous_min = GRN_ID_NIL;
7880 if (token_info_build_fuzzy(ctx, ii->lexicon, ii, string, string_len,
7881 cursor->tis, &(cursor->n_tis),
7882 &only_skip_token, previous_min,
7883 cursor->mode, &(optarg->fuzzy)) != GRN_SUCCESS) {
7884 grn_ii_select_cursor_close(ctx, cursor);
7885 return NULL;
7886 }
7887 } else {
7888 grn_bool only_skip_token = GRN_FALSE;
7889 grn_id previous_min = GRN_ID_NIL;
7890 if (token_info_build(ctx, ii->lexicon, ii, string, string_len,
7891 cursor->tis, &(cursor->n_tis),
7892 &only_skip_token, previous_min,
7893 cursor->mode) != GRN_SUCCESS) {
7894 grn_ii_select_cursor_close(ctx, cursor);
7895 return NULL;
7896 }
7897 }
7898 if (cursor->n_tis == 0) {
7899 grn_ii_select_cursor_close(ctx, cursor);
7900 return NULL;
7901 }
7902
7903 switch (cursor->mode) {
7904 case GRN_OP_NEAR2 :
7905 token_info_clear_offset(cursor->tis, cursor->n_tis);
7906 cursor->mode = GRN_OP_NEAR;
7907 /* fallthru */
7908 case GRN_OP_NEAR :
7909 if (!(cursor->bt = bt_open(ctx, cursor->n_tis))) {
7910 ERR(ctx->rc,
7911 "[ii][select][cursor][open] failed to allocate btree: %s",
7912 ctx->errbuf);
7913 grn_ii_select_cursor_close(ctx, cursor);
7914 return NULL;
7915 }
7916 cursor->max_interval = optarg->max_interval;
7917 break;
7918 default :
7919 break;
7920 }
7921 qsort(cursor->tis, cursor->n_tis, sizeof(token_info *), token_compare);
7922 GRN_LOG(ctx, GRN_LOG_INFO,
7923 "[ii][select][cursor][open] n=%d <%.*s>",
7924 cursor->n_tis,
7925 string_len, string);
7926
7927 cursor->string = string;
7928 cursor->string_len = string_len;
7929
7930 cursor->done = GRN_FALSE;
7931
7932 cursor->have_unshifted_posting = GRN_FALSE;
7933
7934 return cursor;
7935}
7936
7937static grn_ii_select_cursor_posting *
7938grn_ii_select_cursor_next(grn_ctx *ctx,
7939 grn_ii_select_cursor *cursor)
7940{
7941 btr *bt = cursor->bt;
7942 token_info **tis = cursor->tis;
7943 token_info **tie = tis + cursor->n_tis;
7944 uint32_t n_tis = cursor->n_tis;
7945 int max_interval = cursor->max_interval;
7946 grn_operator mode = cursor->mode;
7947
7948 if (cursor->have_unshifted_posting) {
7949 cursor->have_unshifted_posting = GRN_FALSE;
7950 return &(cursor->unshifted_posting);
7951 }
7952
7953 if (cursor->done) {
7954 return NULL;
7955 }
7956
7957 for (;;) {
7958 grn_id rid;
7959 grn_id sid;
7960 grn_id next_rid;
7961 grn_id next_sid;
7962 token_info **tip;
7963
7964 rid = (*tis)->p->rid;
7965 sid = (*tis)->p->sid;
7966 for (tip = tis + 1, next_rid = rid, next_sid = sid + 1;
7967 tip < tie;
7968 tip++) {
7969 token_info *ti = *tip;
7970 if (token_info_skip(ctx, ti, rid, sid)) { return NULL; }
7971 if (ti->p->rid != rid || ti->p->sid != sid) {
7972 next_rid = ti->p->rid;
7973 next_sid = ti->p->sid;
7974 break;
7975 }
7976 }
7977
7978 if (tip == tie) {
7979 int start_pos = 0;
7980 int pos = 0;
7981 int end_pos = 0;
7982 int score = 0;
7983 int tf = 0;
7984 int tscore = 0;
7985
7986#define SKIP_OR_BREAK(pos) {\
7987 if (token_info_skip_pos(ctx, ti, rid, sid, pos)) { break; } \
7988 if (ti->p->rid != rid || ti->p->sid != sid) { \
7989 next_rid = ti->p->rid; \
7990 next_sid = ti->p->sid; \
7991 break; \
7992 } \
7993}
7994
7995#define RETURN_POSTING() do { \
7996 cursor->posting.rid = rid; \
7997 cursor->posting.sid = sid; \
7998 cursor->posting.start_pos = start_pos; \
7999 cursor->posting.end_pos = end_pos; \
8000 cursor->posting.tf = tf; \
8001 cursor->posting.weight = tscore; \
8002 if (token_info_skip_pos(ctx, *tis, rid, sid, pos) != GRN_SUCCESS) { \
8003 if (token_info_skip(ctx, *tis, next_rid, next_sid) != GRN_SUCCESS) { \
8004 cursor->done = GRN_TRUE; \
8005 } \
8006 } \
8007 return &(cursor->posting); \
8008} while (GRN_FALSE)
8009
8010 if (n_tis == 1) {
8011 start_pos = pos = end_pos = (*tis)->p->pos;
8012 pos++;
8013 tf = (*tis)->p->tf;
8014 tscore = (*tis)->p->weight + (*tis)->cursors->bins[0]->weight;
8015 RETURN_POSTING();
8016 } else if (mode == GRN_OP_NEAR) {
8017 bt_zap(bt);
8018 for (tip = tis; tip < tie; tip++) {
8019 token_info *ti = *tip;
8020 SKIP_OR_BREAK(pos);
8021 bt_push(bt, ti);
8022 }
8023 if (tip == tie) {
8024 for (;;) {
8025 token_info *ti;
8026 int min;
8027 int max;
8028
8029 ti = bt->min;
8030 min = ti->pos;
8031 max = bt->max->pos;
8032 if (min > max) {
8033 char ii_name[GRN_TABLE_MAX_KEY_SIZE];
8034 int ii_name_size;
8035 ii_name_size = grn_obj_name(ctx,
8036 (grn_obj *)(cursor->ii),
8037 ii_name,
8038 GRN_TABLE_MAX_KEY_SIZE);
8039 ERR(GRN_FILE_CORRUPT,
8040 "[ii][select][cursor][near] "
8041 "max position must be larger than min position: "
8042 "min:<%d> max:<%d> ii:<%.*s> string:<%.*s>",
8043 min, max,
8044 ii_name_size, ii_name,
8045 cursor->string_len,
8046 cursor->string);
8047 return NULL;
8048 }
8049 if ((max_interval < 0) || (max - min <= max_interval)) {
8050 /* TODO: Set start_pos, pos, end_pos, tf and tscore */
8051 RETURN_POSTING();
8052 if (ti->pos == max + 1) {
8053 break;
8054 }
8055 SKIP_OR_BREAK(max + 1);
8056 } else {
8057 if (ti->pos == max - max_interval) {
8058 break;
8059 }
8060 SKIP_OR_BREAK(max - max_interval);
8061 }
8062 bt_pop(bt);
8063 }
8064 }
8065 } else {
8066 int count = 0;
8067 for (tip = tis; ; tip++) {
8068 token_info *ti;
8069
8070 if (tip == tie) { tip = tis; }
8071 ti = *tip;
8072 SKIP_OR_BREAK(pos);
8073 if (ti->pos == pos) {
8074 score += ti->p->weight + ti->cursors->bins[0]->weight;
8075 count++;
8076 if (ti->p->pos > end_pos) {
8077 end_pos = ti->p->pos;
8078 }
8079 } else {
8080 score = ti->p->weight + ti->cursors->bins[0]->weight;
8081 count = 1;
8082 start_pos = pos = ti->pos;
8083 end_pos = ti->p->pos;
8084 }
8085 if (count == n_tis) {
8086 pos++;
8087 if (ti->p->pos > end_pos) {
8088 end_pos = ti->p->pos;
8089 }
8090 tf = 1;
8091 tscore += score;
8092 RETURN_POSTING();
8093 }
8094 }
8095 }
8096#undef SKIP_OR_BREAK
8097 }
8098 if (token_info_skip(ctx, *tis, next_rid, next_sid)) {
8099 return NULL;
8100 }
8101 }
8102}
8103
8104static void
8105grn_ii_select_cursor_unshift(grn_ctx *ctx,
8106 grn_ii_select_cursor *cursor,
8107 grn_ii_select_cursor_posting *posting)
8108{
8109 cursor->unshifted_posting = *posting;
8110 cursor->have_unshifted_posting = GRN_TRUE;
8111}
8112
8113static grn_rc
8114grn_ii_parse_regexp_query(grn_ctx *ctx,
8115 const char *log_tag,
8116 const char *string, unsigned int string_len,
8117 grn_obj *parsed_strings)
8118{
8119 grn_bool escaping = GRN_FALSE;
8120 int nth_char = 0;
8121 const char *current = string;
8122 const char *string_end = string + string_len;
8123 grn_obj buffer;
8124
8125 GRN_TEXT_INIT(&buffer, 0);
8126 while (current < string_end) {
8127 const char *target;
8128 int char_len;
8129
8130 char_len = grn_charlen(ctx, current, string_end);
8131 if (char_len == 0) {
8132 GRN_OBJ_FIN(ctx, &buffer);
8133 ERR(GRN_INVALID_ARGUMENT,
8134 "%s invalid encoding character: <%.*s|%#x|>",
8135 log_tag,
8136 (int)(current - string), string,
8137 *current);
8138 return ctx->rc;
8139 }
8140 target = current;
8141 current += char_len;
8142
8143 if (escaping) {
8144 escaping = GRN_FALSE;
8145 if (char_len == 1) {
8146 switch (*target) {
8147 case 'A' :
8148 if (nth_char == 0) {
8149 target = GRN_TOKENIZER_BEGIN_MARK_UTF8;
8150 char_len = GRN_TOKENIZER_BEGIN_MARK_UTF8_LEN;
8151 }
8152 break;
8153 case 'z' :
8154 if (current == string_end) {
8155 target = GRN_TOKENIZER_END_MARK_UTF8;
8156 char_len = GRN_TOKENIZER_END_MARK_UTF8_LEN;
8157 }
8158 break;
8159 default :
8160 break;
8161 }
8162 }
8163 } else {
8164 if (char_len == 1) {
8165 if (*target == '\\') {
8166 escaping = GRN_TRUE;
8167 continue;
8168 } else if (*target == '.' &&
8169 grn_charlen(ctx, current, string_end) == 1 &&
8170 *current == '*') {
8171 if (GRN_TEXT_LEN(&buffer) > 0) {
8172 grn_vector_add_element(ctx,
8173 parsed_strings,
8174 GRN_TEXT_VALUE(&buffer),
8175 GRN_TEXT_LEN(&buffer),
8176 0,
8177 GRN_DB_TEXT);
8178 GRN_BULK_REWIND(&buffer);
8179 }
8180 current++;
8181 nth_char++;
8182 continue;
8183 }
8184 }
8185 }
8186
8187 GRN_TEXT_PUT(ctx, &buffer, target, char_len);
8188 nth_char++;
8189 }
8190 if (GRN_TEXT_LEN(&buffer) > 0) {
8191 grn_vector_add_element(ctx,
8192 parsed_strings,
8193 GRN_TEXT_VALUE(&buffer),
8194 GRN_TEXT_LEN(&buffer),
8195 0,
8196 GRN_DB_TEXT);
8197 }
8198 GRN_OBJ_FIN(ctx, &buffer);
8199
8200 return GRN_SUCCESS;
8201}
8202
8203static grn_rc
8204grn_ii_select_regexp(grn_ctx *ctx, grn_ii *ii,
8205 const char *string, unsigned int string_len,
8206 grn_hash *s, grn_operator op, grn_select_optarg *optarg)
8207{
8208 grn_rc rc;
8209 grn_obj parsed_strings;
8210 unsigned int n_parsed_strings;
8211
8212 GRN_TEXT_INIT(&parsed_strings, GRN_OBJ_VECTOR);
8213 rc = grn_ii_parse_regexp_query(ctx, "[ii][select][regexp]",
8214 string, string_len, &parsed_strings);
8215 if (rc != GRN_SUCCESS) {
8216 GRN_OBJ_FIN(ctx, &parsed_strings);
8217 return rc;
8218 }
8219
8220 if (optarg) {
8221 optarg->mode = GRN_OP_EXACT;
8222 }
8223
8224 n_parsed_strings = grn_vector_size(ctx, &parsed_strings);
8225 if (n_parsed_strings == 1) {
8226 const char *parsed_string;
8227 unsigned int parsed_string_len;
8228 parsed_string_len = grn_vector_get_element(ctx,
8229 &parsed_strings,
8230 0,
8231 &parsed_string,
8232 NULL,
8233 NULL);
8234 rc = grn_ii_select(ctx, ii,
8235 parsed_string,
8236 parsed_string_len,
8237 s, op, optarg);
8238 } else {
8239 int i;
8240 grn_ii_select_cursor **cursors;
8241 grn_bool have_error = GRN_FALSE;
8242
8243 cursors = GRN_CALLOC(sizeof(grn_ii_select_cursor *) * n_parsed_strings);
8244 for (i = 0; i < n_parsed_strings; i++) {
8245 const char *parsed_string;
8246 unsigned int parsed_string_len;
8247 parsed_string_len = grn_vector_get_element(ctx,
8248 &parsed_strings,
8249 i,
8250 &parsed_string,
8251 NULL,
8252 NULL);
8253 cursors[i] = grn_ii_select_cursor_open(ctx,
8254 ii,
8255 parsed_string,
8256 parsed_string_len,
8257 optarg);
8258 if (!cursors[i]) {
8259 have_error = GRN_TRUE;
8260 break;
8261 }
8262 }
8263
8264 while (!have_error) {
8265 grn_ii_select_cursor_posting *posting;
8266 uint32_t pos;
8267
8268 posting = grn_ii_select_cursor_next(ctx, cursors[0]);
8269 if (!posting) {
8270 break;
8271 }
8272
8273 pos = posting->end_pos;
8274 for (i = 1; i < n_parsed_strings; i++) {
8275 grn_ii_select_cursor_posting *posting_i;
8276
8277 for (;;) {
8278 posting_i = grn_ii_select_cursor_next(ctx, cursors[i]);
8279 if (!posting_i) {
8280 break;
8281 }
8282
8283 if (posting_i->rid == posting->rid &&
8284 posting_i->sid == posting->sid &&
8285 posting_i->start_pos > pos) {
8286 grn_ii_select_cursor_unshift(ctx, cursors[i], posting_i);
8287 break;
8288 }
8289 if (posting_i->rid > posting->rid) {
8290 grn_ii_select_cursor_unshift(ctx, cursors[i], posting_i);
8291 break;
8292 }
8293 }
8294
8295 if (!posting_i) {
8296 break;
8297 }
8298
8299 if (posting_i->rid != posting->rid || posting_i->sid != posting->sid) {
8300 break;
8301 }
8302
8303 pos = posting_i->end_pos;
8304 }
8305
8306 if (i == n_parsed_strings) {
8307 grn_rset_posinfo pi = {posting->rid, posting->sid, pos};
8308 double record_score = 1.0;
8309 res_add(ctx, s, &pi, record_score, op);
8310 }
8311 }
8312
8313 for (i = 0; i < n_parsed_strings; i++) {
8314 if (cursors[i]) {
8315 grn_ii_select_cursor_close(ctx, cursors[i]);
8316 }
8317 }
8318 GRN_FREE(cursors);
8319 }
8320 GRN_OBJ_FIN(ctx, &parsed_strings);
8321
8322 if (optarg) {
8323 optarg->mode = GRN_OP_REGEXP;
8324 }
8325
8326 return rc;
8327}
8328
8329#ifdef GRN_II_SELECT_ENABLE_SEQUENTIAL_SEARCH
8330static grn_bool
8331grn_ii_select_sequential_search_should_use(grn_ctx *ctx,
8332 grn_ii *ii,
8333 const char *raw_query,
8334 unsigned int raw_query_len,
8335 grn_hash *result,
8336 grn_operator op,
8337 grn_wv_mode wvm,
8338 grn_select_optarg *optarg,
8339 token_info **token_infos,
8340 uint32_t n_token_infos,
8341 double too_many_index_match_ratio)
8342{
8343 int n_sources;
8344
8345 if (too_many_index_match_ratio < 0.0) {
8346 return GRN_FALSE;
8347 }
8348
8349 if (op != GRN_OP_AND) {
8350 return GRN_FALSE;
8351 }
8352
8353 if (optarg->mode != GRN_OP_EXACT) {
8354 return GRN_FALSE;
8355 }
8356
8357 n_sources = ii->obj.source_size / sizeof(grn_id);
8358 if (n_sources == 0) {
8359 return GRN_FALSE;
8360 }
8361
8362 {
8363 uint32_t i;
8364 int n_existing_records;
8365
8366 n_existing_records = GRN_HASH_SIZE(result);
8367 for (i = 0; i < n_token_infos; i++) {
8368 token_info *info = token_infos[i];
8369 if (n_existing_records <= (info->size * too_many_index_match_ratio)) {
8370 return GRN_TRUE;
8371 }
8372 }
8373 return GRN_FALSE;
8374 }
8375}
8376
8377static void
8378grn_ii_select_sequential_search_body(grn_ctx *ctx,
8379 grn_ii *ii,
8380 grn_obj *normalizer,
8381 grn_encoding encoding,
8382 OnigRegex regex,
8383 grn_hash *result,
8384 grn_operator op,
8385 grn_wv_mode wvm,
8386 grn_select_optarg *optarg)
8387{
8388 int i, n_sources;
8389 grn_id *source_ids = ii->obj.source;
8390 grn_obj buffer;
8391
8392 GRN_TEXT_INIT(&buffer, 0);
8393 n_sources = ii->obj.source_size / sizeof(grn_id);
8394 for (i = 0; i < n_sources; i++) {
8395 grn_id source_id = source_ids[i];
8396 grn_obj *source;
8397 grn_obj *accessor;
8398
8399 source = grn_ctx_at(ctx, source_id);
8400 switch (source->header.type) {
8401 case GRN_TABLE_HASH_KEY :
8402 case GRN_TABLE_PAT_KEY :
8403 case GRN_TABLE_DAT_KEY :
8404 accessor = grn_obj_column(ctx,
8405 (grn_obj *)result,
8406 GRN_COLUMN_NAME_KEY,
8407 GRN_COLUMN_NAME_KEY_LEN);
8408 break;
8409 default :
8410 {
8411 char column_name[GRN_TABLE_MAX_KEY_SIZE];
8412 int column_name_size;
8413 column_name_size = grn_column_name(ctx, source,
8414 column_name,
8415 GRN_TABLE_MAX_KEY_SIZE);
8416 accessor = grn_obj_column(ctx, (grn_obj *)result, column_name,
8417 column_name_size);
8418 }
8419 break;
8420 }
8421
8422 {
8423 grn_hash_cursor *cursor;
8424 grn_id id;
8425 cursor = grn_hash_cursor_open(ctx, result, NULL, 0, NULL, 0, 0, -1, 0);
8426 while ((id = grn_hash_cursor_next(ctx, cursor)) != GRN_ID_NIL) {
8427 OnigPosition position;
8428 grn_obj *value;
8429 const char *normalized_value;
8430 unsigned int normalized_value_length;
8431
8432 GRN_BULK_REWIND(&buffer);
8433 grn_obj_get_value(ctx, accessor, id, &buffer);
8434 value = grn_string_open_(ctx,
8435 GRN_TEXT_VALUE(&buffer),
8436 GRN_TEXT_LEN(&buffer),
8437 normalizer, 0, encoding);
8438 grn_string_get_normalized(ctx, value,
8439 &normalized_value, &normalized_value_length,
8440 NULL);
8441 position = onig_search(regex,
8442 normalized_value,
8443 normalized_value + normalized_value_length,
8444 normalized_value,
8445 normalized_value + normalized_value_length,
8446 NULL,
8447 0);
8448 if (position != ONIG_MISMATCH) {
8449 grn_id *record_id;
8450 grn_rset_posinfo info;
8451 double score;
8452
8453 grn_hash_cursor_get_key(ctx, cursor, (void **)&record_id);
8454
8455 info.rid = *record_id;
8456 info.sid = i + 1;
8457 info.pos = 0;
8458 score = get_weight(ctx, result, info.rid, info.sid, wvm, optarg);
8459 res_add(ctx, result, &info, score, op);
8460 }
8461 grn_obj_unlink(ctx, value);
8462 }
8463 grn_hash_cursor_close(ctx, cursor);
8464 }
8465 grn_obj_unlink(ctx, accessor);
8466 }
8467 grn_obj_unlink(ctx, &buffer);
8468}
8469
8470static grn_bool
8471grn_ii_select_sequential_search(grn_ctx *ctx,
8472 grn_ii *ii,
8473 const char *raw_query,
8474 unsigned int raw_query_len,
8475 grn_hash *result,
8476 grn_operator op,
8477 grn_wv_mode wvm,
8478 grn_select_optarg *optarg,
8479 token_info **token_infos,
8480 uint32_t n_token_infos)
8481{
8482 grn_bool processed = GRN_TRUE;
8483
8484 {
8485 if (!grn_ii_select_sequential_search_should_use(ctx,
8486 ii,
8487 raw_query,
8488 raw_query_len,
8489 result,
8490 op,
8491 wvm,
8492 optarg,
8493 token_infos,
8494 n_token_infos,
8495 grn_ii_select_too_many_index_match_ratio)) {
8496 return GRN_FALSE;
8497 }
8498 }
8499
8500 {
8501 grn_encoding encoding;
8502 grn_obj *normalizer;
8503 int nflags = 0;
8504 grn_obj *query;
8505 const char *normalized_query;
8506 unsigned int normalized_query_length;
8507
8508 grn_table_get_info(ctx, ii->lexicon,
8509 NULL, &encoding, NULL, &normalizer, NULL);
8510 query = grn_string_open_(ctx, raw_query, raw_query_len,
8511 normalizer, nflags, encoding);
8512 grn_string_get_normalized(ctx, query,
8513 &normalized_query, &normalized_query_length,
8514 NULL);
8515 {
8516 OnigRegex regex;
8517 int onig_result;
8518 OnigErrorInfo error_info;
8519 onig_result = onig_new(&regex,
8520 normalized_query,
8521 normalized_query + normalized_query_length,
8522 ONIG_OPTION_NONE,
8523 ONIG_ENCODING_UTF8,
8524 ONIG_SYNTAX_ASIS,
8525 &error_info);
8526 if (onig_result == ONIG_NORMAL) {
8527 grn_ii_select_sequential_search_body(ctx, ii, normalizer, encoding,
8528 regex, result, op, wvm, optarg);
8529 onig_free(regex);
8530 } else {
8531 char message[ONIG_MAX_ERROR_MESSAGE_LEN];
8532 onig_error_code_to_str(message, onig_result, error_info);
8533 GRN_LOG(ctx, GRN_LOG_WARNING,
8534 "[ii][select][sequential] "
8535 "failed to create regular expression object: %s",
8536 message);
8537 processed = GRN_FALSE;
8538 }
8539 }
8540 grn_obj_unlink(ctx, query);
8541 }
8542
8543 return processed;
8544}
8545#endif
8546
8547grn_rc
8548grn_ii_select(grn_ctx *ctx, grn_ii *ii,
8549 const char *string, unsigned int string_len,
8550 grn_hash *s, grn_operator op, grn_select_optarg *optarg)
8551{
8552 btr *bt = NULL;
8553 grn_rc rc = GRN_SUCCESS;
8554 int rep, orp, weight, max_interval = 0;
8555 token_info *ti, **tis = NULL, **tip, **tie;
8556 uint32_t n = 0, rid, sid, nrid, nsid;
8557 grn_bool only_skip_token = GRN_FALSE;
8558 grn_operator mode = GRN_OP_EXACT;
8559 grn_wv_mode wvm = grn_wv_none;
8560 grn_obj *lexicon = ii->lexicon;
8561 grn_scorer_score_func *score_func = NULL;
8562 grn_scorer_matched_record record;
8563 grn_id previous_min = GRN_ID_NIL;
8564 grn_id current_min = GRN_ID_NIL;
8565 grn_bool set_min_enable_for_and_query = GRN_FALSE;
8566
8567 if (!lexicon || !ii || !s) { return GRN_INVALID_ARGUMENT; }
8568 if (optarg) {
8569 mode = optarg->mode;
8570 if (optarg->func) {
8571 wvm = grn_wv_dynamic;
8572 } else if (optarg->vector_size) {
8573 wvm = optarg->weight_vector ? grn_wv_static : grn_wv_constant;
8574 }
8575 if (optarg->match_info) {
8576 if (optarg->match_info->flags & GRN_MATCH_INFO_GET_MIN_RECORD_ID) {
8577 previous_min = optarg->match_info->min;
8578 set_min_enable_for_and_query = GRN_TRUE;
8579 }
8580 }
8581 }
8582 if (mode == GRN_OP_SIMILAR) {
8583 return grn_ii_similar_search(ctx, ii, string, string_len, s, op, optarg);
8584 }
8585 if (mode == GRN_OP_TERM_EXTRACT) {
8586 return grn_ii_term_extract(ctx, ii, string, string_len, s, op, optarg);
8587 }
8588 if (mode == GRN_OP_REGEXP) {
8589 return grn_ii_select_regexp(ctx, ii, string, string_len, s, op, optarg);
8590 }
8591 /* todo : support subrec
8592 rep = (s->record_unit == grn_rec_position || s->subrec_unit == grn_rec_position);
8593 orp = (s->record_unit == grn_rec_position || op == GRN_OP_OR);
8594 */
8595 rep = 0;
8596 orp = op == GRN_OP_OR;
8597 if (!string_len) { goto exit; }
8598 if (!(tis = GRN_MALLOC(sizeof(token_info *) * string_len * 2))) {
8599 return GRN_NO_MEMORY_AVAILABLE;
8600 }
8601 if (mode == GRN_OP_FUZZY) {
8602 if (token_info_build_fuzzy(ctx, lexicon, ii, string, string_len,
8603 tis, &n, &only_skip_token, previous_min,
8604 mode, &(optarg->fuzzy)) ||
8605 !n) {
8606 goto exit;
8607 }
8608 } else {
8609 if (token_info_build(ctx, lexicon, ii, string, string_len,
8610 tis, &n, &only_skip_token, previous_min, mode) ||
8611 !n) {
8612 goto exit;
8613 }
8614 }
8615 switch (mode) {
8616 case GRN_OP_NEAR2 :
8617 token_info_clear_offset(tis, n);
8618 mode = GRN_OP_NEAR;
8619 /* fallthru */
8620 case GRN_OP_NEAR :
8621 if (!(bt = bt_open(ctx, n))) { rc = GRN_NO_MEMORY_AVAILABLE; goto exit; }
8622 max_interval = optarg->max_interval;
8623 break;
8624 default :
8625 break;
8626 }
8627 qsort(tis, n, sizeof(token_info *), token_compare);
8628 tie = tis + n;
8629 /*
8630 for (tip = tis; tip < tie; tip++) {
8631 ti = *tip;
8632 grn_log("o=%d n=%d s=%d r=%d", ti->offset, ti->ntoken, ti->size, ti->rid);
8633 }
8634 */
8635 GRN_LOG(ctx, GRN_LOG_INFO, "n=%d (%.*s)", n, string_len, string);
8636 /* todo : array as result
8637 if (n == 1 && (*tis)->cursors->n_entries == 1 && op == GRN_OP_OR
8638 && !GRN_HASH_SIZE(s) && !s->garbages
8639 && s->record_unit == grn_rec_document && !s->max_n_subrecs
8640 && grn_ii_max_section(ii) == 1) {
8641 grn_ii_cursor *c = (*tis)->cursors->bins[0];
8642 if ((rc = grn_hash_array_init(s, (*tis)->size + 32768))) { goto exit; }
8643 do {
8644 grn_rset_recinfo *ri;
8645 grn_posting *p = c->post;
8646 if ((weight = get_weight(ctx, s, p->rid, p->sid, wvm, optarg))) {
8647 GRN_HASH_INT_ADD(s, p, ri);
8648 ri->score = (p->tf + p->score) * weight;
8649 ri->n_subrecs = 1;
8650 }
8651 } while (grn_ii_cursor_next(ctx, c));
8652 goto exit;
8653 }
8654 */
8655#ifdef GRN_II_SELECT_ENABLE_SEQUENTIAL_SEARCH
8656 if (grn_ii_select_sequential_search(ctx, ii, string, string_len,
8657 s, op, wvm, optarg, tis, n)) {
8658 goto exit;
8659 }
8660#endif
8661
8662 if (optarg && optarg->scorer) {
8663 grn_proc *scorer = (grn_proc *)(optarg->scorer);
8664 score_func = scorer->callbacks.scorer.score;
8665 record.table = grn_ctx_at(ctx, s->obj.header.domain);
8666 record.lexicon = lexicon;
8667 record.id = GRN_ID_NIL;
8668 GRN_RECORD_INIT(&(record.terms), GRN_OBJ_VECTOR, lexicon->header.domain);
8669 GRN_UINT32_INIT(&(record.term_weights), GRN_OBJ_VECTOR);
8670 record.total_term_weights = 0;
8671 record.n_documents = grn_table_size(ctx, record.table);
8672 record.n_occurrences = 0;
8673 record.n_candidates = 0;
8674 record.n_tokens = 0;
8675 record.weight = 0;
8676 record.args_expr = optarg->scorer_args_expr;
8677 record.args_expr_offset = optarg->scorer_args_expr_offset;
8678 }
8679
8680 for (;;) {
8681 rid = (*tis)->p->rid;
8682 sid = (*tis)->p->sid;
8683 for (tip = tis + 1, nrid = rid, nsid = sid + 1; tip < tie; tip++) {
8684 ti = *tip;
8685 if (token_info_skip(ctx, ti, rid, sid)) { goto exit; }
8686 if (ti->p->rid != rid || ti->p->sid != sid) {
8687 nrid = ti->p->rid;
8688 nsid = ti->p->sid;
8689 break;
8690 }
8691 }
8692 weight = get_weight(ctx, s, rid, sid, wvm, optarg);
8693 if (tip == tie && weight != 0) {
8694 grn_rset_posinfo pi = {rid, sid, 0};
8695 if (orp || grn_hash_get(ctx, s, &pi, s->key_size, NULL)) {
8696 int count = 0, noccur = 0, pos = 0, score = 0, tscore = 0, min, max;
8697
8698 if (score_func) {
8699 GRN_BULK_REWIND(&(record.terms));
8700 GRN_BULK_REWIND(&(record.term_weights));
8701 record.n_candidates = 0;
8702 record.n_tokens = 0;
8703 }
8704
8705#define SKIP_OR_BREAK(pos) {\
8706 if (token_info_skip_pos(ctx, ti, rid, sid, pos)) { break; } \
8707 if (ti->p->rid != rid || ti->p->sid != sid) { \
8708 nrid = ti->p->rid; \
8709 nsid = ti->p->sid; \
8710 break; \
8711 } \
8712}
8713 if (n == 1 && !rep) {
8714 noccur = (*tis)->p->tf;
8715 tscore = (*tis)->p->weight + (*tis)->cursors->bins[0]->weight;
8716 if (score_func) {
8717 GRN_RECORD_PUT(ctx, &(record.terms), (*tis)->cursors->bins[0]->id);
8718 GRN_UINT32_PUT(ctx, &(record.term_weights), tscore);
8719 record.n_occurrences = noccur;
8720 record.n_candidates = (*tis)->size;
8721 record.n_tokens = (*tis)->ntoken;
8722 }
8723 } else if (mode == GRN_OP_NEAR) {
8724 bt_zap(bt);
8725 for (tip = tis; tip < tie; tip++) {
8726 ti = *tip;
8727 SKIP_OR_BREAK(pos);
8728 bt_push(bt, ti);
8729 }
8730 if (tip == tie) {
8731 for (;;) {
8732 ti = bt->min; min = ti->pos; max = bt->max->pos;
8733 if (min > max) {
8734 char ii_name[GRN_TABLE_MAX_KEY_SIZE];
8735 int ii_name_size;
8736 ii_name_size = grn_obj_name(ctx, (grn_obj *)ii, ii_name,
8737 GRN_TABLE_MAX_KEY_SIZE);
8738 ERR(GRN_FILE_CORRUPT,
8739 "[ii][select][near] "
8740 "max position must be larger than min position: "
8741 "min:<%d> max:<%d> ii:<%.*s> string:<%.*s>",
8742 min, max,
8743 ii_name_size, ii_name,
8744 string_len, string);
8745 rc = ctx->rc;
8746 goto exit;
8747 }
8748 if ((max_interval < 0) || (max - min <= max_interval)) {
8749 if (rep) { pi.pos = min; res_add(ctx, s, &pi, weight, op); }
8750 noccur++;
8751 if (ti->pos == max + 1) {
8752 break;
8753 }
8754 SKIP_OR_BREAK(max + 1);
8755 } else {
8756 if (ti->pos == max - max_interval) {
8757 break;
8758 }
8759 SKIP_OR_BREAK(max - max_interval);
8760 }
8761 bt_pop(bt);
8762 }
8763 }
8764 } else {
8765 for (tip = tis; ; tip++) {
8766 if (tip == tie) { tip = tis; }
8767 ti = *tip;
8768 SKIP_OR_BREAK(pos);
8769 if (ti->pos == pos) {
8770 score += ti->p->weight + ti->cursors->bins[0]->weight; count++;
8771 } else {
8772 score = ti->p->weight + ti->cursors->bins[0]->weight; count = 1;
8773 pos = ti->pos;
8774 if (noccur == 0 && score_func) {
8775 GRN_BULK_REWIND(&(record.terms));
8776 GRN_BULK_REWIND(&(record.term_weights));
8777 record.n_candidates = 0;
8778 record.n_tokens = 0;
8779 }
8780 }
8781 if (noccur == 0 && score_func) {
8782 GRN_RECORD_PUT(ctx, &(record.terms), ti->cursors->bins[0]->id);
8783 GRN_UINT32_PUT(ctx, &(record.term_weights),
8784 ti->p->weight + ti->cursors->bins[0]->weight);
8785 record.n_candidates += ti->size;
8786 record.n_tokens += ti->ntoken;
8787 }
8788 if (count == n) {
8789 if (rep) {
8790 pi.pos = pos; res_add(ctx, s, &pi, (score + 1) * weight, op);
8791 }
8792 tscore += score;
8793 score = 0; count = 0; pos++;
8794 noccur++;
8795 }
8796 }
8797 }
8798 if (noccur && !rep) {
8799 double record_score;
8800 if (score_func) {
8801 record.id = rid;
8802 record.weight = weight;
8803 record.n_occurrences = noccur;
8804 record.total_term_weights = tscore;
8805 record_score = score_func(ctx, &record) * weight;
8806 } else {
8807 record_score = (noccur + tscore) * weight;
8808 }
8809 if (set_min_enable_for_and_query) {
8810 if (current_min == GRN_ID_NIL) {
8811 current_min = rid;
8812 }
8813 }
8814 res_add(ctx, s, &pi, record_score, op);
8815 }
8816#undef SKIP_OR_BREAK
8817 }
8818 }
8819 if (token_info_skip(ctx, *tis, nrid, nsid)) { goto exit; }
8820 }
8821exit :
8822 if (score_func) {
8823 GRN_OBJ_FIN(ctx, &(record.terms));
8824 GRN_OBJ_FIN(ctx, &(record.term_weights));
8825 }
8826
8827 if (set_min_enable_for_and_query) {
8828 if (current_min > previous_min) {
8829 optarg->match_info->min = current_min;
8830 }
8831 }
8832
8833 for (tip = tis; tip < tis + n; tip++) {
8834 if (*tip) { token_info_close(ctx, *tip); }
8835 }
8836 if (tis) { GRN_FREE(tis); }
8837 if (!only_skip_token) {
8838 grn_ii_resolve_sel_and(ctx, s, op);
8839 }
8840 // grn_hash_cursor_clear(r);
8841 bt_close(ctx, bt);
8842#ifdef DEBUG
8843 {
8844 uint32_t segno = GRN_II_MAX_LSEG, nnref = 0;
8845 grn_io_mapinfo *info = ii->seg->maps;
8846 for (; segno; segno--, info++) { if (info->nref) { nnref++; } }
8847 GRN_LOG(ctx, GRN_LOG_INFO, "nnref=%d", nnref);
8848 }
8849#endif /* DEBUG */
8850 return rc;
8851}
8852
8853static uint32_t
8854grn_ii_estimate_size_for_query_regexp(grn_ctx *ctx, grn_ii *ii,
8855 const char *query, unsigned int query_len,
8856 grn_search_optarg *optarg)
8857{
8858 grn_rc rc;
8859 grn_obj parsed_query;
8860 uint32_t size;
8861
8862 GRN_TEXT_INIT(&parsed_query, 0);
8863 rc = grn_ii_parse_regexp_query(ctx, "[ii][estimate-size][query][regexp]",
8864 query, query_len, &parsed_query);
8865 if (rc != GRN_SUCCESS) {
8866 GRN_OBJ_FIN(ctx, &parsed_query);
8867 return 0;
8868 }
8869
8870 if (optarg) {
8871 optarg->mode = GRN_OP_EXACT;
8872 }
8873
8874 size = grn_ii_estimate_size_for_query(ctx, ii,
8875 GRN_TEXT_VALUE(&parsed_query),
8876 GRN_TEXT_LEN(&parsed_query),
8877 optarg);
8878 GRN_OBJ_FIN(ctx, &parsed_query);
8879
8880 if (optarg) {
8881 optarg->mode = GRN_OP_REGEXP;
8882 }
8883
8884 return size;
8885}
8886
8887uint32_t
8888grn_ii_estimate_size_for_query(grn_ctx *ctx, grn_ii *ii,
8889 const char *query, unsigned int query_len,
8890 grn_search_optarg *optarg)
8891{
8892 grn_rc rc;
8893 grn_obj *lexicon = ii->lexicon;
8894 token_info **tis = NULL;
8895 uint32_t i;
8896 uint32_t n_tis = 0;
8897 grn_bool only_skip_token = GRN_FALSE;
8898 grn_operator mode = GRN_OP_EXACT;
8899 double estimated_size = 0;
8900 double normalized_ratio = 1.0;
8901 grn_id min = GRN_ID_NIL;
8902
8903 if (query_len == 0) {
8904 return 0;
8905 }
8906
8907 if (optarg) {
8908 switch (optarg->mode) {
8909 case GRN_OP_NEAR :
8910 case GRN_OP_NEAR2 :
8911 mode = optarg->mode;
8912 break;
8913 case GRN_OP_SIMILAR :
8914 mode = optarg->mode;
8915 break;
8916 case GRN_OP_REGEXP :
8917 mode = optarg->mode;
8918 break;
8919 case GRN_OP_FUZZY :
8920 mode = optarg->mode;
8921 default :
8922 break;
8923 }
8924 if (optarg->match_info.flags & GRN_MATCH_INFO_GET_MIN_RECORD_ID) {
8925 min = optarg->match_info.min;
8926 }
8927 }
8928
8929 if (mode == GRN_OP_REGEXP) {
8930 return grn_ii_estimate_size_for_query_regexp(ctx, ii, query, query_len,
8931 optarg);
8932 }
8933
8934 tis = GRN_MALLOC(sizeof(token_info *) * query_len * 2);
8935 if (!tis) {
8936 return 0;
8937 }
8938
8939 switch (mode) {
8940 case GRN_OP_FUZZY :
8941 rc = token_info_build_fuzzy(ctx, lexicon, ii, query, query_len,
8942 tis, &n_tis, &only_skip_token, min,
8943 mode, &(optarg->fuzzy));
8944 break;
8945 default :
8946 rc = token_info_build(ctx, lexicon, ii, query, query_len,
8947 tis, &n_tis, &only_skip_token, min, mode);
8948 break;
8949 }
8950
8951 if (rc != GRN_SUCCESS) {
8952 goto exit;
8953 }
8954
8955 for (i = 0; i < n_tis; i++) {
8956 token_info *ti = tis[i];
8957 double term_estimated_size;
8958 term_estimated_size = ((double)ti->size / ti->ntoken);
8959 if (i == 0) {
8960 estimated_size = term_estimated_size;
8961 } else {
8962 if (term_estimated_size < estimated_size) {
8963 estimated_size = term_estimated_size;
8964 }
8965 normalized_ratio *= grn_ii_estimate_size_for_query_reduce_ratio;
8966 }
8967 }
8968
8969 estimated_size *= normalized_ratio;
8970 if (estimated_size > 0.0 && estimated_size < 1.0) {
8971 estimated_size = 1.0;
8972 }
8973
8974exit :
8975 for (i = 0; i < n_tis; i++) {
8976 token_info *ti = tis[i];
8977 if (ti) {
8978 token_info_close(ctx, ti);
8979 }
8980 }
8981 if (tis) {
8982 GRN_FREE(tis);
8983 }
8984
8985 return estimated_size;
8986}
8987
8988uint32_t
8989grn_ii_estimate_size_for_lexicon_cursor(grn_ctx *ctx, grn_ii *ii,
8990 grn_table_cursor *lexicon_cursor)
8991{
8992 grn_id term_id;
8993 uint32_t estimated_size = 0;
8994
8995 while ((term_id = grn_table_cursor_next(ctx, lexicon_cursor)) != GRN_ID_NIL) {
8996 uint32_t term_estimated_size;
8997 term_estimated_size = grn_ii_estimate_size(ctx, ii, term_id);
8998 estimated_size += term_estimated_size;
8999 }
9000
9001 return estimated_size;
9002}
9003
9004grn_rc
9005grn_ii_sel(grn_ctx *ctx, grn_ii *ii, const char *string, unsigned int string_len,
9006 grn_hash *s, grn_operator op, grn_search_optarg *optarg)
9007{
9008 ERRCLR(ctx);
9009 GRN_LOG(ctx, GRN_LOG_INFO, "grn_ii_sel > (%.*s)", string_len, string);
9010 {
9011 grn_select_optarg arg;
9012 if (!s) { return GRN_INVALID_ARGUMENT; }
9013 memset(&arg, 0, sizeof(grn_select_optarg));
9014 arg.mode = GRN_OP_EXACT;
9015 if (optarg) {
9016 switch (optarg->mode) {
9017 case GRN_OP_NEAR :
9018 case GRN_OP_NEAR2 :
9019 arg.mode = optarg->mode;
9020 arg.max_interval = optarg->max_interval;
9021 break;
9022 case GRN_OP_SIMILAR :
9023 arg.mode = optarg->mode;
9024 arg.similarity_threshold = optarg->similarity_threshold;
9025 break;
9026 case GRN_OP_REGEXP :
9027 arg.mode = optarg->mode;
9028 break;
9029 case GRN_OP_FUZZY :
9030 arg.mode = optarg->mode;
9031 arg.fuzzy = optarg->fuzzy;
9032 break;
9033 default :
9034 break;
9035 }
9036 if (optarg->vector_size != 0) {
9037 arg.weight_vector = optarg->weight_vector;
9038 arg.vector_size = optarg->vector_size;
9039 }
9040 arg.scorer = optarg->scorer;
9041 arg.scorer_args_expr = optarg->scorer_args_expr;
9042 arg.scorer_args_expr_offset = optarg->scorer_args_expr_offset;
9043 arg.match_info = &(optarg->match_info);
9044 }
9045 /* todo : support subrec
9046 grn_rset_init(ctx, s, grn_rec_document, 0, grn_rec_none, 0, 0);
9047 */
9048 if (grn_ii_select(ctx, ii, string, string_len, s, op, &arg)) {
9049 GRN_LOG(ctx, GRN_LOG_ERROR, "grn_ii_select on grn_ii_sel(1) failed !");
9050 return ctx->rc;
9051 }
9052 GRN_LOG(ctx, GRN_LOG_INFO, "exact: %d", GRN_HASH_SIZE(s));
9053 if (op == GRN_OP_OR) {
9054 grn_id min = GRN_ID_NIL;
9055 if ((int64_t)GRN_HASH_SIZE(s) <= ctx->impl->match_escalation_threshold) {
9056 arg.mode = GRN_OP_UNSPLIT;
9057 if (arg.match_info) {
9058 if (arg.match_info->flags & GRN_MATCH_INFO_GET_MIN_RECORD_ID) {
9059 min = arg.match_info->min;
9060 arg.match_info->min = GRN_ID_NIL;
9061 }
9062 }
9063 if (grn_ii_select(ctx, ii, string, string_len, s, op, &arg)) {
9064 GRN_LOG(ctx, GRN_LOG_ERROR,
9065 "grn_ii_select on grn_ii_sel(2) failed !");
9066 return ctx->rc;
9067 }
9068 GRN_LOG(ctx, GRN_LOG_INFO, "unsplit: %d", GRN_HASH_SIZE(s));
9069 if (arg.match_info) {
9070 if (arg.match_info->flags & GRN_MATCH_INFO_GET_MIN_RECORD_ID) {
9071 if (min > GRN_ID_NIL && min < arg.match_info->min) {
9072 arg.match_info->min = min;
9073 }
9074 }
9075 }
9076 }
9077 if ((int64_t)GRN_HASH_SIZE(s) <= ctx->impl->match_escalation_threshold) {
9078 arg.mode = GRN_OP_PARTIAL;
9079 if (arg.match_info) {
9080 if (arg.match_info->flags & GRN_MATCH_INFO_GET_MIN_RECORD_ID) {
9081 min = arg.match_info->min;
9082 arg.match_info->min = GRN_ID_NIL;
9083 }
9084 }
9085 if (grn_ii_select(ctx, ii, string, string_len, s, op, &arg)) {
9086 GRN_LOG(ctx, GRN_LOG_ERROR,
9087 "grn_ii_select on grn_ii_sel(3) failed !");
9088 return ctx->rc;
9089 }
9090 GRN_LOG(ctx, GRN_LOG_INFO, "partial: %d", GRN_HASH_SIZE(s));
9091 if (arg.match_info) {
9092 if (arg.match_info->flags & GRN_MATCH_INFO_GET_MIN_RECORD_ID) {
9093 if (min > GRN_ID_NIL && min < arg.match_info->min) {
9094 arg.match_info->min = min;
9095 }
9096 }
9097 }
9098 }
9099 }
9100 GRN_LOG(ctx, GRN_LOG_INFO, "hits=%d", GRN_HASH_SIZE(s));
9101 return GRN_SUCCESS;
9102 }
9103}
9104
9105grn_rc
9106grn_ii_at(grn_ctx *ctx, grn_ii *ii, grn_id id, grn_hash *s, grn_operator op)
9107{
9108 int rep = 0;
9109 grn_ii_cursor *c;
9110 grn_posting *pos;
9111 if ((c = grn_ii_cursor_open(ctx, ii, id, GRN_ID_NIL, GRN_ID_MAX,
9112 rep ? ii->n_elements : ii->n_elements - 1, 0))) {
9113 while ((pos = grn_ii_cursor_next(ctx, c))) {
9114 res_add(ctx, s, (grn_rset_posinfo *) pos, (1 + pos->weight), op);
9115 }
9116 grn_ii_cursor_close(ctx, c);
9117 }
9118 return ctx->rc;
9119}
9120
9121void
9122grn_ii_resolve_sel_and(grn_ctx *ctx, grn_hash *s, grn_operator op)
9123{
9124 if (op == GRN_OP_AND
9125 && !(ctx->flags & GRN_CTX_TEMPORARY_DISABLE_II_RESOLVE_SEL_AND)) {
9126 grn_id eid;
9127 grn_rset_recinfo *ri;
9128 grn_hash_cursor *c = grn_hash_cursor_open(ctx, s, NULL, 0, NULL, 0,
9129 0, -1, 0);
9130 if (c) {
9131 while ((eid = grn_hash_cursor_next(ctx, c))) {
9132 grn_hash_cursor_get_value(ctx, c, (void **) &ri);
9133 if ((ri->n_subrecs & GRN_RSET_UTIL_BIT)) {
9134 ri->n_subrecs &= ~GRN_RSET_UTIL_BIT;
9135 } else {
9136 grn_hash_delete_by_id(ctx, s, eid, NULL);
9137 }
9138 }
9139 grn_hash_cursor_close(ctx, c);
9140 }
9141 }
9142}
9143
9144void
9145grn_ii_cursor_inspect(grn_ctx *ctx, grn_ii_cursor *c, grn_obj *buf)
9146{
9147 grn_obj key_buf;
9148 char key[GRN_TABLE_MAX_KEY_SIZE];
9149 int key_size;
9150 int i = 0;
9151 grn_ii_cursor_next_options options = {
9152 .include_garbage = GRN_TRUE
9153 };
9154
9155 GRN_TEXT_PUTS(ctx, buf, " #<");
9156 key_size = grn_table_get_key(ctx, c->ii->lexicon, c->id,
9157 key, GRN_TABLE_MAX_KEY_SIZE);
9158 GRN_OBJ_INIT(&key_buf, GRN_BULK, 0, c->ii->lexicon->header.domain);
9159 GRN_TEXT_SET(ctx, &key_buf, key, key_size);
9160 grn_inspect(ctx, buf, &key_buf);
9161 GRN_OBJ_FIN(ctx, &key_buf);
9162
9163 GRN_TEXT_PUTS(ctx, buf, "\n elements:[\n ");
9164 while (grn_ii_cursor_next_internal(ctx, c, &options)) {
9165 grn_posting *pos = c->post;
9166 if (i > 0) {
9167 GRN_TEXT_PUTS(ctx, buf, ",\n ");
9168 }
9169 i++;
9170 GRN_TEXT_PUTS(ctx, buf, "{status:");
9171 if (pos->tf && pos->sid) {
9172 GRN_TEXT_PUTS(ctx, buf, "available");
9173 } else {
9174 GRN_TEXT_PUTS(ctx, buf, "garbage");
9175 }
9176 GRN_TEXT_PUTS(ctx, buf, ", rid:");
9177 grn_text_lltoa(ctx, buf, pos->rid);
9178 GRN_TEXT_PUTS(ctx, buf, ", sid:");
9179 grn_text_lltoa(ctx, buf, pos->sid);
9180 GRN_TEXT_PUTS(ctx, buf, ", pos:");
9181 grn_text_lltoa(ctx, buf, pos->pos);
9182 GRN_TEXT_PUTS(ctx, buf, ", tf:");
9183 grn_text_lltoa(ctx, buf, pos->tf);
9184 GRN_TEXT_PUTS(ctx, buf, ", weight:");
9185 grn_text_lltoa(ctx, buf, pos->weight);
9186 GRN_TEXT_PUTS(ctx, buf, ", rest:");
9187 grn_text_lltoa(ctx, buf, pos->rest);
9188 GRN_TEXT_PUTS(ctx, buf, "}");
9189 }
9190 GRN_TEXT_PUTS(ctx, buf, "\n ]\n >");
9191}
9192
9193void
9194grn_ii_inspect_values(grn_ctx *ctx, grn_ii *ii, grn_obj *buf)
9195{
9196 grn_table_cursor *tc;
9197 GRN_TEXT_PUTS(ctx, buf, "[");
9198 if ((tc = grn_table_cursor_open(ctx, ii->lexicon, NULL, 0, NULL, 0, 0, -1,
9199 GRN_CURSOR_ASCENDING))) {
9200 int i = 0;
9201 grn_id tid;
9202 grn_ii_cursor *c;
9203 while ((tid = grn_table_cursor_next(ctx, tc))) {
9204 if (i > 0) {
9205 GRN_TEXT_PUTS(ctx, buf, ",");
9206 }
9207 i++;
9208 GRN_TEXT_PUTS(ctx, buf, "\n");
9209 if ((c = grn_ii_cursor_open(ctx, ii, tid, GRN_ID_NIL, GRN_ID_MAX,
9210 ii->n_elements,
9211 GRN_OBJ_WITH_POSITION|GRN_OBJ_WITH_SECTION))) {
9212 grn_ii_cursor_inspect(ctx, c, buf);
9213 grn_ii_cursor_close(ctx, c);
9214 }
9215 }
9216 grn_table_cursor_close(ctx, tc);
9217 }
9218 GRN_TEXT_PUTS(ctx, buf, "]");
9219}
9220
9221/********************** buffered index builder ***********************/
9222
9223const grn_id II_BUFFER_TYPE_MASK = 0xc0000000;
9224#define II_BUFFER_TYPE_RID 0x80000000
9225#define II_BUFFER_TYPE_WEIGHT 0x40000000
9226#define II_BUFFER_TYPE(id) (((id) & II_BUFFER_TYPE_MASK))
9227#define II_BUFFER_PACK(value, type) ((value) | (type))
9228#define II_BUFFER_UNPACK(id, type) ((id) & ~(type))
9229#define II_BUFFER_ORDER GRN_CURSOR_BY_KEY
9230const uint16_t II_BUFFER_NTERMS_PER_BUFFER = 16380;
9231const uint32_t II_BUFFER_PACKED_BUF_SIZE = 0x4000000;
9232const char *TMPFILE_PATH = "grn_ii_buffer_tmp";
9233const uint32_t II_BUFFER_NCOUNTERS_MARGIN = 0x100000;
9234const size_t II_BUFFER_BLOCK_SIZE = 0x1000000;
9235const uint32_t II_BUFFER_BLOCK_READ_UNIT_SIZE = 0x200000;
9236
9237typedef struct {
9238 unsigned int sid; /* Section ID */
9239 unsigned int weight; /* Weight */
9240 const char *p; /* Value address */
9241 uint32_t len; /* Value length */
9242 char *buf; /* Buffer address */
9243 uint32_t cap; /* Buffer size */
9244} ii_buffer_value;
9245
9246/* ii_buffer_counter is associated with a combination of a block an a term. */
9247typedef struct {
9248 uint32_t nrecs; /* Number of records or sections */
9249 uint32_t nposts; /* Number of occurrences */
9250
9251 /* Information of the last value */
9252 grn_id last_rid; /* Record ID */
9253 uint32_t last_sid; /* Section ID */
9254 uint32_t last_tf; /* Term frequency */
9255 uint32_t last_weight; /* Total weight */
9256 uint32_t last_pos; /* Token position */
9257
9258 /* Meaning of offset_* is different before/after encoding. */
9259 /* Before encoding: size in encoded sequence */
9260 /* After encoding: Offset in encoded sequence */
9261 uint32_t offset_rid; /* Record ID */
9262 uint32_t offset_sid; /* Section ID */
9263 uint32_t offset_tf; /* Term frequency */
9264 uint32_t offset_weight; /* Weight */
9265 uint32_t offset_pos; /* Token position */
9266} ii_buffer_counter;
9267
9268typedef struct {
9269 off64_t head;
9270 off64_t tail;
9271 uint32_t nextsize;
9272 uint8_t *buffer;
9273 uint32_t buffersize;
9274 uint8_t *bufcur;
9275 uint32_t rest;
9276 grn_id tid;
9277 uint32_t nrecs;
9278 uint32_t nposts;
9279 grn_id *recs;
9280 uint32_t *tfs;
9281 uint32_t *posts;
9282} ii_buffer_block;
9283
9284struct _grn_ii_buffer {
9285 grn_obj *lexicon; /* Global lexicon */
9286 grn_obj *tmp_lexicon; /* Temporary lexicon for each block */
9287 ii_buffer_block *blocks; /* Blocks */
9288 uint32_t nblocks; /* Number of blocks */
9289 int tmpfd; /* Descriptor of temporary file */
9290 char tmpfpath[PATH_MAX]; /* Path of temporary file */
9291 uint64_t update_buffer_size;
9292
9293 // stuff for parsing
9294 off64_t filepos; /* Write position of temporary file */
9295 grn_id *block_buf; /* Buffer for the current block */
9296 size_t block_buf_size; /* Size of block_buf */
9297 size_t block_pos; /* Write position of block_buf */
9298 ii_buffer_counter *counters; /* Status of terms */
9299 uint32_t ncounters; /* Number of counters */
9300 size_t total_size;
9301 size_t curr_size;
9302 ii_buffer_value *values; /* Values in block */
9303 unsigned int nvalues; /* Number of values in block */
9304 unsigned int max_nvalues; /* Size of values */
9305 grn_id last_rid;
9306
9307 // stuff for merging
9308 grn_ii *ii;
9309 uint32_t lseg;
9310 uint32_t dseg;
9311 buffer *term_buffer;
9312 datavec data_vectors[MAX_N_ELEMENTS + 1];
9313 uint8_t *packed_buf;
9314 size_t packed_buf_size;
9315 size_t packed_len;
9316 size_t total_chunk_size;
9317};
9318
9319/* block_new returns a new ii_buffer_block to store block information. */
9320static ii_buffer_block *
9321block_new(grn_ctx *ctx, grn_ii_buffer *ii_buffer)
9322{
9323 ii_buffer_block *block;
9324 if (!(ii_buffer->nblocks & 0x3ff)) {
9325 ii_buffer_block *blocks;
9326 if (!(blocks = GRN_REALLOC(ii_buffer->blocks,
9327 (ii_buffer->nblocks + 0x400) *
9328 sizeof(ii_buffer_block)))) {
9329 return NULL;
9330 }
9331 ii_buffer->blocks = blocks;
9332 }
9333 block = &ii_buffer->blocks[ii_buffer->nblocks];
9334 block->head = ii_buffer->filepos;
9335 block->rest = 0;
9336 block->buffer = NULL;
9337 block->buffersize = 0;
9338 return block;
9339}
9340
9341/* allocate_outbuf allocates memory to flush a block. */
9342static uint8_t *
9343allocate_outbuf(grn_ctx *ctx, grn_ii_buffer *ii_buffer)
9344{
9345 size_t bufsize = 0, bufsize_ = 0;
9346 uint32_t flags = ii_buffer->ii->header->flags;
9347 ii_buffer_counter *counter = ii_buffer->counters;
9348 grn_id tid, tid_max = grn_table_size(ctx, ii_buffer->tmp_lexicon);
9349 for (tid = 1; tid <= tid_max; counter++, tid++) {
9350 counter->offset_tf += GRN_B_ENC_SIZE(counter->last_tf - 1);
9351 counter->last_rid = 0;
9352 counter->last_tf = 0;
9353 bufsize += 5;
9354 bufsize += GRN_B_ENC_SIZE(counter->nrecs);
9355 bufsize += GRN_B_ENC_SIZE(counter->nposts);
9356 bufsize += counter->offset_rid;
9357 if ((flags & GRN_OBJ_WITH_SECTION)) {
9358 bufsize += counter->offset_sid;
9359 }
9360 bufsize += counter->offset_tf;
9361 if ((flags & GRN_OBJ_WITH_WEIGHT)) {
9362 bufsize += counter->offset_weight;
9363 }
9364 if ((flags & GRN_OBJ_WITH_POSITION)) {
9365 bufsize += counter->offset_pos;
9366 }
9367 if (bufsize_ + II_BUFFER_BLOCK_READ_UNIT_SIZE < bufsize) {
9368 bufsize += sizeof(uint32_t);
9369 bufsize_ = bufsize;
9370 }
9371 }
9372 GRN_LOG(ctx, GRN_LOG_INFO, "flushing:%d bufsize:%" GRN_FMT_SIZE,
9373 ii_buffer->nblocks, bufsize);
9374 return (uint8_t *)GRN_MALLOC(bufsize);
9375}
9376
9377/*
9378 * The temporary file format is roughly as follows:
9379 *
9380 * File = Block...
9381 * Block = Unit...
9382 * Unit = TermChunk (key order)
9383 * NextUnitSize (The first unit size is kept on memory)
9384 * Chunk = Term...
9385 * Term = ID (gtid)
9386 * NumRecordsOrSections (nrecs), NumOccurrences (nposts)
9387 * RecordID... (rid, diff)
9388 * [SectionID... (sid, diff)]
9389 * TermFrequency... (tf, diff)
9390 * [Weight... (weight, diff)]
9391 * [Position... (pos, diff)]
9392 */
9393
9394/*
9395 * encode_terms encodes terms in ii_buffer->tmp_lexicon and returns the
9396 * expected temporary file size.
9397 */
9398static size_t
9399encode_terms(grn_ctx *ctx, grn_ii_buffer *ii_buffer,
9400 uint8_t *outbuf, ii_buffer_block *block)
9401{
9402 grn_id tid;
9403 uint8_t *outbufp = outbuf;
9404 uint8_t *outbufp_ = outbuf;
9405 grn_table_cursor *tc;
9406 /* The first size is written into block->nextsize. */
9407 uint8_t *pnext = (uint8_t *)&block->nextsize;
9408 uint32_t flags = ii_buffer->ii->header->flags;
9409 tc = grn_table_cursor_open(ctx, ii_buffer->tmp_lexicon,
9410 NULL, 0, NULL, 0, 0, -1, II_BUFFER_ORDER);
9411 while ((tid = grn_table_cursor_next(ctx, tc)) != GRN_ID_NIL) {
9412 char key[GRN_TABLE_MAX_KEY_SIZE];
9413 int key_size = grn_table_get_key(ctx, ii_buffer->tmp_lexicon, tid,
9414 key, GRN_TABLE_MAX_KEY_SIZE);
9415 /* gtid is a global term ID, not in a temporary lexicon. */
9416 grn_id gtid = grn_table_add(ctx, ii_buffer->lexicon, key, key_size, NULL);
9417 ii_buffer_counter *counter = &ii_buffer->counters[tid - 1];
9418 if (counter->nrecs) {
9419 uint32_t offset_rid = counter->offset_rid;
9420 uint32_t offset_sid = counter->offset_sid;
9421 uint32_t offset_tf = counter->offset_tf;
9422 uint32_t offset_weight = counter->offset_weight;
9423 uint32_t offset_pos = counter->offset_pos;
9424 GRN_B_ENC(gtid, outbufp);
9425 GRN_B_ENC(counter->nrecs, outbufp);
9426 GRN_B_ENC(counter->nposts, outbufp);
9427 ii_buffer->total_size += counter->nrecs + counter->nposts;
9428 counter->offset_rid = outbufp - outbuf;
9429 outbufp += offset_rid;
9430 if ((flags & GRN_OBJ_WITH_SECTION)) {
9431 counter->offset_sid = outbufp - outbuf;
9432 outbufp += offset_sid;
9433 }
9434 counter->offset_tf = outbufp - outbuf;
9435 outbufp += offset_tf;
9436 if ((flags & GRN_OBJ_WITH_WEIGHT)) {
9437 counter->offset_weight = outbufp - outbuf;
9438 outbufp += offset_weight;
9439 }
9440 if ((flags & GRN_OBJ_WITH_POSITION)) {
9441 counter->offset_pos = outbufp - outbuf;
9442 outbufp += offset_pos;
9443 }
9444 }
9445 if (outbufp_ + II_BUFFER_BLOCK_READ_UNIT_SIZE < outbufp) {
9446 uint32_t size = outbufp - outbufp_ + sizeof(uint32_t);
9447 grn_memcpy(pnext, &size, sizeof(uint32_t));
9448 pnext = outbufp;
9449 outbufp += sizeof(uint32_t);
9450 outbufp_ = outbufp;
9451 }
9452 }
9453 grn_table_cursor_close(ctx, tc);
9454 if (outbufp_ < outbufp) {
9455 uint32_t size = outbufp - outbufp_;
9456 grn_memcpy(pnext, &size, sizeof(uint32_t));
9457 }
9458 return outbufp - outbuf;
9459}
9460
9461/* encode_postings encodes data in ii_buffer->block_buf. */
9462static void
9463encode_postings(grn_ctx *ctx, grn_ii_buffer *ii_buffer, uint8_t *outbuf)
9464{
9465 grn_id rid = 0;
9466 unsigned int sid = 1;
9467 unsigned int weight = 0;
9468 uint32_t pos = 0;
9469 uint32_t rest;
9470 grn_id *bp = ii_buffer->block_buf;
9471 uint32_t flags = ii_buffer->ii->header->flags;
9472 for (rest = ii_buffer->block_pos; rest; bp++, rest--) {
9473 grn_id id = *bp;
9474 switch (II_BUFFER_TYPE(id)) {
9475 case II_BUFFER_TYPE_RID :
9476 rid = II_BUFFER_UNPACK(id, II_BUFFER_TYPE_RID);
9477 if ((flags & GRN_OBJ_WITH_SECTION) && rest) {
9478 sid = *++bp;
9479 rest--;
9480 }
9481 weight = 0;
9482 pos = 0;
9483 break;
9484 case II_BUFFER_TYPE_WEIGHT :
9485 weight = II_BUFFER_UNPACK(id, II_BUFFER_TYPE_WEIGHT);
9486 break;
9487 default :
9488 {
9489 ii_buffer_counter *counter = &ii_buffer->counters[id - 1];
9490 if (counter->last_rid == rid && counter->last_sid == sid) {
9491 counter->last_tf++;
9492 counter->last_weight += weight;
9493 } else {
9494 if (counter->last_tf) {
9495 uint8_t *p = outbuf + counter->offset_tf;
9496 GRN_B_ENC(counter->last_tf - 1, p);
9497 counter->offset_tf = p - outbuf;
9498 if (flags & GRN_OBJ_WITH_WEIGHT) {
9499 p = outbuf + counter->offset_weight;
9500 GRN_B_ENC(counter->last_weight, p);
9501 counter->offset_weight = p - outbuf;
9502 }
9503 }
9504 {
9505 uint8_t *p = outbuf + counter->offset_rid;
9506 GRN_B_ENC(rid - counter->last_rid, p);
9507 counter->offset_rid = p - outbuf;
9508 }
9509 if (flags & GRN_OBJ_WITH_SECTION) {
9510 uint8_t *p = outbuf + counter->offset_sid;
9511 if (counter->last_rid != rid) {
9512 GRN_B_ENC(sid - 1, p);
9513 } else {
9514 GRN_B_ENC(sid - counter->last_sid - 1, p);
9515 }
9516 counter->offset_sid = p - outbuf;
9517 }
9518 counter->last_rid = rid;
9519 counter->last_sid = sid;
9520 counter->last_tf = 1;
9521 counter->last_weight = weight;
9522 counter->last_pos = 0;
9523 }
9524 if ((flags & GRN_OBJ_WITH_POSITION) && rest) {
9525 uint8_t *p = outbuf + counter->offset_pos;
9526 pos = *++bp;
9527 rest--;
9528 GRN_B_ENC(pos - counter->last_pos, p);
9529 counter->offset_pos = p - outbuf;
9530 counter->last_pos = pos;
9531 }
9532 }
9533 break;
9534 }
9535 }
9536}
9537
9538/* encode_last_tf encodes last_tf and last_weight in counters. */
9539static void
9540encode_last_tf(grn_ctx *ctx, grn_ii_buffer *ii_buffer, uint8_t *outbuf)
9541{
9542 ii_buffer_counter *counter = ii_buffer->counters;
9543 grn_id tid, tid_max = grn_table_size(ctx, ii_buffer->tmp_lexicon);
9544 for (tid = 1; tid <= tid_max; counter++, tid++) {
9545 uint8_t *p = outbuf + counter->offset_tf;
9546 GRN_B_ENC(counter->last_tf - 1, p);
9547 }
9548 if ((ii_buffer->ii->header->flags & GRN_OBJ_WITH_WEIGHT)) {
9549 for (tid = 1; tid <= tid_max; counter++, tid++) {
9550 uint8_t *p = outbuf + counter->offset_weight;
9551 GRN_B_ENC(counter->last_weight, p);
9552 }
9553 }
9554}
9555
9556/*
9557 * grn_ii_buffer_flush flushes the current block (ii_buffer->block_buf,
9558 * counters and tmp_lexicon) to a temporary file (ii_buffer->tmpfd).
9559 * Also, block information is stored into ii_buffer->blocks.
9560 */
9561static void
9562grn_ii_buffer_flush(grn_ctx *ctx, grn_ii_buffer *ii_buffer)
9563{
9564 size_t encsize;
9565 uint8_t *outbuf;
9566 ii_buffer_block *block;
9567 GRN_LOG(ctx, GRN_LOG_DEBUG, "flushing:%d npostings:%" GRN_FMT_SIZE,
9568 ii_buffer->nblocks, ii_buffer->block_pos);
9569 if (!(block = block_new(ctx, ii_buffer))) { return; }
9570 if (!(outbuf = allocate_outbuf(ctx, ii_buffer))) { return; }
9571 encsize = encode_terms(ctx, ii_buffer, outbuf, block);
9572 encode_postings(ctx, ii_buffer, outbuf);
9573 encode_last_tf(ctx, ii_buffer, outbuf);
9574 {
9575 ssize_t r = grn_write(ii_buffer->tmpfd, outbuf, encsize);
9576 if (r != encsize) {
9577 ERR(GRN_INPUT_OUTPUT_ERROR,
9578 "write returned %" GRN_FMT_LLD " != %" GRN_FMT_LLU,
9579 (long long int)r, (unsigned long long int)encsize);
9580 GRN_FREE(outbuf);
9581 return;
9582 }
9583 ii_buffer->filepos += r;
9584 block->tail = ii_buffer->filepos;
9585 }
9586 GRN_FREE(outbuf);
9587 memset(ii_buffer->counters, 0,
9588 grn_table_size(ctx, ii_buffer->tmp_lexicon) *
9589 sizeof(ii_buffer_counter));
9590 grn_obj_close(ctx, ii_buffer->tmp_lexicon);
9591 GRN_LOG(ctx, GRN_LOG_DEBUG, "flushed: %d encsize:%" GRN_FMT_SIZE,
9592 ii_buffer->nblocks, encsize);
9593 ii_buffer->tmp_lexicon = NULL;
9594 ii_buffer->nblocks++;
9595 ii_buffer->block_pos = 0;
9596}
9597
9598const uint32_t PAT_CACHE_SIZE = 1<<20;
9599
9600/*
9601 * get_tmp_lexicon returns a temporary lexicon.
9602 *
9603 * Note that a lexicon is created for each block and ii_buffer->tmp_lexicon is
9604 * closed in grn_ii_buffer_flush.
9605 */
9606static grn_obj *
9607get_tmp_lexicon(grn_ctx *ctx, grn_ii_buffer *ii_buffer)
9608{
9609 grn_obj *tmp_lexicon = ii_buffer->tmp_lexicon;
9610 if (!tmp_lexicon) {
9611 grn_obj *domain = grn_ctx_at(ctx, ii_buffer->lexicon->header.domain);
9612 grn_obj *range = grn_ctx_at(ctx, DB_OBJ(ii_buffer->lexicon)->range);
9613 grn_obj *tokenizer;
9614 grn_obj *normalizer;
9615 grn_obj *token_filters;
9616 grn_table_flags flags;
9617 grn_table_get_info(ctx, ii_buffer->lexicon, &flags, NULL,
9618 &tokenizer, &normalizer, &token_filters);
9619 flags &= ~GRN_OBJ_PERSISTENT;
9620 tmp_lexicon = grn_table_create(ctx, NULL, 0, NULL, flags, domain, range);
9621 if (tmp_lexicon) {
9622 ii_buffer->tmp_lexicon = tmp_lexicon;
9623 grn_obj_set_info(ctx, tmp_lexicon,
9624 GRN_INFO_DEFAULT_TOKENIZER, tokenizer);
9625 grn_obj_set_info(ctx, tmp_lexicon,
9626 GRN_INFO_NORMALIZER, normalizer);
9627 grn_obj_set_info(ctx, tmp_lexicon,
9628 GRN_INFO_TOKEN_FILTERS, token_filters);
9629 if ((flags & GRN_OBJ_TABLE_TYPE_MASK) == GRN_OBJ_TABLE_PAT_KEY) {
9630 grn_pat_cache_enable(ctx, (grn_pat *)tmp_lexicon, PAT_CACHE_SIZE);
9631 }
9632 }
9633 }
9634 return tmp_lexicon;
9635}
9636
9637/* get_buffer_counter returns a counter associated with tid. */
9638static ii_buffer_counter *
9639get_buffer_counter(grn_ctx *ctx, grn_ii_buffer *ii_buffer,
9640 grn_obj *tmp_lexicon, grn_id tid)
9641{
9642 if (tid > ii_buffer->ncounters) {
9643 ii_buffer_counter *counters;
9644 uint32_t ncounters =
9645 grn_table_size(ctx, tmp_lexicon) + II_BUFFER_NCOUNTERS_MARGIN;
9646 counters = GRN_REALLOC(ii_buffer->counters,
9647 ncounters * sizeof(ii_buffer_counter));
9648 if (!counters) { return NULL; }
9649 memset(&counters[ii_buffer->ncounters], 0,
9650 (ncounters - ii_buffer->ncounters) * sizeof(ii_buffer_counter));
9651 ii_buffer->ncounters = ncounters;
9652 ii_buffer->counters = counters;
9653 }
9654 return &ii_buffer->counters[tid - 1];
9655}
9656
9657/*
9658 * grn_ii_buffer_tokenize_value tokenizes a value.
9659 *
9660 * The result is written into the current block (ii_buffer->tmp_lexicon,
9661 * ii_buffer->block_buf, ii_buffer->counters, etc.).
9662 */
9663static void
9664grn_ii_buffer_tokenize_value(grn_ctx *ctx, grn_ii_buffer *ii_buffer,
9665 grn_id rid, const ii_buffer_value *value)
9666{
9667 grn_obj *tmp_lexicon;
9668 if ((tmp_lexicon = get_tmp_lexicon(ctx, ii_buffer))) {
9669 unsigned int token_flags = 0;
9670 grn_token_cursor *token_cursor;
9671 grn_id *buffer = ii_buffer->block_buf;
9672 uint32_t block_pos = ii_buffer->block_pos;
9673 uint32_t ii_flags = ii_buffer->ii->header->flags;
9674 buffer[block_pos++] = II_BUFFER_PACK(rid, II_BUFFER_TYPE_RID);
9675 if (ii_flags & GRN_OBJ_WITH_SECTION) {
9676 buffer[block_pos++] = value->sid;
9677 }
9678 if (value->weight) {
9679 buffer[block_pos++] = II_BUFFER_PACK(value->weight,
9680 II_BUFFER_TYPE_WEIGHT);
9681 }
9682 if ((token_cursor = grn_token_cursor_open(ctx, tmp_lexicon,
9683 value->p, value->len,
9684 GRN_TOKEN_ADD, token_flags))) {
9685 while (!token_cursor->status) {
9686 grn_id tid;
9687 if ((tid = grn_token_cursor_next(ctx, token_cursor))) {
9688 ii_buffer_counter *counter;
9689 counter = get_buffer_counter(ctx, ii_buffer, tmp_lexicon, tid);
9690 if (!counter) { return; }
9691 buffer[block_pos++] = tid;
9692 if (ii_flags & GRN_OBJ_WITH_POSITION) {
9693 buffer[block_pos++] = token_cursor->pos;
9694 }
9695 if (counter->last_rid != rid) {
9696 counter->offset_rid += GRN_B_ENC_SIZE(rid - counter->last_rid);
9697 counter->last_rid = rid;
9698 counter->offset_sid += GRN_B_ENC_SIZE(value->sid - 1);
9699 counter->last_sid = value->sid;
9700 if (counter->last_tf) {
9701 counter->offset_tf += GRN_B_ENC_SIZE(counter->last_tf - 1);
9702 counter->last_tf = 0;
9703 counter->offset_weight += GRN_B_ENC_SIZE(counter->last_weight);
9704 counter->last_weight = 0;
9705 }
9706 counter->last_pos = 0;
9707 counter->nrecs++;
9708 } else if (counter->last_sid != value->sid) {
9709 counter->offset_rid += GRN_B_ENC_SIZE(0);
9710 counter->offset_sid +=
9711 GRN_B_ENC_SIZE(value->sid - counter->last_sid - 1);
9712 counter->last_sid = value->sid;
9713 if (counter->last_tf) {
9714 counter->offset_tf += GRN_B_ENC_SIZE(counter->last_tf - 1);
9715 counter->last_tf = 0;
9716 counter->offset_weight += GRN_B_ENC_SIZE(counter->last_weight);
9717 counter->last_weight = 0;
9718 }
9719 counter->last_pos = 0;
9720 counter->nrecs++;
9721 }
9722 counter->offset_pos +=
9723 GRN_B_ENC_SIZE(token_cursor->pos - counter->last_pos);
9724 counter->last_pos = token_cursor->pos;
9725 counter->last_tf++;
9726 counter->last_weight += value->weight;
9727 counter->nposts++;
9728 }
9729 }
9730 grn_token_cursor_close(ctx, token_cursor);
9731 }
9732 ii_buffer->block_pos = block_pos;
9733 }
9734}
9735
9736/*
9737 * grn_ii_buffer_tokenize tokenizes ii_buffer->values.
9738 *
9739 * grn_ii_buffer_tokenize estimates the size of tokenized values.
9740 * If the remaining space of the current block is not enough to store the new
9741 * tokenized values, the current block is flushed.
9742 * Then, grn_ii_buffer_tokenize tokenizes values.
9743 */
9744static void
9745grn_ii_buffer_tokenize(grn_ctx *ctx, grn_ii_buffer *ii_buffer, grn_id rid)
9746{
9747 unsigned int i;
9748 uint32_t est_len = 0;
9749 for (i = 0; i < ii_buffer->nvalues; i++) {
9750 est_len += ii_buffer->values[i].len * 2 + 2;
9751 }
9752 if (ii_buffer->block_buf_size < ii_buffer->block_pos + est_len) {
9753 grn_ii_buffer_flush(ctx, ii_buffer);
9754 }
9755 if (ii_buffer->block_buf_size < est_len) {
9756 grn_id *block_buf = (grn_id *)GRN_REALLOC(ii_buffer->block_buf,
9757 est_len * sizeof(grn_id));
9758 if (block_buf) {
9759 ii_buffer->block_buf = block_buf;
9760 ii_buffer->block_buf_size = est_len;
9761 }
9762 }
9763
9764 for (i = 0; i < ii_buffer->nvalues; i++) {
9765 const ii_buffer_value *value = &ii_buffer->values[i];
9766 if (value->len) {
9767 uint32_t est_len = value->len * 2 + 2;
9768 if (ii_buffer->block_buf_size >= ii_buffer->block_pos + est_len) {
9769 grn_ii_buffer_tokenize_value(ctx, ii_buffer, rid, value);
9770 }
9771 }
9772 }
9773 ii_buffer->nvalues = 0;
9774}
9775
9776/* grn_ii_buffer_fetch fetches the next term. */
9777static void
9778grn_ii_buffer_fetch(grn_ctx *ctx, grn_ii_buffer *ii_buffer,
9779 ii_buffer_block *block)
9780{
9781 if (!block->rest) {
9782 /* Read the next unit. */
9783 if (block->head < block->tail) {
9784 size_t bytesize = block->nextsize;
9785 if (block->buffersize < block->nextsize) {
9786 void *r = GRN_REALLOC(block->buffer, bytesize);
9787 if (r) {
9788 block->buffer = (uint8_t *)r;
9789 block->buffersize = block->nextsize;
9790 } else {
9791 GRN_LOG(ctx, GRN_LOG_WARNING, "realloc: %" GRN_FMT_LLU,
9792 (unsigned long long int)bytesize);
9793 return;
9794 }
9795 }
9796 {
9797 off64_t seeked_position;
9798 seeked_position = grn_lseek(ii_buffer->tmpfd, block->head, SEEK_SET);
9799 if (seeked_position != block->head) {
9800 ERRNO_ERR("failed to "
9801 "grn_lseek(%" GRN_FMT_OFF64_T ") -> %" GRN_FMT_OFF64_T,
9802 block->head,
9803 seeked_position);
9804 return;
9805 }
9806 }
9807 {
9808 size_t read_bytesize;
9809 read_bytesize = grn_read(ii_buffer->tmpfd, block->buffer, bytesize);
9810 if (read_bytesize != bytesize) {
9811 SERR("failed to grn_read(%" GRN_FMT_SIZE ") -> %" GRN_FMT_SIZE,
9812 bytesize, read_bytesize);
9813 return;
9814 }
9815 }
9816 block->head += bytesize;
9817 block->bufcur = block->buffer;
9818 if (block->head >= block->tail) {
9819 if (block->head > block->tail) {
9820 GRN_LOG(ctx, GRN_LOG_WARNING,
9821 "fetch error: %" GRN_FMT_INT64D " > %" GRN_FMT_INT64D,
9822 block->head, block->tail);
9823 }
9824 block->rest = block->nextsize;
9825 block->nextsize = 0;
9826 } else {
9827 block->rest = block->nextsize - sizeof(uint32_t);
9828 grn_memcpy(&block->nextsize,
9829 &block->buffer[block->rest], sizeof(uint32_t));
9830 }
9831 }
9832 }
9833 if (block->rest) {
9834 uint8_t *p = block->bufcur;
9835 GRN_B_DEC(block->tid, p);
9836 GRN_B_DEC(block->nrecs, p);
9837 GRN_B_DEC(block->nposts, p);
9838 block->rest -= (p - block->bufcur);
9839 block->bufcur = p;
9840 } else {
9841 block->tid = 0;
9842 }
9843}
9844
9845/* grn_ii_buffer_chunk_flush flushes the current buffer for packed postings. */
9846static void
9847grn_ii_buffer_chunk_flush(grn_ctx *ctx, grn_ii_buffer *ii_buffer)
9848{
9849 grn_io_win io_win;
9850 uint32_t chunk_number;
9851 chunk_new(ctx, ii_buffer->ii, &chunk_number, ii_buffer->packed_len);
9852 GRN_LOG(ctx, GRN_LOG_INFO, "chunk:%d, packed_len:%" GRN_FMT_SIZE,
9853 chunk_number, ii_buffer->packed_len);
9854 fake_map(ctx, ii_buffer->ii->chunk, &io_win, ii_buffer->packed_buf,
9855 chunk_number, ii_buffer->packed_len);
9856 grn_io_win_unmap(&io_win);
9857 ii_buffer->term_buffer->header.chunk = chunk_number;
9858 ii_buffer->term_buffer->header.chunk_size = ii_buffer->packed_len;
9859 ii_buffer->term_buffer->header.buffer_free =
9860 S_SEGMENT - sizeof(buffer_header) -
9861 ii_buffer->term_buffer->header.nterms * sizeof(buffer_term);
9862 ii_buffer->term_buffer->header.nterms_void = 0;
9863 buffer_segment_update(ii_buffer->ii, ii_buffer->lseg, ii_buffer->dseg);
9864 ii_buffer->ii->header->total_chunk_size += ii_buffer->packed_len;
9865 ii_buffer->total_chunk_size += ii_buffer->packed_len;
9866 GRN_LOG(ctx, GRN_LOG_DEBUG,
9867 "nterms=%d chunk=%d total=%" GRN_FMT_INT64U "KB",
9868 ii_buffer->term_buffer->header.nterms,
9869 ii_buffer->term_buffer->header.chunk_size,
9870 ii_buffer->ii->header->total_chunk_size >> 10);
9871 ii_buffer->term_buffer = NULL;
9872 ii_buffer->packed_buf = NULL;
9873 ii_buffer->packed_len = 0;
9874 ii_buffer->packed_buf_size = 0;
9875 ii_buffer->curr_size = 0;
9876}
9877
9878/*
9879 * merge_hit_blocks merges hit blocks into ii_buffer->data_vectors.
9880 * merge_hit_blocks returns the estimated maximum size in bytes.
9881 */
9882static size_t
9883merge_hit_blocks(grn_ctx *ctx, grn_ii_buffer *ii_buffer,
9884 ii_buffer_block *hits[], int nhits)
9885{
9886 uint64_t nrecs = 0;
9887 uint64_t nposts = 0;
9888 size_t max_size;
9889 uint64_t flags = ii_buffer->ii->header->flags;
9890 int i;
9891 for (i = 0; i < nhits; i++) {
9892 ii_buffer_block *block = hits[i];
9893 nrecs += block->nrecs;
9894 nposts += block->nposts;
9895 }
9896 ii_buffer->curr_size += nrecs + nposts;
9897 max_size = nrecs * (ii_buffer->ii->n_elements);
9898 if (flags & GRN_OBJ_WITH_POSITION) { max_size += nposts - nrecs; }
9899 datavec_reset(ctx, ii_buffer->data_vectors,
9900 ii_buffer->ii->n_elements, nrecs, max_size);
9901 {
9902 int i;
9903 uint32_t lr = 0; /* Last rid */
9904 uint64_t spos = 0;
9905 uint32_t *ridp, *sidp = NULL, *tfp, *weightp = NULL, *posp = NULL;
9906 {
9907 /* Get write positions in datavec. */
9908 int j = 0;
9909 ridp = ii_buffer->data_vectors[j++].data;
9910 if (flags & GRN_OBJ_WITH_SECTION) {
9911 sidp = ii_buffer->data_vectors[j++].data;
9912 }
9913 tfp = ii_buffer->data_vectors[j++].data;
9914 if (flags & GRN_OBJ_WITH_WEIGHT) {
9915 weightp = ii_buffer->data_vectors[j++].data;
9916 }
9917 if (flags & GRN_OBJ_WITH_POSITION) {
9918 posp = ii_buffer->data_vectors[j++].data;
9919 }
9920 }
9921 for (i = 0; i < nhits; i++) {
9922 /* Read postings from hit blocks and join the postings into datavec. */
9923 ii_buffer_block *block = hits[i];
9924 uint8_t *p = block->bufcur;
9925 uint32_t n = block->nrecs;
9926 if (n) {
9927 GRN_B_DEC(*ridp, p);
9928 *ridp -= lr;
9929 lr += *ridp++;
9930 while (--n) {
9931 GRN_B_DEC(*ridp, p);
9932 lr += *ridp++;
9933 }
9934 }
9935 if ((flags & GRN_OBJ_WITH_SECTION)) {
9936 for (n = block->nrecs; n; n--) {
9937 GRN_B_DEC(*sidp++, p);
9938 }
9939 }
9940 for (n = block->nrecs; n; n--) {
9941 GRN_B_DEC(*tfp++, p);
9942 }
9943 if ((flags & GRN_OBJ_WITH_WEIGHT)) {
9944 for (n = block->nrecs; n; n--) {
9945 GRN_B_DEC(*weightp++, p);
9946 }
9947 }
9948 if ((flags & GRN_OBJ_WITH_POSITION)) {
9949 for (n = block->nposts; n; n--) {
9950 GRN_B_DEC(*posp, p);
9951 spos += *posp++;
9952 }
9953 }
9954 block->rest -= (p - block->bufcur);
9955 block->bufcur = p;
9956 grn_ii_buffer_fetch(ctx, ii_buffer, block);
9957 }
9958 {
9959 /* Set size and flags of datavec. */
9960 int j = 0;
9961 uint32_t f_s = (nrecs < 3) ? 0 : USE_P_ENC;
9962 uint32_t f_d = ((nrecs < 16) || (nrecs <= (lr >> 8))) ? 0 : USE_P_ENC;
9963 ii_buffer->data_vectors[j].data_size = nrecs;
9964 ii_buffer->data_vectors[j++].flags = f_d;
9965 if ((flags & GRN_OBJ_WITH_SECTION)) {
9966 ii_buffer->data_vectors[j].data_size = nrecs;
9967 ii_buffer->data_vectors[j++].flags = f_s;
9968 }
9969 ii_buffer->data_vectors[j].data_size = nrecs;
9970 ii_buffer->data_vectors[j++].flags = f_s;
9971 if ((flags & GRN_OBJ_WITH_WEIGHT)) {
9972 ii_buffer->data_vectors[j].data_size = nrecs;
9973 ii_buffer->data_vectors[j++].flags = f_s;
9974 }
9975 if ((flags & GRN_OBJ_WITH_POSITION)) {
9976 uint32_t f_p = (((nposts < 32) ||
9977 (nposts <= (spos >> 13))) ? 0 : USE_P_ENC);
9978 ii_buffer->data_vectors[j].data_size = nposts;
9979 ii_buffer->data_vectors[j++].flags = f_p|ODD;
9980 }
9981 }
9982 }
9983 return (max_size + ii_buffer->ii->n_elements) * 4;
9984}
9985
9986static buffer *
9987get_term_buffer(grn_ctx *ctx, grn_ii_buffer *ii_buffer)
9988{
9989 if (!ii_buffer->term_buffer) {
9990 uint32_t lseg;
9991 void *term_buffer;
9992 for (lseg = 0; lseg < GRN_II_MAX_LSEG; lseg++) {
9993 if (ii_buffer->ii->header->binfo[lseg] == GRN_II_PSEG_NOT_ASSIGNED) { break; }
9994 }
9995 if (lseg == GRN_II_MAX_LSEG) {
9996 DEFINE_NAME(ii_buffer->ii);
9997 MERR("[ii][buffer][term-buffer] couldn't find a free buffer: "
9998 "<%.*s>",
9999 name_size, name);
10000 return NULL;
10001 }
10002 ii_buffer->lseg = lseg;
10003 ii_buffer->dseg = segment_get(ctx, ii_buffer->ii);
10004 GRN_IO_SEG_REF(ii_buffer->ii->seg, ii_buffer->dseg, term_buffer);
10005 ii_buffer->term_buffer = (buffer *)term_buffer;
10006 }
10007 return ii_buffer->term_buffer;
10008}
10009
10010/*
10011 * try_in_place_packing tries to pack a posting in an array element.
10012 *
10013 * The requirements are as follows:
10014 * - nposts == 1
10015 * - nhits == 1 && nrecs == 1 && tf == 0
10016 * - weight == 0
10017 * - !(flags & GRN_OBJ_WITH_SECTION) || (rid < 0x100000 && sid < 0x800)
10018 */
10019static grn_bool
10020try_in_place_packing(grn_ctx *ctx, grn_ii_buffer *ii_buffer,
10021 grn_id tid, ii_buffer_block *hits[], int nhits)
10022{
10023 if (nhits == 1 && hits[0]->nrecs == 1 && hits[0]->nposts == 1) {
10024 grn_id rid;
10025 uint32_t sid = 1, tf, pos = 0, weight = 0;
10026 ii_buffer_block *block = hits[0];
10027 uint8_t *p = block->bufcur;
10028 uint32_t flags = ii_buffer->ii->header->flags;
10029 GRN_B_DEC(rid, p);
10030 if (flags & GRN_OBJ_WITH_SECTION) {
10031 GRN_B_DEC(sid, p);
10032 sid++;
10033 }
10034 GRN_B_DEC(tf, p);
10035 if (tf != 0) { GRN_LOG(ctx, GRN_LOG_WARNING, "tf=%d", tf); }
10036 if (flags & GRN_OBJ_WITH_WEIGHT) { GRN_B_DEC(weight, p); }
10037 if (flags & GRN_OBJ_WITH_POSITION) { GRN_B_DEC(pos, p); }
10038 if (!weight) {
10039 if (flags & GRN_OBJ_WITH_SECTION) {
10040 if (rid < 0x100000 && sid < 0x800) {
10041 uint32_t *a = array_get(ctx, ii_buffer->ii, tid);
10042 a[0] = (rid << 12) + (sid << 1) + 1;
10043 a[1] = pos;
10044 array_unref(ii_buffer->ii, tid);
10045 } else {
10046 return GRN_FALSE;
10047 }
10048 } else {
10049 uint32_t *a = array_get(ctx, ii_buffer->ii, tid);
10050 a[0] = (rid << 1) + 1;
10051 a[1] = pos;
10052 array_unref(ii_buffer->ii, tid);
10053 }
10054 block->rest -= (p - block->bufcur);
10055 block->bufcur = p;
10056 grn_ii_buffer_fetch(ctx, ii_buffer, block);
10057 return GRN_TRUE;
10058 }
10059 }
10060 return GRN_FALSE;
10061}
10062
10063/* grn_ii_buffer_merge merges hit blocks and pack it. */
10064static void
10065grn_ii_buffer_merge(grn_ctx *ctx, grn_ii_buffer *ii_buffer,
10066 grn_id tid, ii_buffer_block *hits[], int nhits)
10067{
10068 if (!try_in_place_packing(ctx, ii_buffer, tid, hits, nhits)) {
10069 /* Merge hit blocks and reserve a buffer for packed data. */
10070 size_t max_size = merge_hit_blocks(ctx, ii_buffer, hits, nhits);
10071 if (ii_buffer->packed_buf &&
10072 ii_buffer->packed_buf_size < ii_buffer->packed_len + max_size) {
10073 grn_ii_buffer_chunk_flush(ctx, ii_buffer);
10074 }
10075 if (!ii_buffer->packed_buf) {
10076 size_t buf_size = (max_size > II_BUFFER_PACKED_BUF_SIZE)
10077 ? max_size : II_BUFFER_PACKED_BUF_SIZE;
10078 if ((ii_buffer->packed_buf = GRN_MALLOC(buf_size))) {
10079 ii_buffer->packed_buf_size = buf_size;
10080 }
10081 }
10082 {
10083 /* Pack postings into the current buffer. */
10084 uint16_t nterm;
10085 size_t packed_len;
10086 buffer_term *bt;
10087 uint32_t *a;
10088 buffer *term_buffer;
10089
10090 a = array_get(ctx, ii_buffer->ii, tid);
10091 if (!a) {
10092 DEFINE_NAME(ii_buffer->ii);
10093 MERR("[ii][buffer][merge] failed to allocate an array: "
10094 "<%.*s>: "
10095 "<%u>",
10096 name_size, name,
10097 tid);
10098 return;
10099 }
10100 term_buffer = get_term_buffer(ctx, ii_buffer);
10101 if (!term_buffer) {
10102 DEFINE_NAME(ii_buffer->ii);
10103 MERR("[ii][buffer][merge] failed to allocate a term buffer: "
10104 "<%.*s>: "
10105 "<%u>",
10106 name_size, name,
10107 tid);
10108 return;
10109 }
10110 nterm = term_buffer->header.nterms++;
10111 bt = &term_buffer->terms[nterm];
10112 a[0] = SEG2POS(ii_buffer->lseg,
10113 (sizeof(buffer_header) + sizeof(buffer_term) * nterm));
10114 packed_len = grn_p_encv(ctx, ii_buffer->data_vectors,
10115 ii_buffer->ii->n_elements,
10116 ii_buffer->packed_buf +
10117 ii_buffer->packed_len);
10118 a[1] = ii_buffer->data_vectors[0].data_size;
10119 bt->tid = tid;
10120 bt->size_in_buffer = 0;
10121 bt->pos_in_buffer = 0;
10122 bt->size_in_chunk = packed_len;
10123 bt->pos_in_chunk = ii_buffer->packed_len;
10124 ii_buffer->packed_len += packed_len;
10125 if (((ii_buffer->curr_size * ii_buffer->update_buffer_size) +
10126 (ii_buffer->total_size * term_buffer->header.nterms * 16)) >=
10127 (ii_buffer->total_size * II_BUFFER_NTERMS_PER_BUFFER * 16)) {
10128 grn_ii_buffer_chunk_flush(ctx, ii_buffer);
10129 }
10130 array_unref(ii_buffer->ii, tid);
10131 }
10132 }
10133}
10134
10135grn_ii_buffer *
10136grn_ii_buffer_open(grn_ctx *ctx, grn_ii *ii,
10137 long long unsigned int update_buffer_size)
10138{
10139 if (ii && ii->lexicon) {
10140 grn_ii_buffer *ii_buffer = GRN_MALLOCN(grn_ii_buffer, 1);
10141 if (ii_buffer) {
10142 ii_buffer->ii = ii;
10143 ii_buffer->lexicon = ii->lexicon;
10144 ii_buffer->tmp_lexicon = NULL;
10145 ii_buffer->nblocks = 0;
10146 ii_buffer->blocks = NULL;
10147 ii_buffer->ncounters = II_BUFFER_NCOUNTERS_MARGIN;
10148 ii_buffer->block_pos = 0;
10149 ii_buffer->filepos = 0;
10150 ii_buffer->curr_size = 0;
10151 ii_buffer->total_size = 0;
10152 ii_buffer->update_buffer_size = update_buffer_size;
10153 ii_buffer->counters = GRN_CALLOC(ii_buffer->ncounters *
10154 sizeof(ii_buffer_counter));
10155 ii_buffer->term_buffer = NULL;
10156 ii_buffer->packed_buf = NULL;
10157 ii_buffer->packed_len = 0;
10158 ii_buffer->packed_buf_size = 0;
10159 ii_buffer->total_chunk_size = 0;
10160 ii_buffer->values = NULL;
10161 ii_buffer->nvalues = 0;
10162 ii_buffer->max_nvalues = 0;
10163 ii_buffer->last_rid = 0;
10164 if (ii_buffer->counters) {
10165 ii_buffer->block_buf = GRN_MALLOCN(grn_id, II_BUFFER_BLOCK_SIZE);
10166 if (ii_buffer->block_buf) {
10167 grn_snprintf(ii_buffer->tmpfpath, PATH_MAX, PATH_MAX,
10168 "%sXXXXXX", grn_io_path(ii->seg));
10169 ii_buffer->block_buf_size = II_BUFFER_BLOCK_SIZE;
10170 ii_buffer->tmpfd = grn_mkstemp(ii_buffer->tmpfpath);
10171 if (ii_buffer->tmpfd != -1) {
10172 grn_table_flags flags;
10173 grn_table_get_info(ctx, ii->lexicon, &flags, NULL, NULL, NULL,
10174 NULL);
10175 if ((flags & GRN_OBJ_TABLE_TYPE_MASK) == GRN_OBJ_TABLE_PAT_KEY) {
10176 grn_pat_cache_enable(ctx, (grn_pat *)ii->lexicon,
10177 PAT_CACHE_SIZE);
10178 }
10179 return ii_buffer;
10180 } else {
10181 SERR("failed grn_mkstemp(%s)",
10182 ii_buffer->tmpfpath);
10183 }
10184 GRN_FREE(ii_buffer->block_buf);
10185 }
10186 GRN_FREE(ii_buffer->counters);
10187 }
10188 GRN_FREE(ii_buffer);
10189 }
10190 } else {
10191 ERR(GRN_INVALID_ARGUMENT, "ii or ii->lexicon is NULL");
10192 }
10193 return NULL;
10194}
10195
10196static void
10197ii_buffer_value_init(grn_ctx *ctx, ii_buffer_value *value)
10198{
10199 value->sid = 0;
10200 value->weight = 0;
10201 value->p = NULL;
10202 value->len = 0;
10203 value->buf = NULL;
10204 value->cap = 0;
10205}
10206
10207static void
10208ii_buffer_value_fin(grn_ctx *ctx, ii_buffer_value *value)
10209{
10210 if (value->buf) {
10211 GRN_FREE(value->buf);
10212 }
10213}
10214
10215/*
10216 * ii_buffer_values_append appends a value to ii_buffer.
10217 * This function deep-copies the value if need_copy == GRN_TRUE.
10218 */
10219static void
10220ii_buffer_values_append(grn_ctx *ctx, grn_ii_buffer *ii_buffer,
10221 unsigned int sid, unsigned weight,
10222 const char *p, uint32_t len, grn_bool need_copy)
10223{
10224 if (ii_buffer->nvalues == ii_buffer->max_nvalues) {
10225 unsigned int i;
10226 unsigned int new_max_nvalues = ii_buffer->max_nvalues * 2;
10227 unsigned int new_size;
10228 ii_buffer_value *new_values;
10229 if (new_max_nvalues == 0) {
10230 new_max_nvalues = 1;
10231 }
10232 new_size = new_max_nvalues * sizeof(ii_buffer_value);
10233 new_values = (ii_buffer_value *)GRN_REALLOC(ii_buffer->values, new_size);
10234 if (!new_values) {
10235 return;
10236 }
10237 for (i = ii_buffer->max_nvalues; i < new_max_nvalues; i++) {
10238 ii_buffer_value_init(ctx, &new_values[i]);
10239 }
10240 ii_buffer->values = new_values;
10241 ii_buffer->max_nvalues = new_max_nvalues;
10242 }
10243
10244 {
10245 ii_buffer_value *value = &ii_buffer->values[ii_buffer->nvalues];
10246 if (need_copy) {
10247 if (len > value->cap) {
10248 char *new_buf = (char *)GRN_REALLOC(value->buf, len);
10249 if (!new_buf) {
10250 return;
10251 }
10252 value->buf = new_buf;
10253 value->cap = len;
10254 }
10255 grn_memcpy(value->buf, p, len);
10256 p = value->buf;
10257 }
10258 value->sid = sid;
10259 value->weight = weight;
10260 value->p = p;
10261 value->len = len;
10262 ii_buffer->nvalues++;
10263 }
10264}
10265
10266grn_rc
10267grn_ii_buffer_append(grn_ctx *ctx, grn_ii_buffer *ii_buffer,
10268 grn_id rid, unsigned int sid, grn_obj *value)
10269{
10270 if (rid != ii_buffer->last_rid) {
10271 if (ii_buffer->last_rid) {
10272 grn_ii_buffer_tokenize(ctx, ii_buffer, ii_buffer->last_rid);
10273 }
10274 ii_buffer->last_rid = rid;
10275 }
10276 ii_buffer_values_append(ctx, ii_buffer, sid, 0,
10277 GRN_TEXT_VALUE(value), GRN_TEXT_LEN(value),
10278 GRN_TRUE);
10279 return ctx->rc;
10280}
10281
10282/*
10283 * grn_ii_buffer_commit completes tokenization and builds an inverted index
10284 * from data in a temporary file.
10285 */
10286grn_rc
10287grn_ii_buffer_commit(grn_ctx *ctx, grn_ii_buffer *ii_buffer)
10288{
10289 /* Tokenize the remaining values and free resources. */
10290 if (ii_buffer->last_rid && ii_buffer->nvalues) {
10291 grn_ii_buffer_tokenize(ctx, ii_buffer, ii_buffer->last_rid);
10292 }
10293 if (ii_buffer->block_pos) {
10294 grn_ii_buffer_flush(ctx, ii_buffer);
10295 }
10296 if (ii_buffer->tmpfd != -1) {
10297 grn_close(ii_buffer->tmpfd);
10298 }
10299 if (ii_buffer->block_buf) {
10300 GRN_FREE(ii_buffer->block_buf);
10301 ii_buffer->block_buf = NULL;
10302 }
10303 if (ii_buffer->counters) {
10304 GRN_FREE(ii_buffer->counters);
10305 ii_buffer->counters = NULL;
10306 }
10307
10308 if (ii_buffer->update_buffer_size &&
10309 ii_buffer->update_buffer_size < 20) {
10310 if (ii_buffer->update_buffer_size < 10) {
10311 ii_buffer->update_buffer_size =
10312 ii_buffer->total_size >> (10 - ii_buffer->update_buffer_size);
10313 } else {
10314 ii_buffer->update_buffer_size =
10315 ii_buffer->total_size << (ii_buffer->update_buffer_size - 10);
10316 }
10317 }
10318
10319 GRN_LOG(ctx, GRN_LOG_DEBUG,
10320 "nblocks=%d, update_buffer_size=%" GRN_FMT_INT64U,
10321 ii_buffer->nblocks, ii_buffer->update_buffer_size);
10322
10323 datavec_init(ctx, ii_buffer->data_vectors, ii_buffer->ii->n_elements, 0, 0);
10324 grn_open(ii_buffer->tmpfd,
10325 ii_buffer->tmpfpath,
10326 O_RDONLY | GRN_OPEN_FLAG_BINARY);
10327 if (ii_buffer->tmpfd == -1) {
10328 ERRNO_ERR("failed to open path: <%s>", ii_buffer->tmpfpath);
10329 return ctx->rc;
10330 }
10331 {
10332 /* Fetch the first term of each block. */
10333 uint32_t i;
10334 for (i = 0; i < ii_buffer->nblocks; i++) {
10335 grn_ii_buffer_fetch(ctx, ii_buffer, &ii_buffer->blocks[i]);
10336 }
10337 }
10338 {
10339 ii_buffer_block **hits;
10340 if ((hits = GRN_MALLOCN(ii_buffer_block *, ii_buffer->nblocks))) {
10341 grn_id tid;
10342 grn_table_cursor *tc;
10343 tc = grn_table_cursor_open(ctx, ii_buffer->lexicon,
10344 NULL, 0, NULL, 0, 0, -1, II_BUFFER_ORDER);
10345 if (tc) {
10346 while ((tid = grn_table_cursor_next(ctx, tc)) != GRN_ID_NIL) {
10347 /*
10348 * Find blocks which contain the current term.
10349 * Then, merge the postings.
10350 */
10351 int nrests = 0;
10352 int nhits = 0;
10353 uint32_t i;
10354 for (i = 0; i < ii_buffer->nblocks; i++) {
10355 if (ii_buffer->blocks[i].tid == tid) {
10356 hits[nhits++] = &ii_buffer->blocks[i];
10357 }
10358 if (ii_buffer->blocks[i].tid) { nrests++; }
10359 }
10360 if (nhits) { grn_ii_buffer_merge(ctx, ii_buffer, tid, hits, nhits); }
10361 if (!nrests) { break; }
10362 }
10363 if (ii_buffer->packed_len) {
10364 grn_ii_buffer_chunk_flush(ctx, ii_buffer);
10365 }
10366 grn_table_cursor_close(ctx, tc);
10367 }
10368 GRN_FREE(hits);
10369 }
10370 }
10371 datavec_fin(ctx, ii_buffer->data_vectors);
10372 GRN_LOG(ctx, GRN_LOG_DEBUG,
10373 "tmpfile_size:%" GRN_FMT_INT64D " > total_chunk_size:%" GRN_FMT_SIZE,
10374 ii_buffer->filepos, ii_buffer->total_chunk_size);
10375 grn_close(ii_buffer->tmpfd);
10376 if (grn_unlink(ii_buffer->tmpfpath) == 0) {
10377 GRN_LOG(ctx, GRN_LOG_INFO,
10378 "[ii][buffer][commit] removed temporary path: <%s>",
10379 ii_buffer->tmpfpath);
10380 } else {
10381 ERRNO_ERR("[ii][buffer][commit] failed to remove temporary path: <%s>",
10382 ii_buffer->tmpfpath);
10383 }
10384 ii_buffer->tmpfd = -1;
10385 return ctx->rc;
10386}
10387
10388grn_rc
10389grn_ii_buffer_close(grn_ctx *ctx, grn_ii_buffer *ii_buffer)
10390{
10391 uint32_t i;
10392 grn_table_flags flags;
10393 grn_table_get_info(ctx, ii_buffer->ii->lexicon, &flags, NULL, NULL, NULL,
10394 NULL);
10395 if ((flags & GRN_OBJ_TABLE_TYPE_MASK) == GRN_OBJ_TABLE_PAT_KEY) {
10396 grn_pat_cache_disable(ctx, (grn_pat *)ii_buffer->ii->lexicon);
10397 }
10398 if (ii_buffer->tmp_lexicon) {
10399 grn_obj_close(ctx, ii_buffer->tmp_lexicon);
10400 }
10401 if (ii_buffer->tmpfd != -1) {
10402 grn_close(ii_buffer->tmpfd);
10403 if (grn_unlink(ii_buffer->tmpfpath) == 0) {
10404 GRN_LOG(ctx, GRN_LOG_INFO,
10405 "[ii][buffer][close] removed temporary path: <%s>",
10406 ii_buffer->tmpfpath);
10407 } else {
10408 ERRNO_ERR("[ii][buffer][close] failed to remove temporary path: <%s>",
10409 ii_buffer->tmpfpath);
10410 }
10411 }
10412 if (ii_buffer->block_buf) {
10413 GRN_FREE(ii_buffer->block_buf);
10414 }
10415 if (ii_buffer->counters) {
10416 GRN_FREE(ii_buffer->counters);
10417 }
10418 if (ii_buffer->blocks) {
10419 for (i = 0; i < ii_buffer->nblocks; i++) {
10420 if (ii_buffer->blocks[i].buffer) {
10421 GRN_FREE(ii_buffer->blocks[i].buffer);
10422 }
10423 }
10424 GRN_FREE(ii_buffer->blocks);
10425 }
10426 if (ii_buffer->values) {
10427 for (i = 0; i < ii_buffer->max_nvalues; i++) {
10428 ii_buffer_value_fin(ctx, &ii_buffer->values[i]);
10429 }
10430 GRN_FREE(ii_buffer->values);
10431 }
10432 GRN_FREE(ii_buffer);
10433 return ctx->rc;
10434}
10435
10436/*
10437 * grn_ii_buffer_parse tokenizes values to be indexed.
10438 *
10439 * For each record of the target table, grn_ii_buffer_parse makes a list of
10440 * target values and calls grn_ii_buffer_tokenize. To make a list of target
10441 * values, ii_buffer_values_append is called for each value. Note that
10442 * ii_buffer_values_append is called for each element for a vector.
10443 */
10444static void
10445grn_ii_buffer_parse(grn_ctx *ctx, grn_ii_buffer *ii_buffer,
10446 grn_obj *target, int ncols, grn_obj **cols)
10447{
10448 grn_table_cursor *tc;
10449 grn_obj *vobjs;
10450 if ((vobjs = GRN_MALLOCN(grn_obj, ncols))) {
10451 int i;
10452 for (i = 0; i < ncols; i++) {
10453 GRN_TEXT_INIT(&vobjs[i], 0);
10454 }
10455 if ((tc = grn_table_cursor_open(ctx, target,
10456 NULL, 0, NULL, 0, 0, -1,
10457 GRN_CURSOR_BY_ID))) {
10458 grn_id rid;
10459 while ((rid = grn_table_cursor_next(ctx, tc)) != GRN_ID_NIL) {
10460 unsigned int j;
10461 int sid;
10462 grn_obj **col;
10463 for (sid = 1, col = cols; sid <= ncols; sid++, col++) {
10464 grn_obj *rv = &vobjs[sid - 1];
10465 grn_obj_reinit_for(ctx, rv, *col);
10466 if (GRN_OBJ_TABLEP(*col)) {
10467 grn_table_get_key2(ctx, *col, rid, rv);
10468 } else {
10469 grn_obj_get_value(ctx, *col, rid, rv);
10470 }
10471 switch (rv->header.type) {
10472 case GRN_BULK :
10473 ii_buffer_values_append(ctx, ii_buffer, sid, 0,
10474 GRN_TEXT_VALUE(rv), GRN_TEXT_LEN(rv),
10475 GRN_FALSE);
10476 break;
10477 case GRN_UVECTOR :
10478 {
10479 unsigned int size;
10480 unsigned int elem_size;
10481 size = grn_uvector_size(ctx, rv);
10482 elem_size = grn_uvector_element_size(ctx, rv);
10483 for (j = 0; j < size; j++) {
10484 ii_buffer_values_append(ctx, ii_buffer, sid, 0,
10485 GRN_BULK_HEAD(rv) + (elem_size * j),
10486 elem_size, GRN_FALSE);
10487 }
10488 }
10489 break;
10490 case GRN_VECTOR :
10491 if (rv->u.v.body) {
10492 int j;
10493 int n_sections = rv->u.v.n_sections;
10494 grn_section *sections = rv->u.v.sections;
10495 const char *head = GRN_BULK_HEAD(rv->u.v.body);
10496 for (j = 0; j < n_sections; j++) {
10497 grn_section *section = sections + j;
10498 if (section->length == 0) {
10499 continue;
10500 }
10501 ii_buffer_values_append(ctx, ii_buffer, sid, section->weight,
10502 head + section->offset,
10503 section->length, GRN_FALSE);
10504 }
10505 }
10506 break;
10507 default :
10508 ERR(GRN_INVALID_ARGUMENT,
10509 "[index] invalid object assigned as value");
10510 break;
10511 }
10512 }
10513 grn_ii_buffer_tokenize(ctx, ii_buffer, rid);
10514 }
10515 grn_table_cursor_close(ctx, tc);
10516 }
10517 for (i = 0; i < ncols; i++) {
10518 GRN_OBJ_FIN(ctx, &vobjs[i]);
10519 }
10520 GRN_FREE(vobjs);
10521 }
10522}
10523
10524grn_rc
10525grn_ii_build(grn_ctx *ctx, grn_ii *ii, uint64_t sparsity)
10526{
10527 grn_ii_buffer *ii_buffer;
10528
10529 {
10530 /* Do nothing if there are no targets. */
10531 grn_obj *data_table = grn_ctx_at(ctx, DB_OBJ(ii)->range);
10532 if (!data_table) {
10533 return ctx->rc;
10534 }
10535 if (grn_table_size(ctx, data_table) == 0) {
10536 return ctx->rc;
10537 }
10538 }
10539
10540 ii_buffer = grn_ii_buffer_open(ctx, ii, sparsity);
10541 if (ii_buffer) {
10542 grn_id *source = (grn_id *)ii->obj.source;
10543 if (ii->obj.source_size && ii->obj.source) {
10544 int ncols = ii->obj.source_size / sizeof(grn_id);
10545 grn_obj **cols = GRN_MALLOCN(grn_obj *, ncols);
10546 if (cols) {
10547 int i;
10548 for (i = 0; i < ncols; i++) {
10549 if (!(cols[i] = grn_ctx_at(ctx, source[i]))) { break; }
10550 }
10551 if (i == ncols) { /* All the source columns are available. */
10552 grn_obj *target = cols[0];
10553 if (!GRN_OBJ_TABLEP(target)) {
10554 target = grn_ctx_at(ctx, target->header.domain);
10555 }
10556 if (target) {
10557 grn_ii_buffer_parse(ctx, ii_buffer, target, ncols, cols);
10558 grn_ii_buffer_commit(ctx, ii_buffer);
10559 } else {
10560 ERR(GRN_INVALID_ARGUMENT, "failed to resolve the target");
10561 }
10562 } else {
10563 ERR(GRN_INVALID_ARGUMENT, "failed to resolve a column (%d)", i);
10564 }
10565 GRN_FREE(cols);
10566 }
10567 } else {
10568 ERR(GRN_INVALID_ARGUMENT, "ii->obj.source is void");
10569 }
10570 grn_ii_buffer_close(ctx, ii_buffer);
10571 }
10572 return ctx->rc;
10573}
10574
10575/*
10576 * ==========================================================================
10577 * The following part provides constants, structures and functions for static
10578 * indexing.
10579 * ==========================================================================
10580 */
10581
10582#define GRN_II_BUILDER_BUFFER_CHUNK_SIZE (S_CHUNK >> 2)
10583
10584#define GRN_II_BUILDER_MAX_LEXICON_CACHE_SIZE (1 << 24)
10585
10586#define GRN_II_BUILDER_MIN_BLOCK_THRESHOLD 1
10587#define GRN_II_BUILDER_MAX_BLOCK_THRESHOLD (1 << 28)
10588
10589#define GRN_II_BUILDER_MIN_FILE_BUF_SIZE (1 << 12)
10590#define GRN_II_BUILDER_MAX_FILE_BUF_SIZE (1 << 30)
10591
10592#define GRN_II_BUILDER_MIN_BLOCK_BUF_SIZE (1 << 12)
10593#define GRN_II_BUILDER_MAX_BLOCK_BUF_SIZE (1 << 30)
10594
10595#define GRN_II_BUILDER_MIN_CHUNK_THRESHOLD 1
10596#define GRN_II_BUILDER_MAX_CHUNK_THRESHOLD (1 << 28)
10597
10598#define GRN_II_BUILDER_MIN_BUFFER_MAX_N_TERMS 1
10599#define GRN_II_BUILDER_MAX_BUFFER_MAX_N_TERMS \
10600 ((S_SEGMENT - sizeof(buffer_header)) / sizeof(buffer_term))
10601
10602struct grn_ii_builder_options {
10603 uint32_t lexicon_cache_size; /* Cache size of temporary lexicon */
10604 /* A block is flushed if builder->n reaches this value. */
10605 uint32_t block_threshold;
10606 uint32_t file_buf_size; /* Buffer size for buffered output */
10607 uint32_t block_buf_size; /* Buffer size for buffered input */
10608 /* A chunk is flushed if chunk->n reaches this value. */
10609 uint32_t chunk_threshold;
10610 uint32_t buffer_max_n_terms; /* Maximum number of terms in each buffer */
10611};
10612
10613static const grn_ii_builder_options grn_ii_builder_default_options = {
10614 0x80000, /* lexicon_cache_size */
10615 0x4000000, /* block_threshold */
10616 0x10000, /* file_buf_size */
10617 0x10000, /* block_buf_size */
10618 0x1000, /* chunk_threshold */
10619 0x3000, /* buffer_max_n_terms */
10620};
10621
10622/* grn_ii_builder_options_init fills options with the default options. */
10623void
10624grn_ii_builder_options_init(grn_ii_builder_options *options)
10625{
10626 *options = grn_ii_builder_default_options;
10627}
10628
10629/* grn_ii_builder_options_fix fixes out-of-range options. */
10630static void
10631grn_ii_builder_options_fix(grn_ii_builder_options *options)
10632{
10633 if (options->lexicon_cache_size > GRN_II_BUILDER_MAX_LEXICON_CACHE_SIZE) {
10634 options->lexicon_cache_size = GRN_II_BUILDER_MAX_LEXICON_CACHE_SIZE;
10635 }
10636
10637 if (options->block_threshold < GRN_II_BUILDER_MIN_BLOCK_THRESHOLD) {
10638 options->block_threshold = GRN_II_BUILDER_MIN_BLOCK_THRESHOLD;
10639 }
10640 if (options->block_threshold > GRN_II_BUILDER_MAX_BLOCK_THRESHOLD) {
10641 options->block_threshold = GRN_II_BUILDER_MAX_BLOCK_THRESHOLD;
10642 }
10643
10644 if (options->file_buf_size < GRN_II_BUILDER_MIN_FILE_BUF_SIZE) {
10645 options->file_buf_size = GRN_II_BUILDER_MIN_FILE_BUF_SIZE;
10646 }
10647 if (options->file_buf_size > GRN_II_BUILDER_MAX_FILE_BUF_SIZE) {
10648 options->file_buf_size = GRN_II_BUILDER_MAX_FILE_BUF_SIZE;
10649 }
10650
10651 if (options->block_buf_size < GRN_II_BUILDER_MIN_BLOCK_BUF_SIZE) {
10652 options->block_buf_size = GRN_II_BUILDER_MIN_BLOCK_BUF_SIZE;
10653 }
10654 if (options->block_buf_size > GRN_II_BUILDER_MAX_BLOCK_BUF_SIZE) {
10655 options->block_buf_size = GRN_II_BUILDER_MAX_BLOCK_BUF_SIZE;
10656 }
10657
10658 if (options->chunk_threshold < GRN_II_BUILDER_MIN_CHUNK_THRESHOLD) {
10659 options->chunk_threshold = GRN_II_BUILDER_MIN_CHUNK_THRESHOLD;
10660 }
10661 if (options->chunk_threshold > GRN_II_BUILDER_MAX_CHUNK_THRESHOLD) {
10662 options->chunk_threshold = GRN_II_BUILDER_MAX_CHUNK_THRESHOLD;
10663 }
10664
10665 if (options->buffer_max_n_terms < GRN_II_BUILDER_MIN_BUFFER_MAX_N_TERMS) {
10666 options->buffer_max_n_terms = GRN_II_BUILDER_MIN_BUFFER_MAX_N_TERMS;
10667 }
10668 if (options->buffer_max_n_terms > GRN_II_BUILDER_MAX_BUFFER_MAX_N_TERMS) {
10669 options->buffer_max_n_terms = GRN_II_BUILDER_MAX_BUFFER_MAX_N_TERMS;
10670 }
10671}
10672
10673#define GRN_II_BUILDER_TERM_INPLACE_SIZE\
10674 (sizeof(grn_ii_builder_term) - (uintptr_t)&((grn_ii_builder_term *)0)->dummy)
10675
10676typedef struct {
10677 grn_id rid; /* Last record ID */
10678 uint32_t sid; /* Last section ID */
10679 /* Last position (GRN_OBJ_WITH_POSITION) or frequency. */
10680 uint32_t pos_or_freq;
10681 uint32_t offset; /* Buffer write offset */
10682 uint32_t size; /* Buffer size */
10683 uint32_t dummy; /* Padding */
10684 uint8_t *buf; /* Buffer (to be freed) */
10685} grn_ii_builder_term;
10686
10687/* grn_ii_builder_term_is_inplace returns whether a term buffer is inplace. */
10688inline static grn_bool
10689grn_ii_builder_term_is_inplace(grn_ii_builder_term *term)
10690{
10691 return term->size == GRN_II_BUILDER_TERM_INPLACE_SIZE;
10692}
10693
10694/* grn_ii_builder_term_get_buf returns a term buffer. */
10695inline static uint8_t *
10696grn_ii_builder_term_get_buf(grn_ii_builder_term *term)
10697{
10698 if (grn_ii_builder_term_is_inplace(term)) {
10699 return (uint8_t *)&term->dummy;
10700 } else {
10701 return term->buf;
10702 }
10703}
10704
10705/*
10706 * grn_ii_builder_term_init initializes a term. Note that an initialized term
10707 * must be finalized by grn_ii_builder_term_fin.
10708 */
10709static void
10710grn_ii_builder_term_init(grn_ctx *ctx, grn_ii_builder_term *term)
10711{
10712 term->rid = GRN_ID_NIL;
10713 term->sid = 0;
10714 term->pos_or_freq = 0;
10715 term->offset = 0;
10716 term->size = GRN_II_BUILDER_TERM_INPLACE_SIZE;
10717}
10718
10719/* grn_ii_builder_term_fin finalizes a term. */
10720static void
10721grn_ii_builder_term_fin(grn_ctx *ctx, grn_ii_builder_term *term)
10722{
10723 if (!grn_ii_builder_term_is_inplace(term)) {
10724 GRN_FREE(term->buf);
10725 }
10726}
10727
10728/* grn_ii_builder_term_reinit reinitializes a term. */
10729static void
10730grn_ii_builder_term_reinit(grn_ctx *ctx, grn_ii_builder_term *term)
10731{
10732 grn_ii_builder_term_fin(ctx, term);
10733 grn_ii_builder_term_init(ctx, term);
10734}
10735
10736/* grn_ii_builder_term_extend extends a term buffer. */
10737static grn_rc
10738grn_ii_builder_term_extend(grn_ctx *ctx, grn_ii_builder_term *term)
10739{
10740 uint8_t *buf;
10741 uint32_t size = term->size * 2;
10742 if (grn_ii_builder_term_is_inplace(term)) {
10743 buf = (uint8_t *)GRN_MALLOC(size);
10744 if (!buf) {
10745 ERR(GRN_NO_MEMORY_AVAILABLE,
10746 "failed to allocate memory for term buffer: size = %u", size);
10747 return ctx->rc;
10748 }
10749 grn_memcpy(buf, &term->dummy, term->offset);
10750 } else {
10751 buf = (uint8_t *)GRN_REALLOC(term->buf, size);
10752 if (!buf) {
10753 ERR(GRN_NO_MEMORY_AVAILABLE,
10754 "failed to reallocate memory for term buffer: size = %u", size);
10755 return ctx->rc;
10756 }
10757 }
10758 term->buf = buf;
10759 term->size = size;
10760 return GRN_SUCCESS;
10761}
10762
10763/* grn_ii_builder_term_append appends an integer to a term buffer. */
10764inline static grn_rc
10765grn_ii_builder_term_append(grn_ctx *ctx, grn_ii_builder_term *term,
10766 uint64_t value)
10767{
10768 uint8_t *p;
10769 if (value < (uint64_t)1 << 5) {
10770 if (term->offset + 1 > term->size) {
10771 grn_rc rc = grn_ii_builder_term_extend(ctx, term);
10772 if (rc != GRN_SUCCESS) {
10773 return rc;
10774 }
10775 }
10776 p = grn_ii_builder_term_get_buf(term) + term->offset;
10777 p[0] = (uint8_t)value;
10778 term->offset++;
10779 return GRN_SUCCESS;
10780 } else if (value < (uint64_t)1 << 13) {
10781 if (term->offset + 2 > term->size) {
10782 grn_rc rc = grn_ii_builder_term_extend(ctx, term);
10783 if (rc != GRN_SUCCESS) {
10784 return rc;
10785 }
10786 }
10787 p = grn_ii_builder_term_get_buf(term) + term->offset;
10788 p[0] = (uint8_t)((value & 0x1f) | (1 << 5));
10789 p[1] = (uint8_t)(value >> 5);
10790 term->offset += 2;
10791 return GRN_SUCCESS;
10792 } else {
10793 uint8_t i, n;
10794 if (value < (uint64_t)1 << 21) {
10795 n = 3;
10796 } else if (value < (uint64_t)1 << 29) {
10797 n = 4;
10798 } else if (value < (uint64_t)1 << 37) {
10799 n = 5;
10800 } else if (value < (uint64_t)1 << 45) {
10801 n = 6;
10802 } else if (value < (uint64_t)1 << 53) {
10803 n = 7;
10804 } else {
10805 n = 8;
10806 }
10807 if (term->offset + n > term->size) {
10808 grn_rc rc = grn_ii_builder_term_extend(ctx, term);
10809 if (rc != GRN_SUCCESS) {
10810 return rc;
10811 }
10812 }
10813 p = grn_ii_builder_term_get_buf(term) + term->offset;
10814 p[0] = (uint8_t)(value & 0x1f) | ((n - 1) << 5);
10815 value >>= 5;
10816 for (i = 1; i < n; i++) {
10817 p[i] = (uint8_t)value;
10818 value >>= 8;
10819 }
10820 term->offset += n;
10821 return GRN_SUCCESS;
10822 }
10823}
10824
10825typedef struct {
10826 uint64_t offset; /* File offset */
10827 uint32_t rest; /* Remaining size */
10828 uint8_t *buf; /* Buffer (to be freed) */
10829 uint8_t *cur; /* Current pointer */
10830 uint8_t *end; /* End pointer */
10831 uint32_t tid; /* Term ID */
10832} grn_ii_builder_block;
10833
10834/*
10835 * grn_ii_builder_block_init initializes a block. Note that an initialized
10836 * block must be finalized by grn_ii_builder_block_fin.
10837 */
10838static void
10839grn_ii_builder_block_init(grn_ctx *ctx, grn_ii_builder_block *block)
10840{
10841 block->offset = 0;
10842 block->rest = 0;
10843 block->buf = NULL;
10844 block->cur = NULL;
10845 block->end = NULL;
10846 block->tid = GRN_ID_NIL;
10847}
10848
10849/* grn_ii_builder_block_fin finalizes a block. */
10850static void
10851grn_ii_builder_block_fin(grn_ctx *ctx, grn_ii_builder_block *block)
10852{
10853 if (block->buf) {
10854 GRN_FREE(block->buf);
10855 }
10856}
10857
10858/*
10859 * grn_ii_builder_block_next reads the next integer. Note that this function
10860 * returns GRN_END_OF_DATA if it reaches the end of a block.
10861 */
10862inline static grn_rc
10863grn_ii_builder_block_next(grn_ctx *ctx, grn_ii_builder_block *block,
10864 uint64_t *value)
10865{
10866 uint8_t n;
10867 if (block->cur == block->end) {
10868 return GRN_END_OF_DATA;
10869 }
10870 n = (*block->cur >> 5) + 1;
10871 if (n > block->end - block->cur) {
10872 return GRN_END_OF_DATA;
10873 }
10874 *value = 0;
10875 switch (n) {
10876 case 8 :
10877 *value |= (uint64_t)block->cur[7] << 53;
10878 case 7 :
10879 *value |= (uint64_t)block->cur[6] << 45;
10880 case 6 :
10881 *value |= (uint64_t)block->cur[5] << 37;
10882 case 5 :
10883 *value |= (uint64_t)block->cur[4] << 29;
10884 case 4 :
10885 *value |= (uint64_t)block->cur[3] << 21;
10886 case 3 :
10887 *value |= (uint64_t)block->cur[2] << 13;
10888 case 2 :
10889 *value |= (uint64_t)block->cur[1] << 5;
10890 case 1 :
10891 *value |= block->cur[0] & 0x1f;
10892 break;
10893 }
10894 block->cur += n;
10895 return GRN_SUCCESS;
10896}
10897
10898typedef struct {
10899 grn_ii *ii; /* Inverted index */
10900 uint32_t buf_id; /* Buffer ID */
10901 uint32_t buf_seg_id; /* Buffer segment ID */
10902 buffer *buf; /* Buffer (to be unreferenced) */
10903 uint32_t chunk_id; /* Chunk ID */
10904 uint32_t chunk_seg_id; /* Chunk segment ID */
10905 uint8_t *chunk; /* Chunk (to be unreferenced) */
10906 uint32_t chunk_offset; /* Chunk write position */
10907 uint32_t chunk_size; /* Chunk size */
10908} grn_ii_builder_buffer;
10909
10910/*
10911 * grn_ii_builder_buffer_init initializes a buffer. Note that a buffer must be
10912 * finalized by grn_ii_builder_buffer_fin.
10913 */
10914static void
10915grn_ii_builder_buffer_init(grn_ctx *ctx, grn_ii_builder_buffer *buf,
10916 grn_ii *ii)
10917{
10918 buf->ii = ii;
10919 buf->buf_id = 0;
10920 buf->buf_seg_id = 0;
10921 buf->buf = NULL;
10922 buf->chunk_id = 0;
10923 buf->chunk_seg_id = 0;
10924 buf->chunk = NULL;
10925 buf->chunk_offset = 0;
10926 buf->chunk_size = 0;
10927}
10928
10929/* grn_ii_builder_buffer_fin finalizes a buffer. */
10930static void
10931grn_ii_builder_buffer_fin(grn_ctx *ctx, grn_ii_builder_buffer *buf)
10932{
10933 if (buf->buf) {
10934 GRN_IO_SEG_UNREF(buf->ii->seg, buf->buf_seg_id);
10935 }
10936 if (buf->chunk) {
10937 GRN_IO_SEG_UNREF(buf->ii->chunk, buf->chunk_seg_id);
10938 }
10939}
10940
10941/* grn_ii_builder_buffer_is_assigned returns whether a buffer is assigned. */
10942static grn_bool
10943grn_ii_builder_buffer_is_assigned(grn_ctx *ctx, grn_ii_builder_buffer *buf)
10944{
10945 return buf->buf != NULL;
10946}
10947
10948/* grn_ii_builder_buffer_assign assigns a buffer. */
10949static grn_rc
10950grn_ii_builder_buffer_assign(grn_ctx *ctx, grn_ii_builder_buffer *buf,
10951 size_t min_chunk_size)
10952{
10953 void *seg;
10954 size_t chunk_size;
10955 grn_rc rc;
10956
10957 /* Create a buffer. */
10958 buf->buf_id = GRN_II_PSEG_NOT_ASSIGNED;
10959 rc = buffer_segment_new(ctx, buf->ii, &buf->buf_id);
10960 if (rc != GRN_SUCCESS) {
10961 if (ctx->rc != GRN_SUCCESS) {
10962 ERR(rc, "failed to allocate segment for buffer");
10963 }
10964 return rc;
10965 }
10966 buf->buf_seg_id = buf->ii->header->binfo[buf->buf_id];
10967 GRN_IO_SEG_REF(buf->ii->seg, buf->buf_seg_id, seg);
10968 if (!seg) {
10969 if (ctx->rc == GRN_SUCCESS) {
10970 ERR(GRN_UNKNOWN_ERROR,
10971 "failed access buffer segment: buf_id = %u, seg_id = %u",
10972 buf->buf_id, buf->buf_seg_id);
10973 }
10974 return ctx->rc;
10975 }
10976 buf->buf = (buffer *)seg;
10977
10978 /* Create a chunk. */
10979 chunk_size = GRN_II_BUILDER_BUFFER_CHUNK_SIZE;
10980 while (chunk_size < min_chunk_size) {
10981 chunk_size *= 2;
10982 }
10983 rc = chunk_new(ctx, buf->ii, &buf->chunk_id, chunk_size);
10984 if (rc != GRN_SUCCESS) {
10985 return rc;
10986 }
10987 buf->chunk_seg_id = buf->chunk_id >> GRN_II_N_CHUNK_VARIATION;
10988 GRN_IO_SEG_REF(buf->ii->chunk, buf->chunk_seg_id, seg);
10989 if (!seg) {
10990 if (ctx->rc == GRN_SUCCESS) {
10991 ERR(GRN_UNKNOWN_ERROR,
10992 "failed access chunk segment: chunk_id = %u, seg_id = %u",
10993 buf->chunk_id, buf->chunk_seg_id);
10994 }
10995 return ctx->rc;
10996 }
10997 buf->chunk = (uint8_t *)seg;
10998 buf->chunk += (buf->chunk_id & ((1 << GRN_II_N_CHUNK_VARIATION) - 1)) <<
10999 GRN_II_W_LEAST_CHUNK;
11000 buf->chunk_offset = 0;
11001 buf->chunk_size = chunk_size;
11002
11003 buf->buf->header.chunk = buf->chunk_id;
11004 buf->buf->header.chunk_size = chunk_size;
11005 buf->buf->header.buffer_free = S_SEGMENT - sizeof(buffer_header);
11006 buf->buf->header.nterms = 0;
11007 buf->buf->header.nterms_void = 0;
11008 buf->ii->header->total_chunk_size += chunk_size;
11009 return GRN_SUCCESS;
11010}
11011
11012/* grn_ii_builder_buffer_flush flushes a buffer. */
11013static grn_rc
11014grn_ii_builder_buffer_flush(grn_ctx *ctx, grn_ii_builder_buffer *buf)
11015{
11016 grn_ii *ii;
11017
11018 buf->buf->header.buffer_free = S_SEGMENT - sizeof(buffer_header) -
11019 buf->buf->header.nterms * sizeof(buffer_term);
11020 GRN_LOG(ctx, GRN_LOG_DEBUG,
11021 "n_terms = %u, chunk_offset = %u, chunk_size = %u, total = %"
11022 GRN_FMT_INT64U "KB",
11023 buf->buf->header.nterms,
11024 buf->chunk_offset,
11025 buf->buf->header.chunk_size,
11026 buf->ii->header->total_chunk_size >> 10);
11027
11028 ii = buf->ii;
11029 grn_ii_builder_buffer_fin(ctx, buf);
11030 grn_ii_builder_buffer_init(ctx, buf, ii);
11031 return GRN_SUCCESS;
11032}
11033
11034typedef struct {
11035 grn_id tid; /* Term ID */
11036 uint32_t n; /* Number of integers in buffers */
11037 grn_id rid; /* Record ID */
11038 uint32_t rid_gap; /* Record ID gap */
11039 uint64_t pos_sum; /* Sum of position gaps */
11040
11041 uint32_t offset; /* Write offset */
11042 uint32_t size; /* Buffer size */
11043 grn_id *rid_buf; /* Buffer for record IDs (to be freed) */
11044 uint32_t *sid_buf; /* Buffer for section IDs (to be freed) */
11045 uint32_t *freq_buf; /* Buffer for frequencies (to be freed) */
11046 uint32_t *weight_buf; /* Buffer for weights (to be freed) */
11047
11048 uint32_t pos_offset; /* Write offset of pos_buf */
11049 uint32_t pos_size; /* Buffer size of pos_buf */
11050 uint32_t *pos_buf; /* Buffer for positions (to be freed) */
11051
11052 size_t enc_offset; /* Write offset of enc_buf */
11053 size_t enc_size; /* Buffer size of enc_buf */
11054 uint8_t *enc_buf; /* Buffer for encoded data (to be freed) */
11055} grn_ii_builder_chunk;
11056
11057/*
11058 * grn_ii_builder_chunk_init initializes a chunk. Note that an initialized
11059 * chunk must be finalized by grn_ii_builder_chunk_fin.
11060 */
11061static void
11062grn_ii_builder_chunk_init(grn_ctx *ctx, grn_ii_builder_chunk *chunk)
11063{
11064 chunk->tid = GRN_ID_NIL;
11065 chunk->n = 0;
11066 chunk->rid = GRN_ID_NIL;
11067 chunk->rid_gap = 0;
11068 chunk->pos_sum = 0;
11069
11070 chunk->offset = 0;
11071 chunk->size = 0;
11072 chunk->rid_buf = NULL;
11073 chunk->sid_buf = NULL;
11074 chunk->freq_buf = NULL;
11075 chunk->weight_buf = NULL;
11076
11077 chunk->pos_offset = 0;
11078 chunk->pos_size = 0;
11079 chunk->pos_buf = NULL;
11080
11081 chunk->enc_offset = 0;
11082 chunk->enc_size = 0;
11083 chunk->enc_buf = NULL;
11084}
11085
11086/* grn_ii_builder_chunk_fin finalizes a chunk. */
11087static void
11088grn_ii_builder_chunk_fin(grn_ctx *ctx, grn_ii_builder_chunk *chunk)
11089{
11090 if (chunk->enc_buf) {
11091 GRN_FREE(chunk->enc_buf);
11092 }
11093 if (chunk->pos_buf) {
11094 GRN_FREE(chunk->pos_buf);
11095 }
11096 if (chunk->weight_buf) {
11097 GRN_FREE(chunk->weight_buf);
11098 }
11099 if (chunk->freq_buf) {
11100 GRN_FREE(chunk->freq_buf);
11101 }
11102 if (chunk->sid_buf) {
11103 GRN_FREE(chunk->sid_buf);
11104 }
11105 if (chunk->rid_buf) {
11106 GRN_FREE(chunk->rid_buf);
11107 }
11108}
11109
11110/*
11111 * grn_ii_builder_chunk_clear clears stats except rid and buffers except
11112 * enc_buf.
11113 */
11114static void
11115grn_ii_builder_chunk_clear(grn_ctx *ctx, grn_ii_builder_chunk *chunk)
11116{
11117 chunk->n = 0;
11118 chunk->rid_gap = 0;
11119 chunk->pos_sum = 0;
11120 chunk->offset = 0;
11121 chunk->pos_offset = 0;
11122}
11123
11124/*
11125 * grn_ii_builder_chunk_extend_bufs extends buffers except pos_buf and enc_buf.
11126 */
11127static grn_rc
11128grn_ii_builder_chunk_extend_bufs(grn_ctx *ctx, grn_ii_builder_chunk *chunk,
11129 uint32_t ii_flags)
11130{
11131 uint32_t *buf, size = chunk->size ? chunk->size * 2 : 1;
11132 size_t n_bytes = size * sizeof(uint32_t);
11133
11134 buf = (uint32_t *)GRN_REALLOC(chunk->rid_buf, n_bytes);
11135 if (!buf) {
11136 ERR(GRN_NO_MEMORY_AVAILABLE,
11137 "failed to allocate memory for record IDs: n_bytes = %" GRN_FMT_SIZE,
11138 n_bytes);
11139 return ctx->rc;
11140 }
11141 chunk->rid_buf = buf;
11142
11143 if (ii_flags & GRN_OBJ_WITH_SECTION) {
11144 buf = (uint32_t *)GRN_REALLOC(chunk->sid_buf, n_bytes);
11145 if (!buf) {
11146 ERR(GRN_NO_MEMORY_AVAILABLE,
11147 "failed to allocate memory for section IDs:"
11148 " n_bytes = %" GRN_FMT_SIZE,
11149 n_bytes);
11150 return ctx->rc;
11151 }
11152 chunk->sid_buf = buf;
11153 }
11154
11155 buf = (uint32_t *)GRN_REALLOC(chunk->freq_buf, n_bytes);
11156 if (!buf) {
11157 ERR(GRN_NO_MEMORY_AVAILABLE,
11158 "failed to allocate memory for frequencies: n_bytes = %" GRN_FMT_SIZE,
11159 n_bytes);
11160 return ctx->rc;
11161 }
11162 chunk->freq_buf = buf;
11163
11164 if (ii_flags & GRN_OBJ_WITH_WEIGHT) {
11165 buf = (uint32_t *)GRN_REALLOC(chunk->weight_buf, n_bytes);
11166 if (!buf) {
11167 ERR(GRN_NO_MEMORY_AVAILABLE,
11168 "failed to allocate memory for weights: n_bytes = %" GRN_FMT_SIZE,
11169 n_bytes);
11170 return ctx->rc;
11171 }
11172 chunk->weight_buf = buf;
11173 }
11174
11175 chunk->size = size;
11176 return GRN_SUCCESS;
11177}
11178
11179/* grn_ii_builder_chunk_extend_pos_buf extends pos_buf. */
11180static grn_rc
11181grn_ii_builder_chunk_extend_pos_buf(grn_ctx *ctx, grn_ii_builder_chunk *chunk)
11182{
11183 uint32_t *buf, size = chunk->pos_size ? chunk->pos_size * 2 : 1;
11184 size_t n_bytes = size * sizeof(uint32_t);
11185 buf = (uint32_t *)GRN_REALLOC(chunk->pos_buf, n_bytes);
11186 if (!buf) {
11187 ERR(GRN_NO_MEMORY_AVAILABLE,
11188 "failed to allocate memory for positions: n_bytes = %" GRN_FMT_SIZE,
11189 n_bytes);
11190 return ctx->rc;
11191 }
11192 chunk->pos_buf = buf;
11193 chunk->pos_size = size;
11194 return GRN_SUCCESS;
11195}
11196
11197/*
11198 * grn_ii_builder_chunk_reserve_enc_buf estimates a size that is enough to
11199 * store encoded data and allocates memory to enc_buf.
11200 */
11201static grn_rc
11202grn_ii_builder_chunk_reserve_enc_buf(grn_ctx *ctx, grn_ii_builder_chunk *chunk,
11203 uint32_t n_cinfos)
11204{
11205 size_t rich_size = (chunk->n + 4) * sizeof(uint32_t) +
11206 n_cinfos * sizeof(chunk_info);
11207 if (chunk->enc_size < rich_size) {
11208 size_t size = chunk->enc_size ? chunk->enc_size * 2 : 1;
11209 uint8_t *buf;
11210 while (size < rich_size) {
11211 size *= 2;
11212 }
11213 buf = GRN_REALLOC(chunk->enc_buf, size);
11214 if (!buf) {
11215 ERR(GRN_NO_MEMORY_AVAILABLE,
11216 "failed to allocate memory for encoding: size = %" GRN_FMT_SIZE,
11217 size);
11218 return ctx->rc;
11219 }
11220 chunk->enc_buf = buf;
11221 chunk->enc_size = size;
11222 }
11223 chunk->enc_offset = 0;
11224 return GRN_SUCCESS;
11225}
11226
11227/* grn_ii_builder_chunk_encode encodes a chunk buffer. */
11228static void
11229grn_ii_builder_chunk_encode_buf(grn_ctx *ctx, grn_ii_builder_chunk *chunk,
11230 uint32_t *values, uint32_t n_values,
11231 grn_bool use_p_enc)
11232{
11233 uint8_t *p = chunk->enc_buf + chunk->enc_offset;
11234 uint32_t i;
11235 if (use_p_enc) {
11236 uint8_t freq[33];
11237 uint32_t buf[UNIT_SIZE];
11238 while (n_values >= UNIT_SIZE) {
11239 memset(freq, 0, 33);
11240 for (i = 0; i < UNIT_SIZE; i++) {
11241 buf[i] = values[i];
11242 if (buf[i]) {
11243 uint32_t w;
11244 GRN_BIT_SCAN_REV(buf[i], w);
11245 freq[w + 1]++;
11246 } else {
11247 freq[0]++;
11248 }
11249 }
11250 p = pack(buf, UNIT_SIZE, freq, p);
11251 values += UNIT_SIZE;
11252 n_values -= UNIT_SIZE;
11253 }
11254 if (n_values) {
11255 memset(freq, 0, 33);
11256 for (i = 0; i < n_values; i++) {
11257 buf[i] = values[i];
11258 if (buf[i]) {
11259 uint32_t w;
11260 GRN_BIT_SCAN_REV(buf[i], w);
11261 freq[w + 1]++;
11262 } else {
11263 freq[0]++;
11264 }
11265 }
11266 p = pack(buf, n_values, freq, p);
11267 }
11268 } else {
11269 for (i = 0; i < n_values; i++) {
11270 GRN_B_ENC(values[i], p);
11271 }
11272 }
11273 chunk->enc_offset = p - chunk->enc_buf;
11274}
11275
11276/* grn_ii_builder_chunk_encode encodes a chunk. */
11277static grn_rc
11278grn_ii_builder_chunk_encode(grn_ctx *ctx, grn_ii_builder_chunk *chunk,
11279 chunk_info *cinfos, uint32_t n_cinfos)
11280{
11281 grn_rc rc;
11282 uint8_t *p;
11283 uint8_t shift = 0, use_p_enc_flags = 0;
11284 uint8_t rid_use_p_enc, rest_use_p_enc, pos_use_p_enc = 0;
11285
11286 /* Choose an encoding. */
11287 rid_use_p_enc = chunk->offset >= 16 && chunk->offset > (chunk->rid >> 8);
11288 use_p_enc_flags |= rid_use_p_enc << shift++;
11289 rest_use_p_enc = chunk->offset >= 3;
11290 if (chunk->sid_buf) {
11291 use_p_enc_flags |= rest_use_p_enc << shift++;
11292 }
11293 use_p_enc_flags |= rest_use_p_enc << shift++;
11294 if (chunk->weight_buf) {
11295 use_p_enc_flags |= rest_use_p_enc << shift++;
11296 }
11297 if (chunk->pos_buf) {
11298 pos_use_p_enc = chunk->pos_offset >= 32 &&
11299 chunk->pos_offset > (chunk->pos_sum >> 13);
11300 use_p_enc_flags |= pos_use_p_enc << shift++;
11301 }
11302
11303 rc = grn_ii_builder_chunk_reserve_enc_buf(ctx, chunk, n_cinfos);
11304 if (rc != GRN_SUCCESS) {
11305 return rc;
11306 }
11307
11308 /* Encode a header. */
11309 p = chunk->enc_buf;
11310 if (n_cinfos) {
11311 uint32_t i;
11312 GRN_B_ENC(n_cinfos, p);
11313 for (i = 0; i < n_cinfos; i++) {
11314 GRN_B_ENC(cinfos[i].segno, p);
11315 GRN_B_ENC(cinfos[i].size, p);
11316 GRN_B_ENC(cinfos[i].dgap, p);
11317 }
11318 }
11319 if (use_p_enc_flags) {
11320 GRN_B_ENC(use_p_enc_flags << 1, p);
11321 GRN_B_ENC(chunk->offset, p);
11322 if (chunk->pos_buf) {
11323 GRN_B_ENC(chunk->pos_offset - chunk->offset, p);
11324 }
11325 } else {
11326 GRN_B_ENC((chunk->offset << 1) | 1, p);
11327 }
11328 chunk->enc_offset = p - chunk->enc_buf;
11329
11330 /* Encode a body. */
11331 grn_ii_builder_chunk_encode_buf(ctx, chunk, chunk->rid_buf, chunk->offset,
11332 rid_use_p_enc);
11333 if (chunk->sid_buf) {
11334 grn_ii_builder_chunk_encode_buf(ctx, chunk, chunk->sid_buf, chunk->offset,
11335 rest_use_p_enc);
11336 }
11337 grn_ii_builder_chunk_encode_buf(ctx, chunk, chunk->freq_buf, chunk->offset,
11338 rest_use_p_enc);
11339 if (chunk->weight_buf) {
11340 grn_ii_builder_chunk_encode_buf(ctx, chunk, chunk->weight_buf,
11341 chunk->offset, rest_use_p_enc);
11342 }
11343 if (chunk->pos_buf) {
11344 grn_ii_builder_chunk_encode_buf(ctx, chunk, chunk->pos_buf,
11345 chunk->pos_offset, pos_use_p_enc);
11346 }
11347
11348 return GRN_SUCCESS;
11349}
11350
11351typedef struct {
11352 grn_ii *ii; /* Building inverted index */
11353 grn_ii_builder_options options; /* Options */
11354
11355 grn_obj *src_table; /* Source table */
11356 grn_obj **srcs; /* Source columns (to be freed) */
11357 uint32_t n_srcs; /* Number of source columns */
11358 uint8_t sid_bits; /* Number of bits for section ID */
11359 uint64_t sid_mask; /* Mask bits for section ID */
11360
11361 grn_obj *lexicon; /* Block lexicon (to be closed) */
11362 grn_obj *tokenizer; /* Lexicon's tokenizer */
11363 grn_obj *normalizer; /* Lexicon's normalzier */
11364
11365 uint32_t n; /* Number of integers appended to the current block */
11366 grn_id rid; /* Record ID */
11367 uint32_t sid; /* Section ID */
11368 uint32_t pos; /* Position */
11369
11370 grn_ii_builder_term *terms; /* Terms (to be freed) */
11371 uint32_t n_terms; /* Number of distinct terms */
11372 uint32_t max_n_terms; /* Maximum number of distinct terms */
11373 uint32_t terms_size; /* Buffer size of terms */
11374
11375 /* A temporary file to save blocks. */
11376 char path[PATH_MAX]; /* File path */
11377 int fd; /* File descriptor (to be closed) */
11378 uint8_t *file_buf; /* File buffer for buffered output (to be freed) */
11379 uint32_t file_buf_offset; /* File buffer write offset */
11380
11381 grn_ii_builder_block *blocks; /* Blocks (to be freed) */
11382 uint32_t n_blocks; /* Number of blocks */
11383 uint32_t blocks_size; /* Buffer size of blocks */
11384
11385 grn_ii_builder_buffer buf; /* Buffer (to be finalized) */
11386 grn_ii_builder_chunk chunk; /* Chunk (to be finalized) */
11387
11388 uint32_t df; /* Document frequency (number of sections) */
11389 chunk_info *cinfos; /* Chunk headers (to be freed) */
11390 uint32_t n_cinfos; /* Number of chunks */
11391 uint32_t cinfos_size; /* Size of cinfos */
11392} grn_ii_builder;
11393
11394/*
11395 * grn_ii_builder_init initializes a builder. Note that an initialized builder
11396 * must be finalized by grn_ii_builder_fin.
11397 */
11398static grn_rc
11399grn_ii_builder_init(grn_ctx *ctx, grn_ii_builder *builder,
11400 grn_ii *ii, const grn_ii_builder_options *options)
11401{
11402 builder->ii = ii;
11403 builder->options = *options;
11404 if (grn_ii_builder_block_threshold_force > 0) {
11405 builder->options.block_threshold = grn_ii_builder_block_threshold_force;
11406 }
11407 grn_ii_builder_options_fix(&builder->options);
11408
11409 builder->src_table = NULL;
11410 builder->srcs = NULL;
11411 builder->n_srcs = 0;
11412 builder->sid_bits = 0;
11413 builder->sid_mask = 0;
11414
11415 builder->lexicon = NULL;
11416 builder->tokenizer = NULL;
11417 builder->normalizer = NULL;
11418
11419 builder->n = 0;
11420 builder->rid = GRN_ID_NIL;
11421 builder->sid = 0;
11422 builder->pos = 0;
11423
11424 builder->terms = NULL;
11425 builder->n_terms = 0;
11426 builder->max_n_terms = 0;
11427 builder->terms_size = 0;
11428
11429 builder->path[0] = '\0';
11430 builder->fd = -1;
11431 builder->file_buf = NULL;
11432 builder->file_buf_offset = 0;
11433
11434 builder->blocks = NULL;
11435 builder->n_blocks = 0;
11436 builder->blocks_size = 0;
11437
11438 grn_ii_builder_buffer_init(ctx, &builder->buf, ii);
11439 grn_ii_builder_chunk_init(ctx, &builder->chunk);
11440
11441 builder->df = 0;
11442 builder->cinfos = NULL;
11443 builder->n_cinfos = 0;
11444 builder->cinfos_size = 0;
11445
11446 return GRN_SUCCESS;
11447}
11448
11449/* grn_ii_builder_fin_terms finalizes terms. */
11450static void
11451grn_ii_builder_fin_terms(grn_ctx *ctx, grn_ii_builder *builder)
11452{
11453 if (builder->terms) {
11454 uint32_t i;
11455 for (i = 0; i < builder->max_n_terms; i++) {
11456 grn_ii_builder_term_fin(ctx, &builder->terms[i]);
11457 }
11458 GRN_FREE(builder->terms);
11459
11460 /* To avoid double finalization. */
11461 builder->terms = NULL;
11462 }
11463}
11464
11465/* grn_ii_builder_fin finalizes a builder. */
11466static grn_rc
11467grn_ii_builder_fin(grn_ctx *ctx, grn_ii_builder *builder)
11468{
11469 if (builder->cinfos) {
11470 GRN_FREE(builder->cinfos);
11471 }
11472 grn_ii_builder_chunk_fin(ctx, &builder->chunk);
11473 grn_ii_builder_buffer_fin(ctx, &builder->buf);
11474 if (builder->blocks) {
11475 uint32_t i;
11476 for (i = 0; i < builder->n_blocks; i++) {
11477 grn_ii_builder_block_fin(ctx, &builder->blocks[i]);
11478 }
11479 GRN_FREE(builder->blocks);
11480 }
11481 if (builder->file_buf) {
11482 GRN_FREE(builder->file_buf);
11483 }
11484 if (builder->fd != -1) {
11485 grn_close(builder->fd);
11486 if (grn_unlink(builder->path) == 0) {
11487 GRN_LOG(ctx, GRN_LOG_INFO,
11488 "[ii][builder][fin] removed path: <%s>",
11489 builder->path);
11490 } else {
11491 ERRNO_ERR("[ii][builder][fin] failed to remove path: <%s>",
11492 builder->path);
11493 }
11494 }
11495 grn_ii_builder_fin_terms(ctx, builder);
11496 if (builder->lexicon) {
11497 grn_obj_close(ctx, builder->lexicon);
11498 }
11499 if (builder->srcs) {
11500 GRN_FREE(builder->srcs);
11501 }
11502 return GRN_SUCCESS;
11503}
11504
11505/*
11506 * grn_ii_builder_open creates a builder. Note that a builder must be closed by
11507 * grn_ii_builder_close.
11508 */
11509static grn_rc
11510grn_ii_builder_open(grn_ctx *ctx, grn_ii *ii,
11511 const grn_ii_builder_options *options,
11512 grn_ii_builder **builder)
11513{
11514 grn_rc rc;
11515 grn_ii_builder *new_builder = GRN_MALLOCN(grn_ii_builder, 1);
11516 if (!new_builder) {
11517 return GRN_NO_MEMORY_AVAILABLE;
11518 }
11519 if (!options) {
11520 options = &grn_ii_builder_default_options;
11521 }
11522 rc = grn_ii_builder_init(ctx, new_builder, ii, options);
11523 if (rc != GRN_SUCCESS) {
11524 GRN_FREE(new_builder);
11525 return rc;
11526 }
11527 *builder = new_builder;
11528 return GRN_SUCCESS;
11529}
11530
11531/* grn_ii_builder_close closes a builder. */
11532static grn_rc
11533grn_ii_builder_close(grn_ctx *ctx, grn_ii_builder *builder)
11534{
11535 grn_rc rc;
11536 if (!builder) {
11537 ERR(GRN_INVALID_ARGUMENT, "builder is null");
11538 return ctx->rc;
11539 }
11540 rc = grn_ii_builder_fin(ctx, builder);
11541 GRN_FREE(builder);
11542 return rc;
11543}
11544
11545/* grn_ii_builder_create_lexicon creates a block lexicon. */
11546static grn_rc
11547grn_ii_builder_create_lexicon(grn_ctx *ctx, grn_ii_builder *builder)
11548{
11549 grn_table_flags flags;
11550 grn_obj *domain = grn_ctx_at(ctx, builder->ii->lexicon->header.domain);
11551 grn_obj *range = grn_ctx_at(ctx, DB_OBJ(builder->ii->lexicon)->range);
11552 grn_obj *tokenizer, *normalizer, *token_filters;
11553 grn_rc rc = grn_table_get_info(ctx, builder->ii->lexicon, &flags, NULL,
11554 &tokenizer, &normalizer, &token_filters);
11555 if (rc != GRN_SUCCESS) {
11556 return rc;
11557 }
11558 flags &= ~GRN_OBJ_PERSISTENT;
11559 builder->lexicon = grn_table_create(ctx, NULL, 0, NULL,
11560 flags, domain, range);
11561 if (!builder->lexicon) {
11562 if (ctx->rc == GRN_SUCCESS) {
11563 ERR(GRN_UNKNOWN_ERROR, "[index] failed to create a block lexicon");
11564 }
11565 return ctx->rc;
11566 }
11567 builder->tokenizer = tokenizer;
11568 builder->normalizer = normalizer;
11569 rc = grn_obj_set_info(ctx, builder->lexicon,
11570 GRN_INFO_DEFAULT_TOKENIZER, tokenizer);
11571 if (rc == GRN_SUCCESS) {
11572 rc = grn_obj_set_info(ctx, builder->lexicon,
11573 GRN_INFO_NORMALIZER, normalizer);
11574 if (rc == GRN_SUCCESS) {
11575 rc = grn_obj_set_info(ctx, builder->lexicon,
11576 GRN_INFO_TOKEN_FILTERS, token_filters);
11577 }
11578 }
11579 if (rc != GRN_SUCCESS) {
11580 return rc;
11581 }
11582 if ((flags & GRN_OBJ_TABLE_TYPE_MASK) == GRN_OBJ_TABLE_PAT_KEY) {
11583 if (builder->options.lexicon_cache_size) {
11584 rc = grn_pat_cache_enable(ctx, (grn_pat *)builder->lexicon,
11585 builder->options.lexicon_cache_size);
11586 if (rc != GRN_SUCCESS) {
11587 return rc;
11588 }
11589 }
11590 }
11591 return GRN_SUCCESS;
11592}
11593
11594/*
11595 * grn_ii_builder_extend_terms extends a buffer for terms in order to make
11596 * terms[n_terms - 1] available.
11597 */
11598static grn_rc
11599grn_ii_builder_extend_terms(grn_ctx *ctx, grn_ii_builder *builder,
11600 uint32_t n_terms)
11601{
11602 if (n_terms <= builder->n_terms) {
11603 return GRN_SUCCESS;
11604 }
11605
11606 if (n_terms > builder->max_n_terms) {
11607 uint32_t i;
11608 if (n_terms > builder->terms_size) {
11609 /* Resize builder->terms for new terms. */
11610 size_t n_bytes;
11611 uint32_t terms_size = builder->terms_size ? builder->terms_size * 2 : 1;
11612 grn_ii_builder_term *terms;
11613 while (terms_size < n_terms) {
11614 terms_size *= 2;
11615 }
11616 n_bytes = terms_size * sizeof(grn_ii_builder_term);
11617 terms = (grn_ii_builder_term *)GRN_REALLOC(builder->terms, n_bytes);
11618 if (!terms) {
11619 ERR(GRN_NO_MEMORY_AVAILABLE,
11620 "failed to allocate memory for terms: n_bytes = %" GRN_FMT_SIZE,
11621 n_bytes);
11622 return ctx->rc;
11623 }
11624 builder->terms = terms;
11625 builder->terms_size = terms_size;
11626 }
11627 /* Initialize new terms. */
11628 for (i = builder->max_n_terms; i < n_terms; i++) {
11629 grn_ii_builder_term_init(ctx, &builder->terms[i]);
11630 }
11631 builder->max_n_terms = n_terms;
11632 }
11633
11634 builder->n += n_terms - builder->n_terms;
11635 builder->n_terms = n_terms;
11636 return GRN_SUCCESS;
11637}
11638
11639/* grn_ii_builder_get_term gets a term associated with tid. */
11640inline static grn_rc
11641grn_ii_builder_get_term(grn_ctx *ctx, grn_ii_builder *builder, grn_id tid,
11642 grn_ii_builder_term **term)
11643{
11644 uint32_t n_terms = tid;
11645 if (n_terms > builder->n_terms) {
11646 grn_rc rc = grn_ii_builder_extend_terms(ctx, builder, n_terms);
11647 if (rc != GRN_SUCCESS) {
11648 return rc;
11649 }
11650 }
11651 *term = &builder->terms[tid - 1];
11652 return GRN_SUCCESS;
11653}
11654
11655/* grn_ii_builder_flush_file_buf flushes buffered data as a block. */
11656static grn_rc
11657grn_ii_builder_flush_file_buf(grn_ctx *ctx, grn_ii_builder *builder)
11658{
11659 if (builder->file_buf_offset) {
11660 ssize_t size = grn_write(builder->fd, builder->file_buf,
11661 builder->file_buf_offset);
11662 if ((uint64_t)size != builder->file_buf_offset) {
11663 SERR("failed to write data: expected = %u, actual = %" GRN_FMT_INT64D,
11664 builder->file_buf_offset, (int64_t)size);
11665 }
11666 builder->file_buf_offset = 0;
11667 }
11668 return GRN_SUCCESS;
11669}
11670
11671/* grn_ii_builder_flush_term flushes a term and clears it */
11672static grn_rc
11673grn_ii_builder_flush_term(grn_ctx *ctx, grn_ii_builder *builder,
11674 grn_ii_builder_term *term)
11675{
11676 grn_rc rc;
11677 uint8_t *term_buf;
11678
11679 /* Append sentinels. */
11680 if (term->rid != GRN_ID_NIL) {
11681 if (builder->ii->header->flags & GRN_OBJ_WITH_POSITION) {
11682 rc = grn_ii_builder_term_append(ctx, term, 0);
11683 } else {
11684 rc = grn_ii_builder_term_append(ctx, term, term->pos_or_freq);
11685 }
11686 if (rc != GRN_SUCCESS) {
11687 return rc;
11688 }
11689 }
11690 rc = grn_ii_builder_term_append(ctx, term, 0);
11691 if (rc != GRN_SUCCESS) {
11692 return rc;
11693 }
11694
11695 {
11696 /* Put the global term ID. */
11697 int key_size;
11698 char key[GRN_TABLE_MAX_KEY_SIZE];
11699 uint8_t *p;
11700 uint32_t rest, value;
11701 grn_rc rc;
11702 grn_id local_tid = term - builder->terms + 1, global_tid;
11703 key_size = grn_table_get_key(ctx, builder->lexicon, local_tid,
11704 key, GRN_TABLE_MAX_KEY_SIZE);
11705 if (!key_size) {
11706 if (ctx->rc == GRN_SUCCESS) {
11707 ERR(GRN_UNKNOWN_ERROR, "failed to get key: tid = %u", local_tid);
11708 }
11709 return ctx->rc;
11710 }
11711 global_tid = grn_table_add(ctx, builder->ii->lexicon, key, key_size, NULL);
11712 if (global_tid == GRN_ID_NIL) {
11713 if (ctx->rc == GRN_SUCCESS) {
11714 ERR(GRN_UNKNOWN_ERROR,
11715 "failed to get global term ID: tid = %u, key = \"%.*s\"",
11716 local_tid, key_size, key);
11717 }
11718 return ctx->rc;
11719 }
11720
11721 rest = builder->options.file_buf_size - builder->file_buf_offset;
11722 if (rest < 10) {
11723 rc = grn_ii_builder_flush_file_buf(ctx, builder);
11724 if (rc != GRN_SUCCESS) {
11725 return rc;
11726 }
11727 }
11728 value = global_tid;
11729 p = builder->file_buf + builder->file_buf_offset;
11730 if (value < 1U << 5) {
11731 p[0] = (uint8_t)value;
11732 builder->file_buf_offset++;
11733 } else if (value < 1U << 13) {
11734 p[0] = (uint8_t)((value & 0x1f) | (1 << 5));
11735 p[1] = (uint8_t)(value >> 5);
11736 builder->file_buf_offset += 2;
11737 } else {
11738 uint8_t i, n;
11739 if (value < 1U << 21) {
11740 n = 3;
11741 } else if (value < 1U << 29) {
11742 n = 4;
11743 } else {
11744 n = 5;
11745 }
11746 p[0] = (uint8_t)(value & 0x1f) | ((n - 1) << 5);
11747 value >>= 5;
11748 for (i = 1; i < n; i++) {
11749 p[i] = (uint8_t)value;
11750 value >>= 8;
11751 }
11752 builder->file_buf_offset += n;
11753 }
11754 }
11755
11756 /* Flush a term buffer. */
11757 term_buf = grn_ii_builder_term_get_buf(term);
11758 if (term->offset > builder->options.file_buf_size) {
11759 ssize_t size;
11760 rc = grn_ii_builder_flush_file_buf(ctx, builder);
11761 if (rc != GRN_SUCCESS) {
11762 return rc;
11763 }
11764 size = grn_write(builder->fd, term_buf, term->offset);
11765 if ((uint64_t)size != term->offset) {
11766 SERR("failed to write data: expected = %u, actual = %" GRN_FMT_INT64D,
11767 term->offset, (int64_t)size);
11768 }
11769 } else {
11770 uint32_t rest = builder->options.file_buf_size - builder->file_buf_offset;
11771 if (term->offset <= rest) {
11772 grn_memcpy(builder->file_buf + builder->file_buf_offset,
11773 term_buf, term->offset);
11774 builder->file_buf_offset += term->offset;
11775 } else {
11776 grn_memcpy(builder->file_buf + builder->file_buf_offset,
11777 term_buf, rest);
11778 builder->file_buf_offset += rest;
11779 rc = grn_ii_builder_flush_file_buf(ctx, builder);
11780 if (rc != GRN_SUCCESS) {
11781 return rc;
11782 }
11783 builder->file_buf_offset = term->offset - rest;
11784 grn_memcpy(builder->file_buf, term_buf + rest, builder->file_buf_offset);
11785 }
11786 }
11787 grn_ii_builder_term_reinit(ctx, term);
11788 return GRN_SUCCESS;
11789}
11790
11791/*
11792 * grn_ii_builder_create_file creates a temporary file and allocates memory for
11793 * buffered output.
11794 */
11795static grn_rc
11796grn_ii_builder_create_file(grn_ctx *ctx, grn_ii_builder *builder)
11797{
11798 grn_snprintf(builder->path, PATH_MAX, PATH_MAX,
11799 "%sXXXXXX", grn_io_path(builder->ii->seg));
11800 builder->fd = grn_mkstemp(builder->path);
11801 if (builder->fd == -1) {
11802 SERR("failed to create a temporary file: path = \"%s\"",
11803 builder->path);
11804 return ctx->rc;
11805 }
11806 builder->file_buf = (uint8_t *)GRN_MALLOC(builder->options.file_buf_size);
11807 if (!builder->file_buf) {
11808 ERR(GRN_NO_MEMORY_AVAILABLE,
11809 "failed to allocate memory for buffered output: size = %u",
11810 builder->options.file_buf_size);
11811 return ctx->rc;
11812 }
11813 return GRN_SUCCESS;
11814}
11815
11816/* grn_ii_builder_register_block registers a block. */
11817static grn_rc
11818grn_ii_builder_register_block(grn_ctx *ctx, grn_ii_builder *builder)
11819{
11820 grn_ii_builder_block *block;
11821 uint64_t file_offset = grn_lseek(builder->fd, 0, SEEK_CUR);
11822 if (file_offset == (uint64_t)-1) {
11823 SERR("failed to get file offset");
11824 return ctx->rc;
11825 }
11826 if (builder->n_blocks >= builder->blocks_size) {
11827 size_t n_bytes;
11828 uint32_t blocks_size = 1;
11829 grn_ii_builder_block *blocks;
11830 while (blocks_size <= builder->n_blocks) {
11831 blocks_size *= 2;
11832 }
11833 n_bytes = blocks_size * sizeof(grn_ii_builder_block);
11834 blocks = (grn_ii_builder_block *)GRN_REALLOC(builder->blocks, n_bytes);
11835 if (!blocks) {
11836 ERR(GRN_NO_MEMORY_AVAILABLE,
11837 "failed to allocate memory for block: n_bytes = %" GRN_FMT_SIZE,
11838 n_bytes);
11839 return ctx->rc;
11840 }
11841 builder->blocks = blocks;
11842 builder->blocks_size = blocks_size;
11843 }
11844 block = &builder->blocks[builder->n_blocks];
11845 grn_ii_builder_block_init(ctx, block);
11846 if (!builder->n_blocks) {
11847 block->offset = 0;
11848 } else {
11849 grn_ii_builder_block *prev_block = &builder->blocks[builder->n_blocks - 1];
11850 block->offset = prev_block->offset + prev_block->rest;
11851 }
11852 block->rest = (uint32_t)(file_offset - block->offset);
11853 builder->n_blocks++;
11854 return GRN_SUCCESS;
11855}
11856
11857/* grn_ii_builder_flush_block flushes a block to a temporary file. */
11858static grn_rc
11859grn_ii_builder_flush_block(grn_ctx *ctx, grn_ii_builder *builder)
11860{
11861 grn_rc rc;
11862 grn_table_cursor *cursor;
11863
11864 if (!builder->n) {
11865 /* Do nothing if there are no output data. */
11866 return GRN_SUCCESS;
11867 }
11868 if (builder->fd == -1) {
11869 rc = grn_ii_builder_create_file(ctx, builder);
11870 if (rc != GRN_SUCCESS) {
11871 return rc;
11872 }
11873 }
11874
11875 /* Flush terms into a temporary file. */
11876 cursor = grn_table_cursor_open(ctx, builder->lexicon,
11877 NULL, 0, NULL, 0, 0, -1, GRN_CURSOR_BY_KEY);
11878 for (;;) {
11879 grn_id tid = grn_table_cursor_next(ctx, cursor);
11880 if (tid == GRN_ID_NIL) {
11881 break;
11882 }
11883 rc = grn_ii_builder_flush_term(ctx, builder, &builder->terms[tid - 1]);
11884 if (rc != GRN_SUCCESS) {
11885 return rc;
11886 }
11887 }
11888 grn_table_cursor_close(ctx, cursor);
11889 rc = grn_ii_builder_flush_file_buf(ctx, builder);
11890 if (rc != GRN_SUCCESS) {
11891 return rc;
11892 }
11893
11894 /* Register a block and clear the current data. */
11895 rc = grn_ii_builder_register_block(ctx, builder);
11896 if (rc != GRN_SUCCESS) {
11897 return rc;
11898 }
11899 rc = grn_table_truncate(ctx, builder->lexicon);
11900 if (rc != GRN_SUCCESS) {
11901 return rc;
11902 }
11903 builder->rid = GRN_ID_NIL;
11904 builder->n_terms = 0;
11905 builder->n = 0;
11906 return GRN_SUCCESS;
11907}
11908
11909/* grn_ii_builder_append_token appends a token. */
11910static grn_rc
11911grn_ii_builder_append_token(grn_ctx *ctx, grn_ii_builder *builder,
11912 grn_id rid, uint32_t sid, uint32_t weight,
11913 grn_id tid, uint32_t pos)
11914{
11915 grn_rc rc;
11916 uint32_t ii_flags = builder->ii->header->flags;
11917 grn_ii_builder_term *term;
11918 rc = grn_ii_builder_get_term(ctx, builder, tid, &term);
11919 if (rc != GRN_SUCCESS) {
11920 return rc;
11921 }
11922 if (rid != term->rid || sid != term->sid) {
11923 uint64_t rsid;
11924 if (term->rid != GRN_ID_NIL) {
11925 if (ii_flags & GRN_OBJ_WITH_POSITION) {
11926 /* Append the end of positions. */
11927 rc = grn_ii_builder_term_append(ctx, term, 0);
11928 if (rc != GRN_SUCCESS) {
11929 return rc;
11930 }
11931 builder->n++;
11932 } else {
11933 /* Append a frequency if positions are not available. */
11934 rc = grn_ii_builder_term_append(ctx, term, term->pos_or_freq);
11935 if (rc != GRN_SUCCESS) {
11936 return rc;
11937 }
11938 builder->n++;
11939 }
11940 }
11941 rsid = ((uint64_t)(rid - term->rid) << builder->sid_bits) | (sid - 1);
11942 rc = grn_ii_builder_term_append(ctx, term, rsid);
11943 if (rc != GRN_SUCCESS) {
11944 return rc;
11945 }
11946 builder->n++;
11947 if (ii_flags & GRN_OBJ_WITH_WEIGHT) {
11948 rc = grn_ii_builder_term_append(ctx, term, weight);
11949 if (rc != GRN_SUCCESS) {
11950 return rc;
11951 }
11952 builder->n++;
11953 }
11954 term->rid = rid;
11955 term->sid = sid;
11956 term->pos_or_freq = 0;
11957 }
11958 if (ii_flags & GRN_OBJ_WITH_POSITION) {
11959 rc = grn_ii_builder_term_append(ctx, term, pos - term->pos_or_freq);
11960 if (rc != GRN_SUCCESS) {
11961 return rc;
11962 }
11963 builder->n++;
11964 term->pos_or_freq = pos;
11965 } else {
11966 term->pos_or_freq++;
11967 }
11968 return GRN_SUCCESS;
11969}
11970
11971/*
11972 * grn_ii_builder_append_value appends a value. Note that values must be
11973 * appended in ascending rid and sid order.
11974 */
11975static grn_rc
11976grn_ii_builder_append_value(grn_ctx *ctx, grn_ii_builder *builder,
11977 grn_id rid, uint32_t sid, uint32_t weight,
11978 const char *value, uint32_t value_size)
11979{
11980 uint32_t pos = 0;
11981 grn_token_cursor *cursor;
11982 if (rid != builder->rid) {
11983 builder->rid = rid;
11984 builder->sid = sid;
11985 builder->pos = 1;
11986 } else if (sid != builder->sid) {
11987 builder->sid = sid;
11988 builder->pos = 1;
11989 } else {
11990 /* Insert a space between values. */
11991 builder->pos++;
11992 }
11993 if (value_size) {
11994 if (!builder->tokenizer && !builder->normalizer) {
11995 grn_id tid;
11996 switch (builder->lexicon->header.type) {
11997 case GRN_TABLE_PAT_KEY :
11998 tid = grn_pat_add(ctx, (grn_pat *)builder->lexicon,
11999 value, value_size, NULL, NULL);
12000 break;
12001 case GRN_TABLE_DAT_KEY :
12002 tid = grn_dat_add(ctx, (grn_dat *)builder->lexicon,
12003 value, value_size, NULL, NULL);
12004 break;
12005 case GRN_TABLE_HASH_KEY :
12006 tid = grn_hash_add(ctx, (grn_hash *)builder->lexicon,
12007 value, value_size, NULL, NULL);
12008 break;
12009 case GRN_TABLE_NO_KEY :
12010 tid = *(grn_id *)value;
12011 break;
12012 default :
12013 tid = GRN_ID_NIL;
12014 break;
12015 }
12016 if (tid != GRN_ID_NIL) {
12017 grn_rc rc;
12018 pos = builder->pos;
12019 rc = grn_ii_builder_append_token(ctx, builder, rid, sid,
12020 weight, tid, pos);
12021 if (rc != GRN_SUCCESS) {
12022 return rc;
12023 }
12024 }
12025 } else {
12026 cursor = grn_token_cursor_open(ctx, builder->lexicon, value, value_size,
12027 GRN_TOKEN_ADD, 0);
12028 if (!cursor) {
12029 if (ctx->rc == GRN_SUCCESS) {
12030 ERR(GRN_UNKNOWN_ERROR,
12031 "grn_token_cursor_open failed: value = <%.*s>",
12032 value_size, value);
12033 }
12034 return ctx->rc;
12035 }
12036 while (cursor->status == GRN_TOKEN_CURSOR_DOING) {
12037 grn_id tid = grn_token_cursor_next(ctx, cursor);
12038 if (tid != GRN_ID_NIL) {
12039 grn_rc rc;
12040 pos = builder->pos + cursor->pos;
12041 rc = grn_ii_builder_append_token(ctx, builder, rid, sid,
12042 weight, tid, pos);
12043 if (rc != GRN_SUCCESS) {
12044 break;
12045 }
12046 }
12047 }
12048 grn_token_cursor_close(ctx, cursor);
12049 }
12050 }
12051 builder->pos = pos + 1;
12052 return ctx->rc;
12053}
12054
12055/* grn_ii_builder_append_obj appends a BULK, UVECTOR or VECTOR object. */
12056static grn_rc
12057grn_ii_builder_append_obj(grn_ctx *ctx, grn_ii_builder *builder,
12058 grn_id rid, uint32_t sid, grn_obj *obj)
12059{
12060 switch (obj->header.type) {
12061 case GRN_BULK :
12062 return grn_ii_builder_append_value(ctx, builder, rid, sid, 0,
12063 GRN_TEXT_VALUE(obj), GRN_TEXT_LEN(obj));
12064 case GRN_UVECTOR :
12065 {
12066 const char *p = GRN_BULK_HEAD(obj);
12067 uint32_t i, n_values = grn_uvector_size(ctx, obj);
12068 uint32_t value_size = grn_uvector_element_size(ctx, obj);
12069 for (i = 0; i < n_values; i++) {
12070 grn_rc rc = grn_ii_builder_append_value(ctx, builder, rid, sid, 0,
12071 p, value_size);
12072 if (rc != GRN_SUCCESS) {
12073 return rc;
12074 }
12075 p += value_size;
12076 }
12077 }
12078 return GRN_SUCCESS;
12079 case GRN_VECTOR :
12080 if (obj->u.v.body) {
12081 /*
12082 * Note that the following sections and n_sections don't correspond to
12083 * source columns.
12084 */
12085 int i, n_secs = obj->u.v.n_sections;
12086 grn_section *secs = obj->u.v.sections;
12087 const char *head = GRN_BULK_HEAD(obj->u.v.body);
12088 for (i = 0; i < n_secs; i++) {
12089 grn_rc rc;
12090 grn_section *sec = &secs[i];
12091 if (sec->length == 0) {
12092 continue;
12093 }
12094 if (builder->tokenizer) {
12095 sid = i + 1;
12096 }
12097 rc = grn_ii_builder_append_value(ctx, builder, rid, sid, sec->weight,
12098 head + sec->offset, sec->length);
12099 if (rc != GRN_SUCCESS) {
12100 return rc;
12101 }
12102 }
12103 }
12104 return GRN_SUCCESS;
12105 default :
12106 ERR(GRN_INVALID_ARGUMENT, "[index] invalid object assigned as value");
12107 return ctx->rc;
12108 }
12109}
12110
12111/*
12112 * grn_ii_builder_append_srcs reads values from source columns and appends the
12113 * values.
12114 */
12115static grn_rc
12116grn_ii_builder_append_srcs(grn_ctx *ctx, grn_ii_builder *builder)
12117{
12118 size_t i;
12119 grn_rc rc = GRN_SUCCESS;
12120 grn_obj *objs;
12121 grn_table_cursor *cursor;
12122
12123 /* Allocate memory for objects to store source values. */
12124 objs = GRN_MALLOCN(grn_obj, builder->n_srcs);
12125 if (!objs) {
12126 ERR(GRN_NO_MEMORY_AVAILABLE,
12127 "failed to allocate memory for objs: n_srcs = %u", builder->n_srcs);
12128 return ctx->rc;
12129 }
12130
12131 /* Create a cursor to get records in the ID order. */
12132 cursor = grn_table_cursor_open(ctx, builder->src_table, NULL, 0, NULL, 0,
12133 0, -1, GRN_CURSOR_BY_ID);
12134 if (!cursor) {
12135 if (ctx->rc == GRN_SUCCESS) {
12136 ERR(GRN_OBJECT_CORRUPT, "[index] failed to open table cursor");
12137 }
12138 GRN_FREE(objs);
12139 return ctx->rc;
12140 }
12141
12142 /* Read source values and append it. */
12143 for (i = 0; i < builder->n_srcs; i++) {
12144 GRN_TEXT_INIT(&objs[i], 0);
12145 }
12146 while (rc == GRN_SUCCESS) {
12147 grn_id rid = grn_table_cursor_next(ctx, cursor);
12148 if (rid == GRN_ID_NIL) {
12149 break;
12150 }
12151 for (i = 0; i < builder->n_srcs; i++) {
12152 grn_obj *obj = &objs[i];
12153 grn_obj *src = builder->srcs[i];
12154 rc = grn_obj_reinit_for(ctx, obj, src);
12155 if (rc == GRN_SUCCESS) {
12156 if (GRN_OBJ_TABLEP(src)) {
12157 int len = grn_table_get_key2(ctx, src, rid, obj);
12158 if (len <= 0) {
12159 if (ctx->rc == GRN_SUCCESS) {
12160 ERR(GRN_UNKNOWN_ERROR, "failed to get key: rid = %u, len = %d",
12161 rid, len);
12162 }
12163 rc = ctx->rc;
12164 }
12165 } else {
12166 if (!grn_obj_get_value(ctx, src, rid, obj)) {
12167 if (ctx->rc == GRN_SUCCESS) {
12168 ERR(GRN_UNKNOWN_ERROR, "failed to get value: rid = %u", rid);
12169 }
12170 rc = ctx->rc;
12171 }
12172 }
12173 if (rc == GRN_SUCCESS) {
12174 uint32_t sid = (uint32_t)(i + 1);
12175 rc = grn_ii_builder_append_obj(ctx, builder, rid, sid, obj);
12176 }
12177 }
12178 }
12179 if (rc == GRN_SUCCESS && builder->n >= builder->options.block_threshold) {
12180 rc = grn_ii_builder_flush_block(ctx, builder);
12181 }
12182 }
12183 if (rc == GRN_SUCCESS) {
12184 rc = grn_ii_builder_flush_block(ctx, builder);
12185 }
12186 for (i = 0; i < builder->n_srcs; i++) {
12187 GRN_OBJ_FIN(ctx, &objs[i]);
12188 }
12189 grn_table_cursor_close(ctx, cursor);
12190 GRN_FREE(objs);
12191 return rc;
12192}
12193
12194/* grn_ii_builder_set_src_table sets a source table. */
12195static grn_rc
12196grn_ii_builder_set_src_table(grn_ctx *ctx, grn_ii_builder *builder)
12197{
12198 builder->src_table = grn_ctx_at(ctx, DB_OBJ(builder->ii)->range);
12199 if (!builder->src_table) {
12200 if (ctx->rc == GRN_SUCCESS) {
12201 ERR(GRN_INVALID_ARGUMENT, "source table is null: range = %d",
12202 DB_OBJ(builder->ii)->range);
12203 }
12204 return ctx->rc;
12205 }
12206 return GRN_SUCCESS;
12207}
12208
12209/* grn_ii_builder_set_sid_bits calculates sid_bits and sid_mask. */
12210static grn_rc
12211grn_ii_builder_set_sid_bits(grn_ctx *ctx, grn_ii_builder *builder)
12212{
12213 /* Calculate the number of bits required to represent a section ID. */
12214 if (builder->n_srcs == 1 && builder->tokenizer &&
12215 (builder->srcs[0]->header.flags & GRN_OBJ_COLUMN_VECTOR) != 0) {
12216 /* If the source column is a vector column and the index has a tokenizer, */
12217 /* the maximum sid equals to the maximum number of elements. */
12218 size_t max_elems = 0;
12219 grn_table_cursor *cursor;
12220 grn_obj obj;
12221 cursor = grn_table_cursor_open(ctx, builder->src_table, NULL, 0, NULL, 0,
12222 0, -1, GRN_CURSOR_BY_ID);
12223 if (!cursor) {
12224 if (ctx->rc == GRN_SUCCESS) {
12225 ERR(GRN_OBJECT_CORRUPT, "[index] failed to open table cursor");
12226 }
12227 return ctx->rc;
12228 }
12229 GRN_TEXT_INIT(&obj, 0);
12230 for (;;) {
12231 grn_id rid = grn_table_cursor_next(ctx, cursor);
12232 if (rid == GRN_ID_NIL) {
12233 break;
12234 }
12235 if (!grn_obj_get_value(ctx, builder->srcs[0], rid, &obj)) {
12236 continue;
12237 }
12238 if (obj.u.v.n_sections > max_elems) {
12239 max_elems = obj.u.v.n_sections;
12240 }
12241 }
12242 GRN_OBJ_FIN(ctx, &obj);
12243 grn_table_cursor_close(ctx, cursor);
12244 while (((uint32_t)1 << builder->sid_bits) < max_elems) {
12245 builder->sid_bits++;
12246 }
12247 }
12248 if (builder->sid_bits == 0) {
12249 while (((uint32_t)1 << builder->sid_bits) < builder->n_srcs) {
12250 builder->sid_bits++;
12251 }
12252 }
12253 builder->sid_mask = ((uint64_t)1 << builder->sid_bits) - 1;
12254 return GRN_SUCCESS;
12255}
12256
12257/* grn_ii_builder_set_srcs sets source columns. */
12258static grn_rc
12259grn_ii_builder_set_srcs(grn_ctx *ctx, grn_ii_builder *builder)
12260{
12261 size_t i;
12262 grn_id *source;
12263 builder->n_srcs = builder->ii->obj.source_size / sizeof(grn_id);
12264 source = (grn_id *)builder->ii->obj.source;
12265 if (!source || !builder->n_srcs) {
12266 ERR(GRN_INVALID_ARGUMENT,
12267 "source is not available: source = %p, source_size = %u",
12268 builder->ii->obj.source, builder->ii->obj.source_size);
12269 return ctx->rc;
12270 }
12271 builder->srcs = GRN_MALLOCN(grn_obj *, builder->n_srcs);
12272 if (!builder->srcs) {
12273 return GRN_NO_MEMORY_AVAILABLE;
12274 }
12275 for (i = 0; i < builder->n_srcs; i++) {
12276 builder->srcs[i] = grn_ctx_at(ctx, source[i]);
12277 if (!builder->srcs[i]) {
12278 if (ctx->rc == GRN_SUCCESS) {
12279 ERR(GRN_OBJECT_CORRUPT, "source not found: id = %d", source[i]);
12280 }
12281 return ctx->rc;
12282 }
12283 }
12284 return grn_ii_builder_set_sid_bits(ctx, builder);
12285}
12286
12287/* grn_ii_builder_append_source appends values in source columns. */
12288static grn_rc
12289grn_ii_builder_append_source(grn_ctx *ctx, grn_ii_builder *builder)
12290{
12291 grn_rc rc = grn_ii_builder_set_src_table(ctx, builder);
12292 if (rc != GRN_SUCCESS) {
12293 return rc;
12294 }
12295 if (grn_table_size(ctx, builder->src_table) == 0) {
12296 /* Nothing to do because there are no values. */
12297 return ctx->rc;
12298 }
12299 /* Create a block lexicon. */
12300 rc = grn_ii_builder_create_lexicon(ctx, builder);
12301 if (rc != GRN_SUCCESS) {
12302 return rc;
12303 }
12304 rc = grn_ii_builder_set_srcs(ctx, builder);
12305 if (rc != GRN_SUCCESS) {
12306 return rc;
12307 }
12308 rc = grn_ii_builder_append_srcs(ctx, builder);
12309 if (rc != GRN_SUCCESS) {
12310 return rc;
12311 }
12312 grn_ii_builder_fin_terms(ctx, builder);
12313 return GRN_SUCCESS;
12314}
12315
12316/*
12317 * grn_ii_builder_fill_block reads the next data from a temporary file and fill
12318 * a block buffer.
12319 */
12320static grn_rc
12321grn_ii_builder_fill_block(grn_ctx *ctx, grn_ii_builder *builder,
12322 uint32_t block_id)
12323{
12324 ssize_t size;
12325 uint32_t buf_rest;
12326 uint64_t file_offset;
12327 grn_ii_builder_block *block = &builder->blocks[block_id];
12328 if (!block->rest) {
12329 return GRN_END_OF_DATA;
12330 }
12331 if (!block->buf) {
12332 block->buf = (uint8_t *)GRN_MALLOC(builder->options.block_buf_size);
12333 if (!block->buf) {
12334 ERR(GRN_NO_MEMORY_AVAILABLE,
12335 "failed to allocate memory for buffered input: size = %u",
12336 builder->options.block_buf_size);
12337 return ctx->rc;
12338 }
12339 }
12340
12341 /* Move the remaining data to the head. */
12342 buf_rest = block->end - block->cur;
12343 if (buf_rest) {
12344 grn_memmove(block->buf, block->cur, buf_rest);
12345 }
12346 block->cur = block->buf;
12347 block->end = block->buf + buf_rest;
12348
12349 /* Read the next data. */
12350 file_offset = grn_lseek(builder->fd, block->offset, SEEK_SET);
12351 if (file_offset != block->offset) {
12352 SERR("failed to seek file: expected = %" GRN_FMT_INT64U
12353 ", actual = %" GRN_FMT_INT64D,
12354 block->offset, file_offset);
12355 return ctx->rc;
12356 }
12357 buf_rest = builder->options.block_buf_size - buf_rest;
12358 if (block->rest < buf_rest) {
12359 buf_rest = block->rest;
12360 }
12361 size = grn_read(builder->fd, block->end, buf_rest);
12362 if (size <= 0) {
12363 SERR("failed to read data: expected = %u, actual = %" GRN_FMT_INT64D,
12364 buf_rest, (int64_t)size);
12365 return ctx->rc;
12366 }
12367 block->offset += size;
12368 block->rest -= size;
12369 block->end += size;
12370 return GRN_SUCCESS;
12371}
12372
12373/* grn_ii_builder_read_from_block reads the next value from a block. */
12374static grn_rc
12375grn_ii_builder_read_from_block(grn_ctx *ctx, grn_ii_builder *builder,
12376 uint32_t block_id, uint64_t *value)
12377{
12378 grn_ii_builder_block *block = &builder->blocks[block_id];
12379 grn_rc rc = grn_ii_builder_block_next(ctx, block, value);
12380 if (rc == GRN_SUCCESS) {
12381 return GRN_SUCCESS;
12382 } else if (rc == GRN_END_OF_DATA) {
12383 rc = grn_ii_builder_fill_block(ctx, builder, block_id);
12384 if (rc != GRN_SUCCESS) {
12385 return rc;
12386 }
12387 return grn_ii_builder_block_next(ctx, block, value);
12388 }
12389 return rc;
12390}
12391
12392/* grn_ii_builder_pack_chunk tries to pack a chunk. */
12393static grn_rc
12394grn_ii_builder_pack_chunk(grn_ctx *ctx, grn_ii_builder *builder,
12395 grn_bool *packed)
12396{
12397 grn_id rid;
12398 uint32_t sid, pos, *a;
12399 grn_ii_builder_chunk *chunk = &builder->chunk;
12400 *packed = GRN_FALSE;
12401 if (chunk->offset != 1) { /* df != 1 */
12402 return GRN_SUCCESS;
12403 }
12404 if (chunk->weight_buf && chunk->weight_buf[0]) { /* weight != 0 */
12405 return GRN_SUCCESS;
12406 }
12407 if (chunk->freq_buf[0] != 0) { /* freq != 1 */
12408 return GRN_SUCCESS;
12409 }
12410 rid = chunk->rid_buf[0];
12411 if (chunk->sid_buf) {
12412 if (rid >= 0x100000) {
12413 return GRN_SUCCESS;
12414 }
12415 sid = chunk->sid_buf[0] + 1;
12416 if (sid >= 0x800) {
12417 return GRN_SUCCESS;
12418 }
12419 a = array_get(ctx, builder->ii, chunk->tid);
12420 if (!a) {
12421 DEFINE_NAME(builder->ii);
12422 MERR("[ii][builder][chunk][pack] failed to allocate an array: "
12423 "<%.*s>: "
12424 "<%u>:<%u>:<%u>",
12425 name_size, name,
12426 rid, sid, chunk->tid);
12427 return ctx->rc;
12428 }
12429 a[0] = ((rid << 12) + (sid << 1)) | 1;
12430 } else {
12431 a = array_get(ctx, builder->ii, chunk->tid);
12432 if (!a) {
12433 DEFINE_NAME(builder->ii);
12434 MERR("[ii][builder][chunk][pack] failed to allocate an array: "
12435 "<%.*s>: "
12436 "<%u>:<%u>",
12437 name_size, name,
12438 rid, chunk->tid);
12439 return ctx->rc;
12440 }
12441 a[0] = (rid << 1) | 1;
12442 }
12443 pos = 0;
12444 if (chunk->pos_buf) {
12445 pos = chunk->pos_buf[0];
12446 }
12447 a[1] = pos;
12448 array_unref(builder->ii, chunk->tid);
12449 *packed = GRN_TRUE;
12450
12451 grn_ii_builder_chunk_clear(ctx, chunk);
12452 return GRN_SUCCESS;
12453}
12454
12455/* grn_ii_builder_get_cinfo returns a new cinfo. */
12456static grn_rc
12457grn_ii_builder_get_cinfo(grn_ctx *ctx, grn_ii_builder *builder,
12458 chunk_info **cinfo)
12459{
12460 if (builder->n_cinfos == builder->cinfos_size) {
12461 uint32_t size = builder->cinfos_size ? (builder->cinfos_size * 2) : 1;
12462 size_t n_bytes = size * sizeof(chunk_info);
12463 chunk_info *cinfos = (chunk_info *)GRN_REALLOC(builder->cinfos, n_bytes);
12464 if (!cinfos) {
12465 ERR(GRN_NO_MEMORY_AVAILABLE,
12466 "failed to allocate memory for cinfos: n_bytes = %" GRN_FMT_SIZE,
12467 n_bytes);
12468 return ctx->rc;
12469 }
12470 builder->cinfos = cinfos;
12471 builder->cinfos_size = size;
12472 }
12473 *cinfo = &builder->cinfos[builder->n_cinfos++];
12474 return GRN_SUCCESS;
12475}
12476
12477/* grn_ii_builder_flush_chunk flushes a chunk. */
12478static grn_rc
12479grn_ii_builder_flush_chunk(grn_ctx *ctx, grn_ii_builder *builder)
12480{
12481 grn_rc rc;
12482 chunk_info *cinfo = NULL;
12483 grn_ii_builder_chunk *chunk = &builder->chunk;
12484 void *seg;
12485 uint8_t *in;
12486 uint32_t in_size, chunk_id, seg_id, seg_offset, seg_rest;
12487
12488 rc = grn_ii_builder_chunk_encode(ctx, chunk, NULL, 0);
12489 if (rc != GRN_SUCCESS) {
12490 return rc;
12491 }
12492 in = chunk->enc_buf;
12493 in_size = chunk->enc_offset;
12494
12495 rc = chunk_new(ctx, builder->ii, &chunk_id, chunk->enc_offset);
12496 if (rc != GRN_SUCCESS) {
12497 return rc;
12498 }
12499
12500 /* Copy to the first segment. */
12501 seg_id = chunk_id >> GRN_II_N_CHUNK_VARIATION;
12502 seg_offset = (chunk_id & ((1 << GRN_II_N_CHUNK_VARIATION) - 1)) <<
12503 GRN_II_W_LEAST_CHUNK;
12504 GRN_IO_SEG_REF(builder->ii->chunk, seg_id, seg);
12505 if (!seg) {
12506 if (ctx->rc == GRN_SUCCESS) {
12507 ERR(GRN_UNKNOWN_ERROR,
12508 "failed access chunk segment: chunk_id = %u, seg_id = %u",
12509 chunk_id, seg_id);
12510 }
12511 return ctx->rc;
12512 }
12513 seg_rest = S_CHUNK - seg_offset;
12514 if (in_size <= seg_rest) {
12515 grn_memcpy((uint8_t *)seg + seg_offset, in, in_size);
12516 in_size = 0;
12517 } else {
12518 grn_memcpy((uint8_t *)seg + seg_offset, in, seg_rest);
12519 in += seg_rest;
12520 in_size -= seg_rest;
12521 }
12522 GRN_IO_SEG_UNREF(builder->ii->chunk, seg_id);
12523
12524 /* Copy to the next segments. */
12525 while (in_size) {
12526 seg_id++;
12527 GRN_IO_SEG_REF(builder->ii->chunk, seg_id, seg);
12528 if (!seg) {
12529 if (ctx->rc == GRN_SUCCESS) {
12530 ERR(GRN_UNKNOWN_ERROR,
12531 "failed access chunk segment: chunk_id = %u, seg_id = %u",
12532 chunk_id, seg_id);
12533 }
12534 return ctx->rc;
12535 }
12536 if (in_size <= S_CHUNK) {
12537 grn_memcpy(seg, in, in_size);
12538 in_size = 0;
12539 } else {
12540 grn_memcpy(seg, in, S_CHUNK);
12541 in += S_CHUNK;
12542 in_size -= S_CHUNK;
12543 }
12544 GRN_IO_SEG_UNREF(builder->ii->chunk, seg_id);
12545 }
12546
12547 /* Append a cinfo. */
12548 rc = grn_ii_builder_get_cinfo(ctx, builder, &cinfo);
12549 if (rc != GRN_SUCCESS) {
12550 return rc;
12551 }
12552 cinfo->segno = chunk_id;
12553 cinfo->size = chunk->enc_offset;
12554 cinfo->dgap = chunk->rid_gap;
12555
12556 builder->buf.ii->header->total_chunk_size += chunk->enc_offset;
12557 grn_ii_builder_chunk_clear(ctx, chunk);
12558 return GRN_SUCCESS;
12559}
12560
12561/* grn_ii_builder_read_to_chunk read values from a block to a chunk. */
12562static grn_rc
12563grn_ii_builder_read_to_chunk(grn_ctx *ctx, grn_ii_builder *builder,
12564 uint32_t block_id)
12565{
12566 grn_rc rc;
12567 uint64_t value;
12568 uint32_t rid = GRN_ID_NIL, last_sid = 0;
12569 uint32_t ii_flags = builder->ii->header->flags;
12570 grn_ii_builder_chunk *chunk = &builder->chunk;
12571
12572 for (;;) {
12573 uint32_t gap, freq;
12574 uint64_t value;
12575 rc = grn_ii_builder_read_from_block(ctx, builder, block_id, &value);
12576 if (rc != GRN_SUCCESS) {
12577 return rc;
12578 }
12579 if (!value) {
12580 break;
12581 }
12582 if (builder->chunk.offset == builder->chunk.size) {
12583 rc = grn_ii_builder_chunk_extend_bufs(ctx, chunk, ii_flags);
12584 if (rc != GRN_SUCCESS) {
12585 return rc;
12586 }
12587 }
12588
12589 /* Read record ID. */
12590 gap = value >> builder->sid_bits; /* In-block gap */
12591 if (gap) {
12592 if (chunk->n >= builder->options.chunk_threshold) {
12593 rc = grn_ii_builder_flush_chunk(ctx, builder);
12594 if (rc != GRN_SUCCESS) {
12595 return rc;
12596 }
12597 }
12598 last_sid = 0;
12599 }
12600 rid += gap;
12601 gap = rid - chunk->rid; /* Global gap */
12602 chunk->rid_buf[chunk->offset] = chunk->offset ? gap : rid;
12603 chunk->n++;
12604 chunk->rid = rid;
12605 chunk->rid_gap += gap;
12606 builder->df++;
12607
12608 /* Read section ID. */
12609 if (ii_flags & GRN_OBJ_WITH_SECTION) {
12610 uint32_t sid = (value & builder->sid_mask) + 1;
12611 chunk->sid_buf[chunk->offset] = sid - last_sid - 1;
12612 chunk->n++;
12613 last_sid = sid;
12614 }
12615
12616 /* Read weight. */
12617 if (ii_flags & GRN_OBJ_WITH_WEIGHT) {
12618 uint32_t weight;
12619 rc = grn_ii_builder_read_from_block(ctx, builder, block_id, &value);
12620 if (rc != GRN_SUCCESS) {
12621 return rc;
12622 }
12623 weight = value;
12624 chunk->weight_buf[chunk->offset] = weight;
12625 chunk->n++;
12626 }
12627
12628 /* Read positions or a frequency. */
12629 if (ii_flags & GRN_OBJ_WITH_POSITION) {
12630 uint32_t pos = -1;
12631 freq = 0;
12632 for (;;) {
12633 rc = grn_ii_builder_read_from_block(ctx, builder, block_id, &value);
12634 if (rc != GRN_SUCCESS) {
12635 return rc;
12636 }
12637 if (!value) {
12638 break;
12639 }
12640 if (builder->chunk.pos_offset == builder->chunk.pos_size) {
12641 rc = grn_ii_builder_chunk_extend_pos_buf(ctx, chunk);
12642 if (rc != GRN_SUCCESS) {
12643 return rc;
12644 }
12645 }
12646 if (pos == -1) {
12647 chunk->pos_buf[chunk->pos_offset] = value - 1;
12648 chunk->pos_sum += value - 1;
12649 } else {
12650 chunk->pos_buf[chunk->pos_offset] = value;
12651 chunk->pos_sum += value;
12652 }
12653 chunk->n++;
12654 pos += value;
12655 chunk->pos_offset++;
12656 freq++;
12657 }
12658 } else {
12659 rc = grn_ii_builder_read_from_block(ctx, builder, block_id, &value);
12660 if (rc != GRN_SUCCESS) {
12661 return rc;
12662 }
12663 freq = value;
12664 }
12665 chunk->freq_buf[chunk->offset] = freq - 1;
12666 chunk->n++;
12667 chunk->offset++;
12668 }
12669 rc = grn_ii_builder_read_from_block(ctx, builder, block_id, &value);
12670 if (rc == GRN_SUCCESS) {
12671 builder->blocks[block_id].tid = value;
12672 } else if (rc == GRN_END_OF_DATA) {
12673 builder->blocks[block_id].tid = GRN_ID_NIL;
12674 } else {
12675 return rc;
12676 }
12677 return GRN_SUCCESS;
12678}
12679
12680/* grn_ii_builder_register_chunks registers chunks. */
12681static grn_rc
12682grn_ii_builder_register_chunks(grn_ctx *ctx, grn_ii_builder *builder)
12683{
12684 grn_rc rc;
12685 uint32_t buf_tid, *a;
12686 buffer_term *buf_term;
12687
12688 rc = grn_ii_builder_chunk_encode(ctx, &builder->chunk, builder->cinfos,
12689 builder->n_cinfos);
12690 if (rc != GRN_SUCCESS) {
12691 return rc;
12692 }
12693
12694 if (!grn_ii_builder_buffer_is_assigned(ctx, &builder->buf)) {
12695 rc = grn_ii_builder_buffer_assign(ctx, &builder->buf,
12696 builder->chunk.enc_offset);
12697 if (rc != GRN_SUCCESS) {
12698 return rc;
12699 }
12700 }
12701 buf_tid = builder->buf.buf->header.nterms;
12702 if (buf_tid >= builder->options.buffer_max_n_terms ||
12703 builder->buf.chunk_size - builder->buf.chunk_offset <
12704 builder->chunk.enc_offset) {
12705 rc = grn_ii_builder_buffer_flush(ctx, &builder->buf);
12706 if (rc != GRN_SUCCESS) {
12707 return rc;
12708 }
12709 rc = grn_ii_builder_buffer_assign(ctx, &builder->buf,
12710 builder->chunk.enc_offset);
12711 if (rc != GRN_SUCCESS) {
12712 return rc;
12713 }
12714 buf_tid = 0;
12715 }
12716 buf_term = &builder->buf.buf->terms[buf_tid];
12717 buf_term->tid = builder->chunk.tid;
12718 if (builder->n_cinfos) {
12719 buf_term->tid |= CHUNK_SPLIT;
12720 }
12721 buf_term->size_in_buffer = 0;
12722 buf_term->pos_in_buffer = 0;
12723 buf_term->size_in_chunk = builder->chunk.enc_offset;
12724 buf_term->pos_in_chunk = builder->buf.chunk_offset;
12725
12726 grn_memcpy(builder->buf.chunk + builder->buf.chunk_offset,
12727 builder->chunk.enc_buf, builder->chunk.enc_offset);
12728 builder->buf.chunk_offset += builder->chunk.enc_offset;
12729
12730 a = array_get(ctx, builder->ii, builder->chunk.tid);
12731 if (!a) {
12732 DEFINE_NAME(builder->ii);
12733 MERR("[ii][builder][chunk][register] "
12734 "failed to allocate an array in segment: "
12735 "<%.*s>: "
12736 "tid=<%u>: max_n_segments=<%u>",
12737 name_size, name,
12738 builder->chunk.tid,
12739 builder->ii->seg->header->max_segment);
12740 return ctx->rc;
12741 }
12742 a[0] = SEG2POS(builder->buf.buf_id,
12743 sizeof(buffer_header) + buf_tid * sizeof(buffer_term));
12744 a[1] = builder->df;
12745 array_unref(builder->ii, builder->chunk.tid);
12746
12747 builder->buf.buf->header.nterms++;
12748 builder->n_cinfos = 0;
12749 grn_ii_builder_chunk_clear(ctx, &builder->chunk);
12750 return GRN_SUCCESS;
12751}
12752
12753static grn_rc
12754grn_ii_builder_commit(grn_ctx *ctx, grn_ii_builder *builder)
12755{
12756 uint32_t i;
12757 grn_rc rc;
12758 grn_table_cursor *cursor;
12759
12760 for (i = 0; i < builder->n_blocks; i++) {
12761 uint64_t value;
12762 rc = grn_ii_builder_read_from_block(ctx, builder, i, &value);
12763 if (rc != GRN_SUCCESS) {
12764 return rc;
12765 }
12766 builder->blocks[i].tid = value;
12767 }
12768
12769 cursor = grn_table_cursor_open(ctx, builder->ii->lexicon,
12770 NULL, 0, NULL, 0, 0, -1, GRN_CURSOR_BY_KEY);
12771 for (;;) {
12772 grn_id tid = grn_table_cursor_next(ctx, cursor);
12773 if (tid == GRN_ID_NIL) {
12774 break;
12775 }
12776 builder->chunk.tid = tid;
12777 builder->chunk.rid = GRN_ID_NIL;
12778 builder->df = 0;
12779 for (i = 0; i < builder->n_blocks; i++) {
12780 if (tid == builder->blocks[i].tid) {
12781 rc = grn_ii_builder_read_to_chunk(ctx, builder, i);
12782 if (rc != GRN_SUCCESS) {
12783 return rc;
12784 }
12785 }
12786 }
12787 if (!builder->chunk.n) {
12788 /* This term does not appear. */
12789 continue;
12790 }
12791 if (!builder->n_cinfos) {
12792 grn_bool packed;
12793 rc = grn_ii_builder_pack_chunk(ctx, builder, &packed);
12794 if (rc != GRN_SUCCESS) {
12795 return rc;
12796 }
12797 if (packed) {
12798 continue;
12799 }
12800 }
12801 rc = grn_ii_builder_register_chunks(ctx, builder);
12802 if (rc != GRN_SUCCESS) {
12803 return rc;
12804 }
12805 }
12806 grn_table_cursor_close(ctx, cursor);
12807 if (grn_ii_builder_buffer_is_assigned(ctx, &builder->buf)) {
12808 rc = grn_ii_builder_buffer_flush(ctx, &builder->buf);
12809 if (rc != GRN_SUCCESS) {
12810 return rc;
12811 }
12812 }
12813 return GRN_SUCCESS;
12814}
12815
12816grn_rc
12817grn_ii_build2(grn_ctx *ctx, grn_ii *ii, const grn_ii_builder_options *options)
12818{
12819 grn_rc rc, rc_close;
12820 grn_ii_builder *builder;
12821 rc = grn_ii_builder_open(ctx, ii, options, &builder);
12822 if (rc == GRN_SUCCESS) {
12823 rc = grn_ii_builder_append_source(ctx, builder);
12824 if (rc == GRN_SUCCESS) {
12825 rc = grn_ii_builder_commit(ctx, builder);
12826 }
12827 rc_close = grn_ii_builder_close(ctx, builder);
12828 if (rc == GRN_SUCCESS) {
12829 rc = rc_close;
12830 }
12831 }
12832 return rc;
12833}
12834