1/*
2 * bt_iterator.c
3 *
4 * Copyright (C) 2013-2014 Aerospike, Inc.
5 *
6 * Portions may be licensed to Aerospike, Inc. under one or more contributor
7 * license agreements.
8 *
9 * This program is free software: you can redistribute it and/or modify it under
10 * the terms of the GNU Affero General Public License as published by the Free
11 * Software Foundation, either version 3 of the License, or (at your option) any
12 * later version.
13 *
14 * This program is distributed in the hope that it will be useful, but WITHOUT
15 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
16 * FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
17 * details.
18 *
19 * You should have received a copy of the GNU Affero General Public License
20 * along with this program. If not, see http://www.gnu.org/licenses/
21 */
22/*
23 * This file implements Aerospike Index B-tree iterators.
24 */
25
26#include <assert.h>
27#include <float.h>
28#include <stdio.h>
29#include <stdlib.h>
30#include <string.h>
31#include <strings.h>
32#include <sys/param.h> // For MAX() & MIN().
33
34#include "ai_obj.h"
35#include "bt_iterator.h"
36#include "stream.h"
37#include <citrusleaf/alloc.h>
38
39// HELPER_DEFINES HELPER_DEFINES HELPER_DEFINES HELPER_DEFINES HELPER_DEFINES
40#define GET_NEW_CHILD(iter) \
41 if (!iter->bln->child) { iter->bln->child = get_new_iter_child(iter); }
42
43#define SETITER8R(iter, btr, asc, l, lrev, n, nrev) \
44 btSIter *siter = setIterator(iter, btr, asc ? l : lrev, asc ? n : nrev);
45
46#define CR8ITER8R(btr, asc, l, lrev, n, nrev) \
47 btSIter *siter = createIterator(btr, asc ? l : lrev, asc ? n : nrev);
48
49bt_ll_n *get_new_iter_child(btIterator *iter) { //printf("get_newiterchild\n");
50 assert(iter->num_nodes < MAX_BTREE_DEPTH);
51 bt_ll_n *nn = &(iter->nodes[iter->num_nodes]);
52 bzero(nn, sizeof(bt_ll_n));
53 iter->num_nodes++;
54 return nn;
55}
56
57void to_child(btIterator *iter, bt_n* self) { //printf("to_child\n");
58 iter->depth++;
59 iter->bln->child->parent = iter->bln;
60 iter->bln->child->ik = 0;
61 iter->bln->child->in = 0;
62 iter->bln->child->self = self;
63 iter->bln = iter->bln->child;
64}
65static void toparentrecurse(btIterator *iter) { //printf("to_parent\n");
66 if (!iter->bln->parent) {
67 iter->finished = 1; /* finished */
68 return;
69 }
70 iter->depth--;
71 bt *btr = iter->btr;
72 void *child = KEYS(btr, iter->bln->self, iter->bln->ik);
73 iter->bln = iter->bln->parent; /* -> parent */
74 void *parent = KEYS(btr, iter->bln->self, iter->bln->ik);
75 int x = btr->cmp(child, parent);
76 if (x > 0) {
77 if ((iter->bln->ik + 1) < iter->bln->self->n) iter->bln->ik++;
78 if ((iter->bln->in + 1) < iter->bln->self->n) iter->bln->in++;
79 else toparentrecurse(iter);
80 }
81}
82static void iter_leaf(btIterator *iter) { //printf("iter_leaf\n");
83 if ((iter->bln->ik + 1) < iter->bln->self->n) iter->bln->ik++;
84 else toparentrecurse(iter);
85}
86static void tochildrecurse(btIterator *iter, bt_n* self) {
87 to_child(iter, self);
88 if (!iter->bln->self->leaf) { // depth-first
89 GET_NEW_CHILD(iter)
90 tochildrecurse(iter, NODES(iter->btr, iter->bln->self)[iter->bln->in]);
91 }
92}
93static void iter_node(btIterator *iter) {
94 if ((iter->bln->ik + 1) < iter->bln->self->n) iter->bln->ik++;
95 if ((iter->bln->in + 1) <= iter->bln->self->n) iter->bln->in++;
96 GET_NEW_CHILD(iter)
97 tochildrecurse(iter, NODES(iter->btr, iter->bln->self)[iter->bln->in]);
98}
99
100static void *btNext(btSIter *siter, bt_n **rx, int *ri, bool asc) {
101 btIterator *iter = &(siter->x);
102 if (iter->finished) {
103 if (siter->scan) siter->missed = siter->nim;
104 return NULL;
105 }
106 if (asc) siter->missed = siter->nim; //Curr MISSED = LastLoop's NextIsMissed
107 bt_n *x = iter->bln->self;
108 if (rx) *rx = x;
109 int i = iter->bln->ik;
110 if (ri) *ri = i;
111 void *curr = KEYS(iter->btr, x, i);
112 siter->nim = getDR(iter->btr, x, i) ? 1 : 0;
113 if (iter->bln->self->leaf) (*iter->iLeaf)(iter);
114 else (*iter->iNode)(iter);
115 return curr;
116}
117
118void to_child_rev(btIterator *iter, bt_n* self) {
119 iter->depth++;
120 iter->bln->child->parent = iter->bln;
121 iter->bln->child->ik = self->n - 1;
122 iter->bln->child->in = self->n;
123 iter->bln->child->self = self;
124 iter->bln = iter->bln->child;
125}
126static void tochildrecurserev(btIterator *iter, bt_n* self) {
127 to_child_rev(iter, self);
128 if (!iter->bln->self->leaf) { // depth-first
129 GET_NEW_CHILD(iter)
130 tochildrecurserev(iter,
131 NODES(iter->btr, iter->bln->self)[iter->bln->in]);
132 }
133}
134static void toparentrecurserev(btIterator *iter) {
135 if (!iter->bln->parent) {
136 iter->finished = 1; /* finished */
137 return;
138 }
139 iter->depth--;
140 bt *btr = iter->btr;
141 void *child = KEYS(btr, iter->bln->self, iter->bln->ik);
142 iter->bln = iter->bln->parent; /* -> parent */
143 void *parent = KEYS(btr, iter->bln->self, iter->bln->ik);
144 int x = btr->cmp(child, parent);
145 if (x < 0) {
146 if (iter->bln->ik) iter->bln->ik--;
147 if (iter->bln->in) iter->bln->in--;
148 else toparentrecurserev(iter);
149 }
150 if (iter->bln->in == iter->bln->self->n) iter->bln->in--;
151}
152static void iter_leaf_rev(btIterator *iter) { //printf("iter_leaf_rev\n");
153 if (iter->bln->ik) iter->bln->ik--;
154 else toparentrecurserev(iter);
155}
156static void iter_node_rev(btIterator *iter) {
157 GET_NEW_CHILD(iter)
158 tochildrecurserev(iter, NODES(iter->btr, iter->bln->self)[iter->bln->in]);
159}
160
161// INIT_ITERATOR INIT_ITERATOR INIT_ITERATOR INIT_ITERATOR INIT_ITERATOR
162static void *setIter(bt *btr, bt_data_t bkey, btSIter *siter, ai_obj *alow,
163 bt_n **rx, int *ri, bool asc) {
164 btIterator *iter = &(siter->x);
165 int ret = bt_init_iterator(btr, bkey, iter, alow);
166 //printf("setIter: ret: %d\n", ret);
167 if (ret == II_FAIL) return NULL;
168 siter->empty = 0;
169 if (ret == II_L_MISS) {
170 siter->nim = siter->missed = 1;
171 return NULL;
172 }
173 else if (ret == II_MISS) siter->nim = siter->missed = 1;
174 else if (ret != II_OK) { /* range queries, find nearest match */
175 int x = btr->cmp(bkey, KEYS(btr, iter->bln->self, iter->bln->ik));
176 if (x > 0) {
177 if (ret == II_ONLY_RIGHT) { // off end of B-tree
178 siter->empty = 1;
179 return NULL;
180 } else { // II_LEAF_EXIT
181 //printf("setIter: [II_LEAF_EXIT\n"); //TODO needed?
182 return btNext(siter, rx, ri, asc); // find next
183 }
184 }
185 }
186 if (rx) *rx = iter->bln->self;
187 if (ri) *ri = iter->bln->ik;
188 return KEYS(iter->btr, iter->bln->self, iter->bln->ik);
189}
190static void init_iter(btIterator *iter, bt *btr,
191 iter_single *itl, iter_single *itn) {
192 iter->btr = btr;
193 iter->high = LONG_MIN;
194 iter->iLeaf = itl;
195 iter->iNode = itn;
196 iter->finished = 0;
197 iter->num_nodes = 0;
198 iter->bln = &(iter->nodes[0]);
199 iter->bln->ik = iter->bln->in = 0;
200 iter->num_nodes++;
201 iter->bln->self = btr->root;
202 iter->bln->parent = iter->bln->child = NULL;
203 iter->depth = 0;
204}
205
206// AEROSPIKE MULTI_THREAD
207static btSIter *newIter() {
208 btSIter *siter = cf_malloc(sizeof(btSIter));
209 bzero(siter, sizeof(btSIter));
210 return siter;
211}
212
213static btSIter *getIterator() {
214 return newIter();
215}
216
217static void releaseIterator(btSIter *siter) {
218 if (siter) {
219 cf_free(siter);
220 }
221 return;
222}
223
224static btSIter *createIterator(bt *btr, iter_single *itl, iter_single *itn) {
225 btSIter *siter = getIterator();
226 siter->dofree = 1;
227 siter->missed = 0;
228 siter->nim = 0;
229 siter->empty = 1;
230 siter->scan = 0;
231 siter->ktype = btr->s.ktype;
232 init_ai_obj(&siter->key);
233 siter->be.key = &(siter->key);
234 siter->be.val = NULL;
235 init_iter(&siter->x, btr, itl, itn);
236 return siter;
237}
238//extra insertion
239
240static btSIter *setIterator(btSIter *iter, bt *btr, iter_single *itl, iter_single *itn) {
241 btSIter *siter = iter;
242 siter->dofree = 0;
243 siter->missed = 0;
244 siter->nim = 0;
245 siter->empty = 1;
246 siter->scan = 0;
247 siter->ktype = btr->s.ktype;
248 init_ai_obj(&siter->key);
249 siter->be.key = &(siter->key);
250 siter->be.val = NULL;
251 init_iter(&siter->x, btr, itl, itn);
252 return siter;
253}
254void btReleaseRangeIterator(btSIter *siter) {
255 if (!siter) return;
256 if (siter->dofree) {
257 releaseIterator(siter);
258 }
259}
260static void setHigh(btSIter *siter, ai_obj *high, col_type_t ktype) {
261 if (C_IS_L(ktype) || C_IS_G(ktype)) {
262 siter->x.high = high->l;
263 }
264 else if (C_IS_DG(ktype)) {
265 siter->x.highy = high->y;
266 }
267}
268
269static bool streamToBTEntry(uchar *stream, btSIter *siter, bt_n *x, int i) {
270 if (!stream) return 0;
271 if (i < 0) i = 0;
272 convertStream2Key(stream, siter->be.key, siter->x.btr);
273 siter->be.val = parseStream(stream, siter->x.btr);
274 bool gost = IS_GHOST(siter->x.btr, siter->be.val);
275 if (gost) {
276 siter->missed = 1; // GHOST key
277 siter->nim = 0;
278 }
279 siter->be.dr = x ? getDR(siter->x.btr, x, i) : 0;
280 siter->be.stream = stream;
281 siter->be.x = x;
282 siter->be.i = i; //NOTE: used by bt_validate_dirty
283 //DUMP_STREAM_TO_BT_ENTRY
284 return 1;
285}
286btSIter *btGetRangeIter(bt *btr, ai_obj *alow, ai_obj *ahigh, bool asc) {
287 if (!btr->root || !btr->numkeys) return NULL;
288 btk_t btk;
289 bool med;
290 uint32 ksize; //bt_dumptree(btr, btr->ktype);
291 CR8ITER8R(btr, asc, iter_leaf, iter_leaf_rev, iter_node, iter_node_rev);
292 setHigh(siter, asc ? ahigh : alow, btr->s.ktype);
293 char *bkey = createBTKey(asc ? alow : ahigh,
294 &med, &ksize, btr, &btk); //D032
295 if (!bkey) goto rangeiter_err;
296 bt_n *x = NULL;
297 int i = -1;
298 uchar *stream = setIter(btr, bkey, siter, asc ? alow : ahigh, &x, &i, asc);
299 destroyBTKey(bkey, med); /* DESTROYED 032 */
300 if (!streamToBTEntry(stream, siter, x, i)) goto rangeiter_err;
301 return siter;
302
303rangeiter_err:
304 btReleaseRangeIterator(siter);
305 return NULL;
306}
307
308
309btSIter *btSetRangeIter(btSIter * iter, bt *btr, ai_obj *alow, ai_obj *ahigh, bool asc) {
310 if (!btr->root || !btr->numkeys) return NULL;
311 btk_t btk;
312 bool med;
313 uint32 ksize; //bt_dumptree(btr, btr->ktype);
314 SETITER8R(iter, btr, asc, iter_leaf, iter_leaf_rev, iter_node, iter_node_rev);
315 setHigh(siter, asc ? ahigh : alow, btr->s.ktype);
316 char *bkey = createBTKey(asc ? alow : ahigh,
317 &med, &ksize, btr, &btk); //D032
318 if (!bkey) goto rangeiter_err;
319 bt_n *x = NULL;
320 int i = -1;
321 uchar *stream = setIter(btr, bkey, siter, asc ? alow : ahigh, &x, &i, asc);
322 destroyBTKey(bkey, med); /* DESTROYED 032 */
323 if (!streamToBTEntry(stream, siter, x, i)) goto rangeiter_err;
324 return siter;
325
326rangeiter_err:
327 btReleaseRangeIterator(siter);
328 return NULL;
329}
330btEntry *btRangeNext(btSIter *siter, bool asc) { //printf("btRangeNext\n");
331 //printf("btRangeNext: siter: %p\n", (void *)siter);
332 //if (siter) printf("btRangeNext: empty: %d\n", siter->empty);
333 if (!siter || siter->empty) return NULL;
334 bt_n *x = NULL;
335 int i = -1;
336 uchar *stream = btNext(siter, &x, &i, asc);
337 if (!streamToBTEntry(stream, siter, x, i)) return NULL;
338 if (C_IS_L(siter->ktype) || C_IS_G(siter->ktype)) {
339 long l = siter->key.l;
340 if (l == siter->x.high) siter->x.finished = 1; /* exact match */
341 if (!asc) {
342 //printf("btRangeNext: DESC: l: %lu dr: %u\n",
343 // l, getDR(siter->x.btr, x, i));
344 l += getDR(siter->x.btr, x, i);
345 }
346 bool over = asc ? (l > siter->x.high) : (l < siter->x.high);
347 if (over && siter->nim) {
348 siter->missed = 1;
349 }
350 //printf("btRangeNext: over: %d l: %lu high: %lu\n",
351 // over, l, siter->x.high);
352 return over ? NULL : &(siter->be);
353 } else if (C_IS_DG(siter->ktype)) {
354 uint160 yy = siter->key.y;
355 int ret = u160Cmp(&yy, &siter->x.highy);
356 if (!ret) siter->x.finished = 1; /* exact match */
357 if (!asc) { //TODO is ENDIANness of memcpy() correct
358 uint32 low;
359 char *spot = ((char *)&yy) + 12;
360 memcpy(&low, spot, 4);
361 low += getDR(siter->x.btr, x, i);
362 memcpy(spot, &low, 4);
363 }
364 bool over = asc ? (ret > 0) : (ret < 0);
365 return over ? NULL : &(siter->be);
366 } else {
367 return NULL;
368 }
369}
370
371// FULL_BTREE_ITERATOR FULL_BTREE_ITERATOR FULL_BTREE_ITERATOR
372bool assignMinKey(bt *btr, ai_obj *akey) { //TODO combine w/ setIter()
373 void *e = bt_min(btr);
374 if (!e) return 0; // iter can be initialised
375 convertStream2Key(e, akey, btr);
376 return 1; // w/ this lookup
377}
378bool assignMaxKey(bt *btr, ai_obj *akey) {
379 void *e = bt_max(btr);
380 if (!e) return 0;
381 convertStream2Key(e, akey, btr);
382 return 1;
383}
384btSIter *btGetFullRangeIter(bt *btr, bool asc, cswc_t *w) {
385 cswc_t W; // used in setHigh()
386 if (!btr->root || !btr->numkeys) return NULL;
387 if (!w) w = &W;
388 ai_obj *aL = &w->wf.alow, *aH = &w->wf.ahigh;
389 if (!assignMinKey(btr, aL) || !assignMaxKey(btr, aH)) return NULL;
390 btk_t btk;
391 bool med;
392 uint32 ksize;
393 CR8ITER8R(btr, asc, iter_leaf, iter_leaf_rev, iter_node, iter_node_rev);
394 siter->scan = 1;
395 setHigh(siter, asc ? aH : aL, btr->s.ktype);
396 char *bkey = createBTKey(asc ? aL : aH,
397 &med, &ksize, btr, &btk); //DEST 030
398 if (!bkey) goto frangeiter_err;
399 bt_n *x = NULL;
400 int i = -1;
401 uchar *stream = setIter(btr, bkey, siter, asc ? aL : aH, &x, &i, asc);
402 destroyBTKey(bkey, med); /* DESTROYED 030 */
403 if (!stream && siter->missed) return siter;//IILMISS
404 if (!streamToBTEntry(stream, siter, x, i)) goto frangeiter_err;
405 if (btr->dirty_left) siter->missed = 1; // FULL means 100% FULL
406 return siter;
407
408frangeiter_err:
409 btReleaseRangeIterator(siter);
410 return NULL;
411}
412
413btSIter *btSetFullRangeIter(btSIter *iter, bt *btr, bool asc, cswc_t *w) {
414 cswc_t W; // used in setHigh()
415 if (!btr->root || !btr->numkeys) return NULL;
416 if (!w) w = &W;
417 ai_obj *aL = &w->wf.alow, *aH = &w->wf.ahigh;
418 if (!assignMinKey(btr, aL) || !assignMaxKey(btr, aH)) return NULL;
419 btk_t btk;
420 bool med;
421 uint32 ksize;
422 SETITER8R(iter, btr, asc, iter_leaf, iter_leaf_rev, iter_node, iter_node_rev);
423 siter->scan = 1;
424 setHigh(siter, asc ? aH : aL, btr->s.ktype);
425 char *bkey = createBTKey(asc ? aL : aH,
426 &med, &ksize, btr, &btk); //DEST 030
427 if (!bkey) goto frangeiter_err;
428 bt_n *x = NULL;
429 int i = -1;
430 uchar *stream = setIter(btr, bkey, siter, asc ? aL : aH, &x, &i, asc);
431 destroyBTKey(bkey, med); /* DESTROYED 030 */
432 if (!stream && siter->missed) return siter;//IILMISS
433 if (!streamToBTEntry(stream, siter, x, i)) goto frangeiter_err;
434 if (btr->dirty_left) siter->missed = 1; // FULL means 100% FULL
435 return siter;
436
437frangeiter_err:
438 btReleaseRangeIterator(siter);
439 return NULL;
440}
441
442typedef struct four_longs {
443 long cnt;
444 long ofst;
445 long diff;
446 long over;
447} fol_t;
448
449#define INIT_ITER_BEENTRY(siter, btr, x, i) \
450 { uchar *iistream = KEYS(btr, x, i); streamToBTEntry(iistream, siter, x, i); }
451static bool btScionFind(btSIter *siter, bt_n *x, ulong ofst, bt *btr, bool asc,
452 cswc_t *w, long lim) {
453 int i = asc ? 0 : x->n;
454 int fin = asc ? x->n + 1 : -1;
455 while (i != fin) {
456 if (x->leaf) break;
457 uint32_t scion = NODES(btr, x)[i]->scion;
458 if (scion >= ofst) {
459 bool i_end_n = (i == siter->x.bln->self->n);
460 siter->x.bln->in = i;
461 siter->x.bln->ik = (i_end_n) ? i - 1 : i;
462 if (scion == ofst) {
463 if (!asc) {
464 siter->x.bln->in = siter->x.bln->ik = i - 1;
465 }
466 return 1;
467 }
468 siter->x.bln->child = get_new_iter_child(&siter->x);
469 to_child(&siter->x, NODES(btr, x)[i]);
470 bt_n *kid = NODES(btr, x)[i];
471 if (!kid->leaf) {
472 btScionFind(siter, kid, ofst, btr, asc, w, lim);
473 return 1;
474 } else x = kid;
475 break;
476 } else ofst -= (scion + 1); // +1 for NODE itself
477 i = asc ? i + 1 : i - 1; // loop increment
478 }
479 // Now Find the rest of the OFFSET (respecting DRs)
480 uint32 n = siter->x.bln->self->n;
481 i = asc ? 0 : n - 1;
482 fin = asc ? MIN(ofst, n) : MAX(-1, (n - ofst));
483 int last = asc ? n - 1 : 0;
484 ulong cnt = 0;
485 //TODO findminnode() is too inefficient -> needs to be a part of btr
486 bt_n *minx = findminnode(btr, btr->root);
487 int btdl = btr->dirty_left;
488 int dr = 0;
489 while (i != fin) {
490 dr = getDR(btr, x, i);
491 cnt += dr;
492 if (!i && x == minx) cnt += btdl;
493 if (cnt >= ofst) break;
494 cnt++;
495 i = asc ? i + 1 : i - 1; // loop increment
496 }
497 if (i == fin && i == last) {
498 if (cnt >= x->scion) return 0;
499 }
500 else if (cnt < ofst) return 0; //OFST 2big
501 siter->x.bln->ik = i;
502 INIT_ITER_BEENTRY(siter, btr, x, siter->x.bln->ik);
503 if (asc) {
504 if ((ofst + dr) != cnt) siter->missed = 1;
505 }
506 else {
507 if (!i && x == minx) {
508 if (ofst != (cnt - btdl)) siter->missed = 1;
509 }
510 else {
511 if (ofst != cnt) siter->missed = 1;
512 }
513 }
514 return 1;
515}
516btSIter *btGetFullXthIter(bt *btr, ulong oofst, bool asc, cswc_t *w, long lim) {
517 ulong ofst = oofst;
518 cswc_t W; // used in setHigh()
519 if (!btr->root || !btr->numkeys) return NULL;
520 if (!w) w = &W;
521 ai_obj *aL = &w->wf.alow, *aH = &w->wf.ahigh;
522 if (!assignMinKey(btr, aL) || !assignMaxKey(btr, aH)) return NULL;
523 CR8ITER8R(btr, asc, iter_leaf, iter_leaf_rev, iter_node, iter_node_rev);
524 setHigh(siter, asc ? aH : aL, btr->s.ktype);
525 if (btScionFind(siter, btr->root, ofst, btr, asc, w, lim)) siter->empty = 0;
526 return siter;
527}
528