| 1 | /*- |
| 2 | * Copyright (c) 1990, 1993, 1994 |
| 3 | * The Regents of the University of California. All rights reserved. |
| 4 | * |
| 5 | * This code is derived from software contributed to Berkeley by |
| 6 | * Mike Olson. |
| 7 | * |
| 8 | * Redistribution and use in source and binary forms, with or without |
| 9 | * modification, are permitted provided that the following conditions |
| 10 | * are met: |
| 11 | * 1. Redistributions of source code must retain the above copyright |
| 12 | * notice, this list of conditions and the following disclaimer. |
| 13 | * 2. Redistributions in binary form must reproduce the above copyright |
| 14 | * notice, this list of conditions and the following disclaimer in the |
| 15 | * documentation and/or other materials provided with the distribution. |
| 16 | * 3. All advertising materials mentioning features or use of this software |
| 17 | * must display the following acknowledgement: |
| 18 | * This product includes software developed by the University of |
| 19 | * California, Berkeley and its contributors. |
| 20 | * 4. Neither the name of the University nor the names of its contributors |
| 21 | * may be used to endorse or promote products derived from this software |
| 22 | * without specific prior written permission. |
| 23 | * |
| 24 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
| 25 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 26 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 27 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
| 28 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
| 29 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
| 30 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
| 31 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
| 32 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
| 33 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
| 34 | * SUCH DAMAGE. |
| 35 | */ |
| 36 | |
| 37 | #if defined(LIBC_SCCS) && !defined(lint) |
| 38 | static char sccsid[] = "@(#)bt_seq.c 8.7 (Berkeley) 7/20/94" ; |
| 39 | #endif /* LIBC_SCCS and not lint */ |
| 40 | |
| 41 | #include <sys/types.h> |
| 42 | |
| 43 | #include <errno.h> |
| 44 | #include <stddef.h> |
| 45 | #include <stdio.h> |
| 46 | #include <stdlib.h> |
| 47 | |
| 48 | #include <db.h> |
| 49 | #include "btree.h" |
| 50 | |
| 51 | static int __bt_first __P((BTREE *, const DBT *, EPG *, int *)); |
| 52 | static int __bt_seqadv __P((BTREE *, EPG *, int)); |
| 53 | static int __bt_seqset __P((BTREE *, EPG *, DBT *, int)); |
| 54 | |
| 55 | /* |
| 56 | * Sequential scan support. |
| 57 | * |
| 58 | * The tree can be scanned sequentially, starting from either end of the |
| 59 | * tree or from any specific key. A scan request before any scanning is |
| 60 | * done is initialized as starting from the least node. |
| 61 | */ |
| 62 | |
| 63 | /* |
| 64 | * __bt_seq -- |
| 65 | * Btree sequential scan interface. |
| 66 | * |
| 67 | * Parameters: |
| 68 | * dbp: pointer to access method |
| 69 | * key: key for positioning and return value |
| 70 | * data: data return value |
| 71 | * flags: R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV. |
| 72 | * |
| 73 | * Returns: |
| 74 | * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. |
| 75 | */ |
| 76 | int |
| 77 | __bt_seq(dbp, key, data, flags) |
| 78 | const DB *dbp; |
| 79 | DBT *key, *data; |
| 80 | u_int flags; |
| 81 | { |
| 82 | BTREE *t; |
| 83 | EPG e; |
| 84 | int status; |
| 85 | |
| 86 | t = dbp->internal; |
| 87 | |
| 88 | /* Toss any page pinned across calls. */ |
| 89 | if (t->bt_pinned != NULL) { |
| 90 | mpool_put(t->bt_mp, t->bt_pinned, 0); |
| 91 | t->bt_pinned = NULL; |
| 92 | } |
| 93 | |
| 94 | /* |
| 95 | * If scan unitialized as yet, or starting at a specific record, set |
| 96 | * the scan to a specific key. Both __bt_seqset and __bt_seqadv pin |
| 97 | * the page the cursor references if they're successful. |
| 98 | */ |
| 99 | switch (flags) { |
| 100 | case R_NEXT: |
| 101 | case R_PREV: |
| 102 | if (F_ISSET(&t->bt_cursor, CURS_INIT)) { |
| 103 | status = __bt_seqadv(t, &e, flags); |
| 104 | break; |
| 105 | } |
| 106 | /* FALLTHROUGH */ |
| 107 | case R_FIRST: |
| 108 | case R_LAST: |
| 109 | case R_CURSOR: |
| 110 | status = __bt_seqset(t, &e, key, flags); |
| 111 | break; |
| 112 | default: |
| 113 | errno = EINVAL; |
| 114 | return (RET_ERROR); |
| 115 | } |
| 116 | |
| 117 | if (status == RET_SUCCESS) { |
| 118 | __bt_setcur(t, e.page->pgno, e.index); |
| 119 | |
| 120 | status = |
| 121 | __bt_ret(t, &e, key, &t->bt_rkey, data, &t->bt_rdata, 0); |
| 122 | |
| 123 | /* |
| 124 | * If the user is doing concurrent access, we copied the |
| 125 | * key/data, toss the page. |
| 126 | */ |
| 127 | if (F_ISSET(t, B_DB_LOCK)) |
| 128 | mpool_put(t->bt_mp, e.page, 0); |
| 129 | else |
| 130 | t->bt_pinned = e.page; |
| 131 | } |
| 132 | return (status); |
| 133 | } |
| 134 | |
| 135 | /* |
| 136 | * __bt_seqset -- |
| 137 | * Set the sequential scan to a specific key. |
| 138 | * |
| 139 | * Parameters: |
| 140 | * t: tree |
| 141 | * ep: storage for returned key |
| 142 | * key: key for initial scan position |
| 143 | * flags: R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV |
| 144 | * |
| 145 | * Side effects: |
| 146 | * Pins the page the cursor references. |
| 147 | * |
| 148 | * Returns: |
| 149 | * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. |
| 150 | */ |
| 151 | static int |
| 152 | __bt_seqset(t, ep, key, flags) |
| 153 | BTREE *t; |
| 154 | EPG *ep; |
| 155 | DBT *key; |
| 156 | int flags; |
| 157 | { |
| 158 | PAGE *h; |
| 159 | pgno_t pg; |
| 160 | int exact; |
| 161 | |
| 162 | /* |
| 163 | * Find the first, last or specific key in the tree and point the |
| 164 | * cursor at it. The cursor may not be moved until a new key has |
| 165 | * been found. |
| 166 | */ |
| 167 | switch (flags) { |
| 168 | case R_CURSOR: /* Keyed scan. */ |
| 169 | /* |
| 170 | * Find the first instance of the key or the smallest key |
| 171 | * which is greater than or equal to the specified key. |
| 172 | */ |
| 173 | if (key->data == NULL || key->size == 0) { |
| 174 | errno = EINVAL; |
| 175 | return (RET_ERROR); |
| 176 | } |
| 177 | return (__bt_first(t, key, ep, &exact)); |
| 178 | case R_FIRST: /* First record. */ |
| 179 | case R_NEXT: |
| 180 | /* Walk down the left-hand side of the tree. */ |
| 181 | for (pg = P_ROOT;;) { |
| 182 | if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) |
| 183 | return (RET_ERROR); |
| 184 | |
| 185 | /* Check for an empty tree. */ |
| 186 | if (NEXTINDEX(h) == 0) { |
| 187 | mpool_put(t->bt_mp, h, 0); |
| 188 | return (RET_SPECIAL); |
| 189 | } |
| 190 | |
| 191 | if (h->flags & (P_BLEAF | P_RLEAF)) |
| 192 | break; |
| 193 | pg = GETBINTERNAL(h, 0)->pgno; |
| 194 | mpool_put(t->bt_mp, h, 0); |
| 195 | } |
| 196 | ep->page = h; |
| 197 | ep->index = 0; |
| 198 | break; |
| 199 | case R_LAST: /* Last record. */ |
| 200 | case R_PREV: |
| 201 | /* Walk down the right-hand side of the tree. */ |
| 202 | for (pg = P_ROOT;;) { |
| 203 | if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) |
| 204 | return (RET_ERROR); |
| 205 | |
| 206 | /* Check for an empty tree. */ |
| 207 | if (NEXTINDEX(h) == 0) { |
| 208 | mpool_put(t->bt_mp, h, 0); |
| 209 | return (RET_SPECIAL); |
| 210 | } |
| 211 | |
| 212 | if (h->flags & (P_BLEAF | P_RLEAF)) |
| 213 | break; |
| 214 | pg = GETBINTERNAL(h, NEXTINDEX(h) - 1)->pgno; |
| 215 | mpool_put(t->bt_mp, h, 0); |
| 216 | } |
| 217 | |
| 218 | ep->page = h; |
| 219 | ep->index = NEXTINDEX(h) - 1; |
| 220 | break; |
| 221 | } |
| 222 | return (RET_SUCCESS); |
| 223 | } |
| 224 | |
| 225 | /* |
| 226 | * __bt_seqadvance -- |
| 227 | * Advance the sequential scan. |
| 228 | * |
| 229 | * Parameters: |
| 230 | * t: tree |
| 231 | * flags: R_NEXT, R_PREV |
| 232 | * |
| 233 | * Side effects: |
| 234 | * Pins the page the new key/data record is on. |
| 235 | * |
| 236 | * Returns: |
| 237 | * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. |
| 238 | */ |
| 239 | static int |
| 240 | __bt_seqadv(t, ep, flags) |
| 241 | BTREE *t; |
| 242 | EPG *ep; |
| 243 | int flags; |
| 244 | { |
| 245 | CURSOR *c; |
| 246 | PAGE *h; |
| 247 | indx_t index = 0; |
| 248 | pgno_t pg; |
| 249 | int exact; |
| 250 | |
| 251 | /* |
| 252 | * There are a couple of states that we can be in. The cursor has |
| 253 | * been initialized by the time we get here, but that's all we know. |
| 254 | */ |
| 255 | c = &t->bt_cursor; |
| 256 | |
| 257 | /* |
| 258 | * The cursor was deleted where there weren't any duplicate records, |
| 259 | * so the key was saved. Find out where that key would go in the |
| 260 | * current tree. It doesn't matter if the returned key is an exact |
| 261 | * match or not -- if it's an exact match, the record was added after |
| 262 | * the delete so we can just return it. If not, as long as there's |
| 263 | * a record there, return it. |
| 264 | */ |
| 265 | if (F_ISSET(c, CURS_ACQUIRE)) |
| 266 | return (__bt_first(t, &c->key, ep, &exact)); |
| 267 | |
| 268 | /* Get the page referenced by the cursor. */ |
| 269 | if ((h = mpool_get(t->bt_mp, c->pg.pgno, 0)) == NULL) |
| 270 | return (RET_ERROR); |
| 271 | |
| 272 | /* |
| 273 | * Find the next/previous record in the tree and point the cursor at |
| 274 | * it. The cursor may not be moved until a new key has been found. |
| 275 | */ |
| 276 | switch (flags) { |
| 277 | case R_NEXT: /* Next record. */ |
| 278 | /* |
| 279 | * The cursor was deleted in duplicate records, and moved |
| 280 | * forward to a record that has yet to be returned. Clear |
| 281 | * that flag, and return the record. |
| 282 | */ |
| 283 | if (F_ISSET(c, CURS_AFTER)) |
| 284 | goto usecurrent; |
| 285 | index = c->pg.index; |
| 286 | if (++index == NEXTINDEX(h)) { |
| 287 | pg = h->nextpg; |
| 288 | mpool_put(t->bt_mp, h, 0); |
| 289 | if (pg == P_INVALID) |
| 290 | return (RET_SPECIAL); |
| 291 | if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) |
| 292 | return (RET_ERROR); |
| 293 | index = 0; |
| 294 | } |
| 295 | break; |
| 296 | case R_PREV: /* Previous record. */ |
| 297 | /* |
| 298 | * The cursor was deleted in duplicate records, and moved |
| 299 | * backward to a record that has yet to be returned. Clear |
| 300 | * that flag, and return the record. |
| 301 | */ |
| 302 | if (F_ISSET(c, CURS_BEFORE)) { |
| 303 | usecurrent: F_CLR(c, CURS_AFTER | CURS_BEFORE); |
| 304 | ep->page = h; |
| 305 | ep->index = c->pg.index; |
| 306 | return (RET_SUCCESS); |
| 307 | } |
| 308 | index = c->pg.index; |
| 309 | if (index == 0) { |
| 310 | pg = h->prevpg; |
| 311 | mpool_put(t->bt_mp, h, 0); |
| 312 | if (pg == P_INVALID) |
| 313 | return (RET_SPECIAL); |
| 314 | if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) |
| 315 | return (RET_ERROR); |
| 316 | index = NEXTINDEX(h) - 1; |
| 317 | } else |
| 318 | --index; |
| 319 | break; |
| 320 | } |
| 321 | |
| 322 | ep->page = h; |
| 323 | ep->index = index; |
| 324 | return (RET_SUCCESS); |
| 325 | } |
| 326 | |
| 327 | /* |
| 328 | * __bt_first -- |
| 329 | * Find the first entry. |
| 330 | * |
| 331 | * Parameters: |
| 332 | * t: the tree |
| 333 | * key: the key |
| 334 | * erval: return EPG |
| 335 | * exactp: pointer to exact match flag |
| 336 | * |
| 337 | * Returns: |
| 338 | * The first entry in the tree greater than or equal to key, |
| 339 | * or RET_SPECIAL if no such key exists. |
| 340 | */ |
| 341 | static int |
| 342 | __bt_first(t, key, erval, exactp) |
| 343 | BTREE *t; |
| 344 | const DBT *key; |
| 345 | EPG *erval; |
| 346 | int *exactp; |
| 347 | { |
| 348 | PAGE *h; |
| 349 | EPG *ep, save; |
| 350 | pgno_t pg; |
| 351 | |
| 352 | /* |
| 353 | * Find any matching record; __bt_search pins the page. |
| 354 | * |
| 355 | * If it's an exact match and duplicates are possible, walk backwards |
| 356 | * in the tree until we find the first one. Otherwise, make sure it's |
| 357 | * a valid key (__bt_search may return an index just past the end of a |
| 358 | * page) and return it. |
| 359 | */ |
| 360 | if ((ep = __bt_search(t, key, exactp)) == NULL) |
| 361 | return (RET_SPECIAL); |
| 362 | if (*exactp) { |
| 363 | if (F_ISSET(t, B_NODUPS)) { |
| 364 | *erval = *ep; |
| 365 | return (RET_SUCCESS); |
| 366 | } |
| 367 | |
| 368 | /* |
| 369 | * Walk backwards, as long as the entry matches and there are |
| 370 | * keys left in the tree. Save a copy of each match in case |
| 371 | * we go too far. |
| 372 | */ |
| 373 | save = *ep; |
| 374 | h = ep->page; |
| 375 | do { |
| 376 | if (save.page->pgno != ep->page->pgno) { |
| 377 | mpool_put(t->bt_mp, save.page, 0); |
| 378 | save = *ep; |
| 379 | } else |
| 380 | save.index = ep->index; |
| 381 | |
| 382 | /* |
| 383 | * Don't unpin the page the last (or original) match |
| 384 | * was on, but make sure it's unpinned if an error |
| 385 | * occurs. |
| 386 | */ |
| 387 | if (ep->index == 0) { |
| 388 | if (h->prevpg == P_INVALID) |
| 389 | break; |
| 390 | if (h->pgno != save.page->pgno) |
| 391 | mpool_put(t->bt_mp, h, 0); |
| 392 | if ((h = mpool_get(t->bt_mp, |
| 393 | h->prevpg, 0)) == NULL) { |
| 394 | if (h->pgno == save.page->pgno) |
| 395 | mpool_put(t->bt_mp, |
| 396 | save.page, 0); |
| 397 | return (RET_ERROR); |
| 398 | } |
| 399 | ep->page = h; |
| 400 | ep->index = NEXTINDEX(h); |
| 401 | } |
| 402 | --ep->index; |
| 403 | } while (__bt_cmp(t, key, ep) == 0); |
| 404 | |
| 405 | /* |
| 406 | * Reach here with the last page that was looked at pinned, |
| 407 | * which may or may not be the same as the last (or original) |
| 408 | * match page. If it's not useful, release it. |
| 409 | */ |
| 410 | if (h->pgno != save.page->pgno) |
| 411 | mpool_put(t->bt_mp, h, 0); |
| 412 | |
| 413 | *erval = save; |
| 414 | return (RET_SUCCESS); |
| 415 | } |
| 416 | |
| 417 | /* If at the end of a page, find the next entry. */ |
| 418 | if (ep->index == NEXTINDEX(ep->page)) { |
| 419 | h = ep->page; |
| 420 | pg = h->nextpg; |
| 421 | mpool_put(t->bt_mp, h, 0); |
| 422 | if (pg == P_INVALID) |
| 423 | return (RET_SPECIAL); |
| 424 | if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) |
| 425 | return (RET_ERROR); |
| 426 | ep->index = 0; |
| 427 | ep->page = h; |
| 428 | } |
| 429 | *erval = *ep; |
| 430 | return (RET_SUCCESS); |
| 431 | } |
| 432 | |
| 433 | /* |
| 434 | * __bt_setcur -- |
| 435 | * Set the cursor to an entry in the tree. |
| 436 | * |
| 437 | * Parameters: |
| 438 | * t: the tree |
| 439 | * pgno: page number |
| 440 | * index: page index |
| 441 | */ |
| 442 | void |
| 443 | __bt_setcur(t, pgno, index) |
| 444 | BTREE *t; |
| 445 | pgno_t pgno; |
| 446 | u_int index; |
| 447 | { |
| 448 | /* Lose any already deleted key. */ |
| 449 | if (t->bt_cursor.key.data != NULL) { |
| 450 | free(t->bt_cursor.key.data); |
| 451 | t->bt_cursor.key.size = 0; |
| 452 | t->bt_cursor.key.data = NULL; |
| 453 | } |
| 454 | F_CLR(&t->bt_cursor, CURS_ACQUIRE | CURS_AFTER | CURS_BEFORE); |
| 455 | |
| 456 | /* Update the cursor. */ |
| 457 | t->bt_cursor.pg.pgno = pgno; |
| 458 | t->bt_cursor.pg.index = index; |
| 459 | F_SET(&t->bt_cursor, CURS_INIT); |
| 460 | } |
| 461 | |