1 | /*- |
2 | * Copyright (c) 1990, 1993, 1994 |
3 | * The Regents of the University of California. All rights reserved. |
4 | * |
5 | * This code is derived from software contributed to Berkeley by |
6 | * Mike Olson. |
7 | * |
8 | * Redistribution and use in source and binary forms, with or without |
9 | * modification, are permitted provided that the following conditions |
10 | * are met: |
11 | * 1. Redistributions of source code must retain the above copyright |
12 | * notice, this list of conditions and the following disclaimer. |
13 | * 2. Redistributions in binary form must reproduce the above copyright |
14 | * notice, this list of conditions and the following disclaimer in the |
15 | * documentation and/or other materials provided with the distribution. |
16 | * 3. All advertising materials mentioning features or use of this software |
17 | * must display the following acknowledgement: |
18 | * This product includes software developed by the University of |
19 | * California, Berkeley and its contributors. |
20 | * 4. Neither the name of the University nor the names of its contributors |
21 | * may be used to endorse or promote products derived from this software |
22 | * without specific prior written permission. |
23 | * |
24 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
25 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
26 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
27 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
28 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
29 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
30 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
31 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
32 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
33 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
34 | * SUCH DAMAGE. |
35 | */ |
36 | |
37 | #if defined(LIBC_SCCS) && !defined(lint) |
38 | static char sccsid[] = "@(#)bt_seq.c 8.7 (Berkeley) 7/20/94" ; |
39 | #endif /* LIBC_SCCS and not lint */ |
40 | |
41 | #include <sys/types.h> |
42 | |
43 | #include <errno.h> |
44 | #include <stddef.h> |
45 | #include <stdio.h> |
46 | #include <stdlib.h> |
47 | |
48 | #include <db.h> |
49 | #include "btree.h" |
50 | |
51 | static int __bt_first __P((BTREE *, const DBT *, EPG *, int *)); |
52 | static int __bt_seqadv __P((BTREE *, EPG *, int)); |
53 | static int __bt_seqset __P((BTREE *, EPG *, DBT *, int)); |
54 | |
55 | /* |
56 | * Sequential scan support. |
57 | * |
58 | * The tree can be scanned sequentially, starting from either end of the |
59 | * tree or from any specific key. A scan request before any scanning is |
60 | * done is initialized as starting from the least node. |
61 | */ |
62 | |
63 | /* |
64 | * __bt_seq -- |
65 | * Btree sequential scan interface. |
66 | * |
67 | * Parameters: |
68 | * dbp: pointer to access method |
69 | * key: key for positioning and return value |
70 | * data: data return value |
71 | * flags: R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV. |
72 | * |
73 | * Returns: |
74 | * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. |
75 | */ |
76 | int |
77 | __bt_seq(dbp, key, data, flags) |
78 | const DB *dbp; |
79 | DBT *key, *data; |
80 | u_int flags; |
81 | { |
82 | BTREE *t; |
83 | EPG e; |
84 | int status; |
85 | |
86 | t = dbp->internal; |
87 | |
88 | /* Toss any page pinned across calls. */ |
89 | if (t->bt_pinned != NULL) { |
90 | mpool_put(t->bt_mp, t->bt_pinned, 0); |
91 | t->bt_pinned = NULL; |
92 | } |
93 | |
94 | /* |
95 | * If scan unitialized as yet, or starting at a specific record, set |
96 | * the scan to a specific key. Both __bt_seqset and __bt_seqadv pin |
97 | * the page the cursor references if they're successful. |
98 | */ |
99 | switch (flags) { |
100 | case R_NEXT: |
101 | case R_PREV: |
102 | if (F_ISSET(&t->bt_cursor, CURS_INIT)) { |
103 | status = __bt_seqadv(t, &e, flags); |
104 | break; |
105 | } |
106 | /* FALLTHROUGH */ |
107 | case R_FIRST: |
108 | case R_LAST: |
109 | case R_CURSOR: |
110 | status = __bt_seqset(t, &e, key, flags); |
111 | break; |
112 | default: |
113 | errno = EINVAL; |
114 | return (RET_ERROR); |
115 | } |
116 | |
117 | if (status == RET_SUCCESS) { |
118 | __bt_setcur(t, e.page->pgno, e.index); |
119 | |
120 | status = |
121 | __bt_ret(t, &e, key, &t->bt_rkey, data, &t->bt_rdata, 0); |
122 | |
123 | /* |
124 | * If the user is doing concurrent access, we copied the |
125 | * key/data, toss the page. |
126 | */ |
127 | if (F_ISSET(t, B_DB_LOCK)) |
128 | mpool_put(t->bt_mp, e.page, 0); |
129 | else |
130 | t->bt_pinned = e.page; |
131 | } |
132 | return (status); |
133 | } |
134 | |
135 | /* |
136 | * __bt_seqset -- |
137 | * Set the sequential scan to a specific key. |
138 | * |
139 | * Parameters: |
140 | * t: tree |
141 | * ep: storage for returned key |
142 | * key: key for initial scan position |
143 | * flags: R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV |
144 | * |
145 | * Side effects: |
146 | * Pins the page the cursor references. |
147 | * |
148 | * Returns: |
149 | * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. |
150 | */ |
151 | static int |
152 | __bt_seqset(t, ep, key, flags) |
153 | BTREE *t; |
154 | EPG *ep; |
155 | DBT *key; |
156 | int flags; |
157 | { |
158 | PAGE *h; |
159 | pgno_t pg; |
160 | int exact; |
161 | |
162 | /* |
163 | * Find the first, last or specific key in the tree and point the |
164 | * cursor at it. The cursor may not be moved until a new key has |
165 | * been found. |
166 | */ |
167 | switch (flags) { |
168 | case R_CURSOR: /* Keyed scan. */ |
169 | /* |
170 | * Find the first instance of the key or the smallest key |
171 | * which is greater than or equal to the specified key. |
172 | */ |
173 | if (key->data == NULL || key->size == 0) { |
174 | errno = EINVAL; |
175 | return (RET_ERROR); |
176 | } |
177 | return (__bt_first(t, key, ep, &exact)); |
178 | case R_FIRST: /* First record. */ |
179 | case R_NEXT: |
180 | /* Walk down the left-hand side of the tree. */ |
181 | for (pg = P_ROOT;;) { |
182 | if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) |
183 | return (RET_ERROR); |
184 | |
185 | /* Check for an empty tree. */ |
186 | if (NEXTINDEX(h) == 0) { |
187 | mpool_put(t->bt_mp, h, 0); |
188 | return (RET_SPECIAL); |
189 | } |
190 | |
191 | if (h->flags & (P_BLEAF | P_RLEAF)) |
192 | break; |
193 | pg = GETBINTERNAL(h, 0)->pgno; |
194 | mpool_put(t->bt_mp, h, 0); |
195 | } |
196 | ep->page = h; |
197 | ep->index = 0; |
198 | break; |
199 | case R_LAST: /* Last record. */ |
200 | case R_PREV: |
201 | /* Walk down the right-hand side of the tree. */ |
202 | for (pg = P_ROOT;;) { |
203 | if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) |
204 | return (RET_ERROR); |
205 | |
206 | /* Check for an empty tree. */ |
207 | if (NEXTINDEX(h) == 0) { |
208 | mpool_put(t->bt_mp, h, 0); |
209 | return (RET_SPECIAL); |
210 | } |
211 | |
212 | if (h->flags & (P_BLEAF | P_RLEAF)) |
213 | break; |
214 | pg = GETBINTERNAL(h, NEXTINDEX(h) - 1)->pgno; |
215 | mpool_put(t->bt_mp, h, 0); |
216 | } |
217 | |
218 | ep->page = h; |
219 | ep->index = NEXTINDEX(h) - 1; |
220 | break; |
221 | } |
222 | return (RET_SUCCESS); |
223 | } |
224 | |
225 | /* |
226 | * __bt_seqadvance -- |
227 | * Advance the sequential scan. |
228 | * |
229 | * Parameters: |
230 | * t: tree |
231 | * flags: R_NEXT, R_PREV |
232 | * |
233 | * Side effects: |
234 | * Pins the page the new key/data record is on. |
235 | * |
236 | * Returns: |
237 | * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. |
238 | */ |
239 | static int |
240 | __bt_seqadv(t, ep, flags) |
241 | BTREE *t; |
242 | EPG *ep; |
243 | int flags; |
244 | { |
245 | CURSOR *c; |
246 | PAGE *h; |
247 | indx_t index = 0; |
248 | pgno_t pg; |
249 | int exact; |
250 | |
251 | /* |
252 | * There are a couple of states that we can be in. The cursor has |
253 | * been initialized by the time we get here, but that's all we know. |
254 | */ |
255 | c = &t->bt_cursor; |
256 | |
257 | /* |
258 | * The cursor was deleted where there weren't any duplicate records, |
259 | * so the key was saved. Find out where that key would go in the |
260 | * current tree. It doesn't matter if the returned key is an exact |
261 | * match or not -- if it's an exact match, the record was added after |
262 | * the delete so we can just return it. If not, as long as there's |
263 | * a record there, return it. |
264 | */ |
265 | if (F_ISSET(c, CURS_ACQUIRE)) |
266 | return (__bt_first(t, &c->key, ep, &exact)); |
267 | |
268 | /* Get the page referenced by the cursor. */ |
269 | if ((h = mpool_get(t->bt_mp, c->pg.pgno, 0)) == NULL) |
270 | return (RET_ERROR); |
271 | |
272 | /* |
273 | * Find the next/previous record in the tree and point the cursor at |
274 | * it. The cursor may not be moved until a new key has been found. |
275 | */ |
276 | switch (flags) { |
277 | case R_NEXT: /* Next record. */ |
278 | /* |
279 | * The cursor was deleted in duplicate records, and moved |
280 | * forward to a record that has yet to be returned. Clear |
281 | * that flag, and return the record. |
282 | */ |
283 | if (F_ISSET(c, CURS_AFTER)) |
284 | goto usecurrent; |
285 | index = c->pg.index; |
286 | if (++index == NEXTINDEX(h)) { |
287 | pg = h->nextpg; |
288 | mpool_put(t->bt_mp, h, 0); |
289 | if (pg == P_INVALID) |
290 | return (RET_SPECIAL); |
291 | if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) |
292 | return (RET_ERROR); |
293 | index = 0; |
294 | } |
295 | break; |
296 | case R_PREV: /* Previous record. */ |
297 | /* |
298 | * The cursor was deleted in duplicate records, and moved |
299 | * backward to a record that has yet to be returned. Clear |
300 | * that flag, and return the record. |
301 | */ |
302 | if (F_ISSET(c, CURS_BEFORE)) { |
303 | usecurrent: F_CLR(c, CURS_AFTER | CURS_BEFORE); |
304 | ep->page = h; |
305 | ep->index = c->pg.index; |
306 | return (RET_SUCCESS); |
307 | } |
308 | index = c->pg.index; |
309 | if (index == 0) { |
310 | pg = h->prevpg; |
311 | mpool_put(t->bt_mp, h, 0); |
312 | if (pg == P_INVALID) |
313 | return (RET_SPECIAL); |
314 | if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) |
315 | return (RET_ERROR); |
316 | index = NEXTINDEX(h) - 1; |
317 | } else |
318 | --index; |
319 | break; |
320 | } |
321 | |
322 | ep->page = h; |
323 | ep->index = index; |
324 | return (RET_SUCCESS); |
325 | } |
326 | |
327 | /* |
328 | * __bt_first -- |
329 | * Find the first entry. |
330 | * |
331 | * Parameters: |
332 | * t: the tree |
333 | * key: the key |
334 | * erval: return EPG |
335 | * exactp: pointer to exact match flag |
336 | * |
337 | * Returns: |
338 | * The first entry in the tree greater than or equal to key, |
339 | * or RET_SPECIAL if no such key exists. |
340 | */ |
341 | static int |
342 | __bt_first(t, key, erval, exactp) |
343 | BTREE *t; |
344 | const DBT *key; |
345 | EPG *erval; |
346 | int *exactp; |
347 | { |
348 | PAGE *h; |
349 | EPG *ep, save; |
350 | pgno_t pg; |
351 | |
352 | /* |
353 | * Find any matching record; __bt_search pins the page. |
354 | * |
355 | * If it's an exact match and duplicates are possible, walk backwards |
356 | * in the tree until we find the first one. Otherwise, make sure it's |
357 | * a valid key (__bt_search may return an index just past the end of a |
358 | * page) and return it. |
359 | */ |
360 | if ((ep = __bt_search(t, key, exactp)) == NULL) |
361 | return (RET_SPECIAL); |
362 | if (*exactp) { |
363 | if (F_ISSET(t, B_NODUPS)) { |
364 | *erval = *ep; |
365 | return (RET_SUCCESS); |
366 | } |
367 | |
368 | /* |
369 | * Walk backwards, as long as the entry matches and there are |
370 | * keys left in the tree. Save a copy of each match in case |
371 | * we go too far. |
372 | */ |
373 | save = *ep; |
374 | h = ep->page; |
375 | do { |
376 | if (save.page->pgno != ep->page->pgno) { |
377 | mpool_put(t->bt_mp, save.page, 0); |
378 | save = *ep; |
379 | } else |
380 | save.index = ep->index; |
381 | |
382 | /* |
383 | * Don't unpin the page the last (or original) match |
384 | * was on, but make sure it's unpinned if an error |
385 | * occurs. |
386 | */ |
387 | if (ep->index == 0) { |
388 | if (h->prevpg == P_INVALID) |
389 | break; |
390 | if (h->pgno != save.page->pgno) |
391 | mpool_put(t->bt_mp, h, 0); |
392 | if ((h = mpool_get(t->bt_mp, |
393 | h->prevpg, 0)) == NULL) { |
394 | if (h->pgno == save.page->pgno) |
395 | mpool_put(t->bt_mp, |
396 | save.page, 0); |
397 | return (RET_ERROR); |
398 | } |
399 | ep->page = h; |
400 | ep->index = NEXTINDEX(h); |
401 | } |
402 | --ep->index; |
403 | } while (__bt_cmp(t, key, ep) == 0); |
404 | |
405 | /* |
406 | * Reach here with the last page that was looked at pinned, |
407 | * which may or may not be the same as the last (or original) |
408 | * match page. If it's not useful, release it. |
409 | */ |
410 | if (h->pgno != save.page->pgno) |
411 | mpool_put(t->bt_mp, h, 0); |
412 | |
413 | *erval = save; |
414 | return (RET_SUCCESS); |
415 | } |
416 | |
417 | /* If at the end of a page, find the next entry. */ |
418 | if (ep->index == NEXTINDEX(ep->page)) { |
419 | h = ep->page; |
420 | pg = h->nextpg; |
421 | mpool_put(t->bt_mp, h, 0); |
422 | if (pg == P_INVALID) |
423 | return (RET_SPECIAL); |
424 | if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) |
425 | return (RET_ERROR); |
426 | ep->index = 0; |
427 | ep->page = h; |
428 | } |
429 | *erval = *ep; |
430 | return (RET_SUCCESS); |
431 | } |
432 | |
433 | /* |
434 | * __bt_setcur -- |
435 | * Set the cursor to an entry in the tree. |
436 | * |
437 | * Parameters: |
438 | * t: the tree |
439 | * pgno: page number |
440 | * index: page index |
441 | */ |
442 | void |
443 | __bt_setcur(t, pgno, index) |
444 | BTREE *t; |
445 | pgno_t pgno; |
446 | u_int index; |
447 | { |
448 | /* Lose any already deleted key. */ |
449 | if (t->bt_cursor.key.data != NULL) { |
450 | free(t->bt_cursor.key.data); |
451 | t->bt_cursor.key.size = 0; |
452 | t->bt_cursor.key.data = NULL; |
453 | } |
454 | F_CLR(&t->bt_cursor, CURS_ACQUIRE | CURS_AFTER | CURS_BEFORE); |
455 | |
456 | /* Update the cursor. */ |
457 | t->bt_cursor.pg.pgno = pgno; |
458 | t->bt_cursor.pg.index = index; |
459 | F_SET(&t->bt_cursor, CURS_INIT); |
460 | } |
461 | |