1/*-
2 * Copyright (c) 1990, 1993, 1994
3 * The Regents of the University of California. All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Mike Olson.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgement:
18 * This product includes software developed by the University of
19 * California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 */
36
37#if defined(LIBC_SCCS) && !defined(lint)
38static char sccsid[] = "@(#)bt_overflow.c 8.5 (Berkeley) 7/16/94";
39#endif /* LIBC_SCCS and not lint */
40
41#include <sys/param.h>
42
43#include <stdio.h>
44#include <stdlib.h>
45#include <string.h>
46
47#include <db.h>
48#include "btree.h"
49
50/*
51 * Big key/data code.
52 *
53 * Big key and data entries are stored on linked lists of pages. The initial
54 * reference is byte string stored with the key or data and is the page number
55 * and size. The actual record is stored in a chain of pages linked by the
56 * nextpg field of the PAGE header.
57 *
58 * The first page of the chain has a special property. If the record is used
59 * by an internal page, it cannot be deleted and the P_PRESERVE bit will be set
60 * in the header.
61 *
62 * XXX
63 * A single DBT is written to each chain, so a lot of space on the last page
64 * is wasted. This is a fairly major bug for some data sets.
65 */
66
67/*
68 * __OVFL_GET -- Get an overflow key/data item.
69 *
70 * Parameters:
71 * t: tree
72 * p: pointer to { pgno_t, u_int32_t }
73 * buf: storage address
74 * bufsz: storage size
75 *
76 * Returns:
77 * RET_ERROR, RET_SUCCESS
78 */
79int
80__ovfl_get(t, p, ssz, buf, bufsz)
81 BTREE *t;
82 void *p;
83 size_t *ssz;
84 void **buf;
85 size_t *bufsz;
86{
87 PAGE *h;
88 pgno_t pg;
89 size_t nb, plen;
90 u_int32_t sz;
91
92 memcpy(&pg, p, sizeof(pgno_t));
93 memcpy(&sz, (char *)p + sizeof(pgno_t), sizeof(u_int32_t));
94 *ssz = sz;
95
96#ifdef DEBUG
97 if (pg == P_INVALID || sz == 0)
98 abort();
99#endif
100 /* Make the buffer bigger as necessary. */
101 if (*bufsz < sz) {
102 *buf = (char *)(*buf == NULL ? malloc(sz) : realloc(*buf, sz));
103 if (*buf == NULL)
104 return (RET_ERROR);
105 *bufsz = sz;
106 }
107
108 /*
109 * Step through the linked list of pages, copying the data on each one
110 * into the buffer. Never copy more than the data's length.
111 */
112 plen = t->bt_psize - BTDATAOFF;
113 for (p = *buf;; p = (char *)p + nb, pg = h->nextpg) {
114 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
115 return (RET_ERROR);
116
117 nb = MIN(sz, plen);
118 memmove(p, (char *)h + BTDATAOFF, nb);
119 mpool_put(t->bt_mp, h, 0);
120
121 if ((sz -= nb) == 0)
122 break;
123 }
124 return (RET_SUCCESS);
125}
126
127/*
128 * __OVFL_PUT -- Store an overflow key/data item.
129 *
130 * Parameters:
131 * t: tree
132 * data: DBT to store
133 * pgno: storage page number
134 *
135 * Returns:
136 * RET_ERROR, RET_SUCCESS
137 */
138int
139__ovfl_put(t, dbt, pg)
140 BTREE *t;
141 const DBT *dbt;
142 pgno_t *pg;
143{
144 PAGE *h, *last;
145 void *p;
146 pgno_t npg;
147 size_t nb, plen;
148 u_int32_t sz;
149
150 /*
151 * Allocate pages and copy the key/data record into them. Store the
152 * number of the first page in the chain.
153 */
154 plen = t->bt_psize - BTDATAOFF;
155 for (last = NULL, p = dbt->data, sz = dbt->size;;
156 p = (char *)p + plen, last = h) {
157 if ((h = __bt_new(t, &npg)) == NULL)
158 return (RET_ERROR);
159
160 h->pgno = npg;
161 h->nextpg = h->prevpg = P_INVALID;
162 h->flags = P_OVERFLOW;
163 h->lower = h->upper = 0;
164
165 nb = MIN(sz, plen);
166 memmove((char *)h + BTDATAOFF, p, nb);
167
168 if (last) {
169 last->nextpg = h->pgno;
170 mpool_put(t->bt_mp, last, MPOOL_DIRTY);
171 } else
172 *pg = h->pgno;
173
174 if ((sz -= nb) == 0) {
175 mpool_put(t->bt_mp, h, MPOOL_DIRTY);
176 break;
177 }
178 }
179 return (RET_SUCCESS);
180}
181
182/*
183 * __OVFL_DELETE -- Delete an overflow chain.
184 *
185 * Parameters:
186 * t: tree
187 * p: pointer to { pgno_t, u_int32_t }
188 *
189 * Returns:
190 * RET_ERROR, RET_SUCCESS
191 */
192int
193__ovfl_delete(t, p)
194 BTREE *t;
195 void *p;
196{
197 PAGE *h;
198 pgno_t pg;
199 size_t plen;
200 u_int32_t sz;
201
202 memmove(&pg, p, sizeof(pgno_t));
203 memmove(&sz, (char *)p + sizeof(pgno_t), sizeof(u_int32_t));
204
205#ifdef DEBUG
206 if (pg == P_INVALID || sz == 0)
207 abort();
208#endif
209 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
210 return (RET_ERROR);
211
212 /* Don't delete chains used by internal pages. */
213 if (h->flags & P_PRESERVE) {
214 mpool_put(t->bt_mp, h, 0);
215 return (RET_SUCCESS);
216 }
217
218 /* Step through the chain, calling the free routine for each page. */
219 for (plen = t->bt_psize - BTDATAOFF;; sz -= plen) {
220 pg = h->nextpg;
221 __bt_free(t, h);
222 if (sz <= plen)
223 break;
224 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
225 return (RET_ERROR);
226 }
227 return (RET_SUCCESS);
228}
229