1 | /* |
2 | ** 2008 February 16 |
3 | ** |
4 | ** The author disclaims copyright to this source code. In place of |
5 | ** a legal notice, here is a blessing: |
6 | ** |
7 | ** May you do good and not evil. |
8 | ** May you find forgiveness for yourself and forgive others. |
9 | ** May you share freely, never taking more than you give. |
10 | ** |
11 | ************************************************************************* |
12 | ** This file implements an object that represents a fixed-length |
13 | ** bitmap. Bits are numbered starting with 1. |
14 | ** |
15 | ** A bitmap is used to record which pages of a database file have been |
16 | ** journalled during a transaction, or which pages have the "dont-write" |
17 | ** property. Usually only a few pages are meet either condition. |
18 | ** So the bitmap is usually sparse and has low cardinality. |
19 | ** But sometimes (for example when during a DROP of a large table) most |
20 | ** or all of the pages in a database can get journalled. In those cases, |
21 | ** the bitmap becomes dense with high cardinality. The algorithm needs |
22 | ** to handle both cases well. |
23 | ** |
24 | ** The size of the bitmap is fixed when the object is created. |
25 | ** |
26 | ** All bits are clear when the bitmap is created. Individual bits |
27 | ** may be set or cleared one at a time. |
28 | ** |
29 | ** Test operations are about 100 times more common that set operations. |
30 | ** Clear operations are exceedingly rare. There are usually between |
31 | ** 5 and 500 set operations per Bitvec object, though the number of sets can |
32 | ** sometimes grow into tens of thousands or larger. The size of the |
33 | ** Bitvec object is the number of pages in the database file at the |
34 | ** start of a transaction, and is thus usually less than a few thousand, |
35 | ** but can be as large as 2 billion for a really big database. |
36 | */ |
37 | #include "sqliteInt.h" |
38 | |
39 | /* Size of the Bitvec structure in bytes. */ |
40 | #define BITVEC_SZ 512 |
41 | |
42 | /* Round the union size down to the nearest pointer boundary, since that's how |
43 | ** it will be aligned within the Bitvec struct. */ |
44 | #define BITVEC_USIZE \ |
45 | (((BITVEC_SZ-(3*sizeof(u32)))/sizeof(Bitvec*))*sizeof(Bitvec*)) |
46 | |
47 | /* Type of the array "element" for the bitmap representation. |
48 | ** Should be a power of 2, and ideally, evenly divide into BITVEC_USIZE. |
49 | ** Setting this to the "natural word" size of your CPU may improve |
50 | ** performance. */ |
51 | #define BITVEC_TELEM u8 |
52 | /* Size, in bits, of the bitmap element. */ |
53 | #define BITVEC_SZELEM 8 |
54 | /* Number of elements in a bitmap array. */ |
55 | #define BITVEC_NELEM (BITVEC_USIZE/sizeof(BITVEC_TELEM)) |
56 | /* Number of bits in the bitmap array. */ |
57 | #define BITVEC_NBIT (BITVEC_NELEM*BITVEC_SZELEM) |
58 | |
59 | /* Number of u32 values in hash table. */ |
60 | #define BITVEC_NINT (BITVEC_USIZE/sizeof(u32)) |
61 | /* Maximum number of entries in hash table before |
62 | ** sub-dividing and re-hashing. */ |
63 | #define BITVEC_MXHASH (BITVEC_NINT/2) |
64 | /* Hashing function for the aHash representation. |
65 | ** Empirical testing showed that the *37 multiplier |
66 | ** (an arbitrary prime)in the hash function provided |
67 | ** no fewer collisions than the no-op *1. */ |
68 | #define BITVEC_HASH(X) (((X)*1)%BITVEC_NINT) |
69 | |
70 | #define BITVEC_NPTR (BITVEC_USIZE/sizeof(Bitvec *)) |
71 | |
72 | |
73 | /* |
74 | ** A bitmap is an instance of the following structure. |
75 | ** |
76 | ** This bitmap records the existence of zero or more bits |
77 | ** with values between 1 and iSize, inclusive. |
78 | ** |
79 | ** There are three possible representations of the bitmap. |
80 | ** If iSize<=BITVEC_NBIT, then Bitvec.u.aBitmap[] is a straight |
81 | ** bitmap. The least significant bit is bit 1. |
82 | ** |
83 | ** If iSize>BITVEC_NBIT and iDivisor==0 then Bitvec.u.aHash[] is |
84 | ** a hash table that will hold up to BITVEC_MXHASH distinct values. |
85 | ** |
86 | ** Otherwise, the value i is redirected into one of BITVEC_NPTR |
87 | ** sub-bitmaps pointed to by Bitvec.u.apSub[]. Each subbitmap |
88 | ** handles up to iDivisor separate values of i. apSub[0] holds |
89 | ** values between 1 and iDivisor. apSub[1] holds values between |
90 | ** iDivisor+1 and 2*iDivisor. apSub[N] holds values between |
91 | ** N*iDivisor+1 and (N+1)*iDivisor. Each subbitmap is normalized |
92 | ** to hold deal with values between 1 and iDivisor. |
93 | */ |
94 | struct Bitvec { |
95 | u32 iSize; /* Maximum bit index. Max iSize is 4,294,967,296. */ |
96 | u32 nSet; /* Number of bits that are set - only valid for aHash |
97 | ** element. Max is BITVEC_NINT. For BITVEC_SZ of 512, |
98 | ** this would be 125. */ |
99 | u32 iDivisor; /* Number of bits handled by each apSub[] entry. */ |
100 | /* Should >=0 for apSub element. */ |
101 | /* Max iDivisor is max(u32) / BITVEC_NPTR + 1. */ |
102 | /* For a BITVEC_SZ of 512, this would be 34,359,739. */ |
103 | union { |
104 | BITVEC_TELEM aBitmap[BITVEC_NELEM]; /* Bitmap representation */ |
105 | u32 aHash[BITVEC_NINT]; /* Hash table representation */ |
106 | Bitvec *apSub[BITVEC_NPTR]; /* Recursive representation */ |
107 | } u; |
108 | }; |
109 | |
110 | /* |
111 | ** Create a new bitmap object able to handle bits between 0 and iSize, |
112 | ** inclusive. Return a pointer to the new object. Return NULL if |
113 | ** malloc fails. |
114 | */ |
115 | Bitvec *sqlite3BitvecCreate(u32 iSize){ |
116 | Bitvec *p; |
117 | assert( sizeof(*p)==BITVEC_SZ ); |
118 | p = sqlite3MallocZero( sizeof(*p) ); |
119 | if( p ){ |
120 | p->iSize = iSize; |
121 | } |
122 | return p; |
123 | } |
124 | |
125 | /* |
126 | ** Check to see if the i-th bit is set. Return true or false. |
127 | ** If p is NULL (if the bitmap has not been created) or if |
128 | ** i is out of range, then return false. |
129 | */ |
130 | int sqlite3BitvecTestNotNull(Bitvec *p, u32 i){ |
131 | assert( p!=0 ); |
132 | i--; |
133 | if( i>=p->iSize ) return 0; |
134 | while( p->iDivisor ){ |
135 | u32 bin = i/p->iDivisor; |
136 | i = i%p->iDivisor; |
137 | p = p->u.apSub[bin]; |
138 | if (!p) { |
139 | return 0; |
140 | } |
141 | } |
142 | if( p->iSize<=BITVEC_NBIT ){ |
143 | return (p->u.aBitmap[i/BITVEC_SZELEM] & (1<<(i&(BITVEC_SZELEM-1))))!=0; |
144 | } else{ |
145 | u32 h = BITVEC_HASH(i++); |
146 | while( p->u.aHash[h] ){ |
147 | if( p->u.aHash[h]==i ) return 1; |
148 | h = (h+1) % BITVEC_NINT; |
149 | } |
150 | return 0; |
151 | } |
152 | } |
153 | int sqlite3BitvecTest(Bitvec *p, u32 i){ |
154 | return p!=0 && sqlite3BitvecTestNotNull(p,i); |
155 | } |
156 | |
157 | /* |
158 | ** Set the i-th bit. Return 0 on success and an error code if |
159 | ** anything goes wrong. |
160 | ** |
161 | ** This routine might cause sub-bitmaps to be allocated. Failing |
162 | ** to get the memory needed to hold the sub-bitmap is the only |
163 | ** that can go wrong with an insert, assuming p and i are valid. |
164 | ** |
165 | ** The calling function must ensure that p is a valid Bitvec object |
166 | ** and that the value for "i" is within range of the Bitvec object. |
167 | ** Otherwise the behavior is undefined. |
168 | */ |
169 | int sqlite3BitvecSet(Bitvec *p, u32 i){ |
170 | u32 h; |
171 | if( p==0 ) return SQLITE_OK; |
172 | assert( i>0 ); |
173 | assert( i<=p->iSize ); |
174 | i--; |
175 | while((p->iSize > BITVEC_NBIT) && p->iDivisor) { |
176 | u32 bin = i/p->iDivisor; |
177 | i = i%p->iDivisor; |
178 | if( p->u.apSub[bin]==0 ){ |
179 | p->u.apSub[bin] = sqlite3BitvecCreate( p->iDivisor ); |
180 | if( p->u.apSub[bin]==0 ) return SQLITE_NOMEM_BKPT; |
181 | } |
182 | p = p->u.apSub[bin]; |
183 | } |
184 | if( p->iSize<=BITVEC_NBIT ){ |
185 | p->u.aBitmap[i/BITVEC_SZELEM] |= 1 << (i&(BITVEC_SZELEM-1)); |
186 | return SQLITE_OK; |
187 | } |
188 | h = BITVEC_HASH(i++); |
189 | /* if there wasn't a hash collision, and this doesn't */ |
190 | /* completely fill the hash, then just add it without */ |
191 | /* worring about sub-dividing and re-hashing. */ |
192 | if( !p->u.aHash[h] ){ |
193 | if (p->nSet<(BITVEC_NINT-1)) { |
194 | goto bitvec_set_end; |
195 | } else { |
196 | goto bitvec_set_rehash; |
197 | } |
198 | } |
199 | /* there was a collision, check to see if it's already */ |
200 | /* in hash, if not, try to find a spot for it */ |
201 | do { |
202 | if( p->u.aHash[h]==i ) return SQLITE_OK; |
203 | h++; |
204 | if( h>=BITVEC_NINT ) h = 0; |
205 | } while( p->u.aHash[h] ); |
206 | /* we didn't find it in the hash. h points to the first */ |
207 | /* available free spot. check to see if this is going to */ |
208 | /* make our hash too "full". */ |
209 | bitvec_set_rehash: |
210 | if( p->nSet>=BITVEC_MXHASH ){ |
211 | unsigned int j; |
212 | int rc; |
213 | u32 *aiValues = sqlite3StackAllocRaw(0, sizeof(p->u.aHash)); |
214 | if( aiValues==0 ){ |
215 | return SQLITE_NOMEM_BKPT; |
216 | }else{ |
217 | memcpy(aiValues, p->u.aHash, sizeof(p->u.aHash)); |
218 | memset(p->u.apSub, 0, sizeof(p->u.apSub)); |
219 | p->iDivisor = (p->iSize + BITVEC_NPTR - 1)/BITVEC_NPTR; |
220 | rc = sqlite3BitvecSet(p, i); |
221 | for(j=0; j<BITVEC_NINT; j++){ |
222 | if( aiValues[j] ) rc |= sqlite3BitvecSet(p, aiValues[j]); |
223 | } |
224 | sqlite3StackFree(0, aiValues); |
225 | return rc; |
226 | } |
227 | } |
228 | bitvec_set_end: |
229 | p->nSet++; |
230 | p->u.aHash[h] = i; |
231 | return SQLITE_OK; |
232 | } |
233 | |
234 | /* |
235 | ** Clear the i-th bit. |
236 | ** |
237 | ** pBuf must be a pointer to at least BITVEC_SZ bytes of temporary storage |
238 | ** that BitvecClear can use to rebuilt its hash table. |
239 | */ |
240 | void sqlite3BitvecClear(Bitvec *p, u32 i, void *pBuf){ |
241 | if( p==0 ) return; |
242 | assert( i>0 ); |
243 | i--; |
244 | while( p->iDivisor ){ |
245 | u32 bin = i/p->iDivisor; |
246 | i = i%p->iDivisor; |
247 | p = p->u.apSub[bin]; |
248 | if (!p) { |
249 | return; |
250 | } |
251 | } |
252 | if( p->iSize<=BITVEC_NBIT ){ |
253 | p->u.aBitmap[i/BITVEC_SZELEM] &= ~(1 << (i&(BITVEC_SZELEM-1))); |
254 | }else{ |
255 | unsigned int j; |
256 | u32 *aiValues = pBuf; |
257 | memcpy(aiValues, p->u.aHash, sizeof(p->u.aHash)); |
258 | memset(p->u.aHash, 0, sizeof(p->u.aHash)); |
259 | p->nSet = 0; |
260 | for(j=0; j<BITVEC_NINT; j++){ |
261 | if( aiValues[j] && aiValues[j]!=(i+1) ){ |
262 | u32 h = BITVEC_HASH(aiValues[j]-1); |
263 | p->nSet++; |
264 | while( p->u.aHash[h] ){ |
265 | h++; |
266 | if( h>=BITVEC_NINT ) h = 0; |
267 | } |
268 | p->u.aHash[h] = aiValues[j]; |
269 | } |
270 | } |
271 | } |
272 | } |
273 | |
274 | /* |
275 | ** Destroy a bitmap object. Reclaim all memory used. |
276 | */ |
277 | void sqlite3BitvecDestroy(Bitvec *p){ |
278 | if( p==0 ) return; |
279 | if( p->iDivisor ){ |
280 | unsigned int i; |
281 | for(i=0; i<BITVEC_NPTR; i++){ |
282 | sqlite3BitvecDestroy(p->u.apSub[i]); |
283 | } |
284 | } |
285 | sqlite3_free(p); |
286 | } |
287 | |
288 | /* |
289 | ** Return the value of the iSize parameter specified when Bitvec *p |
290 | ** was created. |
291 | */ |
292 | u32 sqlite3BitvecSize(Bitvec *p){ |
293 | return p->iSize; |
294 | } |
295 | |
296 | #ifndef SQLITE_UNTESTABLE |
297 | /* |
298 | ** Let V[] be an array of unsigned characters sufficient to hold |
299 | ** up to N bits. Let I be an integer between 0 and N. 0<=I<N. |
300 | ** Then the following macros can be used to set, clear, or test |
301 | ** individual bits within V. |
302 | */ |
303 | #define SETBIT(V,I) V[I>>3] |= (1<<(I&7)) |
304 | #define CLEARBIT(V,I) V[I>>3] &= ~(1<<(I&7)) |
305 | #define TESTBIT(V,I) (V[I>>3]&(1<<(I&7)))!=0 |
306 | |
307 | /* |
308 | ** This routine runs an extensive test of the Bitvec code. |
309 | ** |
310 | ** The input is an array of integers that acts as a program |
311 | ** to test the Bitvec. The integers are opcodes followed |
312 | ** by 0, 1, or 3 operands, depending on the opcode. Another |
313 | ** opcode follows immediately after the last operand. |
314 | ** |
315 | ** There are 6 opcodes numbered from 0 through 5. 0 is the |
316 | ** "halt" opcode and causes the test to end. |
317 | ** |
318 | ** 0 Halt and return the number of errors |
319 | ** 1 N S X Set N bits beginning with S and incrementing by X |
320 | ** 2 N S X Clear N bits beginning with S and incrementing by X |
321 | ** 3 N Set N randomly chosen bits |
322 | ** 4 N Clear N randomly chosen bits |
323 | ** 5 N S X Set N bits from S increment X in array only, not in bitvec |
324 | ** |
325 | ** The opcodes 1 through 4 perform set and clear operations are performed |
326 | ** on both a Bitvec object and on a linear array of bits obtained from malloc. |
327 | ** Opcode 5 works on the linear array only, not on the Bitvec. |
328 | ** Opcode 5 is used to deliberately induce a fault in order to |
329 | ** confirm that error detection works. |
330 | ** |
331 | ** At the conclusion of the test the linear array is compared |
332 | ** against the Bitvec object. If there are any differences, |
333 | ** an error is returned. If they are the same, zero is returned. |
334 | ** |
335 | ** If a memory allocation error occurs, return -1. |
336 | */ |
337 | int sqlite3BitvecBuiltinTest(int sz, int *aOp){ |
338 | Bitvec *pBitvec = 0; |
339 | unsigned char *pV = 0; |
340 | int rc = -1; |
341 | int i, nx, pc, op; |
342 | void *pTmpSpace; |
343 | |
344 | /* Allocate the Bitvec to be tested and a linear array of |
345 | ** bits to act as the reference */ |
346 | pBitvec = sqlite3BitvecCreate( sz ); |
347 | pV = sqlite3MallocZero( (sz+7)/8 + 1 ); |
348 | pTmpSpace = sqlite3_malloc64(BITVEC_SZ); |
349 | if( pBitvec==0 || pV==0 || pTmpSpace==0 ) goto bitvec_end; |
350 | |
351 | /* NULL pBitvec tests */ |
352 | sqlite3BitvecSet(0, 1); |
353 | sqlite3BitvecClear(0, 1, pTmpSpace); |
354 | |
355 | /* Run the program */ |
356 | pc = i = 0; |
357 | while( (op = aOp[pc])!=0 ){ |
358 | switch( op ){ |
359 | case 1: |
360 | case 2: |
361 | case 5: { |
362 | nx = 4; |
363 | i = aOp[pc+2] - 1; |
364 | aOp[pc+2] += aOp[pc+3]; |
365 | break; |
366 | } |
367 | case 3: |
368 | case 4: |
369 | default: { |
370 | nx = 2; |
371 | sqlite3_randomness(sizeof(i), &i); |
372 | break; |
373 | } |
374 | } |
375 | if( (--aOp[pc+1]) > 0 ) nx = 0; |
376 | pc += nx; |
377 | i = (i & 0x7fffffff)%sz; |
378 | if( (op & 1)!=0 ){ |
379 | SETBIT(pV, (i+1)); |
380 | if( op!=5 ){ |
381 | if( sqlite3BitvecSet(pBitvec, i+1) ) goto bitvec_end; |
382 | } |
383 | }else{ |
384 | CLEARBIT(pV, (i+1)); |
385 | sqlite3BitvecClear(pBitvec, i+1, pTmpSpace); |
386 | } |
387 | } |
388 | |
389 | /* Test to make sure the linear array exactly matches the |
390 | ** Bitvec object. Start with the assumption that they do |
391 | ** match (rc==0). Change rc to non-zero if a discrepancy |
392 | ** is found. |
393 | */ |
394 | rc = sqlite3BitvecTest(0,0) + sqlite3BitvecTest(pBitvec, sz+1) |
395 | + sqlite3BitvecTest(pBitvec, 0) |
396 | + (sqlite3BitvecSize(pBitvec) - sz); |
397 | for(i=1; i<=sz; i++){ |
398 | if( (TESTBIT(pV,i))!=sqlite3BitvecTest(pBitvec,i) ){ |
399 | rc = i; |
400 | break; |
401 | } |
402 | } |
403 | |
404 | /* Free allocated structure */ |
405 | bitvec_end: |
406 | sqlite3_free(pTmpSpace); |
407 | sqlite3_free(pV); |
408 | sqlite3BitvecDestroy(pBitvec); |
409 | return rc; |
410 | } |
411 | #endif /* SQLITE_UNTESTABLE */ |
412 | |