1 | /* |
2 | ** 2008 December 3 |
3 | ** |
4 | ** The author disclaims copyright to this source code. In place of |
5 | ** a legal notice, here is a blessing: |
6 | ** |
7 | ** May you do good and not evil. |
8 | ** May you find forgiveness for yourself and forgive others. |
9 | ** May you share freely, never taking more than you give. |
10 | ** |
11 | ************************************************************************* |
12 | ** |
13 | ** This module implements an object we call a "RowSet". |
14 | ** |
15 | ** The RowSet object is a collection of rowids. Rowids |
16 | ** are inserted into the RowSet in an arbitrary order. Inserts |
17 | ** can be intermixed with tests to see if a given rowid has been |
18 | ** previously inserted into the RowSet. |
19 | ** |
20 | ** After all inserts are finished, it is possible to extract the |
21 | ** elements of the RowSet in sorted order. Once this extraction |
22 | ** process has started, no new elements may be inserted. |
23 | ** |
24 | ** Hence, the primitive operations for a RowSet are: |
25 | ** |
26 | ** CREATE |
27 | ** INSERT |
28 | ** TEST |
29 | ** SMALLEST |
30 | ** DESTROY |
31 | ** |
32 | ** The CREATE and DESTROY primitives are the constructor and destructor, |
33 | ** obviously. The INSERT primitive adds a new element to the RowSet. |
34 | ** TEST checks to see if an element is already in the RowSet. SMALLEST |
35 | ** extracts the least value from the RowSet. |
36 | ** |
37 | ** The INSERT primitive might allocate additional memory. Memory is |
38 | ** allocated in chunks so most INSERTs do no allocation. There is an |
39 | ** upper bound on the size of allocated memory. No memory is freed |
40 | ** until DESTROY. |
41 | ** |
42 | ** The TEST primitive includes a "batch" number. The TEST primitive |
43 | ** will only see elements that were inserted before the last change |
44 | ** in the batch number. In other words, if an INSERT occurs between |
45 | ** two TESTs where the TESTs have the same batch nubmer, then the |
46 | ** value added by the INSERT will not be visible to the second TEST. |
47 | ** The initial batch number is zero, so if the very first TEST contains |
48 | ** a non-zero batch number, it will see all prior INSERTs. |
49 | ** |
50 | ** No INSERTs may occurs after a SMALLEST. An assertion will fail if |
51 | ** that is attempted. |
52 | ** |
53 | ** The cost of an INSERT is roughly constant. (Sometimes new memory |
54 | ** has to be allocated on an INSERT.) The cost of a TEST with a new |
55 | ** batch number is O(NlogN) where N is the number of elements in the RowSet. |
56 | ** The cost of a TEST using the same batch number is O(logN). The cost |
57 | ** of the first SMALLEST is O(NlogN). Second and subsequent SMALLEST |
58 | ** primitives are constant time. The cost of DESTROY is O(N). |
59 | ** |
60 | ** TEST and SMALLEST may not be used by the same RowSet. This used to |
61 | ** be possible, but the feature was not used, so it was removed in order |
62 | ** to simplify the code. |
63 | */ |
64 | #include "sqliteInt.h" |
65 | |
66 | |
67 | /* |
68 | ** Target size for allocation chunks. |
69 | */ |
70 | #define ROWSET_ALLOCATION_SIZE 1024 |
71 | |
72 | /* |
73 | ** The number of rowset entries per allocation chunk. |
74 | */ |
75 | #define ROWSET_ENTRY_PER_CHUNK \ |
76 | ((ROWSET_ALLOCATION_SIZE-8)/sizeof(struct RowSetEntry)) |
77 | |
78 | /* |
79 | ** Each entry in a RowSet is an instance of the following object. |
80 | ** |
81 | ** This same object is reused to store a linked list of trees of RowSetEntry |
82 | ** objects. In that alternative use, pRight points to the next entry |
83 | ** in the list, pLeft points to the tree, and v is unused. The |
84 | ** RowSet.pForest value points to the head of this forest list. |
85 | */ |
86 | struct RowSetEntry { |
87 | i64 v; /* ROWID value for this entry */ |
88 | struct RowSetEntry *pRight; /* Right subtree (larger entries) or list */ |
89 | struct RowSetEntry *pLeft; /* Left subtree (smaller entries) */ |
90 | }; |
91 | |
92 | /* |
93 | ** RowSetEntry objects are allocated in large chunks (instances of the |
94 | ** following structure) to reduce memory allocation overhead. The |
95 | ** chunks are kept on a linked list so that they can be deallocated |
96 | ** when the RowSet is destroyed. |
97 | */ |
98 | struct RowSetChunk { |
99 | struct RowSetChunk *pNextChunk; /* Next chunk on list of them all */ |
100 | struct RowSetEntry aEntry[ROWSET_ENTRY_PER_CHUNK]; /* Allocated entries */ |
101 | }; |
102 | |
103 | /* |
104 | ** A RowSet in an instance of the following structure. |
105 | ** |
106 | ** A typedef of this structure if found in sqliteInt.h. |
107 | */ |
108 | struct RowSet { |
109 | struct RowSetChunk *pChunk; /* List of all chunk allocations */ |
110 | sqlite3 *db; /* The database connection */ |
111 | struct RowSetEntry *pEntry; /* List of entries using pRight */ |
112 | struct RowSetEntry *pLast; /* Last entry on the pEntry list */ |
113 | struct RowSetEntry *pFresh; /* Source of new entry objects */ |
114 | struct RowSetEntry *pForest; /* List of binary trees of entries */ |
115 | u16 nFresh; /* Number of objects on pFresh */ |
116 | u16 rsFlags; /* Various flags */ |
117 | int iBatch; /* Current insert batch */ |
118 | }; |
119 | |
120 | /* |
121 | ** Allowed values for RowSet.rsFlags |
122 | */ |
123 | #define ROWSET_SORTED 0x01 /* True if RowSet.pEntry is sorted */ |
124 | #define ROWSET_NEXT 0x02 /* True if sqlite3RowSetNext() has been called */ |
125 | |
126 | /* |
127 | ** Allocate a RowSet object. Return NULL if a memory allocation |
128 | ** error occurs. |
129 | */ |
130 | RowSet *sqlite3RowSetInit(sqlite3 *db){ |
131 | RowSet *p = sqlite3DbMallocRawNN(db, sizeof(*p)); |
132 | if( p ){ |
133 | int N = sqlite3DbMallocSize(db, p); |
134 | p->pChunk = 0; |
135 | p->db = db; |
136 | p->pEntry = 0; |
137 | p->pLast = 0; |
138 | p->pForest = 0; |
139 | p->pFresh = (struct RowSetEntry*)(ROUND8(sizeof(*p)) + (char*)p); |
140 | p->nFresh = (u16)((N - ROUND8(sizeof(*p)))/sizeof(struct RowSetEntry)); |
141 | p->rsFlags = ROWSET_SORTED; |
142 | p->iBatch = 0; |
143 | } |
144 | return p; |
145 | } |
146 | |
147 | /* |
148 | ** Deallocate all chunks from a RowSet. This frees all memory that |
149 | ** the RowSet has allocated over its lifetime. This routine is |
150 | ** the destructor for the RowSet. |
151 | */ |
152 | void sqlite3RowSetClear(void *pArg){ |
153 | RowSet *p = (RowSet*)pArg; |
154 | struct RowSetChunk *pChunk, *pNextChunk; |
155 | for(pChunk=p->pChunk; pChunk; pChunk = pNextChunk){ |
156 | pNextChunk = pChunk->pNextChunk; |
157 | sqlite3DbFree(p->db, pChunk); |
158 | } |
159 | p->pChunk = 0; |
160 | p->nFresh = 0; |
161 | p->pEntry = 0; |
162 | p->pLast = 0; |
163 | p->pForest = 0; |
164 | p->rsFlags = ROWSET_SORTED; |
165 | } |
166 | |
167 | /* |
168 | ** Deallocate all chunks from a RowSet. This frees all memory that |
169 | ** the RowSet has allocated over its lifetime. This routine is |
170 | ** the destructor for the RowSet. |
171 | */ |
172 | void sqlite3RowSetDelete(void *pArg){ |
173 | sqlite3RowSetClear(pArg); |
174 | sqlite3DbFree(((RowSet*)pArg)->db, pArg); |
175 | } |
176 | |
177 | /* |
178 | ** Allocate a new RowSetEntry object that is associated with the |
179 | ** given RowSet. Return a pointer to the new and completely uninitialized |
180 | ** object. |
181 | ** |
182 | ** In an OOM situation, the RowSet.db->mallocFailed flag is set and this |
183 | ** routine returns NULL. |
184 | */ |
185 | static struct RowSetEntry *rowSetEntryAlloc(RowSet *p){ |
186 | assert( p!=0 ); |
187 | if( p->nFresh==0 ){ /*OPTIMIZATION-IF-FALSE*/ |
188 | /* We could allocate a fresh RowSetEntry each time one is needed, but it |
189 | ** is more efficient to pull a preallocated entry from the pool */ |
190 | struct RowSetChunk *pNew; |
191 | pNew = sqlite3DbMallocRawNN(p->db, sizeof(*pNew)); |
192 | if( pNew==0 ){ |
193 | return 0; |
194 | } |
195 | pNew->pNextChunk = p->pChunk; |
196 | p->pChunk = pNew; |
197 | p->pFresh = pNew->aEntry; |
198 | p->nFresh = ROWSET_ENTRY_PER_CHUNK; |
199 | } |
200 | p->nFresh--; |
201 | return p->pFresh++; |
202 | } |
203 | |
204 | /* |
205 | ** Insert a new value into a RowSet. |
206 | ** |
207 | ** The mallocFailed flag of the database connection is set if a |
208 | ** memory allocation fails. |
209 | */ |
210 | void sqlite3RowSetInsert(RowSet *p, i64 rowid){ |
211 | struct RowSetEntry *pEntry; /* The new entry */ |
212 | struct RowSetEntry *pLast; /* The last prior entry */ |
213 | |
214 | /* This routine is never called after sqlite3RowSetNext() */ |
215 | assert( p!=0 && (p->rsFlags & ROWSET_NEXT)==0 ); |
216 | |
217 | pEntry = rowSetEntryAlloc(p); |
218 | if( pEntry==0 ) return; |
219 | pEntry->v = rowid; |
220 | pEntry->pRight = 0; |
221 | pLast = p->pLast; |
222 | if( pLast ){ |
223 | if( rowid<=pLast->v ){ /*OPTIMIZATION-IF-FALSE*/ |
224 | /* Avoid unnecessary sorts by preserving the ROWSET_SORTED flags |
225 | ** where possible */ |
226 | p->rsFlags &= ~ROWSET_SORTED; |
227 | } |
228 | pLast->pRight = pEntry; |
229 | }else{ |
230 | p->pEntry = pEntry; |
231 | } |
232 | p->pLast = pEntry; |
233 | } |
234 | |
235 | /* |
236 | ** Merge two lists of RowSetEntry objects. Remove duplicates. |
237 | ** |
238 | ** The input lists are connected via pRight pointers and are |
239 | ** assumed to each already be in sorted order. |
240 | */ |
241 | static struct RowSetEntry *rowSetEntryMerge( |
242 | struct RowSetEntry *pA, /* First sorted list to be merged */ |
243 | struct RowSetEntry *pB /* Second sorted list to be merged */ |
244 | ){ |
245 | struct RowSetEntry head; |
246 | struct RowSetEntry *pTail; |
247 | |
248 | pTail = &head; |
249 | assert( pA!=0 && pB!=0 ); |
250 | for(;;){ |
251 | assert( pA->pRight==0 || pA->v<=pA->pRight->v ); |
252 | assert( pB->pRight==0 || pB->v<=pB->pRight->v ); |
253 | if( pA->v<=pB->v ){ |
254 | if( pA->v<pB->v ) pTail = pTail->pRight = pA; |
255 | pA = pA->pRight; |
256 | if( pA==0 ){ |
257 | pTail->pRight = pB; |
258 | break; |
259 | } |
260 | }else{ |
261 | pTail = pTail->pRight = pB; |
262 | pB = pB->pRight; |
263 | if( pB==0 ){ |
264 | pTail->pRight = pA; |
265 | break; |
266 | } |
267 | } |
268 | } |
269 | return head.pRight; |
270 | } |
271 | |
272 | /* |
273 | ** Sort all elements on the list of RowSetEntry objects into order of |
274 | ** increasing v. |
275 | */ |
276 | static struct RowSetEntry *rowSetEntrySort(struct RowSetEntry *pIn){ |
277 | unsigned int i; |
278 | struct RowSetEntry *pNext, *aBucket[40]; |
279 | |
280 | memset(aBucket, 0, sizeof(aBucket)); |
281 | while( pIn ){ |
282 | pNext = pIn->pRight; |
283 | pIn->pRight = 0; |
284 | for(i=0; aBucket[i]; i++){ |
285 | pIn = rowSetEntryMerge(aBucket[i], pIn); |
286 | aBucket[i] = 0; |
287 | } |
288 | aBucket[i] = pIn; |
289 | pIn = pNext; |
290 | } |
291 | pIn = aBucket[0]; |
292 | for(i=1; i<sizeof(aBucket)/sizeof(aBucket[0]); i++){ |
293 | if( aBucket[i]==0 ) continue; |
294 | pIn = pIn ? rowSetEntryMerge(pIn, aBucket[i]) : aBucket[i]; |
295 | } |
296 | return pIn; |
297 | } |
298 | |
299 | |
300 | /* |
301 | ** The input, pIn, is a binary tree (or subtree) of RowSetEntry objects. |
302 | ** Convert this tree into a linked list connected by the pRight pointers |
303 | ** and return pointers to the first and last elements of the new list. |
304 | */ |
305 | static void rowSetTreeToList( |
306 | struct RowSetEntry *pIn, /* Root of the input tree */ |
307 | struct RowSetEntry **ppFirst, /* Write head of the output list here */ |
308 | struct RowSetEntry **ppLast /* Write tail of the output list here */ |
309 | ){ |
310 | assert( pIn!=0 ); |
311 | if( pIn->pLeft ){ |
312 | struct RowSetEntry *p; |
313 | rowSetTreeToList(pIn->pLeft, ppFirst, &p); |
314 | p->pRight = pIn; |
315 | }else{ |
316 | *ppFirst = pIn; |
317 | } |
318 | if( pIn->pRight ){ |
319 | rowSetTreeToList(pIn->pRight, &pIn->pRight, ppLast); |
320 | }else{ |
321 | *ppLast = pIn; |
322 | } |
323 | assert( (*ppLast)->pRight==0 ); |
324 | } |
325 | |
326 | |
327 | /* |
328 | ** Convert a sorted list of elements (connected by pRight) into a binary |
329 | ** tree with depth of iDepth. A depth of 1 means the tree contains a single |
330 | ** node taken from the head of *ppList. A depth of 2 means a tree with |
331 | ** three nodes. And so forth. |
332 | ** |
333 | ** Use as many entries from the input list as required and update the |
334 | ** *ppList to point to the unused elements of the list. If the input |
335 | ** list contains too few elements, then construct an incomplete tree |
336 | ** and leave *ppList set to NULL. |
337 | ** |
338 | ** Return a pointer to the root of the constructed binary tree. |
339 | */ |
340 | static struct RowSetEntry *rowSetNDeepTree( |
341 | struct RowSetEntry **ppList, |
342 | int iDepth |
343 | ){ |
344 | struct RowSetEntry *p; /* Root of the new tree */ |
345 | struct RowSetEntry *pLeft; /* Left subtree */ |
346 | if( *ppList==0 ){ /*OPTIMIZATION-IF-TRUE*/ |
347 | /* Prevent unnecessary deep recursion when we run out of entries */ |
348 | return 0; |
349 | } |
350 | if( iDepth>1 ){ /*OPTIMIZATION-IF-TRUE*/ |
351 | /* This branch causes a *balanced* tree to be generated. A valid tree |
352 | ** is still generated without this branch, but the tree is wildly |
353 | ** unbalanced and inefficient. */ |
354 | pLeft = rowSetNDeepTree(ppList, iDepth-1); |
355 | p = *ppList; |
356 | if( p==0 ){ /*OPTIMIZATION-IF-FALSE*/ |
357 | /* It is safe to always return here, but the resulting tree |
358 | ** would be unbalanced */ |
359 | return pLeft; |
360 | } |
361 | p->pLeft = pLeft; |
362 | *ppList = p->pRight; |
363 | p->pRight = rowSetNDeepTree(ppList, iDepth-1); |
364 | }else{ |
365 | p = *ppList; |
366 | *ppList = p->pRight; |
367 | p->pLeft = p->pRight = 0; |
368 | } |
369 | return p; |
370 | } |
371 | |
372 | /* |
373 | ** Convert a sorted list of elements into a binary tree. Make the tree |
374 | ** as deep as it needs to be in order to contain the entire list. |
375 | */ |
376 | static struct RowSetEntry *rowSetListToTree(struct RowSetEntry *pList){ |
377 | int iDepth; /* Depth of the tree so far */ |
378 | struct RowSetEntry *p; /* Current tree root */ |
379 | struct RowSetEntry *pLeft; /* Left subtree */ |
380 | |
381 | assert( pList!=0 ); |
382 | p = pList; |
383 | pList = p->pRight; |
384 | p->pLeft = p->pRight = 0; |
385 | for(iDepth=1; pList; iDepth++){ |
386 | pLeft = p; |
387 | p = pList; |
388 | pList = p->pRight; |
389 | p->pLeft = pLeft; |
390 | p->pRight = rowSetNDeepTree(&pList, iDepth); |
391 | } |
392 | return p; |
393 | } |
394 | |
395 | /* |
396 | ** Extract the smallest element from the RowSet. |
397 | ** Write the element into *pRowid. Return 1 on success. Return |
398 | ** 0 if the RowSet is already empty. |
399 | ** |
400 | ** After this routine has been called, the sqlite3RowSetInsert() |
401 | ** routine may not be called again. |
402 | ** |
403 | ** This routine may not be called after sqlite3RowSetTest() has |
404 | ** been used. Older versions of RowSet allowed that, but as the |
405 | ** capability was not used by the code generator, it was removed |
406 | ** for code economy. |
407 | */ |
408 | int sqlite3RowSetNext(RowSet *p, i64 *pRowid){ |
409 | assert( p!=0 ); |
410 | assert( p->pForest==0 ); /* Cannot be used with sqlite3RowSetText() */ |
411 | |
412 | /* Merge the forest into a single sorted list on first call */ |
413 | if( (p->rsFlags & ROWSET_NEXT)==0 ){ /*OPTIMIZATION-IF-FALSE*/ |
414 | if( (p->rsFlags & ROWSET_SORTED)==0 ){ /*OPTIMIZATION-IF-FALSE*/ |
415 | p->pEntry = rowSetEntrySort(p->pEntry); |
416 | } |
417 | p->rsFlags |= ROWSET_SORTED|ROWSET_NEXT; |
418 | } |
419 | |
420 | /* Return the next entry on the list */ |
421 | if( p->pEntry ){ |
422 | *pRowid = p->pEntry->v; |
423 | p->pEntry = p->pEntry->pRight; |
424 | if( p->pEntry==0 ){ /*OPTIMIZATION-IF-TRUE*/ |
425 | /* Free memory immediately, rather than waiting on sqlite3_finalize() */ |
426 | sqlite3RowSetClear(p); |
427 | } |
428 | return 1; |
429 | }else{ |
430 | return 0; |
431 | } |
432 | } |
433 | |
434 | /* |
435 | ** Check to see if element iRowid was inserted into the rowset as |
436 | ** part of any insert batch prior to iBatch. Return 1 or 0. |
437 | ** |
438 | ** If this is the first test of a new batch and if there exist entries |
439 | ** on pRowSet->pEntry, then sort those entries into the forest at |
440 | ** pRowSet->pForest so that they can be tested. |
441 | */ |
442 | int sqlite3RowSetTest(RowSet *pRowSet, int iBatch, sqlite3_int64 iRowid){ |
443 | struct RowSetEntry *p, *pTree; |
444 | |
445 | /* This routine is never called after sqlite3RowSetNext() */ |
446 | assert( pRowSet!=0 && (pRowSet->rsFlags & ROWSET_NEXT)==0 ); |
447 | |
448 | /* Sort entries into the forest on the first test of a new batch. |
449 | ** To save unnecessary work, only do this when the batch number changes. |
450 | */ |
451 | if( iBatch!=pRowSet->iBatch ){ /*OPTIMIZATION-IF-FALSE*/ |
452 | p = pRowSet->pEntry; |
453 | if( p ){ |
454 | struct RowSetEntry **ppPrevTree = &pRowSet->pForest; |
455 | if( (pRowSet->rsFlags & ROWSET_SORTED)==0 ){ /*OPTIMIZATION-IF-FALSE*/ |
456 | /* Only sort the current set of entries if they need it */ |
457 | p = rowSetEntrySort(p); |
458 | } |
459 | for(pTree = pRowSet->pForest; pTree; pTree=pTree->pRight){ |
460 | ppPrevTree = &pTree->pRight; |
461 | if( pTree->pLeft==0 ){ |
462 | pTree->pLeft = rowSetListToTree(p); |
463 | break; |
464 | }else{ |
465 | struct RowSetEntry *pAux, *pTail; |
466 | rowSetTreeToList(pTree->pLeft, &pAux, &pTail); |
467 | pTree->pLeft = 0; |
468 | p = rowSetEntryMerge(pAux, p); |
469 | } |
470 | } |
471 | if( pTree==0 ){ |
472 | *ppPrevTree = pTree = rowSetEntryAlloc(pRowSet); |
473 | if( pTree ){ |
474 | pTree->v = 0; |
475 | pTree->pRight = 0; |
476 | pTree->pLeft = rowSetListToTree(p); |
477 | } |
478 | } |
479 | pRowSet->pEntry = 0; |
480 | pRowSet->pLast = 0; |
481 | pRowSet->rsFlags |= ROWSET_SORTED; |
482 | } |
483 | pRowSet->iBatch = iBatch; |
484 | } |
485 | |
486 | /* Test to see if the iRowid value appears anywhere in the forest. |
487 | ** Return 1 if it does and 0 if not. |
488 | */ |
489 | for(pTree = pRowSet->pForest; pTree; pTree=pTree->pRight){ |
490 | p = pTree->pLeft; |
491 | while( p ){ |
492 | if( p->v<iRowid ){ |
493 | p = p->pRight; |
494 | }else if( p->v>iRowid ){ |
495 | p = p->pLeft; |
496 | }else{ |
497 | return 1; |
498 | } |
499 | } |
500 | } |
501 | return 0; |
502 | } |
503 | |