btree.c source code [sqlite/src/btree.c]

1	/*
2	** 2004 April 6
3	**
4	** The author disclaims copyright to this source code. In place of
5	** a legal notice, here is a blessing:
6	**
7	** May you do good and not evil.
8	** May you find forgiveness for yourself and forgive others.
9	** May you share freely, never taking more than you give.
10	**
11	*************************************************************************
12	** This file implements an external (disk-based) database using BTrees.
13	** See the header comment on "btreeInt.h" for additional information.
14	** Including a description of file format and an overview of operation.
15	*/
16	#include "btreeInt.h"
17
18	/*
19	** The header string that appears at the beginning of every
20	** SQLite database.
21	*/
22	static const char zMagicHeader[] = SQLITE_FILE_HEADER;
23
24	/*
25	** Set this global variable to 1 to enable tracing using the TRACE
26	** macro.
27	*/
28	#if 0
29	int sqlite3BtreeTrace=`1`; / True to enable tracing /
30	# define TRACE(X) if(sqlite3BtreeTrace){printf X;fflush(stdout);}
31	#else
32	# define TRACE(X)
33	#endif
34
35	/*
36	** Extract a 2-byte big-endian integer from an array of unsigned bytes.
37	** But if the value is zero, make it 65536.
38	**
39	** This routine is used to extract the "offset to cell content area" value
40	** from the header of a btree page. If the page size is 65536 and the page
41	** is empty, the offset should be 65536, but the 2-byte value stores zero.
42	** This routine makes the necessary adjustment to 65536.
43	*/
44	#define get2byteNotZero(X) (((((int)get2byte(X))-1)&0xffff)+1)
45
46	/*
47	** Values passed as the 5th argument to allocateBtreePage()
48	*/
49	#define BTALLOC_ANY 0 /* Allocate any page */
50	#define BTALLOC_EXACT 1 /* Allocate exact page if possible */
51	#define BTALLOC_LE 2 /* Allocate any page <= the parameter */
52
53	/*
54	** Macro IfNotOmitAV(x) returns (x) if SQLITE_OMIT_AUTOVACUUM is not
55	** defined, or 0 if it is. For example:
56	**
57	** bIncrVacuum = IfNotOmitAV(pBtShared->incrVacuum);
58	*/
59	#ifndef SQLITE_OMIT_AUTOVACUUM
60	#define IfNotOmitAV(expr) (expr)
61	#else
62	#define IfNotOmitAV(expr) 0
63	#endif
64
65	#ifndef SQLITE_OMIT_SHARED_CACHE
66	/*
67	** A list of BtShared objects that are eligible for participation
68	** in shared cache. This variable has file scope during normal builds,
69	** but the test harness needs to access it so we make it global for
70	** test builds.
71	**
72	** Access to this variable is protected by SQLITE_MUTEX_STATIC_MAIN.
73	*/
74	#ifdef SQLITE_TEST
75	BtShared *SQLITE_WSD sqlite3SharedCacheList = `0`;
76	#else
77	static BtShared *SQLITE_WSD sqlite3SharedCacheList = `0`;
78	#endif
79	#endif /* SQLITE_OMIT_SHARED_CACHE */
80
81	#ifndef SQLITE_OMIT_SHARED_CACHE
82	/*
83	** Enable or disable the shared pager and schema features.
84	**
85	** This routine has no effect on existing database connections.
86	** The shared cache setting effects only future calls to
87	** sqlite3_open(), sqlite3_open16(), or sqlite3_open_v2().
88	*/
89	int sqlite3_enable_shared_cache(int enable){
90	sqlite3GlobalConfig.sharedCacheEnabled = enable;
91	return SQLITE_OK;
92	}
93	#endif
94
95
96
97	#ifdef SQLITE_OMIT_SHARED_CACHE
98	/*
99	** The functions querySharedCacheTableLock(), setSharedCacheTableLock(),
100	** and clearAllSharedCacheTableLocks()
101	** manipulate entries in the BtShared.pLock linked list used to store
102	** shared-cache table level locks. If the library is compiled with the
103	** shared-cache feature disabled, then there is only ever one user
104	** of each BtShared structure and so this locking is not necessary.
105	** So define the lock related functions as no-ops.
106	*/
107	#define querySharedCacheTableLock(a,b,c) SQLITE_OK
108	#define setSharedCacheTableLock(a,b,c) SQLITE_OK
109	#define clearAllSharedCacheTableLocks(a)
110	#define downgradeAllSharedCacheTableLocks(a)
111	#define hasSharedCacheTableLock(a,b,c,d) 1
112	#define hasReadConflicts(a, b) 0
113	#endif
114
115	#ifdef SQLITE_DEBUG
116	/*
117	** Return and reset the seek counter for a Btree object.
118	*/
119	sqlite3_uint64 sqlite3BtreeSeekCount(Btree *pBt){
120	u64 n = pBt->nSeek;
121	pBt->nSeek = `0`;
122	return n;
123	}
124	#endif
125
126	/*
127	** Implementation of the SQLITE_CORRUPT_PAGE() macro. Takes a single
128	** (MemPage) as an argument. The (MemPage) must not be NULL.
129	**
130	** If SQLITE_DEBUG is not defined, then this macro is equivalent to
131	** SQLITE_CORRUPT_BKPT. Or, if SQLITE_DEBUG is set, then the log message
132	** normally produced as a side-effect of SQLITE_CORRUPT_BKPT is augmented
133	** with the page number and filename associated with the (MemPage*).
134	*/
135	#ifdef SQLITE_DEBUG
136	int corruptPageError(int lineno, MemPage *p){
137	char *zMsg;
138	sqlite3BeginBenignMalloc();
139	zMsg = sqlite3_mprintf("database corruption page %d of %s",
140	(int)p->pgno, sqlite3PagerFilename(p->pBt->pPager, `0`)
141	);
142	sqlite3EndBenignMalloc();
143	if( zMsg ){
144	sqlite3ReportError(SQLITE_CORRUPT, lineno, zMsg);
145	}
146	sqlite3_free(zMsg);
147	return SQLITE_CORRUPT_BKPT;
148	}
149	# define SQLITE_CORRUPT_PAGE(pMemPage) corruptPageError(__LINE__, pMemPage)
150	#else
151	# define SQLITE_CORRUPT_PAGE(pMemPage) SQLITE_CORRUPT_PGNO(pMemPage->pgno)
152	#endif
153
154	#ifndef SQLITE_OMIT_SHARED_CACHE
155
156	#ifdef SQLITE_DEBUG
157	/*
158	** This function is only used as part of an assert() statement. *
159	**
160	** Check to see if pBtree holds the required locks to read or write to the
161	** table with root page iRoot. Return 1 if it does and 0 if not.
162	**
163	** For example, when writing to a table with root-page iRoot via
164	** Btree connection pBtree:
165	**
166	** assert( hasSharedCacheTableLock(pBtree, iRoot, 0, WRITE_LOCK) );
167	**
168	** When writing to an index that resides in a sharable database, the
169	** caller should have first obtained a lock specifying the root page of
170	** the corresponding table. This makes things a bit more complicated,
171	** as this module treats each table as a separate structure. To determine
172	** the table corresponding to the index being written, this
173	** function has to search through the database schema.
174	**
175	** Instead of a lock on the table/index rooted at page iRoot, the caller may
176	** hold a write-lock on the schema table (root page 1). This is also
177	** acceptable.
178	*/
179	static int hasSharedCacheTableLock(
180	Btree pBtree, /* Handle that must hold lock /
181	Pgno iRoot, / Root page of b-tree /
182	int isIndex, / True if iRoot is the root of an index b-tree /
183	int eLockType / Required lock type (READ_LOCK or WRITE_LOCK) /
184	){
185	Schema pSchema = (Schema )pBtree->pBt->pSchema;
186	Pgno iTab = `0`;
187	BtLock *pLock;
188
189	/ If this database is not shareable, or if the client is reading*
190	** and has the read-uncommitted flag set, then no lock is required.
191	** Return true immediately.
192	*/
193	if( (pBtree->sharable==`0`)
194	\|\| (eLockType==READ_LOCK && (pBtree->db->flags & SQLITE_ReadUncommit))
195	){
196	return `1`;
197	}
198
199	/ If the client is reading or writing an index and the schema is*
200	** not loaded, then it is too difficult to actually check to see if
201	** the correct locks are held. So do not bother - just return true.
202	** This case does not come up very often anyhow.
203	*/
204	if( isIndex && (!pSchema \|\| (pSchema->schemaFlags&DB_SchemaLoaded)==`0`) ){
205	return `1`;
206	}
207
208	/ Figure out the root-page that the lock should be held on. For table*
209	** b-trees, this is just the root page of the b-tree being read or
210	** written. For index b-trees, it is the root page of the associated
211	** table. */
212	if( isIndex ){
213	HashElem *p;
214	int bSeen = `0`;
215	for(p=sqliteHashFirst(&pSchema->idxHash); p; p=sqliteHashNext(p)){
216	Index pIdx = (Index )sqliteHashData(p);
217	if( pIdx->tnum==iRoot ){
218	if( bSeen ){
219	/ Two or more indexes share the same root page. There must*
220	** be imposter tables. So just return true. The assert is not
221	** useful in that case. */
222	return `1`;
223	}
224	iTab = pIdx->pTable->tnum;
225	bSeen = `1`;
226	}
227	}
228	}else{
229	iTab = iRoot;
230	}
231
232	/ Search for the required lock. Either a write-lock on root-page iTab, a*
233	** write-lock on the schema table, or (if the client is reading) a
234	** read-lock on iTab will suffice. Return 1 if any of these are found. */
235	for(pLock=pBtree->pBt->pLock; pLock; pLock=pLock->pNext){
236	if( pLock->pBtree==pBtree
237	&& (pLock->iTable==iTab \|\| (pLock->eLock==WRITE_LOCK && pLock->iTable==`1`))
238	&& pLock->eLock>=eLockType
239	){
240	return `1`;
241	}
242	}
243
244	/ Failed to find the required lock. /
245	return `0`;
246	}
247	#endif /* SQLITE_DEBUG */
248
249	#ifdef SQLITE_DEBUG
250	/*
251	** This function may be used as part of assert() statements only. **
252	**
253	** Return true if it would be illegal for pBtree to write into the
254	** table or index rooted at iRoot because other shared connections are
255	** simultaneously reading that same table or index.
256	**
257	** It is illegal for pBtree to write if some other Btree object that
258	** shares the same BtShared object is currently reading or writing
259	** the iRoot table. Except, if the other Btree object has the
260	** read-uncommitted flag set, then it is OK for the other object to
261	** have a read cursor.
262	**
263	** For example, before writing to any part of the table or index
264	** rooted at page iRoot, one should call:
265	**
266	** assert( !hasReadConflicts(pBtree, iRoot) );
267	*/
268	static int hasReadConflicts(Btree *pBtree, Pgno iRoot){
269	BtCursor *p;
270	for(p=pBtree->pBt->pCursor; p; p=p->pNext){
271	if( p->pgnoRoot==iRoot
272	&& p->pBtree!=pBtree
273	&& `0`==(p->pBtree->db->flags & SQLITE_ReadUncommit)
274	){
275	return `1`;
276	}
277	}
278	return `0`;
279	}
280	#endif /* #ifdef SQLITE_DEBUG */
281
282	/*
283	** Query to see if Btree handle p may obtain a lock of type eLock
284	** (READ_LOCK or WRITE_LOCK) on the table with root-page iTab. Return
285	** SQLITE_OK if the lock may be obtained (by calling
286	** setSharedCacheTableLock()), or SQLITE_LOCKED if not.
287	*/
288	static int querySharedCacheTableLock(Btree *p, Pgno iTab, u8 eLock){
289	BtShared *pBt = p->pBt;
290	BtLock *pIter;
291
292	assert( sqlite3BtreeHoldsMutex(p) );
293	assert( eLock==READ_LOCK \|\| eLock==WRITE_LOCK );
294	assert( p->db!=`0` );
295	assert( !(p->db->flags&SQLITE_ReadUncommit)\|\|eLock==WRITE_LOCK\|\|iTab==`1` );
296
297	/ If requesting a write-lock, then the Btree must have an open write*
298	** transaction on this file. And, obviously, for this to be so there
299	** must be an open write transaction on the file itself.
300	*/
301	assert( eLock==READ_LOCK \|\| (p==pBt->pWriter && p->inTrans==TRANS_WRITE) );
302	assert( eLock==READ_LOCK \|\| pBt->inTransaction==TRANS_WRITE );
303
304	/ This routine is a no-op if the shared-cache is not enabled /
305	if( !p->sharable ){
306	return SQLITE_OK;
307	}
308
309	/ If some other connection is holding an exclusive lock, the*
310	** requested lock may not be obtained.
311	*/
312	if( pBt->pWriter!=p && (pBt->btsFlags & BTS_EXCLUSIVE)!=`0` ){
313	sqlite3ConnectionBlocked(p->db, pBt->pWriter->db);
314	return SQLITE_LOCKED_SHAREDCACHE;
315	}
316
317	for(pIter=pBt->pLock; pIter; pIter=pIter->pNext){
318	/ The condition (pIter->eLock!=eLock) in the following if(...)*
319	** statement is a simplification of:
320	**
321	** (eLock==WRITE_LOCK \|\| pIter->eLock==WRITE_LOCK)
322	**
323	** since we know that if eLock==WRITE_LOCK, then no other connection
324	** may hold a WRITE_LOCK on any table in this file (since there can
325	** only be a single writer).
326	*/
327	assert( pIter->eLock==READ_LOCK \|\| pIter->eLock==WRITE_LOCK );
328	assert( eLock==READ_LOCK \|\| pIter->pBtree==p \|\| pIter->eLock==READ_LOCK);
329	if( pIter->pBtree!=p && pIter->iTable==iTab && pIter->eLock!=eLock ){
330	sqlite3ConnectionBlocked(p->db, pIter->pBtree->db);
331	if( eLock==WRITE_LOCK ){
332	assert( p==pBt->pWriter );
333	pBt->btsFlags \|= BTS_PENDING;
334	}
335	return SQLITE_LOCKED_SHAREDCACHE;
336	}
337	}
338	return SQLITE_OK;
339	}
340	#endif /* !SQLITE_OMIT_SHARED_CACHE */
341
342	#ifndef SQLITE_OMIT_SHARED_CACHE
343	/*
344	** Add a lock on the table with root-page iTable to the shared-btree used
345	** by Btree handle p. Parameter eLock must be either READ_LOCK or
346	** WRITE_LOCK.
347	**
348	** This function assumes the following:
349	**
350	** (a) The specified Btree object p is connected to a sharable
351	** database (one with the BtShared.sharable flag set), and
352	**
353	** (b) No other Btree objects hold a lock that conflicts
354	** with the requested lock (i.e. querySharedCacheTableLock() has
355	** already been called and returned SQLITE_OK).
356	**
357	** SQLITE_OK is returned if the lock is added successfully. SQLITE_NOMEM
358	** is returned if a malloc attempt fails.
359	*/
360	static int setSharedCacheTableLock(Btree *p, Pgno iTable, u8 eLock){
361	BtShared *pBt = p->pBt;
362	BtLock *pLock = `0`;
363	BtLock *pIter;
364
365	assert( sqlite3BtreeHoldsMutex(p) );
366	assert( eLock==READ_LOCK \|\| eLock==WRITE_LOCK );
367	assert( p->db!=`0` );
368
369	/ A connection with the read-uncommitted flag set will never try to*
370	** obtain a read-lock using this function. The only read-lock obtained
371	** by a connection in read-uncommitted mode is on the sqlite_schema
372	** table, and that lock is obtained in BtreeBeginTrans(). */
373	assert( `0`==(p->db->flags&SQLITE_ReadUncommit) \|\| eLock==WRITE_LOCK );
374
375	/ This function should only be called on a sharable b-tree after it*
376	** has been determined that no other b-tree holds a conflicting lock. */
377	assert( p->sharable );
378	assert( SQLITE_OK==querySharedCacheTableLock(p, iTable, eLock) );
379
380	/ First search the list for an existing lock on this table. /
381	for(pIter=pBt->pLock; pIter; pIter=pIter->pNext){
382	if( pIter->iTable==iTable && pIter->pBtree==p ){
383	pLock = pIter;
384	break;
385	}
386	}
387
388	/ If the above search did not find a BtLock struct associating Btree p*
389	** with table iTable, allocate one and link it into the list.
390	*/
391	if( !pLock ){
392	pLock = (BtLock )sqlite3MallocZero(sizeof*(BtLock));
393	if( !pLock ){
394	return SQLITE_NOMEM_BKPT;
395	}
396	pLock->iTable = iTable;
397	pLock->pBtree = p;
398	pLock->pNext = pBt->pLock;
399	pBt->pLock = pLock;
400	}
401
402	/ Set the BtLock.eLock variable to the maximum of the current lock*
403	** and the requested lock. This means if a write-lock was already held
404	** and a read-lock requested, we don't incorrectly downgrade the lock.
405	*/
406	assert( WRITE_LOCK>READ_LOCK );
407	if( eLock>pLock->eLock ){
408	pLock->eLock = eLock;
409	}
410
411	return SQLITE_OK;
412	}
413	#endif /* !SQLITE_OMIT_SHARED_CACHE */
414
415	#ifndef SQLITE_OMIT_SHARED_CACHE
416	/*
417	** Release all the table locks (locks obtained via calls to
418	** the setSharedCacheTableLock() procedure) held by Btree object p.
419	**
420	** This function assumes that Btree p has an open read or write
421	** transaction. If it does not, then the BTS_PENDING flag
422	** may be incorrectly cleared.
423	*/
424	static void clearAllSharedCacheTableLocks(Btree *p){
425	BtShared *pBt = p->pBt;
426	BtLock **ppIter = &pBt->pLock;
427
428	assert( sqlite3BtreeHoldsMutex(p) );
429	assert( p->sharable \|\| `0`==*ppIter );
430	assert( p->inTrans>`0` );
431
432	while( *ppIter ){
433	BtLock pLock = ppIter;
434	assert( (pBt->btsFlags & BTS_EXCLUSIVE)==`0` \|\| pBt->pWriter==pLock->pBtree );
435	assert( pLock->pBtree->inTrans>=pLock->eLock );
436	if( pLock->pBtree==p ){
437	*ppIter = pLock->pNext;
438	assert( pLock->iTable!=`1` \|\| pLock==&p->lock );
439	if( pLock->iTable!=`1` ){
440	sqlite3_free(pLock);
441	}
442	}else{
443	ppIter = &pLock->pNext;
444	}
445	}
446
447	assert( (pBt->btsFlags & BTS_PENDING)==`0` \|\| pBt->pWriter );
448	if( pBt->pWriter==p ){
449	pBt->pWriter = `0`;
450	pBt->btsFlags &= ~(BTS_EXCLUSIVE\|BTS_PENDING);
451	}else if( pBt->nTransaction==`2` ){
452	/ This function is called when Btree p is concluding its*
453	** transaction. If there currently exists a writer, and p is not
454	** that writer, then the number of locks held by connections other
455	** than the writer must be about to drop to zero. In this case
456	** set the BTS_PENDING flag to 0.
457	**
458	** If there is not currently a writer, then BTS_PENDING must
459	** be zero already. So this next line is harmless in that case.
460	*/
461	pBt->btsFlags &= ~BTS_PENDING;
462	}
463	}
464
465	/*
466	** This function changes all write-locks held by Btree p into read-locks.
467	*/
468	static void downgradeAllSharedCacheTableLocks(Btree *p){
469	BtShared *pBt = p->pBt;
470	if( pBt->pWriter==p ){
471	BtLock *pLock;
472	pBt->pWriter = `0`;
473	pBt->btsFlags &= ~(BTS_EXCLUSIVE\|BTS_PENDING);
474	for(pLock=pBt->pLock; pLock; pLock=pLock->pNext){
475	assert( pLock->eLock==READ_LOCK \|\| pLock->pBtree==p );
476	pLock->eLock = READ_LOCK;
477	}
478	}
479	}
480
481	#endif /* SQLITE_OMIT_SHARED_CACHE */
482
483	static void releasePage(MemPage pPage); /* Forward reference /
484	static void releasePageOne(MemPage pPage); /* Forward reference /
485	static void releasePageNotNull(MemPage pPage); /* Forward reference /
486
487	/*
488	*** This routine is used inside of assert() only **
489	**
490	** Verify that the cursor holds the mutex on its BtShared
491	*/
492	#ifdef SQLITE_DEBUG
493	static int cursorHoldsMutex(BtCursor *p){
494	return sqlite3_mutex_held(p->pBt->mutex);
495	}
496
497	/ Verify that the cursor and the BtShared agree about what is the current*
498	** database connetion. This is important in shared-cache mode. If the database
499	** connection pointers get out-of-sync, it is possible for routines like
500	** btreeInitPage() to reference an stale connection pointer that references a
501	** a connection that has already closed. This routine is used inside assert()
502	** statements only and for the purpose of double-checking that the btree code
503	** does keep the database connection pointers up-to-date.
504	*/
505	static int cursorOwnsBtShared(BtCursor *p){
506	assert( cursorHoldsMutex(p) );
507	return (p->pBtree->db==p->pBt->db);
508	}
509	#endif
510
511	/*
512	** Invalidate the overflow cache of the cursor passed as the first argument.
513	** on the shared btree structure pBt.
514	*/
515	#define invalidateOverflowCache(pCur) (pCur->curFlags &= ~BTCF_ValidOvfl)
516
517	/*
518	** Invalidate the overflow page-list cache for all cursors opened
519	** on the shared btree structure pBt.
520	*/
521	static void invalidateAllOverflowCache(BtShared *pBt){
522	BtCursor *p;
523	assert( sqlite3_mutex_held(pBt->mutex) );
524	for(p=pBt->pCursor; p; p=p->pNext){
525	invalidateOverflowCache(p);
526	}
527	}
528
529	#ifndef SQLITE_OMIT_INCRBLOB
530	/*
531	** This function is called before modifying the contents of a table
532	** to invalidate any incrblob cursors that are open on the
533	** row or one of the rows being modified.
534	**
535	** If argument isClearTable is true, then the entire contents of the
536	** table is about to be deleted. In this case invalidate all incrblob
537	** cursors open on any row within the table with root-page pgnoRoot.
538	**
539	** Otherwise, if argument isClearTable is false, then the row with
540	** rowid iRow is being replaced or deleted. In this case invalidate
541	** only those incrblob cursors open on that specific row.
542	*/
543	static void invalidateIncrblobCursors(
544	Btree pBtree, /* The database file to check /
545	Pgno pgnoRoot, / The table that might be changing /
546	i64 iRow, / The rowid that might be changing /
547	int isClearTable / True if all rows are being deleted /
548	){
549	BtCursor *p;
550	assert( pBtree->hasIncrblobCur );
551	assert( sqlite3BtreeHoldsMutex(pBtree) );
552	pBtree->hasIncrblobCur = `0`;
553	for(p=pBtree->pBt->pCursor; p; p=p->pNext){
554	if( (p->curFlags & BTCF_Incrblob)!=`0` ){
555	pBtree->hasIncrblobCur = `1`;
556	if( p->pgnoRoot==pgnoRoot && (isClearTable \|\| p->info.nKey==iRow) ){
557	p->eState = CURSOR_INVALID;
558	}
559	}
560	}
561	}
562
563	#else
564	/ Stub function when INCRBLOB is omitted /
565	#define invalidateIncrblobCursors(w,x,y,z)
566	#endif /* SQLITE_OMIT_INCRBLOB */
567
568	/*
569	** Set bit pgno of the BtShared.pHasContent bitvec. This is called
570	** when a page that previously contained data becomes a free-list leaf
571	** page.
572	**
573	** The BtShared.pHasContent bitvec exists to work around an obscure
574	** bug caused by the interaction of two useful IO optimizations surrounding
575	** free-list leaf pages:
576	**
577	** 1) When all data is deleted from a page and the page becomes
578	** a free-list leaf page, the page is not written to the database
579	** (as free-list leaf pages contain no meaningful data). Sometimes
580	** such a page is not even journalled (as it will not be modified,
581	** why bother journalling it?).
582	**
583	** 2) When a free-list leaf page is reused, its content is not read
584	** from the database or written to the journal file (why should it
585	** be, if it is not at all meaningful?).
586	**
587	** By themselves, these optimizations work fine and provide a handy
588	** performance boost to bulk delete or insert operations. However, if
589	** a page is moved to the free-list and then reused within the same
590	** transaction, a problem comes up. If the page is not journalled when
591	** it is moved to the free-list and it is also not journalled when it
592	** is extracted from the free-list and reused, then the original data
593	** may be lost. In the event of a rollback, it may not be possible
594	** to restore the database to its original configuration.
595	**
596	** The solution is the BtShared.pHasContent bitvec. Whenever a page is
597	** moved to become a free-list leaf page, the corresponding bit is
598	** set in the bitvec. Whenever a leaf page is extracted from the free-list,
599	** optimization 2 above is omitted if the corresponding bit is already
600	** set in BtShared.pHasContent. The contents of the bitvec are cleared
601	** at the end of every transaction.
602	*/
603	static int btreeSetHasContent(BtShared *pBt, Pgno pgno){
604	int rc = SQLITE_OK;
605	if( !pBt->pHasContent ){
606	assert( pgno<=pBt->nPage );
607	pBt->pHasContent = sqlite3BitvecCreate(pBt->nPage);
608	if( !pBt->pHasContent ){
609	rc = SQLITE_NOMEM_BKPT;
610	}
611	}
612	if( rc==SQLITE_OK && pgno<=sqlite3BitvecSize(pBt->pHasContent) ){
613	rc = sqlite3BitvecSet(pBt->pHasContent, pgno);
614	}
615	return rc;
616	}
617
618	/*
619	** Query the BtShared.pHasContent vector.
620	**
621	** This function is called when a free-list leaf page is removed from the
622	** free-list for reuse. It returns false if it is safe to retrieve the
623	** page from the pager layer with the 'no-content' flag set. True otherwise.
624	*/
625	static int btreeGetHasContent(BtShared *pBt, Pgno pgno){
626	Bitvec *p = pBt->pHasContent;
627	return p && (pgno>sqlite3BitvecSize(p) \|\| sqlite3BitvecTestNotNull(p, pgno));
628	}
629
630	/*
631	** Clear (destroy) the BtShared.pHasContent bitvec. This should be
632	** invoked at the conclusion of each write-transaction.
633	*/
634	static void btreeClearHasContent(BtShared *pBt){
635	sqlite3BitvecDestroy(pBt->pHasContent);
636	pBt->pHasContent = `0`;
637	}
638
639	/*
640	** Release all of the apPage[] pages for a cursor.
641	*/
642	static void btreeReleaseAllCursorPages(BtCursor *pCur){
643	int i;
644	if( pCur->iPage>=`0` ){
645	for(i=`0`; i<pCur->iPage; i++){
646	releasePageNotNull(pCur->apPage[i]);
647	}
648	releasePageNotNull(pCur->pPage);
649	pCur->iPage = -`1`;
650	}
651	}
652
653	/*
654	** The cursor passed as the only argument must point to a valid entry
655	** when this function is called (i.e. have eState==CURSOR_VALID). This
656	** function saves the current cursor key in variables pCur->nKey and
657	** pCur->pKey. SQLITE_OK is returned if successful or an SQLite error
658	** code otherwise.
659	**
660	** If the cursor is open on an intkey table, then the integer key
661	** (the rowid) is stored in pCur->nKey and pCur->pKey is left set to
662	** NULL. If the cursor is open on a non-intkey table, then pCur->pKey is
663	** set to point to a malloced buffer pCur->nKey bytes in size containing
664	** the key.
665	*/
666	static int saveCursorKey(BtCursor *pCur){
667	int rc = SQLITE_OK;
668	assert( CURSOR_VALID==pCur->eState );
669	assert( `0`==pCur->pKey );
670	assert( cursorHoldsMutex(pCur) );
671
672	if( pCur->curIntKey ){
673	/ Only the rowid is required for a table btree /
674	pCur->nKey = sqlite3BtreeIntegerKey(pCur);
675	}else{
676	/ For an index btree, save the complete key content. It is possible*
677	** that the current key is corrupt. In that case, it is possible that
678	** the sqlite3VdbeRecordUnpack() function may overread the buffer by
679	** up to the size of 1 varint plus 1 8-byte value when the cursor
680	** position is restored. Hence the 17 bytes of padding allocated
681	** below. */
682	void *pKey;
683	pCur->nKey = sqlite3BtreePayloadSize(pCur);
684	pKey = sqlite3Malloc( pCur->nKey + `9` + `8` );
685	if( pKey ){
686	rc = sqlite3BtreePayload(pCur, `0`, (int)pCur->nKey, pKey);
687	if( rc==SQLITE_OK ){
688	memset(((u8*)pKey)+pCur->nKey, `0`, `9`+`8`);
689	pCur->pKey = pKey;
690	}else{
691	sqlite3_free(pKey);
692	}
693	}else{
694	rc = SQLITE_NOMEM_BKPT;
695	}
696	}
697	assert( !pCur->curIntKey \|\| !pCur->pKey );
698	return rc;
699	}
700
701	/*
702	** Save the current cursor position in the variables BtCursor.nKey
703	** and BtCursor.pKey. The cursor's state is set to CURSOR_REQUIRESEEK.
704	**
705	** The caller must ensure that the cursor is valid (has eState==CURSOR_VALID)
706	** prior to calling this routine.
707	*/
708	static int saveCursorPosition(BtCursor *pCur){
709	int rc;
710
711	assert( CURSOR_VALID==pCur->eState \|\| CURSOR_SKIPNEXT==pCur->eState );
712	assert( `0`==pCur->pKey );
713	assert( cursorHoldsMutex(pCur) );
714
715	if( pCur->curFlags & BTCF_Pinned ){
716	return SQLITE_CONSTRAINT_PINNED;
717	}
718	if( pCur->eState==CURSOR_SKIPNEXT ){
719	pCur->eState = CURSOR_VALID;
720	}else{
721	pCur->skipNext = `0`;
722	}
723
724	rc = saveCursorKey(pCur);
725	if( rc==SQLITE_OK ){
726	btreeReleaseAllCursorPages(pCur);
727	pCur->eState = CURSOR_REQUIRESEEK;
728	}
729
730	pCur->curFlags &= ~(BTCF_ValidNKey\|BTCF_ValidOvfl\|BTCF_AtLast);
731	return rc;
732	}
733
734	/ Forward reference /
735	static int SQLITE_NOINLINE saveCursorsOnList(BtCursor,Pgno,BtCursor);
736
737	/*
738	** Save the positions of all cursors (except pExcept) that are open on
739	** the table with root-page iRoot. "Saving the cursor position" means that
740	** the location in the btree is remembered in such a way that it can be
741	** moved back to the same spot after the btree has been modified. This
742	** routine is called just before cursor pExcept is used to modify the
743	** table, for example in BtreeDelete() or BtreeInsert().
744	**
745	** If there are two or more cursors on the same btree, then all such
746	** cursors should have their BTCF_Multiple flag set. The btreeCursor()
747	** routine enforces that rule. This routine only needs to be called in
748	** the uncommon case when pExpect has the BTCF_Multiple flag set.
749	**
750	** If pExpect!=NULL and if no other cursors are found on the same root-page,
751	** then the BTCF_Multiple flag on pExpect is cleared, to avoid another
752	** pointless call to this routine.
753	**
754	** Implementation note: This routine merely checks to see if any cursors
755	** need to be saved. It calls out to saveCursorsOnList() in the (unusual)
756	** event that cursors are in need to being saved.
757	*/
758	static int saveAllCursors(BtShared pBt, Pgno iRoot, BtCursor pExcept){
759	BtCursor *p;
760	assert( sqlite3_mutex_held(pBt->mutex) );
761	assert( pExcept==`0` \|\| pExcept->pBt==pBt );
762	for(p=pBt->pCursor; p; p=p->pNext){
763	if( p!=pExcept && (`0`==iRoot \|\| p->pgnoRoot==iRoot) ) break;
764	}
765	if( p ) return saveCursorsOnList(p, iRoot, pExcept);
766	if( pExcept ) pExcept->curFlags &= ~BTCF_Multiple;
767	return SQLITE_OK;
768	}
769
770	/ This helper routine to saveAllCursors does the actual work of saving*
771	** the cursors if and when a cursor is found that actually requires saving.
772	** The common case is that no cursors need to be saved, so this routine is
773	** broken out from its caller to avoid unnecessary stack pointer movement.
774	*/
775	static int SQLITE_NOINLINE saveCursorsOnList(
776	BtCursor p, /* The first cursor that needs saving /
777	Pgno iRoot, / Only save cursor with this iRoot. Save all if zero /
778	BtCursor pExcept /* Do not save this cursor /
779	){
780	do{
781	if( p!=pExcept && (`0`==iRoot \|\| p->pgnoRoot==iRoot) ){
782	if( p->eState==CURSOR_VALID \|\| p->eState==CURSOR_SKIPNEXT ){
783	int rc = saveCursorPosition(p);
784	if( SQLITE_OK!=rc ){
785	return rc;
786	}
787	}else{
788	testcase( p->iPage>=`0` );
789	btreeReleaseAllCursorPages(p);
790	}
791	}
792	p = p->pNext;
793	}while( p );
794	return SQLITE_OK;
795	}
796
797	/*
798	** Clear the current cursor position.
799	*/
800	void sqlite3BtreeClearCursor(BtCursor *pCur){
801	assert( cursorHoldsMutex(pCur) );
802	sqlite3_free(pCur->pKey);
803	pCur->pKey = `0`;
804	pCur->eState = CURSOR_INVALID;
805	}
806
807	/*
808	** In this version of BtreeMoveto, pKey is a packed index record
809	** such as is generated by the OP_MakeRecord opcode. Unpack the
810	** record and then call sqlite3BtreeIndexMoveto() to do the work.
811	*/
812	static int btreeMoveto(
813	BtCursor pCur, /* Cursor open on the btree to be searched /
814	const void pKey, /* Packed key if the btree is an index /
815	i64 nKey, / Integer key for tables. Size of pKey for indices /
816	int bias, / Bias search to the high end /
817	int pRes /* Write search results here /
818	){
819	int rc; / Status code /
820	UnpackedRecord pIdxKey; /* Unpacked index key /
821
822	if( pKey ){
823	KeyInfo *pKeyInfo = pCur->pKeyInfo;
824	assert( nKey==(i64)(int)nKey );
825	pIdxKey = sqlite3VdbeAllocUnpackedRecord(pKeyInfo);
826	if( pIdxKey==`0` ) return SQLITE_NOMEM_BKPT;
827	sqlite3VdbeRecordUnpack(pKeyInfo, (int)nKey, pKey, pIdxKey);
828	if( pIdxKey->nField==`0` \|\| pIdxKey->nField>pKeyInfo->nAllField ){
829	rc = SQLITE_CORRUPT_BKPT;
830	}else{
831	rc = sqlite3BtreeIndexMoveto(pCur, pIdxKey, pRes);
832	}
833	sqlite3DbFree(pCur->pKeyInfo->db, pIdxKey);
834	}else{
835	pIdxKey = `0`;
836	rc = sqlite3BtreeTableMoveto(pCur, nKey, bias, pRes);
837	}
838	return rc;
839	}
840
841	/*
842	** Restore the cursor to the position it was in (or as close to as possible)
843	** when saveCursorPosition() was called. Note that this call deletes the
844	** saved position info stored by saveCursorPosition(), so there can be
845	** at most one effective restoreCursorPosition() call after each
846	** saveCursorPosition().
847	*/
848	static int btreeRestoreCursorPosition(BtCursor *pCur){
849	int rc;
850	int skipNext = `0`;
851	assert( cursorOwnsBtShared(pCur) );
852	assert( pCur->eState>=CURSOR_REQUIRESEEK );
853	if( pCur->eState==CURSOR_FAULT ){
854	return pCur->skipNext;
855	}
856	pCur->eState = CURSOR_INVALID;
857	if( sqlite3FaultSim(`410`) ){
858	rc = SQLITE_IOERR;
859	}else{
860	rc = btreeMoveto(pCur, pCur->pKey, pCur->nKey, `0`, &skipNext);
861	}
862	if( rc==SQLITE_OK ){
863	sqlite3_free(pCur->pKey);
864	pCur->pKey = `0`;
865	assert( pCur->eState==CURSOR_VALID \|\| pCur->eState==CURSOR_INVALID );
866	if( skipNext ) pCur->skipNext = skipNext;
867	if( pCur->skipNext && pCur->eState==CURSOR_VALID ){
868	pCur->eState = CURSOR_SKIPNEXT;
869	}
870	}
871	return rc;
872	}
873
874	#define restoreCursorPosition(p) \
875	(p->eState>=CURSOR_REQUIRESEEK ? \
876	btreeRestoreCursorPosition(p) : \
877	SQLITE_OK)
878
879	/*
880	** Determine whether or not a cursor has moved from the position where
881	** it was last placed, or has been invalidated for any other reason.
882	** Cursors can move when the row they are pointing at is deleted out
883	** from under them, for example. Cursor might also move if a btree
884	** is rebalanced.
885	**
886	** Calling this routine with a NULL cursor pointer returns false.
887	**
888	** Use the separate sqlite3BtreeCursorRestore() routine to restore a cursor
889	** back to where it ought to be if this routine returns true.
890	*/
891	int sqlite3BtreeCursorHasMoved(BtCursor *pCur){
892	assert( EIGHT_BYTE_ALIGNMENT(pCur)
893	\|\| pCur==sqlite3BtreeFakeValidCursor() );
894	assert( offsetof(BtCursor, eState)==`0` );
895	assert( sizeof(pCur->eState)==`1` );
896	return CURSOR_VALID != (u8)pCur;
897	}
898
899	/*
900	** Return a pointer to a fake BtCursor object that will always answer
901	** false to the sqlite3BtreeCursorHasMoved() routine above. The fake
902	** cursor returned must not be used with any other Btree interface.
903	*/
904	BtCursor sqlite3BtreeFakeValidCursor(void*){
905	static u8 fakeCursor = CURSOR_VALID;
906	assert( offsetof(BtCursor, eState)==`0` );
907	return (BtCursor*)&fakeCursor;
908	}
909
910	/*
911	** This routine restores a cursor back to its original position after it
912	** has been moved by some outside activity (such as a btree rebalance or
913	** a row having been deleted out from under the cursor).
914	**
915	** On success, the *pDifferentRow parameter is false if the cursor is left
916	** pointing at exactly the same row. *pDifferntRow is the row the cursor
917	** was pointing to has been deleted, forcing the cursor to point to some
918	** nearby row.
919	**
920	** This routine should only be called for a cursor that just returned
921	** TRUE from sqlite3BtreeCursorHasMoved().
922	*/
923	int sqlite3BtreeCursorRestore(BtCursor pCur, int* *pDifferentRow){
924	int rc;
925
926	assert( pCur!=`0` );
927	assert( pCur->eState!=CURSOR_VALID );
928	rc = restoreCursorPosition(pCur);
929	if( rc ){
930	*pDifferentRow = `1`;
931	return rc;
932	}
933	if( pCur->eState!=CURSOR_VALID ){
934	*pDifferentRow = `1`;
935	}else{
936	*pDifferentRow = `0`;
937	}
938	return SQLITE_OK;
939	}
940
941	#ifdef SQLITE_ENABLE_CURSOR_HINTS
942	/*
943	** Provide hints to the cursor. The particular hint given (and the type
944	** and number of the varargs parameters) is determined by the eHintType
945	** parameter. See the definitions of the BTREE_HINT_* macros for details.
946	*/
947	void sqlite3BtreeCursorHint(BtCursor pCur, int* eHintType, ...){
948	/ Used only by system that substitute their own storage engine /
949	}
950	#endif
951
952	/*
953	** Provide flag hints to the cursor.
954	*/
955	void sqlite3BtreeCursorHintFlags(BtCursor pCur, unsigned* x){
956	assert( x==BTREE_SEEK_EQ \|\| x==BTREE_BULKLOAD \|\| x==`0` );
957	pCur->hints = x;
958	}
959
960
961	#ifndef SQLITE_OMIT_AUTOVACUUM
962	/*
963	** Given a page number of a regular database page, return the page
964	** number for the pointer-map page that contains the entry for the
965	** input page number.
966	**
967	** Return 0 (not a valid page) for pgno==1 since there is
968	** no pointer map associated with page 1. The integrity_check logic
969	** requires that ptrmapPageno(*,1)!=1.
970	*/
971	static Pgno ptrmapPageno(BtShared *pBt, Pgno pgno){
972	int nPagesPerMapPage;
973	Pgno iPtrMap, ret;
974	assert( sqlite3_mutex_held(pBt->mutex) );
975	if( pgno<`2` ) return `0`;
976	nPagesPerMapPage = (pBt->usableSize/`5`)+`1`;
977	iPtrMap = (pgno-`2`)/nPagesPerMapPage;
978	ret = (iPtrMap*nPagesPerMapPage) + `2`;
979	if( ret==PENDING_BYTE_PAGE(pBt) ){
980	ret++;
981	}
982	return ret;
983	}
984
985	/*
986	** Write an entry into the pointer map.
987	**
988	** This routine updates the pointer map entry for page number 'key'
989	** so that it maps to type 'eType' and parent page number 'pgno'.
990	**
991	** If *pRC is initially non-zero (non-SQLITE_OK) then this routine is
992	** a no-op. If an error occurs, the appropriate error code is written
993	** into *pRC.
994	*/
995	static void ptrmapPut(BtShared pBt, Pgno key, u8 eType, Pgno parent, int* *pRC){
996	DbPage pDbPage; /* The pointer map page /
997	u8 pPtrmap; /* The pointer map data /
998	Pgno iPtrmap; / The pointer map page number /
999	int offset; / Offset in pointer map page /
1000	int rc; / Return code from subfunctions /
1001
1002	if( pRC ) return*;
1003
1004	assert( sqlite3_mutex_held(pBt->mutex) );
1005	/ The super-journal page number must never be used as a pointer map page /
1006	assert( `0`==PTRMAP_ISPAGE(pBt, PENDING_BYTE_PAGE(pBt)) );
1007
1008	assert( pBt->autoVacuum );
1009	if( key==`0` ){
1010	*pRC = SQLITE_CORRUPT_BKPT;
1011	return;
1012	}
1013	iPtrmap = PTRMAP_PAGENO(pBt, key);
1014	rc = sqlite3PagerGet(pBt->pPager, iPtrmap, &pDbPage, `0`);
1015	if( rc!=SQLITE_OK ){
1016	*pRC = rc;
1017	return;
1018	}
1019	if( ((char*)sqlite3PagerGetExtra(pDbPage))[`0`]!=`0` ){
1020	/ The first byte of the extra data is the MemPage.isInit byte.*
1021	** If that byte is set, it means this page is also being used
1022	** as a btree page. */
1023	*pRC = SQLITE_CORRUPT_BKPT;
1024	goto ptrmap_exit;
1025	}
1026	offset = PTRMAP_PTROFFSET(iPtrmap, key);
1027	if( offset<`0` ){
1028	*pRC = SQLITE_CORRUPT_BKPT;
1029	goto ptrmap_exit;
1030	}
1031	assert( offset <= (int)pBt->usableSize-`5` );
1032	pPtrmap = (u8 *)sqlite3PagerGetData(pDbPage);
1033
1034	if( eType!=pPtrmap[offset] \|\| get4byte(&pPtrmap[offset+`1`])!=parent ){
1035	TRACE(("PTRMAP_UPDATE: %d->(%d,%d)\n", key, eType, parent));
1036	*pRC= rc = sqlite3PagerWrite(pDbPage);
1037	if( rc==SQLITE_OK ){
1038	pPtrmap[offset] = eType;
1039	put4byte(&pPtrmap[offset+`1`], parent);
1040	}
1041	}
1042
1043	ptrmap_exit:
1044	sqlite3PagerUnref(pDbPage);
1045	}
1046
1047	/*
1048	** Read an entry from the pointer map.
1049	**
1050	** This routine retrieves the pointer map entry for page 'key', writing
1051	** the type and parent page number to pEType and pPgno respectively.
1052	** An error code is returned if something goes wrong, otherwise SQLITE_OK.
1053	*/
1054	static int ptrmapGet(BtShared pBt, Pgno key, u8 pEType, Pgno *pPgno){
1055	DbPage pDbPage; /* The pointer map page /
1056	int iPtrmap; / Pointer map page index /
1057	u8 pPtrmap; /* Pointer map page data /
1058	int offset; / Offset of entry in pointer map /
1059	int rc;
1060
1061	assert( sqlite3_mutex_held(pBt->mutex) );
1062
1063	iPtrmap = PTRMAP_PAGENO(pBt, key);
1064	rc = sqlite3PagerGet(pBt->pPager, iPtrmap, &pDbPage, `0`);
1065	if( rc!=`0` ){
1066	return rc;
1067	}
1068	pPtrmap = (u8 *)sqlite3PagerGetData(pDbPage);
1069
1070	offset = PTRMAP_PTROFFSET(iPtrmap, key);
1071	if( offset<`0` ){
1072	sqlite3PagerUnref(pDbPage);
1073	return SQLITE_CORRUPT_BKPT;
1074	}
1075	assert( offset <= (int)pBt->usableSize-`5` );
1076	assert( pEType!=`0` );
1077	*pEType = pPtrmap[offset];
1078	if( pPgno ) *pPgno = get4byte(&pPtrmap[offset+`1`]);
1079
1080	sqlite3PagerUnref(pDbPage);
1081	if( pEType<`1` \|\| pEType>`5` ) return SQLITE_CORRUPT_PGNO(iPtrmap);
1082	return SQLITE_OK;
1083	}
1084
1085	#else /* if defined SQLITE_OMIT_AUTOVACUUM */
1086	#define ptrmapPut(w,x,y,z,rc)
1087	#define ptrmapGet(w,x,y,z) SQLITE_OK
1088	#define ptrmapPutOvflPtr(x, y, z, rc)
1089	#endif
1090
1091	/*
1092	** Given a btree page and a cell index (0 means the first cell on
1093	** the page, 1 means the second cell, and so forth) return a pointer
1094	** to the cell content.
1095	**
1096	** findCellPastPtr() does the same except it skips past the initial
1097	** 4-byte child pointer found on interior pages, if there is one.
1098	**
1099	** This routine works only for pages that do not contain overflow cells.
1100	*/
1101	#define findCell(P,I) \
1102	((P)->aData + ((P)->maskPage & get2byteAligned(&(P)->aCellIdx[2*(I)])))
1103	#define findCellPastPtr(P,I) \
1104	((P)->aDataOfst + ((P)->maskPage & get2byteAligned(&(P)->aCellIdx[2*(I)])))
1105
1106
1107	/*
1108	** This is common tail processing for btreeParseCellPtr() and
1109	** btreeParseCellPtrIndex() for the case when the cell does not fit entirely
1110	** on a single B-tree page. Make necessary adjustments to the CellInfo
1111	** structure.
1112	*/
1113	static SQLITE_NOINLINE void btreeParseCellAdjustSizeForOverflow(
1114	MemPage pPage, /* Page containing the cell /
1115	u8 pCell, /* Pointer to the cell text. /
1116	CellInfo pInfo /* Fill in this structure /
1117	){
1118	/ If the payload will not fit completely on the local page, we have*
1119	** to decide how much to store locally and how much to spill onto
1120	** overflow pages. The strategy is to minimize the amount of unused
1121	** space on overflow pages while keeping the amount of local storage
1122	** in between minLocal and maxLocal.
1123	**
1124	** Warning: changing the way overflow payload is distributed in any
1125	** way will result in an incompatible file format.
1126	*/
1127	int minLocal; / Minimum amount of payload held locally /
1128	int maxLocal; / Maximum amount of payload held locally /
1129	int surplus; / Overflow payload available for local storage /
1130
1131	minLocal = pPage->minLocal;
1132	maxLocal = pPage->maxLocal;
1133	surplus = minLocal + (pInfo->nPayload - minLocal)%(pPage->pBt->usableSize-`4`);
1134	testcase( surplus==maxLocal );
1135	testcase( surplus==maxLocal+`1` );
1136	if( surplus <= maxLocal ){
1137	pInfo->nLocal = (u16)surplus;
1138	}else{
1139	pInfo->nLocal = (u16)minLocal;
1140	}
1141	pInfo->nSize = (u16)(&pInfo->pPayload[pInfo->nLocal] - pCell) + `4`;
1142	}
1143
1144	/*
1145	** Given a record with nPayload bytes of payload stored within btree
1146	** page pPage, return the number of bytes of payload stored locally.
1147	*/
1148	static int btreePayloadToLocal(MemPage *pPage, i64 nPayload){
1149	int maxLocal; / Maximum amount of payload held locally /
1150	maxLocal = pPage->maxLocal;
1151	if( nPayload<=maxLocal ){
1152	return nPayload;
1153	}else{
1154	int minLocal; / Minimum amount of payload held locally /
1155	int surplus; / Overflow payload available for local storage /
1156	minLocal = pPage->minLocal;
1157	surplus = minLocal + (nPayload - minLocal)%(pPage->pBt->usableSize-`4`);
1158	return ( surplus <= maxLocal ) ? surplus : minLocal;
1159	}
1160	}
1161
1162	/*
1163	** The following routines are implementations of the MemPage.xParseCell()
1164	** method.
1165	**
1166	** Parse a cell content block and fill in the CellInfo structure.
1167	**
1168	** btreeParseCellPtr() => table btree leaf nodes
1169	** btreeParseCellNoPayload() => table btree internal nodes
1170	** btreeParseCellPtrIndex() => index btree nodes
1171	**
1172	** There is also a wrapper function btreeParseCell() that works for
1173	** all MemPage types and that references the cell by index rather than
1174	** by pointer.
1175	*/
1176	static void btreeParseCellPtrNoPayload(
1177	MemPage pPage, /* Page containing the cell /
1178	u8 pCell, /* Pointer to the cell text. /
1179	CellInfo pInfo /* Fill in this structure /
1180	){
1181	assert( sqlite3_mutex_held(pPage->pBt->mutex) );
1182	assert( pPage->leaf==`0` );
1183	assert( pPage->childPtrSize==`4` );
1184	#ifndef SQLITE_DEBUG
1185	UNUSED_PARAMETER(pPage);
1186	#endif
1187	pInfo->nSize = `4` + getVarint(&pCell[`4`], (u64*)&pInfo->nKey);
1188	pInfo->nPayload = `0`;
1189	pInfo->nLocal = `0`;
1190	pInfo->pPayload = `0`;
1191	return;
1192	}
1193	static void btreeParseCellPtr(
1194	MemPage pPage, /* Page containing the cell /
1195	u8 pCell, /* Pointer to the cell text. /
1196	CellInfo pInfo /* Fill in this structure /
1197	){
1198	u8 pIter; /* For scanning through pCell /
1199	u32 nPayload; / Number of bytes of cell payload /
1200	u64 iKey; / Extracted Key value /
1201
1202	assert( sqlite3_mutex_held(pPage->pBt->mutex) );
1203	assert( pPage->leaf==`0` \|\| pPage->leaf==`1` );
1204	assert( pPage->intKeyLeaf );
1205	assert( pPage->childPtrSize==`0` );
1206	pIter = pCell;
1207
1208	/ The next block of code is equivalent to:*
1209	**
1210	** pIter += getVarint32(pIter, nPayload);
1211	**
1212	** The code is inlined to avoid a function call.
1213	*/
1214	nPayload = *pIter;
1215	if( nPayload>=`0x80` ){
1216	u8 *pEnd = &pIter[`8`];
1217	nPayload &= `0x7f`;
1218	do{
1219	nPayload = (nPayload<<`7`) \| (*++pIter & `0x7f`);
1220	}while( (*pIter)>=`0x80` && pIter<pEnd );
1221	}
1222	pIter++;
1223
1224	/ The next block of code is equivalent to:*
1225	**
1226	** pIter += getVarint(pIter, (u64*)&pInfo->nKey);
1227	**
1228	** The code is inlined and the loop is unrolled for performance.
1229	** This routine is a high-runner.
1230	*/
1231	iKey = *pIter;
1232	if( iKey>=`0x80` ){
1233	u8 x;
1234	iKey = ((iKey&`0x7f`)<<`7`) \| ((x = *++pIter) & `0x7f`);
1235	if( x>=`0x80` ){
1236	iKey = (iKey<<`7`) \| ((x =*++pIter) & `0x7f`);
1237	if( x>=`0x80` ){
1238	iKey = (iKey<<`7`) \| ((x = *++pIter) & `0x7f`);
1239	if( x>=`0x80` ){
1240	iKey = (iKey<<`7`) \| ((x = *++pIter) & `0x7f`);
1241	if( x>=`0x80` ){
1242	iKey = (iKey<<`7`) \| ((x = *++pIter) & `0x7f`);
1243	if( x>=`0x80` ){
1244	iKey = (iKey<<`7`) \| ((x = *++pIter) & `0x7f`);
1245	if( x>=`0x80` ){
1246	iKey = (iKey<<`7`) \| ((x = *++pIter) & `0x7f`);
1247	if( x>=`0x80` ){
1248	iKey = (iKey<<`8`) \| (*++pIter);
1249	}
1250	}
1251	}
1252	}
1253	}
1254	}
1255	}
1256	}
1257	pIter++;
1258
1259	pInfo->nKey = (i64)&iKey;
1260	pInfo->nPayload = nPayload;
1261	pInfo->pPayload = pIter;
1262	testcase( nPayload==pPage->maxLocal );
1263	testcase( nPayload==(u32)pPage->maxLocal+`1` );
1264	if( nPayload<=pPage->maxLocal ){
1265	/ This is the (easy) common case where the entire payload fits*
1266	** on the local page. No overflow is required.
1267	*/
1268	pInfo->nSize = nPayload + (u16)(pIter - pCell);
1269	if( pInfo->nSize<`4` ) pInfo->nSize = `4`;
1270	pInfo->nLocal = (u16)nPayload;
1271	}else{
1272	btreeParseCellAdjustSizeForOverflow(pPage, pCell, pInfo);
1273	}
1274	}
1275	static void btreeParseCellPtrIndex(
1276	MemPage pPage, /* Page containing the cell /
1277	u8 pCell, /* Pointer to the cell text. /
1278	CellInfo pInfo /* Fill in this structure /
1279	){
1280	u8 pIter; /* For scanning through pCell /
1281	u32 nPayload; / Number of bytes of cell payload /
1282
1283	assert( sqlite3_mutex_held(pPage->pBt->mutex) );
1284	assert( pPage->leaf==`0` \|\| pPage->leaf==`1` );
1285	assert( pPage->intKeyLeaf==`0` );
1286	pIter = pCell + pPage->childPtrSize;
1287	nPayload = *pIter;
1288	if( nPayload>=`0x80` ){
1289	u8 *pEnd = &pIter[`8`];
1290	nPayload &= `0x7f`;
1291	do{
1292	nPayload = (nPayload<<`7`) \| (*++pIter & `0x7f`);
1293	}while( *(pIter)>=`0x80` && pIter<pEnd );
1294	}
1295	pIter++;
1296	pInfo->nKey = nPayload;
1297	pInfo->nPayload = nPayload;
1298	pInfo->pPayload = pIter;
1299	testcase( nPayload==pPage->maxLocal );
1300	testcase( nPayload==(u32)pPage->maxLocal+`1` );
1301	if( nPayload<=pPage->maxLocal ){
1302	/ This is the (easy) common case where the entire payload fits*
1303	** on the local page. No overflow is required.
1304	*/
1305	pInfo->nSize = nPayload + (u16)(pIter - pCell);
1306	if( pInfo->nSize<`4` ) pInfo->nSize = `4`;
1307	pInfo->nLocal = (u16)nPayload;
1308	}else{
1309	btreeParseCellAdjustSizeForOverflow(pPage, pCell, pInfo);
1310	}
1311	}
1312	static void btreeParseCell(
1313	MemPage pPage, /* Page containing the cell /
1314	int iCell, / The cell index. First cell is 0 /
1315	CellInfo pInfo /* Fill in this structure /
1316	){
1317	pPage->xParseCell(pPage, findCell(pPage, iCell), pInfo);
1318	}
1319
1320	/*
1321	** The following routines are implementations of the MemPage.xCellSize
1322	** method.
1323	**
1324	** Compute the total number of bytes that a Cell needs in the cell
1325	** data area of the btree-page. The return number includes the cell
1326	** data header and the local payload, but not any overflow page or
1327	** the space used by the cell pointer.
1328	**
1329	** cellSizePtrNoPayload() => table internal nodes
1330	** cellSizePtrTableLeaf() => table leaf nodes
1331	** cellSizePtr() => all index nodes & table leaf nodes
1332	*/
1333	static u16 cellSizePtr(MemPage pPage, u8 pCell){
1334	u8 pIter = pCell + pPage->childPtrSize; /* For looping over bytes of pCell /
1335	u8 pEnd; /* End mark for a varint /
1336	u32 nSize; / Size value to return /
1337
1338	#ifdef SQLITE_DEBUG
1339	/ The value returned by this function should always be the same as*
1340	** the (CellInfo.nSize) value found by doing a full parse of the
1341	** cell. If SQLITE_DEBUG is defined, an assert() at the bottom of
1342	** this function verifies that this invariant is not violated. */
1343	CellInfo debuginfo;
1344	pPage->xParseCell(pPage, pCell, &debuginfo);
1345	#endif
1346
1347	nSize = *pIter;
1348	if( nSize>=`0x80` ){
1349	pEnd = &pIter[`8`];
1350	nSize &= `0x7f`;
1351	do{
1352	nSize = (nSize<<`7`) \| (*++pIter & `0x7f`);
1353	}while( *(pIter)>=`0x80` && pIter<pEnd );
1354	}
1355	pIter++;
1356	testcase( nSize==pPage->maxLocal );
1357	testcase( nSize==(u32)pPage->maxLocal+`1` );
1358	if( nSize<=pPage->maxLocal ){
1359	nSize += (u32)(pIter - pCell);
1360	if( nSize<`4` ) nSize = `4`;
1361	}else{
1362	int minLocal = pPage->minLocal;
1363	nSize = minLocal + (nSize - minLocal) % (pPage->pBt->usableSize - `4`);
1364	testcase( nSize==pPage->maxLocal );
1365	testcase( nSize==(u32)pPage->maxLocal+`1` );
1366	if( nSize>pPage->maxLocal ){
1367	nSize = minLocal;
1368	}
1369	nSize += `4` + (u16)(pIter - pCell);
1370	}
1371	assert( nSize==debuginfo.nSize \|\| CORRUPT_DB );
1372	return (u16)nSize;
1373	}
1374	static u16 cellSizePtrNoPayload(MemPage pPage, u8 pCell){
1375	u8 pIter = pCell + `4`; /* For looping over bytes of pCell /
1376	u8 pEnd; /* End mark for a varint /
1377
1378	#ifdef SQLITE_DEBUG
1379	/ The value returned by this function should always be the same as*
1380	** the (CellInfo.nSize) value found by doing a full parse of the
1381	** cell. If SQLITE_DEBUG is defined, an assert() at the bottom of
1382	** this function verifies that this invariant is not violated. */
1383	CellInfo debuginfo;
1384	pPage->xParseCell(pPage, pCell, &debuginfo);
1385	#else
1386	UNUSED_PARAMETER(pPage);
1387	#endif
1388
1389	assert( pPage->childPtrSize==`4` );
1390	pEnd = pIter + `9`;
1391	while( (*pIter++)&`0x80` && pIter<pEnd );
1392	assert( debuginfo.nSize==(u16)(pIter - pCell) \|\| CORRUPT_DB );
1393	return (u16)(pIter - pCell);
1394	}
1395	static u16 cellSizePtrTableLeaf(MemPage pPage, u8 pCell){
1396	u8 pIter = pCell; /* For looping over bytes of pCell /
1397	u8 pEnd; /* End mark for a varint /
1398	u32 nSize; / Size value to return /
1399
1400	#ifdef SQLITE_DEBUG
1401	/ The value returned by this function should always be the same as*
1402	** the (CellInfo.nSize) value found by doing a full parse of the
1403	** cell. If SQLITE_DEBUG is defined, an assert() at the bottom of
1404	** this function verifies that this invariant is not violated. */
1405	CellInfo debuginfo;
1406	pPage->xParseCell(pPage, pCell, &debuginfo);
1407	#endif
1408
1409	nSize = *pIter;
1410	if( nSize>=`0x80` ){
1411	pEnd = &pIter[`8`];
1412	nSize &= `0x7f`;
1413	do{
1414	nSize = (nSize<<`7`) \| (*++pIter & `0x7f`);
1415	}while( *(pIter)>=`0x80` && pIter<pEnd );
1416	}
1417	pIter++;
1418	/ pIter now points at the 64-bit integer key value, a variable length*
1419	** integer. The following block moves pIter to point at the first byte
1420	** past the end of the key value. */
1421	if( (*pIter++)&`0x80`
1422	&& (*pIter++)&`0x80`
1423	&& (*pIter++)&`0x80`
1424	&& (*pIter++)&`0x80`
1425	&& (*pIter++)&`0x80`
1426	&& (*pIter++)&`0x80`
1427	&& (*pIter++)&`0x80`
1428	&& (*pIter++)&`0x80` ){ pIter++; }
1429	testcase( nSize==pPage->maxLocal );
1430	testcase( nSize==(u32)pPage->maxLocal+`1` );
1431	if( nSize<=pPage->maxLocal ){
1432	nSize += (u32)(pIter - pCell);
1433	if( nSize<`4` ) nSize = `4`;
1434	}else{
1435	int minLocal = pPage->minLocal;
1436	nSize = minLocal + (nSize - minLocal) % (pPage->pBt->usableSize - `4`);
1437	testcase( nSize==pPage->maxLocal );
1438	testcase( nSize==(u32)pPage->maxLocal+`1` );
1439	if( nSize>pPage->maxLocal ){
1440	nSize = minLocal;
1441	}
1442	nSize += `4` + (u16)(pIter - pCell);
1443	}
1444	assert( nSize==debuginfo.nSize \|\| CORRUPT_DB );
1445	return (u16)nSize;
1446	}
1447
1448
1449	#ifdef SQLITE_DEBUG
1450	/ This variation on cellSizePtr() is used inside of assert() statements*
1451	** only. */
1452	static u16 cellSize(MemPage pPage, int* iCell){
1453	return pPage->xCellSize(pPage, findCell(pPage, iCell));
1454	}
1455	#endif
1456
1457	#ifndef SQLITE_OMIT_AUTOVACUUM
1458	/*
1459	** The cell pCell is currently part of page pSrc but will ultimately be part
1460	** of pPage. (pSrc and pPage are often the same.) If pCell contains a
1461	** pointer to an overflow page, insert an entry into the pointer-map for
1462	** the overflow page that will be valid after pCell has been moved to pPage.
1463	*/
1464	static void ptrmapPutOvflPtr(MemPage pPage, MemPage pSrc, u8 pCell,int* *pRC){
1465	CellInfo info;
1466	if( pRC ) return*;
1467	assert( pCell!=`0` );
1468	pPage->xParseCell(pPage, pCell, &info);
1469	if( info.nLocal<info.nPayload ){
1470	Pgno ovfl;
1471	if( SQLITE_WITHIN(pSrc->aDataEnd, pCell, pCell+info.nLocal) ){
1472	testcase( pSrc!=pPage );
1473	*pRC = SQLITE_CORRUPT_BKPT;
1474	return;
1475	}
1476	ovfl = get4byte(&pCell[info.nSize-`4`]);
1477	ptrmapPut(pPage->pBt, ovfl, PTRMAP_OVERFLOW1, pPage->pgno, pRC);
1478	}
1479	}
1480	#endif
1481
1482
1483	/*
1484	** Defragment the page given. This routine reorganizes cells within the
1485	** page so that there are no free-blocks on the free-block list.
1486	**
1487	** Parameter nMaxFrag is the maximum amount of fragmented space that may be
1488	** present in the page after this routine returns.
1489	**
1490	** EVIDENCE-OF: R-44582-60138 SQLite may from time to time reorganize a
1491	** b-tree page so that there are no freeblocks or fragment bytes, all
1492	** unused bytes are contained in the unallocated space region, and all
1493	** cells are packed tightly at the end of the page.
1494	*/
1495	static int defragmentPage(MemPage pPage, int* nMaxFrag){
1496	int i; / Loop counter /
1497	int pc; / Address of the i-th cell /
1498	int hdr; / Offset to the page header /
1499	int size; / Size of a cell /
1500	int usableSize; / Number of usable bytes on a page /
1501	int cellOffset; / Offset to the cell pointer array /
1502	int cbrk; / Offset to the cell content area /
1503	int nCell; / Number of cells on the page /
1504	unsigned char data; /* The page data /
1505	unsigned char temp; /* Temp area for cell content /
1506	unsigned char src; /* Source of content /
1507	int iCellFirst; / First allowable cell index /
1508	int iCellLast; / Last possible cell index /
1509	int iCellStart; / First cell offset in input /
1510
1511	assert( sqlite3PagerIswriteable(pPage->pDbPage) );
1512	assert( pPage->pBt!=`0` );
1513	assert( pPage->pBt->usableSize <= SQLITE_MAX_PAGE_SIZE );
1514	assert( pPage->nOverflow==`0` );
1515	assert( sqlite3_mutex_held(pPage->pBt->mutex) );
1516	data = pPage->aData;
1517	hdr = pPage->hdrOffset;
1518	cellOffset = pPage->cellOffset;
1519	nCell = pPage->nCell;
1520	assert( nCell==get2byte(&data[hdr+`3`]) \|\| CORRUPT_DB );
1521	iCellFirst = cellOffset + `2`*nCell;
1522	usableSize = pPage->pBt->usableSize;
1523
1524	/ This block handles pages with two or fewer free blocks and nMaxFrag*
1525	** or fewer fragmented bytes. In this case it is faster to move the
1526	** two (or one) blocks of cells using memmove() and add the required
1527	** offsets to each pointer in the cell-pointer array than it is to
1528	** reconstruct the entire page. */
1529	if( (int)data[hdr+`7`]<=nMaxFrag ){
1530	int iFree = get2byte(&data[hdr+`1`]);
1531	if( iFree>usableSize-`4` ) return SQLITE_CORRUPT_PAGE(pPage);
1532	if( iFree ){
1533	int iFree2 = get2byte(&data[iFree]);
1534	if( iFree2>usableSize-`4` ) return SQLITE_CORRUPT_PAGE(pPage);
1535	if( `0`==iFree2 \|\| (data[iFree2]==`0` && data[iFree2+`1`]==`0`) ){
1536	u8 pEnd = &data[cellOffset + nCell`2`];
1537	u8 *pAddr;
1538	int sz2 = `0`;
1539	int sz = get2byte(&data[iFree+`2`]);
1540	int top = get2byte(&data[hdr+`5`]);
1541	if( top>=iFree ){
1542	return SQLITE_CORRUPT_PAGE(pPage);
1543	}
1544	if( iFree2 ){
1545	if( iFree+sz>iFree2 ) return SQLITE_CORRUPT_PAGE(pPage);
1546	sz2 = get2byte(&data[iFree2+`2`]);
1547	if( iFree2+sz2 > usableSize ) return SQLITE_CORRUPT_PAGE(pPage);
1548	memmove(&data[iFree+sz+sz2], &data[iFree+sz], iFree2-(iFree+sz));
1549	sz += sz2;
1550	}else if( iFree+sz>usableSize ){
1551	return SQLITE_CORRUPT_PAGE(pPage);
1552	}
1553
1554	cbrk = top+sz;
1555	assert( cbrk+(iFree-top) <= usableSize );
1556	memmove(&data[cbrk], &data[top], iFree-top);
1557	for(pAddr=&data[cellOffset]; pAddr<pEnd; pAddr+=`2`){
1558	pc = get2byte(pAddr);
1559	if( pc<iFree ){ put2byte(pAddr, pc+sz); }
1560	else if( pc<iFree2 ){ put2byte(pAddr, pc+sz2); }
1561	}
1562	goto defragment_out;
1563	}
1564	}
1565	}
1566
1567	cbrk = usableSize;
1568	iCellLast = usableSize - `4`;
1569	iCellStart = get2byte(&data[hdr+`5`]);
1570	if( nCell>`0` ){
1571	temp = sqlite3PagerTempSpace(pPage->pBt->pPager);
1572	memcpy(&temp[iCellStart], &data[iCellStart], usableSize - iCellStart);
1573	src = temp;
1574	for(i=`0`; i<nCell; i++){
1575	u8 pAddr; /* The i-th cell pointer /
1576	pAddr = &data[cellOffset + i*`2`];
1577	pc = get2byte(pAddr);
1578	testcase( pc==iCellFirst );
1579	testcase( pc==iCellLast );
1580	/ These conditions have already been verified in btreeInitPage()*
1581	** if PRAGMA cell_size_check=ON.
1582	*/
1583	if( pc<iCellStart \|\| pc>iCellLast ){
1584	return SQLITE_CORRUPT_PAGE(pPage);
1585	}
1586	assert( pc>=iCellStart && pc<=iCellLast );
1587	size = pPage->xCellSize(pPage, &src[pc]);
1588	cbrk -= size;
1589	if( cbrk<iCellStart \|\| pc+size>usableSize ){
1590	return SQLITE_CORRUPT_PAGE(pPage);
1591	}
1592	assert( cbrk+size<=usableSize && cbrk>=iCellStart );
1593	testcase( cbrk+size==usableSize );
1594	testcase( pc+size==usableSize );
1595	put2byte(pAddr, cbrk);
1596	memcpy(&data[cbrk], &src[pc], size);
1597	}
1598	}
1599	data[hdr+`7`] = `0`;
1600
1601	defragment_out:
1602	assert( pPage->nFree>=`0` );
1603	if( data[hdr+`7`]+cbrk-iCellFirst!=pPage->nFree ){
1604	return SQLITE_CORRUPT_PAGE(pPage);
1605	}
1606	assert( cbrk>=iCellFirst );
1607	put2byte(&data[hdr+`5`], cbrk);
1608	data[hdr+`1`] = `0`;
1609	data[hdr+`2`] = `0`;
1610	memset(&data[iCellFirst], `0`, cbrk-iCellFirst);
1611	assert( sqlite3PagerIswriteable(pPage->pDbPage) );
1612	return SQLITE_OK;
1613	}
1614
1615	/*
1616	** Search the free-list on page pPg for space to store a cell nByte bytes in
1617	** size. If one can be found, return a pointer to the space and remove it
1618	** from the free-list.
1619	**
1620	** If no suitable space can be found on the free-list, return NULL.
1621	**
1622	** This function may detect corruption within pPg. If corruption is
1623	** detected then *pRc is set to SQLITE_CORRUPT and NULL is returned.
1624	**
1625	** Slots on the free list that are between 1 and 3 bytes larger than nByte
1626	** will be ignored if adding the extra space to the fragmentation count
1627	** causes the fragmentation count to exceed 60.
1628	*/
1629	static u8 pageFindSlot(MemPage pPg, int nByte, int *pRc){
1630	const int hdr = pPg->hdrOffset; / Offset to page header /
1631	u8 * const aData = pPg->aData; / Page data /
1632	int iAddr = hdr + `1`; / Address of ptr to pc /
1633	u8 pTmp = &aData[iAddr]; /* Temporary ptr into aData[] /
1634	int pc = get2byte(pTmp); / Address of a free slot /
1635	int x; / Excess size of the slot /
1636	int maxPC = pPg->pBt->usableSize - nByte; / Max address for a usable slot /
1637	int size; / Size of the free slot /
1638
1639	assert( pc>`0` );
1640	while( pc<=maxPC ){
1641	/ EVIDENCE-OF: R-22710-53328 The third and fourth bytes of each*
1642	** freeblock form a big-endian integer which is the size of the freeblock
1643	** in bytes, including the 4-byte header. */
1644	pTmp = &aData[pc+`2`];
1645	size = get2byte(pTmp);
1646	if( (x = size - nByte)>=`0` ){
1647	testcase( x==`4` );
1648	testcase( x==`3` );
1649	if( x<`4` ){
1650	/ EVIDENCE-OF: R-11498-58022 In a well-formed b-tree page, the total*
1651	** number of bytes in fragments may not exceed 60. */
1652	if( aData[hdr+`7`]>`57` ) return `0`;
1653
1654	/ Remove the slot from the free-list. Update the number of*
1655	** fragmented bytes within the page. */
1656	memcpy(&aData[iAddr], &aData[pc], `2`);
1657	aData[hdr+`7`] += (u8)x;
1658	return &aData[pc];
1659	}else if( x+pc > maxPC ){
1660	/ This slot extends off the end of the usable part of the page /
1661	*pRc = SQLITE_CORRUPT_PAGE(pPg);
1662	return `0`;
1663	}else{
1664	/ The slot remains on the free-list. Reduce its size to account*
1665	** for the portion used by the new allocation. */
1666	put2byte(&aData[pc+`2`], x);
1667	}
1668	return &aData[pc + x];
1669	}
1670	iAddr = pc;
1671	pTmp = &aData[pc];
1672	pc = get2byte(pTmp);
1673	if( pc<=iAddr ){
1674	if( pc ){
1675	/ The next slot in the chain comes before the current slot /
1676	*pRc = SQLITE_CORRUPT_PAGE(pPg);
1677	}
1678	return `0`;
1679	}
1680	}
1681	if( pc>maxPC+nByte-`4` ){
1682	/ The free slot chain extends off the end of the page /
1683	*pRc = SQLITE_CORRUPT_PAGE(pPg);
1684	}
1685	return `0`;
1686	}
1687
1688	/*
1689	** Allocate nByte bytes of space from within the B-Tree page passed
1690	** as the first argument. Write into *pIdx the index into pPage->aData[]
1691	** of the first byte of allocated space. Return either SQLITE_OK or
1692	** an error code (usually SQLITE_CORRUPT).
1693	**
1694	** The caller guarantees that there is sufficient space to make the
1695	** allocation. This routine might need to defragment in order to bring
1696	** all the space together, however. This routine will avoid using
1697	** the first two bytes past the cell pointer area since presumably this
1698	** allocation is being made in order to insert a new cell, so we will
1699	** also end up needing a new cell pointer.
1700	*/
1701	static int allocateSpace(MemPage pPage, int* nByte, int *pIdx){
1702	const int hdr = pPage->hdrOffset; / Local cache of pPage->hdrOffset /
1703	u8 * const data = pPage->aData; / Local cache of pPage->aData /
1704	int top; / First byte of cell content area /
1705	int rc = SQLITE_OK; / Integer return code /
1706	u8 pTmp; /* Temp ptr into data[] /
1707	int gap; / First byte of gap between cell pointers and cell content /
1708
1709	assert( sqlite3PagerIswriteable(pPage->pDbPage) );
1710	assert( pPage->pBt );
1711	assert( sqlite3_mutex_held(pPage->pBt->mutex) );
1712	assert( nByte>=`0` ); / Minimum cell size is 4 /
1713	assert( pPage->nFree>=nByte );
1714	assert( pPage->nOverflow==`0` );
1715	assert( nByte < (int)(pPage->pBt->usableSize-`8`) );
1716
1717	assert( pPage->cellOffset == hdr + `12` - `4`*pPage->leaf );
1718	gap = pPage->cellOffset + `2`*pPage->nCell;
1719	assert( gap<=`65536` );
1720	/ EVIDENCE-OF: R-29356-02391 If the database uses a 65536-byte page size*
1721	** and the reserved space is zero (the usual value for reserved space)
1722	** then the cell content offset of an empty page wants to be 65536.
1723	** However, that integer is too large to be stored in a 2-byte unsigned
1724	** integer, so a value of 0 is used in its place. */
1725	pTmp = &data[hdr+`5`];
1726	top = get2byte(pTmp);
1727	assert( top<=(int)pPage->pBt->usableSize ); / by btreeComputeFreeSpace() /
1728	if( gap>top ){
1729	if( top==`0` && pPage->pBt->usableSize==`65536` ){
1730	top = `65536`;
1731	}else{
1732	return SQLITE_CORRUPT_PAGE(pPage);
1733	}
1734	}
1735
1736	/ If there is enough space between gap and top for one more cell pointer,*
1737	** and if the freelist is not empty, then search the
1738	** freelist looking for a slot big enough to satisfy the request.
1739	*/
1740	testcase( gap+`2`==top );
1741	testcase( gap+`1`==top );
1742	testcase( gap==top );
1743	if( (data[hdr+`2`] \|\| data[hdr+`1`]) && gap+`2`<=top ){
1744	u8 *pSpace = pageFindSlot(pPage, nByte, &rc);
1745	if( pSpace ){
1746	int g2;
1747	assert( pSpace+nByte<=data+pPage->pBt->usableSize );
1748	pIdx = g2 = (int*)(pSpace-data);
1749	if( g2<=gap ){
1750	return SQLITE_CORRUPT_PAGE(pPage);
1751	}else{
1752	return SQLITE_OK;
1753	}
1754	}else if( rc ){
1755	return rc;
1756	}
1757	}
1758
1759	/ The request could not be fulfilled using a freelist slot. Check*
1760	** to see if defragmentation is necessary.
1761	*/
1762	testcase( gap+`2`+nByte==top );
1763	if( gap+`2`+nByte>top ){
1764	assert( pPage->nCell>`0` \|\| CORRUPT_DB );
1765	assert( pPage->nFree>=`0` );
1766	rc = defragmentPage(pPage, MIN(`4`, pPage->nFree - (`2`+nByte)));
1767	if( rc ) return rc;
1768	top = get2byteNotZero(&data[hdr+`5`]);
1769	assert( gap+`2`+nByte<=top );
1770	}
1771
1772
1773	/ Allocate memory from the gap in between the cell pointer array*
1774	** and the cell content area. The btreeComputeFreeSpace() call has already
1775	** validated the freelist. Given that the freelist is valid, there
1776	** is no way that the allocation can extend off the end of the page.
1777	** The assert() below verifies the previous sentence.
1778	*/
1779	top -= nByte;
1780	put2byte(&data[hdr+`5`], top);
1781	assert( top+nByte <= (int)pPage->pBt->usableSize );
1782	*pIdx = top;
1783	return SQLITE_OK;
1784	}
1785
1786	/*
1787	** Return a section of the pPage->aData to the freelist.
1788	** The first byte of the new free block is pPage->aData[iStart]
1789	** and the size of the block is iSize bytes.
1790	**
1791	** Adjacent freeblocks are coalesced.
1792	**
1793	** Even though the freeblock list was checked by btreeComputeFreeSpace(),
1794	** that routine will not detect overlap between cells or freeblocks. Nor
1795	** does it detect cells or freeblocks that encrouch into the reserved bytes
1796	** at the end of the page. So do additional corruption checks inside this
1797	** routine and return SQLITE_CORRUPT if any problems are found.
1798	*/
1799	static int freeSpace(MemPage *pPage, u16 iStart, u16 iSize){
1800	u16 iPtr; / Address of ptr to next freeblock /
1801	u16 iFreeBlk; / Address of the next freeblock /
1802	u8 hdr; / Page header size. 0 or 100 /
1803	u8 nFrag = `0`; / Reduction in fragmentation /
1804	u16 iOrigSize = iSize; / Original value of iSize /
1805	u16 x; / Offset to cell content area /
1806	u32 iEnd = iStart + iSize; / First byte past the iStart buffer /
1807	unsigned char data = pPage->aData; /* Page content /
1808	u8 pTmp; /* Temporary ptr into data[] /
1809
1810	assert( pPage->pBt!=`0` );
1811	assert( sqlite3PagerIswriteable(pPage->pDbPage) );
1812	assert( CORRUPT_DB \|\| iStart>=pPage->hdrOffset+`6`+pPage->childPtrSize );
1813	assert( CORRUPT_DB \|\| iEnd <= pPage->pBt->usableSize );
1814	assert( sqlite3_mutex_held(pPage->pBt->mutex) );
1815	assert( iSize>=`4` ); / Minimum cell size is 4 /
1816	assert( iStart<=pPage->pBt->usableSize-`4` );
1817
1818	/ The list of freeblocks must be in ascending order. Find the*
1819	** spot on the list where iStart should be inserted.
1820	*/
1821	hdr = pPage->hdrOffset;
1822	iPtr = hdr + `1`;
1823	if( data[iPtr+`1`]==`0` && data[iPtr]==`0` ){
1824	iFreeBlk = `0`; / Shortcut for the case when the freelist is empty /
1825	}else{
1826	while( (iFreeBlk = get2byte(&data[iPtr]))<iStart ){
1827	if( iFreeBlk<=iPtr ){
1828	if( iFreeBlk==`0` ) break; / TH3: corrupt082.100 /
1829	return SQLITE_CORRUPT_PAGE(pPage);
1830	}
1831	iPtr = iFreeBlk;
1832	}
1833	if( iFreeBlk>pPage->pBt->usableSize-`4` ){ / TH3: corrupt081.100 /
1834	return SQLITE_CORRUPT_PAGE(pPage);
1835	}
1836	assert( iFreeBlk>iPtr \|\| iFreeBlk==`0` \|\| CORRUPT_DB );
1837
1838	/ At this point:*
1839	** iFreeBlk: First freeblock after iStart, or zero if none
1840	** iPtr: The address of a pointer to iFreeBlk
1841	**
1842	** Check to see if iFreeBlk should be coalesced onto the end of iStart.
1843	*/
1844	if( iFreeBlk && iEnd+`3`>=iFreeBlk ){
1845	nFrag = iFreeBlk - iEnd;
1846	if( iEnd>iFreeBlk ) return SQLITE_CORRUPT_PAGE(pPage);
1847	iEnd = iFreeBlk + get2byte(&data[iFreeBlk+`2`]);
1848	if( iEnd > pPage->pBt->usableSize ){
1849	return SQLITE_CORRUPT_PAGE(pPage);
1850	}
1851	iSize = iEnd - iStart;
1852	iFreeBlk = get2byte(&data[iFreeBlk]);
1853	}
1854
1855	/ If iPtr is another freeblock (that is, if iPtr is not the freelist*
1856	** pointer in the page header) then check to see if iStart should be
1857	** coalesced onto the end of iPtr.
1858	*/
1859	if( iPtr>hdr+`1` ){
1860	int iPtrEnd = iPtr + get2byte(&data[iPtr+`2`]);
1861	if( iPtrEnd+`3`>=iStart ){
1862	if( iPtrEnd>iStart ) return SQLITE_CORRUPT_PAGE(pPage);
1863	nFrag += iStart - iPtrEnd;
1864	iSize = iEnd - iPtr;
1865	iStart = iPtr;
1866	}
1867	}
1868	if( nFrag>data[hdr+`7`] ) return SQLITE_CORRUPT_PAGE(pPage);
1869	data[hdr+`7`] -= nFrag;
1870	}
1871	pTmp = &data[hdr+`5`];
1872	x = get2byte(pTmp);
1873	if( iStart<=x ){
1874	/ The new freeblock is at the beginning of the cell content area,*
1875	** so just extend the cell content area rather than create another
1876	** freelist entry */
1877	if( iStart<x ) return SQLITE_CORRUPT_PAGE(pPage);
1878	if( iPtr!=hdr+`1` ) return SQLITE_CORRUPT_PAGE(pPage);
1879	put2byte(&data[hdr+`1`], iFreeBlk);
1880	put2byte(&data[hdr+`5`], iEnd);
1881	}else{
1882	/ Insert the new freeblock into the freelist /
1883	put2byte(&data[iPtr], iStart);
1884	}
1885	if( pPage->pBt->btsFlags & BTS_FAST_SECURE ){
1886	/ Overwrite deleted information with zeros when the secure_delete*
1887	** option is enabled */
1888	memset(&data[iStart], `0`, iSize);
1889	}
1890	put2byte(&data[iStart], iFreeBlk);
1891	put2byte(&data[iStart+`2`], iSize);
1892	pPage->nFree += iOrigSize;
1893	return SQLITE_OK;
1894	}
1895
1896	/*
1897	** Decode the flags byte (the first byte of the header) for a page
1898	** and initialize fields of the MemPage structure accordingly.
1899	**
1900	** Only the following combinations are supported. Anything different
1901	** indicates a corrupt database files:
1902	**
1903	** PTF_ZERODATA
1904	** PTF_ZERODATA \| PTF_LEAF
1905	** PTF_LEAFDATA \| PTF_INTKEY
1906	** PTF_LEAFDATA \| PTF_INTKEY \| PTF_LEAF
1907	*/
1908	static int decodeFlags(MemPage pPage, int* flagByte){
1909	BtShared pBt; /* A copy of pPage->pBt /
1910
1911	assert( pPage->hdrOffset==(pPage->pgno==`1` ? `100` : `0`) );
1912	assert( sqlite3_mutex_held(pPage->pBt->mutex) );
1913	pPage->leaf = (u8)(flagByte>>`3`); assert( PTF_LEAF == `1`<<`3` );
1914	flagByte &= ~PTF_LEAF;
1915	pPage->childPtrSize = `4`-`4`*pPage->leaf;
1916	pBt = pPage->pBt;
1917	if( flagByte==(PTF_LEAFDATA \| PTF_INTKEY) ){
1918	/ EVIDENCE-OF: R-07291-35328 A value of 5 (0x05) means the page is an*
1919	** interior table b-tree page. */
1920	assert( (PTF_LEAFDATA\|PTF_INTKEY)==`5` );
1921	/ EVIDENCE-OF: R-26900-09176 A value of 13 (0x0d) means the page is a*
1922	** leaf table b-tree page. */
1923	assert( (PTF_LEAFDATA\|PTF_INTKEY\|PTF_LEAF)==`13` );
1924	pPage->intKey = `1`;
1925	if( pPage->leaf ){
1926	pPage->intKeyLeaf = `1`;
1927	pPage->xCellSize = cellSizePtrTableLeaf;
1928	pPage->xParseCell = btreeParseCellPtr;
1929	}else{
1930	pPage->intKeyLeaf = `0`;
1931	pPage->xCellSize = cellSizePtrNoPayload;
1932	pPage->xParseCell = btreeParseCellPtrNoPayload;
1933	}
1934	pPage->maxLocal = pBt->maxLeaf;
1935	pPage->minLocal = pBt->minLeaf;
1936	}else if( flagByte==PTF_ZERODATA ){
1937	/ EVIDENCE-OF: R-43316-37308 A value of 2 (0x02) means the page is an*
1938	** interior index b-tree page. */
1939	assert( (PTF_ZERODATA)==`2` );
1940	/ EVIDENCE-OF: R-59615-42828 A value of 10 (0x0a) means the page is a*
1941	** leaf index b-tree page. */
1942	assert( (PTF_ZERODATA\|PTF_LEAF)==`10` );
1943	pPage->intKey = `0`;
1944	pPage->intKeyLeaf = `0`;
1945	pPage->xCellSize = cellSizePtr;
1946	pPage->xParseCell = btreeParseCellPtrIndex;
1947	pPage->maxLocal = pBt->maxLocal;
1948	pPage->minLocal = pBt->minLocal;
1949	}else{
1950	/ EVIDENCE-OF: R-47608-56469 Any other value for the b-tree page type is*
1951	** an error. */
1952	pPage->intKey = `0`;
1953	pPage->intKeyLeaf = `0`;
1954	pPage->xCellSize = cellSizePtr;
1955	pPage->xParseCell = btreeParseCellPtrIndex;
1956	return SQLITE_CORRUPT_PAGE(pPage);
1957	}
1958	pPage->max1bytePayload = pBt->max1bytePayload;
1959	return SQLITE_OK;
1960	}
1961
1962	/*
1963	** Compute the amount of freespace on the page. In other words, fill
1964	** in the pPage->nFree field.
1965	*/
1966	static int btreeComputeFreeSpace(MemPage *pPage){
1967	int pc; / Address of a freeblock within pPage->aData[] /
1968	u8 hdr; / Offset to beginning of page header /
1969	u8 data; /* Equal to pPage->aData /
1970	int usableSize; / Amount of usable space on each page /
1971	int nFree; / Number of unused bytes on the page /
1972	int top; / First byte of the cell content area /
1973	int iCellFirst; / First allowable cell or freeblock offset /
1974	int iCellLast; / Last possible cell or freeblock offset /
1975
1976	assert( pPage->pBt!=`0` );
1977	assert( pPage->pBt->db!=`0` );
1978	assert( sqlite3_mutex_held(pPage->pBt->mutex) );
1979	assert( pPage->pgno==sqlite3PagerPagenumber(pPage->pDbPage) );
1980	assert( pPage == sqlite3PagerGetExtra(pPage->pDbPage) );
1981	assert( pPage->aData == sqlite3PagerGetData(pPage->pDbPage) );
1982	assert( pPage->isInit==`1` );
1983	assert( pPage->nFree<`0` );
1984
1985	usableSize = pPage->pBt->usableSize;
1986	hdr = pPage->hdrOffset;
1987	data = pPage->aData;
1988	/ EVIDENCE-OF: R-58015-48175 The two-byte integer at offset 5 designates*
1989	** the start of the cell content area. A zero value for this integer is
1990	** interpreted as 65536. */
1991	top = get2byteNotZero(&data[hdr+`5`]);
1992	iCellFirst = hdr + `8` + pPage->childPtrSize + `2`*pPage->nCell;
1993	iCellLast = usableSize - `4`;
1994
1995	/ Compute the total free space on the page*
1996	** EVIDENCE-OF: R-23588-34450 The two-byte integer at offset 1 gives the
1997	** start of the first freeblock on the page, or is zero if there are no
1998	** freeblocks. */
1999	pc = get2byte(&data[hdr+`1`]);
2000	nFree = data[hdr+`7`] + top; / Init nFree to non-freeblock free space /
2001	if( pc>`0` ){
2002	u32 next, size;
2003	if( pc<top ){
2004	/ EVIDENCE-OF: R-55530-52930 In a well-formed b-tree page, there will*
2005	** always be at least one cell before the first freeblock.
2006	*/
2007	return SQLITE_CORRUPT_PAGE(pPage);
2008	}
2009	while( `1` ){
2010	if( pc>iCellLast ){
2011	/ Freeblock off the end of the page /
2012	return SQLITE_CORRUPT_PAGE(pPage);
2013	}
2014	next = get2byte(&data[pc]);
2015	size = get2byte(&data[pc+`2`]);
2016	nFree = nFree + size;
2017	if( next<=pc+size+`3` ) break;
2018	pc = next;
2019	}
2020	if( next>`0` ){
2021	/ Freeblock not in ascending order /
2022	return SQLITE_CORRUPT_PAGE(pPage);
2023	}
2024	if( pc+size>(unsigned int)usableSize ){
2025	/ Last freeblock extends past page end /
2026	return SQLITE_CORRUPT_PAGE(pPage);
2027	}
2028	}
2029
2030	/ At this point, nFree contains the sum of the offset to the start*
2031	** of the cell-content area plus the number of free bytes within
2032	** the cell-content area. If this is greater than the usable-size
2033	** of the page, then the page must be corrupted. This check also
2034	** serves to verify that the offset to the start of the cell-content
2035	** area, according to the page header, lies within the page.
2036	*/
2037	if( nFree>usableSize \|\| nFree<iCellFirst ){
2038	return SQLITE_CORRUPT_PAGE(pPage);
2039	}
2040	pPage->nFree = (u16)(nFree - iCellFirst);
2041	return SQLITE_OK;
2042	}
2043
2044	/*
2045	** Do additional sanity check after btreeInitPage() if
2046	** PRAGMA cell_size_check=ON
2047	*/
2048	static SQLITE_NOINLINE int btreeCellSizeCheck(MemPage *pPage){
2049	int iCellFirst; / First allowable cell or freeblock offset /
2050	int iCellLast; / Last possible cell or freeblock offset /
2051	int i; / Index into the cell pointer array /
2052	int sz; / Size of a cell /
2053	int pc; / Address of a freeblock within pPage->aData[] /
2054	u8 data; /* Equal to pPage->aData /
2055	int usableSize; / Maximum usable space on the page /
2056	int cellOffset; / Start of cell content area /
2057
2058	iCellFirst = pPage->cellOffset + `2`*pPage->nCell;
2059	usableSize = pPage->pBt->usableSize;
2060	iCellLast = usableSize - `4`;
2061	data = pPage->aData;
2062	cellOffset = pPage->cellOffset;
2063	if( !pPage->leaf ) iCellLast--;
2064	for(i=`0`; i<pPage->nCell; i++){
2065	pc = get2byteAligned(&data[cellOffset+i*`2`]);
2066	testcase( pc==iCellFirst );
2067	testcase( pc==iCellLast );
2068	if( pc<iCellFirst \|\| pc>iCellLast ){
2069	return SQLITE_CORRUPT_PAGE(pPage);
2070	}
2071	sz = pPage->xCellSize(pPage, &data[pc]);
2072	testcase( pc+sz==usableSize );
2073	if( pc+sz>usableSize ){
2074	return SQLITE_CORRUPT_PAGE(pPage);
2075	}
2076	}
2077	return SQLITE_OK;
2078	}
2079
2080	/*
2081	** Initialize the auxiliary information for a disk block.
2082	**
2083	** Return SQLITE_OK on success. If we see that the page does
2084	** not contain a well-formed database page, then return
2085	** SQLITE_CORRUPT. Note that a return of SQLITE_OK does not
2086	** guarantee that the page is well-formed. It only shows that
2087	** we failed to detect any corruption.
2088	*/
2089	static int btreeInitPage(MemPage *pPage){
2090	u8 data; /* Equal to pPage->aData /
2091	BtShared pBt; /* The main btree structure /
2092
2093	assert( pPage->pBt!=`0` );
2094	assert( pPage->pBt->db!=`0` );
2095	assert( sqlite3_mutex_held(pPage->pBt->mutex) );
2096	assert( pPage->pgno==sqlite3PagerPagenumber(pPage->pDbPage) );
2097	assert( pPage == sqlite3PagerGetExtra(pPage->pDbPage) );
2098	assert( pPage->aData == sqlite3PagerGetData(pPage->pDbPage) );
2099	assert( pPage->isInit==`0` );
2100
2101	pBt = pPage->pBt;
2102	data = pPage->aData + pPage->hdrOffset;
2103	/ EVIDENCE-OF: R-28594-02890 The one-byte flag at offset 0 indicating*
2104	** the b-tree page type. */
2105	if( decodeFlags(pPage, data[`0`]) ){
2106	return SQLITE_CORRUPT_PAGE(pPage);
2107	}
2108	assert( pBt->pageSize>=`512` && pBt->pageSize<=`65536` );
2109	pPage->maskPage = (u16)(pBt->pageSize - `1`);
2110	pPage->nOverflow = `0`;
2111	pPage->cellOffset = pPage->hdrOffset + `8` + pPage->childPtrSize;
2112	pPage->aCellIdx = data + pPage->childPtrSize + `8`;
2113	pPage->aDataEnd = pPage->aData + pBt->pageSize;
2114	pPage->aDataOfst = pPage->aData + pPage->childPtrSize;
2115	/ EVIDENCE-OF: R-37002-32774 The two-byte integer at offset 3 gives the*
2116	** number of cells on the page. */
2117	pPage->nCell = get2byte(&data[`3`]);
2118	if( pPage->nCell>MX_CELL(pBt) ){
2119	/ To many cells for a single page. The page must be corrupt /
2120	return SQLITE_CORRUPT_PAGE(pPage);
2121	}
2122	testcase( pPage->nCell==MX_CELL(pBt) );
2123	/ EVIDENCE-OF: R-24089-57979 If a page contains no cells (which is only*
2124	** possible for a root page of a table that contains no rows) then the
2125	** offset to the cell content area will equal the page size minus the
2126	** bytes of reserved space. */
2127	assert( pPage->nCell>`0`
2128	\|\| get2byteNotZero(&data[`5`])==(int)pBt->usableSize
2129	\|\| CORRUPT_DB );
2130	pPage->nFree = -`1`; / Indicate that this value is yet uncomputed /
2131	pPage->isInit = `1`;
2132	if( pBt->db->flags & SQLITE_CellSizeCk ){
2133	return btreeCellSizeCheck(pPage);
2134	}
2135	return SQLITE_OK;
2136	}
2137
2138	/*
2139	** Set up a raw page so that it looks like a database page holding
2140	** no entries.
2141	*/
2142	static void zeroPage(MemPage pPage, int* flags){
2143	unsigned char *data = pPage->aData;
2144	BtShared *pBt = pPage->pBt;
2145	u8 hdr = pPage->hdrOffset;
2146	u16 first;
2147
2148	assert( sqlite3PagerPagenumber(pPage->pDbPage)==pPage->pgno \|\| CORRUPT_DB );
2149	assert( sqlite3PagerGetExtra(pPage->pDbPage) == (void*)pPage );
2150	assert( sqlite3PagerGetData(pPage->pDbPage) == data );
2151	assert( sqlite3PagerIswriteable(pPage->pDbPage) );
2152	assert( sqlite3_mutex_held(pBt->mutex) );
2153	if( pBt->btsFlags & BTS_FAST_SECURE ){
2154	memset(&data[hdr], `0`, pBt->usableSize - hdr);
2155	}
2156	data[hdr] = (char)flags;
2157	first = hdr + ((flags&PTF_LEAF)==`0` ? `12` : `8`);
2158	memset(&data[hdr+`1`], `0`, `4`);
2159	data[hdr+`7`] = `0`;
2160	put2byte(&data[hdr+`5`], pBt->usableSize);
2161	pPage->nFree = (u16)(pBt->usableSize - first);
2162	decodeFlags(pPage, flags);
2163	pPage->cellOffset = first;
2164	pPage->aDataEnd = &data[pBt->pageSize];
2165	pPage->aCellIdx = &data[first];
2166	pPage->aDataOfst = &data[pPage->childPtrSize];
2167	pPage->nOverflow = `0`;
2168	assert( pBt->pageSize>=`512` && pBt->pageSize<=`65536` );
2169	pPage->maskPage = (u16)(pBt->pageSize - `1`);
2170	pPage->nCell = `0`;
2171	pPage->isInit = `1`;
2172	}
2173
2174
2175	/*
2176	** Convert a DbPage obtained from the pager into a MemPage used by
2177	** the btree layer.
2178	*/
2179	static MemPage btreePageFromDbPage(DbPage pDbPage, Pgno pgno, BtShared *pBt){
2180	MemPage pPage = (MemPage)sqlite3PagerGetExtra(pDbPage);
2181	if( pgno!=pPage->pgno ){
2182	pPage->aData = sqlite3PagerGetData(pDbPage);
2183	pPage->pDbPage = pDbPage;
2184	pPage->pBt = pBt;
2185	pPage->pgno = pgno;
2186	pPage->hdrOffset = pgno==`1` ? `100` : `0`;
2187	}
2188	assert( pPage->aData==sqlite3PagerGetData(pDbPage) );
2189	return pPage;
2190	}
2191
2192	/*
2193	** Get a page from the pager. Initialize the MemPage.pBt and
2194	** MemPage.aData elements if needed. See also: btreeGetUnusedPage().
2195	**
2196	** If the PAGER_GET_NOCONTENT flag is set, it means that we do not care
2197	** about the content of the page at this time. So do not go to the disk
2198	** to fetch the content. Just fill in the content with zeros for now.
2199	** If in the future we call sqlite3PagerWrite() on this page, that
2200	** means we have started to be concerned about content and the disk
2201	** read should occur at that point.
2202	*/
2203	static int btreeGetPage(
2204	BtShared pBt, /* The btree /
2205	Pgno pgno, / Number of the page to fetch /
2206	MemPage *ppPage, /* Return the page in this parameter /
2207	int flags / PAGER_GET_NOCONTENT or PAGER_GET_READONLY /
2208	){
2209	int rc;
2210	DbPage *pDbPage;
2211
2212	assert( flags==`0` \|\| flags==PAGER_GET_NOCONTENT \|\| flags==PAGER_GET_READONLY );
2213	assert( sqlite3_mutex_held(pBt->mutex) );
2214	rc = sqlite3PagerGet(pBt->pPager, pgno, (DbPage**)&pDbPage, flags);
2215	if( rc ) return rc;
2216	*ppPage = btreePageFromDbPage(pDbPage, pgno, pBt);
2217	return SQLITE_OK;
2218	}
2219
2220	/*
2221	** Retrieve a page from the pager cache. If the requested page is not
2222	** already in the pager cache return NULL. Initialize the MemPage.pBt and
2223	** MemPage.aData elements if needed.
2224	*/
2225	static MemPage btreePageLookup(BtShared pBt, Pgno pgno){
2226	DbPage *pDbPage;
2227	assert( sqlite3_mutex_held(pBt->mutex) );
2228	pDbPage = sqlite3PagerLookup(pBt->pPager, pgno);
2229	if( pDbPage ){
2230	return btreePageFromDbPage(pDbPage, pgno, pBt);
2231	}
2232	return `0`;
2233	}
2234
2235	/*
2236	** Return the size of the database file in pages. If there is any kind of
2237	** error, return ((unsigned int)-1).
2238	*/
2239	static Pgno btreePagecount(BtShared *pBt){
2240	return pBt->nPage;
2241	}
2242	Pgno sqlite3BtreeLastPage(Btree *p){
2243	assert( sqlite3BtreeHoldsMutex(p) );
2244	return btreePagecount(p->pBt);
2245	}
2246
2247	/*
2248	** Get a page from the pager and initialize it.
2249	**
2250	** If pCur!=0 then the page is being fetched as part of a moveToChild()
2251	** call. Do additional sanity checking on the page in this case.
2252	** And if the fetch fails, this routine must decrement pCur->iPage.
2253	**
2254	** The page is fetched as read-write unless pCur is not NULL and is
2255	** a read-only cursor.
2256	**
2257	** If an error occurs, then *ppPage is undefined. It
2258	** may remain unchanged, or it may be set to an invalid value.
2259	*/
2260	static int getAndInitPage(
2261	BtShared pBt, /* The database file /
2262	Pgno pgno, / Number of the page to get /
2263	MemPage *ppPage, /* Write the page pointer here /
2264	BtCursor pCur, /* Cursor to receive the page, or NULL /
2265	int bReadOnly / True for a read-only page /
2266	){
2267	int rc;
2268	DbPage *pDbPage;
2269	assert( sqlite3_mutex_held(pBt->mutex) );
2270	assert( pCur==`0` \|\| ppPage==&pCur->pPage );
2271	assert( pCur==`0` \|\| bReadOnly==pCur->curPagerFlags );
2272	assert( pCur==`0` \|\| pCur->iPage>`0` );
2273
2274	if( pgno>btreePagecount(pBt) ){
2275	rc = SQLITE_CORRUPT_BKPT;
2276	goto getAndInitPage_error1;
2277	}
2278	rc = sqlite3PagerGet(pBt->pPager, pgno, (DbPage**)&pDbPage, bReadOnly);
2279	if( rc ){
2280	goto getAndInitPage_error1;
2281	}
2282	ppPage = (MemPage)sqlite3PagerGetExtra(pDbPage);
2283	if( (*ppPage)->isInit==`0` ){
2284	btreePageFromDbPage(pDbPage, pgno, pBt);
2285	rc = btreeInitPage(*ppPage);
2286	if( rc!=SQLITE_OK ){
2287	goto getAndInitPage_error2;
2288	}
2289	}
2290	assert( (*ppPage)->pgno==pgno \|\| CORRUPT_DB );
2291	assert( (*ppPage)->aData==sqlite3PagerGetData(pDbPage) );
2292
2293	/ If obtaining a child page for a cursor, we must verify that the page is*
2294	** compatible with the root page. */
2295	if( pCur && ((ppPage)->nCell<`1` \|\| (ppPage)->intKey!=pCur->curIntKey) ){
2296	rc = SQLITE_CORRUPT_PGNO(pgno);
2297	goto getAndInitPage_error2;
2298	}
2299	return SQLITE_OK;
2300
2301	getAndInitPage_error2:
2302	releasePage(*ppPage);
2303	getAndInitPage_error1:
2304	if( pCur ){
2305	pCur->iPage--;
2306	pCur->pPage = pCur->apPage[pCur->iPage];
2307	}
2308	testcase( pgno==`0` );
2309	assert( pgno!=`0` \|\| rc!=SQLITE_OK );
2310	return rc;
2311	}
2312
2313	/*
2314	** Release a MemPage. This should be called once for each prior
2315	** call to btreeGetPage.
2316	**
2317	** Page1 is a special case and must be released using releasePageOne().
2318	*/
2319	static void releasePageNotNull(MemPage *pPage){
2320	assert( pPage->aData );
2321	assert( pPage->pBt );
2322	assert( pPage->pDbPage!=`0` );
2323	assert( sqlite3PagerGetExtra(pPage->pDbPage) == (void*)pPage );
2324	assert( sqlite3PagerGetData(pPage->pDbPage)==pPage->aData );
2325	assert( sqlite3_mutex_held(pPage->pBt->mutex) );
2326	sqlite3PagerUnrefNotNull(pPage->pDbPage);
2327	}
2328	static void releasePage(MemPage *pPage){
2329	if( pPage ) releasePageNotNull(pPage);
2330	}
2331	static void releasePageOne(MemPage *pPage){
2332	assert( pPage!=`0` );
2333	assert( pPage->aData );
2334	assert( pPage->pBt );
2335	assert( pPage->pDbPage!=`0` );
2336	assert( sqlite3PagerGetExtra(pPage->pDbPage) == (void*)pPage );
2337	assert( sqlite3PagerGetData(pPage->pDbPage)==pPage->aData );
2338	assert( sqlite3_mutex_held(pPage->pBt->mutex) );
2339	sqlite3PagerUnrefPageOne(pPage->pDbPage);
2340	}
2341
2342	/*
2343	** Get an unused page.
2344	**
2345	** This works just like btreeGetPage() with the addition:
2346	**
2347	** * If the page is already in use for some other purpose, immediately
2348	** release it and return an SQLITE_CURRUPT error.
2349	** * Make sure the isInit flag is clear
2350	*/
2351	static int btreeGetUnusedPage(
2352	BtShared pBt, /* The btree /
2353	Pgno pgno, / Number of the page to fetch /
2354	MemPage *ppPage, /* Return the page in this parameter /
2355	int flags / PAGER_GET_NOCONTENT or PAGER_GET_READONLY /
2356	){
2357	int rc = btreeGetPage(pBt, pgno, ppPage, flags);
2358	if( rc==SQLITE_OK ){
2359	if( sqlite3PagerPageRefcount((*ppPage)->pDbPage)>`1` ){
2360	releasePage(*ppPage);
2361	*ppPage = `0`;
2362	return SQLITE_CORRUPT_BKPT;
2363	}
2364	(*ppPage)->isInit = `0`;
2365	}else{
2366	*ppPage = `0`;
2367	}
2368	return rc;
2369	}
2370
2371
2372	/*
2373	** During a rollback, when the pager reloads information into the cache
2374	** so that the cache is restored to its original state at the start of
2375	** the transaction, for each page restored this routine is called.
2376	**
2377	** This routine needs to reset the extra data section at the end of the
2378	** page to agree with the restored data.
2379	*/
2380	static void pageReinit(DbPage *pData){
2381	MemPage *pPage;
2382	pPage = (MemPage *)sqlite3PagerGetExtra(pData);
2383	assert( sqlite3PagerPageRefcount(pData)>`0` );
2384	if( pPage->isInit ){
2385	assert( sqlite3_mutex_held(pPage->pBt->mutex) );
2386	pPage->isInit = `0`;
2387	if( sqlite3PagerPageRefcount(pData)>`1` ){
2388	/ pPage might not be a btree page; it might be an overflow page*
2389	** or ptrmap page or a free page. In those cases, the following
2390	** call to btreeInitPage() will likely return SQLITE_CORRUPT.
2391	** But no harm is done by this. And it is very important that
2392	** btreeInitPage() be called on every btree page so we make
2393	** the call for every page that comes in for re-initing. */
2394	btreeInitPage(pPage);
2395	}
2396	}
2397	}
2398
2399	/*
2400	** Invoke the busy handler for a btree.
2401	*/
2402	static int btreeInvokeBusyHandler(void *pArg){
2403	BtShared pBt = (BtShared)pArg;
2404	assert( pBt->db );
2405	assert( sqlite3_mutex_held(pBt->db->mutex) );
2406	return sqlite3InvokeBusyHandler(&pBt->db->busyHandler);
2407	}
2408
2409	/*
2410	** Open a database file.
2411	**
2412	** zFilename is the name of the database file. If zFilename is NULL
2413	** then an ephemeral database is created. The ephemeral database might
2414	** be exclusively in memory, or it might use a disk-based memory cache.
2415	** Either way, the ephemeral database will be automatically deleted
2416	** when sqlite3BtreeClose() is called.
2417	**
2418	** If zFilename is ":memory:" then an in-memory database is created
2419	** that is automatically destroyed when it is closed.
2420	**
2421	** The "flags" parameter is a bitmask that might contain bits like
2422	** BTREE_OMIT_JOURNAL and/or BTREE_MEMORY.
2423	**
2424	** If the database is already opened in the same database connection
2425	** and we are in shared cache mode, then the open will fail with an
2426	** SQLITE_CONSTRAINT error. We cannot allow two or more BtShared
2427	** objects in the same database connection since doing so will lead
2428	** to problems with locking.
2429	*/
2430	int sqlite3BtreeOpen(
2431	sqlite3_vfs pVfs, /* VFS to use for this b-tree /
2432	const char zFilename, /* Name of the file containing the BTree database /
2433	sqlite3 db, /* Associated database handle /
2434	Btree *ppBtree, /* Pointer to new Btree object written here /
2435	int flags, / Options /
2436	int vfsFlags / Flags passed through to sqlite3_vfs.xOpen() /
2437	){
2438	BtShared pBt = `0`; /* Shared part of btree structure /
2439	Btree p; /* Handle to return /
2440	sqlite3_mutex mutexOpen = `0`; /* Prevents a race condition. Ticket #3537 /
2441	int rc = SQLITE_OK; / Result code from this function /
2442	u8 nReserve; / Byte of unused space on each page /
2443	unsigned char zDbHeader[`100`]; / Database header content /
2444
2445	/ True if opening an ephemeral, temporary database /
2446	const int isTempDb = zFilename==`0` \|\| zFilename[`0`]==`0`;
2447
2448	/ Set the variable isMemdb to true for an in-memory database, or*
2449	** false for a file-based database.
2450	*/
2451	#ifdef SQLITE_OMIT_MEMORYDB
2452	const int isMemdb = `0`;
2453	#else
2454	const int isMemdb = (zFilename && strcmp(zFilename, ":memory:")==`0`)
2455	\|\| (isTempDb && sqlite3TempInMemory(db))
2456	\|\| (vfsFlags & SQLITE_OPEN_MEMORY)!=`0`;
2457	#endif
2458
2459	assert( db!=`0` );
2460	assert( pVfs!=`0` );
2461	assert( sqlite3_mutex_held(db->mutex) );
2462	assert( (flags&`0xff`)==flags ); / flags fit in 8 bits /
2463
2464	/ Only a BTREE_SINGLE database can be BTREE_UNORDERED /
2465	assert( (flags & BTREE_UNORDERED)==`0` \|\| (flags & BTREE_SINGLE)!=`0` );
2466
2467	/ A BTREE_SINGLE database is always a temporary and/or ephemeral /
2468	assert( (flags & BTREE_SINGLE)==`0` \|\| isTempDb );
2469
2470	if( isMemdb ){
2471	flags \|= BTREE_MEMORY;
2472	}
2473	if( (vfsFlags & SQLITE_OPEN_MAIN_DB)!=`0` && (isMemdb \|\| isTempDb) ){
2474	vfsFlags = (vfsFlags & ~SQLITE_OPEN_MAIN_DB) \| SQLITE_OPEN_TEMP_DB;
2475	}
2476	p = sqlite3MallocZero(sizeof(Btree));
2477	if( !p ){
2478	return SQLITE_NOMEM_BKPT;
2479	}
2480	p->inTrans = TRANS_NONE;
2481	p->db = db;
2482	#ifndef SQLITE_OMIT_SHARED_CACHE
2483	p->lock.pBtree = p;
2484	p->lock.iTable = `1`;
2485	#endif
2486
2487	#if !defined(SQLITE_OMIT_SHARED_CACHE) && !defined(SQLITE_OMIT_DISKIO)
2488	/*
2489	** If this Btree is a candidate for shared cache, try to find an
2490	** existing BtShared object that we can share with
2491	*/
2492	if( isTempDb==`0` && (isMemdb==`0` \|\| (vfsFlags&SQLITE_OPEN_URI)!=`0`) ){
2493	if( vfsFlags & SQLITE_OPEN_SHAREDCACHE ){
2494	int nFilename = sqlite3Strlen30(zFilename)+`1`;
2495	int nFullPathname = pVfs->mxPathname+`1`;
2496	char *zFullPathname = sqlite3Malloc(MAX(nFullPathname,nFilename));
2497	MUTEX_LOGIC( sqlite3_mutex *mutexShared; )
2498
2499	p->sharable = `1`;
2500	if( !zFullPathname ){
2501	sqlite3_free(p);
2502	return SQLITE_NOMEM_BKPT;
2503	}
2504	if( isMemdb ){
2505	memcpy(zFullPathname, zFilename, nFilename);
2506	}else{
2507	rc = sqlite3OsFullPathname(pVfs, zFilename,
2508	nFullPathname, zFullPathname);
2509	if( rc ){
2510	if( rc==SQLITE_OK_SYMLINK ){
2511	rc = SQLITE_OK;
2512	}else{
2513	sqlite3_free(zFullPathname);
2514	sqlite3_free(p);
2515	return rc;
2516	}
2517	}
2518	}
2519	#if SQLITE_THREADSAFE
2520	mutexOpen = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_OPEN);
2521	sqlite3_mutex_enter(mutexOpen);
2522	mutexShared = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MAIN);
2523	sqlite3_mutex_enter(mutexShared);
2524	#endif
2525	for(pBt=GLOBAL(BtShared*,sqlite3SharedCacheList); pBt; pBt=pBt->pNext){
2526	assert( pBt->nRef>`0` );
2527	if( `0`==strcmp(zFullPathname, sqlite3PagerFilename(pBt->pPager, `0`))
2528	&& sqlite3PagerVfs(pBt->pPager)==pVfs ){
2529	int iDb;
2530	for(iDb=db->nDb-`1`; iDb>=`0`; iDb--){
2531	Btree *pExisting = db->aDb[iDb].pBt;
2532	if( pExisting && pExisting->pBt==pBt ){
2533	sqlite3_mutex_leave(mutexShared);
2534	sqlite3_mutex_leave(mutexOpen);
2535	sqlite3_free(zFullPathname);
2536	sqlite3_free(p);
2537	return SQLITE_CONSTRAINT;
2538	}
2539	}
2540	p->pBt = pBt;
2541	pBt->nRef++;
2542	break;
2543	}
2544	}
2545	sqlite3_mutex_leave(mutexShared);
2546	sqlite3_free(zFullPathname);
2547	}
2548	#ifdef SQLITE_DEBUG
2549	else{
2550	/ In debug mode, we mark all persistent databases as sharable*
2551	** even when they are not. This exercises the locking code and
2552	** gives more opportunity for asserts(sqlite3_mutex_held())
2553	** statements to find locking problems.
2554	*/
2555	p->sharable = `1`;
2556	}
2557	#endif
2558	}
2559	#endif
2560	if( pBt==`0` ){
2561	/*
2562	** The following asserts make sure that structures used by the btree are
2563	** the right size. This is to guard against size changes that result
2564	** when compiling on a different architecture.
2565	*/
2566	assert( sizeof(i64)==`8` );
2567	assert( sizeof(u64)==`8` );
2568	assert( sizeof(u32)==`4` );
2569	assert( sizeof(u16)==`2` );
2570	assert( sizeof(Pgno)==`4` );
2571
2572	pBt = sqlite3MallocZero( sizeof(*pBt) );
2573	if( pBt==`0` ){
2574	rc = SQLITE_NOMEM_BKPT;
2575	goto btree_open_out;
2576	}
2577	rc = sqlite3PagerOpen(pVfs, &pBt->pPager, zFilename,
2578	sizeof(MemPage), flags, vfsFlags, pageReinit);
2579	if( rc==SQLITE_OK ){
2580	sqlite3PagerSetMmapLimit(pBt->pPager, db->szMmap);
2581	rc = sqlite3PagerReadFileheader(pBt->pPager,sizeof(zDbHeader),zDbHeader);
2582	}
2583	if( rc!=SQLITE_OK ){
2584	goto btree_open_out;
2585	}
2586	pBt->openFlags = (u8)flags;
2587	pBt->db = db;
2588	sqlite3PagerSetBusyHandler(pBt->pPager, btreeInvokeBusyHandler, pBt);
2589	p->pBt = pBt;
2590
2591	pBt->pCursor = `0`;
2592	pBt->pPage1 = `0`;
2593	if( sqlite3PagerIsreadonly(pBt->pPager) ) pBt->btsFlags \|= BTS_READ_ONLY;
2594	#if defined(SQLITE_SECURE_DELETE)
2595	pBt->btsFlags \|= BTS_SECURE_DELETE;
2596	#elif defined(SQLITE_FAST_SECURE_DELETE)
2597	pBt->btsFlags \|= BTS_OVERWRITE;
2598	#endif
2599	/ EVIDENCE-OF: R-51873-39618 The page size for a database file is*
2600	** determined by the 2-byte integer located at an offset of 16 bytes from
2601	** the beginning of the database file. */
2602	pBt->pageSize = (zDbHeader[`16`]<<`8`) \| (zDbHeader[`17`]<<`16`);
2603	if( pBt->pageSize<`512` \|\| pBt->pageSize>SQLITE_MAX_PAGE_SIZE
2604	\|\| ((pBt->pageSize-`1`)&pBt->pageSize)!=`0` ){
2605	pBt->pageSize = `0`;
2606	#ifndef SQLITE_OMIT_AUTOVACUUM
2607	/ If the magic name ":memory:" will create an in-memory database, then*
2608	** leave the autoVacuum mode at 0 (do not auto-vacuum), even if
2609	** SQLITE_DEFAULT_AUTOVACUUM is true. On the other hand, if
2610	** SQLITE_OMIT_MEMORYDB has been defined, then ":memory:" is just a
2611	** regular file-name. In this case the auto-vacuum applies as per normal.
2612	*/
2613	if( zFilename && !isMemdb ){
2614	pBt->autoVacuum = (SQLITE_DEFAULT_AUTOVACUUM ? `1` : `0`);
2615	pBt->incrVacuum = (SQLITE_DEFAULT_AUTOVACUUM==`2` ? `1` : `0`);
2616	}
2617	#endif
2618	nReserve = `0`;
2619	}else{
2620	/ EVIDENCE-OF: R-37497-42412 The size of the reserved region is*
2621	** determined by the one-byte unsigned integer found at an offset of 20
2622	** into the database file header. */
2623	nReserve = zDbHeader[`20`];
2624	pBt->btsFlags \|= BTS_PAGESIZE_FIXED;
2625	#ifndef SQLITE_OMIT_AUTOVACUUM
2626	pBt->autoVacuum = (get4byte(&zDbHeader[`36` + `4`*`4`])?`1`:`0`);
2627	pBt->incrVacuum = (get4byte(&zDbHeader[`36` + `7`*`4`])?`1`:`0`);
2628	#endif
2629	}
2630	rc = sqlite3PagerSetPagesize(pBt->pPager, &pBt->pageSize, nReserve);
2631	if( rc ) goto btree_open_out;
2632	pBt->usableSize = pBt->pageSize - nReserve;
2633	assert( (pBt->pageSize & `7`)==`0` ); / 8-byte alignment of pageSize /
2634
2635	#if !defined(SQLITE_OMIT_SHARED_CACHE) && !defined(SQLITE_OMIT_DISKIO)
2636	/ Add the new BtShared object to the linked list sharable BtShareds.*
2637	*/
2638	pBt->nRef = `1`;
2639	if( p->sharable ){
2640	MUTEX_LOGIC( sqlite3_mutex *mutexShared; )
2641	MUTEX_LOGIC( mutexShared = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MAIN);)
2642	if( SQLITE_THREADSAFE && sqlite3GlobalConfig.bCoreMutex ){
2643	pBt->mutex = sqlite3MutexAlloc(SQLITE_MUTEX_FAST);
2644	if( pBt->mutex==`0` ){
2645	rc = SQLITE_NOMEM_BKPT;
2646	goto btree_open_out;
2647	}
2648	}
2649	sqlite3_mutex_enter(mutexShared);
2650	pBt->pNext = GLOBAL(BtShared*,sqlite3SharedCacheList);
2651	GLOBAL(BtShared*,sqlite3SharedCacheList) = pBt;
2652	sqlite3_mutex_leave(mutexShared);
2653	}
2654	#endif
2655	}
2656
2657	#if !defined(SQLITE_OMIT_SHARED_CACHE) && !defined(SQLITE_OMIT_DISKIO)
2658	/ If the new Btree uses a sharable pBtShared, then link the new*
2659	** Btree into the list of all sharable Btrees for the same connection.
2660	** The list is kept in ascending order by pBt address.
2661	*/
2662	if( p->sharable ){
2663	int i;
2664	Btree *pSib;
2665	for(i=`0`; i<db->nDb; i++){
2666	if( (pSib = db->aDb[i].pBt)!=`0` && pSib->sharable ){
2667	while( pSib->pPrev ){ pSib = pSib->pPrev; }
2668	if( (uptr)p->pBt<(uptr)pSib->pBt ){
2669	p->pNext = pSib;
2670	p->pPrev = `0`;
2671	pSib->pPrev = p;
2672	}else{
2673	while( pSib->pNext && (uptr)pSib->pNext->pBt<(uptr)p->pBt ){
2674	pSib = pSib->pNext;
2675	}
2676	p->pNext = pSib->pNext;
2677	p->pPrev = pSib;
2678	if( p->pNext ){
2679	p->pNext->pPrev = p;
2680	}
2681	pSib->pNext = p;
2682	}
2683	break;
2684	}
2685	}
2686	}
2687	#endif
2688	*ppBtree = p;
2689
2690	btree_open_out:
2691	if( rc!=SQLITE_OK ){
2692	if( pBt && pBt->pPager ){
2693	sqlite3PagerClose(pBt->pPager, `0`);
2694	}
2695	sqlite3_free(pBt);
2696	sqlite3_free(p);
2697	*ppBtree = `0`;
2698	}else{
2699	sqlite3_file *pFile;
2700
2701	/ If the B-Tree was successfully opened, set the pager-cache size to the*
2702	** default value. Except, when opening on an existing shared pager-cache,
2703	** do not change the pager-cache size.
2704	*/
2705	if( sqlite3BtreeSchema(p, `0`, `0`)==`0` ){
2706	sqlite3BtreeSetCacheSize(p, SQLITE_DEFAULT_CACHE_SIZE);
2707	}
2708
2709	pFile = sqlite3PagerFile(pBt->pPager);
2710	if( pFile->pMethods ){
2711	sqlite3OsFileControlHint(pFile, SQLITE_FCNTL_PDB, (void*)&pBt->db);
2712	}
2713	}
2714	if( mutexOpen ){
2715	assert( sqlite3_mutex_held(mutexOpen) );
2716	sqlite3_mutex_leave(mutexOpen);
2717	}
2718	assert( rc!=SQLITE_OK \|\| sqlite3BtreeConnectionCount(*ppBtree)>`0` );
2719	return rc;
2720	}
2721
2722	/*
2723	** Decrement the BtShared.nRef counter. When it reaches zero,
2724	** remove the BtShared structure from the sharing list. Return
2725	** true if the BtShared.nRef counter reaches zero and return
2726	** false if it is still positive.
2727	*/
2728	static int removeFromSharingList(BtShared *pBt){
2729	#ifndef SQLITE_OMIT_SHARED_CACHE
2730	MUTEX_LOGIC( sqlite3_mutex *pMainMtx; )
2731	BtShared *pList;
2732	int removed = `0`;
2733
2734	assert( sqlite3_mutex_notheld(pBt->mutex) );
2735	MUTEX_LOGIC( pMainMtx = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MAIN); )
2736	sqlite3_mutex_enter(pMainMtx);
2737	pBt->nRef--;
2738	if( pBt->nRef<=`0` ){
2739	if( GLOBAL(BtShared*,sqlite3SharedCacheList)==pBt ){
2740	GLOBAL(BtShared*,sqlite3SharedCacheList) = pBt->pNext;
2741	}else{
2742	pList = GLOBAL(BtShared*,sqlite3SharedCacheList);
2743	while( ALWAYS(pList) && pList->pNext!=pBt ){
2744	pList=pList->pNext;
2745	}
2746	if( ALWAYS(pList) ){
2747	pList->pNext = pBt->pNext;
2748	}
2749	}
2750	if( SQLITE_THREADSAFE ){
2751	sqlite3_mutex_free(pBt->mutex);
2752	}
2753	removed = `1`;
2754	}
2755	sqlite3_mutex_leave(pMainMtx);
2756	return removed;
2757	#else
2758	return `1`;
2759	#endif
2760	}
2761
2762	/*
2763	** Make sure pBt->pTmpSpace points to an allocation of
2764	** MX_CELL_SIZE(pBt) bytes with a 4-byte prefix for a left-child
2765	** pointer.
2766	*/
2767	static SQLITE_NOINLINE int allocateTempSpace(BtShared *pBt){
2768	assert( pBt!=`0` );
2769	assert( pBt->pTmpSpace==`0` );
2770	/ This routine is called only by btreeCursor() when allocating the*
2771	** first write cursor for the BtShared object */
2772	assert( pBt->pCursor!=`0` && (pBt->pCursor->curFlags & BTCF_WriteFlag)!=`0` );
2773	pBt->pTmpSpace = sqlite3PageMalloc( pBt->pageSize );
2774	if( pBt->pTmpSpace==`0` ){
2775	BtCursor *pCur = pBt->pCursor;
2776	pBt->pCursor = pCur->pNext; / Unlink the cursor /
2777	memset(pCur, `0`, sizeof(*pCur));
2778	return SQLITE_NOMEM_BKPT;
2779	}
2780
2781	/ One of the uses of pBt->pTmpSpace is to format cells before*
2782	** inserting them into a leaf page (function fillInCell()). If
2783	** a cell is less than 4 bytes in size, it is rounded up to 4 bytes
2784	** by the various routines that manipulate binary cells. Which
2785	** can mean that fillInCell() only initializes the first 2 or 3
2786	** bytes of pTmpSpace, but that the first 4 bytes are copied from
2787	** it into a database page. This is not actually a problem, but it
2788	** does cause a valgrind error when the 1 or 2 bytes of unitialized
2789	** data is passed to system call write(). So to avoid this error,
2790	** zero the first 4 bytes of temp space here.
2791	**
2792	** Also: Provide four bytes of initialized space before the
2793	** beginning of pTmpSpace as an area available to prepend the
2794	** left-child pointer to the beginning of a cell.
2795	*/
2796	memset(pBt->pTmpSpace, `0`, `8`);
2797	pBt->pTmpSpace += `4`;
2798	return SQLITE_OK;
2799	}
2800
2801	/*
2802	** Free the pBt->pTmpSpace allocation
2803	*/
2804	static void freeTempSpace(BtShared *pBt){
2805	if( pBt->pTmpSpace ){
2806	pBt->pTmpSpace -= `4`;
2807	sqlite3PageFree(pBt->pTmpSpace);
2808	pBt->pTmpSpace = `0`;
2809	}
2810	}
2811
2812	/*
2813	** Close an open database and invalidate all cursors.
2814	*/
2815	int sqlite3BtreeClose(Btree *p){
2816	BtShared *pBt = p->pBt;
2817
2818	/ Close all cursors opened via this handle. /
2819	assert( sqlite3_mutex_held(p->db->mutex) );
2820	sqlite3BtreeEnter(p);
2821
2822	/ Verify that no other cursors have this Btree open /
2823	#ifdef SQLITE_DEBUG
2824	{
2825	BtCursor *pCur = pBt->pCursor;
2826	while( pCur ){
2827	BtCursor *pTmp = pCur;
2828	pCur = pCur->pNext;
2829	assert( pTmp->pBtree!=p );
2830
2831	}
2832	}
2833	#endif
2834
2835	/ Rollback any active transaction and free the handle structure.*
2836	** The call to sqlite3BtreeRollback() drops any table-locks held by
2837	** this handle.
2838	*/
2839	sqlite3BtreeRollback(p, SQLITE_OK, `0`);
2840	sqlite3BtreeLeave(p);
2841
2842	/ If there are still other outstanding references to the shared-btree*
2843	** structure, return now. The remainder of this procedure cleans
2844	** up the shared-btree.
2845	*/
2846	assert( p->wantToLock==`0` && p->locked==`0` );
2847	if( !p->sharable \|\| removeFromSharingList(pBt) ){
2848	/ The pBt is no longer on the sharing list, so we can access*
2849	** it without having to hold the mutex.
2850	**
2851	** Clean out and delete the BtShared object.
2852	*/
2853	assert( !pBt->pCursor );
2854	sqlite3PagerClose(pBt->pPager, p->db);
2855	if( pBt->xFreeSchema && pBt->pSchema ){
2856	pBt->xFreeSchema(pBt->pSchema);
2857	}
2858	sqlite3DbFree(`0`, pBt->pSchema);
2859	freeTempSpace(pBt);
2860	sqlite3_free(pBt);
2861	}
2862
2863	#ifndef SQLITE_OMIT_SHARED_CACHE
2864	assert( p->wantToLock==`0` );
2865	assert( p->locked==`0` );
2866	if( p->pPrev ) p->pPrev->pNext = p->pNext;
2867	if( p->pNext ) p->pNext->pPrev = p->pPrev;
2868	#endif
2869
2870	sqlite3_free(p);
2871	return SQLITE_OK;
2872	}
2873
2874	/*
2875	** Change the "soft" limit on the number of pages in the cache.
2876	** Unused and unmodified pages will be recycled when the number of
2877	** pages in the cache exceeds this soft limit. But the size of the
2878	** cache is allowed to grow larger than this limit if it contains
2879	** dirty pages or pages still in active use.
2880	*/
2881	int sqlite3BtreeSetCacheSize(Btree p, int* mxPage){
2882	BtShared *pBt = p->pBt;
2883	assert( sqlite3_mutex_held(p->db->mutex) );
2884	sqlite3BtreeEnter(p);
2885	sqlite3PagerSetCachesize(pBt->pPager, mxPage);
2886	sqlite3BtreeLeave(p);
2887	return SQLITE_OK;
2888	}
2889
2890	/*
2891	** Change the "spill" limit on the number of pages in the cache.
2892	** If the number of pages exceeds this limit during a write transaction,
2893	** the pager might attempt to "spill" pages to the journal early in
2894	** order to free up memory.
2895	**
2896	** The value returned is the current spill size. If zero is passed
2897	** as an argument, no changes are made to the spill size setting, so
2898	** using mxPage of 0 is a way to query the current spill size.
2899	*/
2900	int sqlite3BtreeSetSpillSize(Btree p, int* mxPage){
2901	BtShared *pBt = p->pBt;
2902	int res;
2903	assert( sqlite3_mutex_held(p->db->mutex) );
2904	sqlite3BtreeEnter(p);
2905	res = sqlite3PagerSetSpillsize(pBt->pPager, mxPage);
2906	sqlite3BtreeLeave(p);
2907	return res;
2908	}
2909
2910	#if SQLITE_MAX_MMAP_SIZE>0
2911	/*
2912	** Change the limit on the amount of the database file that may be
2913	** memory mapped.
2914	*/
2915	int sqlite3BtreeSetMmapLimit(Btree *p, sqlite3_int64 szMmap){
2916	BtShared *pBt = p->pBt;
2917	assert( sqlite3_mutex_held(p->db->mutex) );
2918	sqlite3BtreeEnter(p);
2919	sqlite3PagerSetMmapLimit(pBt->pPager, szMmap);
2920	sqlite3BtreeLeave(p);
2921	return SQLITE_OK;
2922	}
2923	#endif /* SQLITE_MAX_MMAP_SIZE>0 */
2924
2925	/*
2926	** Change the way data is synced to disk in order to increase or decrease
2927	** how well the database resists damage due to OS crashes and power
2928	** failures. Level 1 is the same as asynchronous (no syncs() occur and
2929	** there is a high probability of damage) Level 2 is the default. There
2930	** is a very low but non-zero probability of damage. Level 3 reduces the
2931	** probability of damage to near zero but with a write performance reduction.
2932	*/
2933	#ifndef SQLITE_OMIT_PAGER_PRAGMAS
2934	int sqlite3BtreeSetPagerFlags(
2935	Btree p, /* The btree to set the safety level on /
2936	unsigned pgFlags / Various PAGER_* flags /
2937	){
2938	BtShared *pBt = p->pBt;
2939	assert( sqlite3_mutex_held(p->db->mutex) );
2940	sqlite3BtreeEnter(p);
2941	sqlite3PagerSetFlags(pBt->pPager, pgFlags);
2942	sqlite3BtreeLeave(p);
2943	return SQLITE_OK;
2944	}
2945	#endif
2946
2947	/*
2948	** Change the default pages size and the number of reserved bytes per page.
2949	** Or, if the page size has already been fixed, return SQLITE_READONLY
2950	** without changing anything.
2951	**
2952	** The page size must be a power of 2 between 512 and 65536. If the page
2953	** size supplied does not meet this constraint then the page size is not
2954	** changed.
2955	**
2956	** Page sizes are constrained to be a power of two so that the region
2957	** of the database file used for locking (beginning at PENDING_BYTE,
2958	** the first byte past the 1GB boundary, 0x40000000) needs to occur
2959	** at the beginning of a page.
2960	**
2961	** If parameter nReserve is less than zero, then the number of reserved
2962	** bytes per page is left unchanged.
2963	**
2964	** If the iFix!=0 then the BTS_PAGESIZE_FIXED flag is set so that the page size
2965	** and autovacuum mode can no longer be changed.
2966	*/
2967	int sqlite3BtreeSetPageSize(Btree p, int* pageSize, int nReserve, int iFix){
2968	int rc = SQLITE_OK;
2969	int x;
2970	BtShared *pBt = p->pBt;
2971	assert( nReserve>=`0` && nReserve<=`255` );
2972	sqlite3BtreeEnter(p);
2973	pBt->nReserveWanted = nReserve;
2974	x = pBt->pageSize - pBt->usableSize;
2975	if( nReserve<x ) nReserve = x;
2976	if( pBt->btsFlags & BTS_PAGESIZE_FIXED ){
2977	sqlite3BtreeLeave(p);
2978	return SQLITE_READONLY;
2979	}
2980	assert( nReserve>=`0` && nReserve<=`255` );
2981	if( pageSize>=`512` && pageSize<=SQLITE_MAX_PAGE_SIZE &&
2982	((pageSize-`1`)&pageSize)==`0` ){
2983	assert( (pageSize & `7`)==`0` );
2984	assert( !pBt->pCursor );
2985	if( nReserve>`32` && pageSize==`512` ) pageSize = `1024`;
2986	pBt->pageSize = (u32)pageSize;
2987	freeTempSpace(pBt);
2988	}
2989	rc = sqlite3PagerSetPagesize(pBt->pPager, &pBt->pageSize, nReserve);
2990	pBt->usableSize = pBt->pageSize - (u16)nReserve;
2991	if( iFix ) pBt->btsFlags \|= BTS_PAGESIZE_FIXED;
2992	sqlite3BtreeLeave(p);
2993	return rc;
2994	}
2995
2996	/*
2997	** Return the currently defined page size
2998	*/
2999	int sqlite3BtreeGetPageSize(Btree *p){
3000	return p->pBt->pageSize;
3001	}
3002
3003	/*
3004	** This function is similar to sqlite3BtreeGetReserve(), except that it
3005	** may only be called if it is guaranteed that the b-tree mutex is already
3006	** held.
3007	**
3008	** This is useful in one special case in the backup API code where it is
3009	** known that the shared b-tree mutex is held, but the mutex on the
3010	** database handle that owns *p is not. In this case if sqlite3BtreeEnter()
3011	** were to be called, it might collide with some other operation on the
3012	** database handle that owns *p, causing undefined behavior.
3013	*/
3014	int sqlite3BtreeGetReserveNoMutex(Btree *p){
3015	int n;
3016	assert( sqlite3_mutex_held(p->pBt->mutex) );
3017	n = p->pBt->pageSize - p->pBt->usableSize;
3018	return n;
3019	}
3020
3021	/*
3022	** Return the number of bytes of space at the end of every page that
3023	** are intentually left unused. This is the "reserved" space that is
3024	** sometimes used by extensions.
3025	**
3026	** The value returned is the larger of the current reserve size and
3027	** the latest reserve size requested by SQLITE_FILECTRL_RESERVE_BYTES.
3028	** The amount of reserve can only grow - never shrink.
3029	*/
3030	int sqlite3BtreeGetRequestedReserve(Btree *p){
3031	int n1, n2;
3032	sqlite3BtreeEnter(p);
3033	n1 = (int)p->pBt->nReserveWanted;
3034	n2 = sqlite3BtreeGetReserveNoMutex(p);
3035	sqlite3BtreeLeave(p);
3036	return n1>n2 ? n1 : n2;
3037	}
3038
3039
3040	/*
3041	** Set the maximum page count for a database if mxPage is positive.
3042	** No changes are made if mxPage is 0 or negative.
3043	** Regardless of the value of mxPage, return the maximum page count.
3044	*/
3045	Pgno sqlite3BtreeMaxPageCount(Btree *p, Pgno mxPage){
3046	Pgno n;
3047	sqlite3BtreeEnter(p);
3048	n = sqlite3PagerMaxPageCount(p->pBt->pPager, mxPage);
3049	sqlite3BtreeLeave(p);
3050	return n;
3051	}
3052
3053	/*
3054	** Change the values for the BTS_SECURE_DELETE and BTS_OVERWRITE flags:
3055	**
3056	** newFlag==0 Both BTS_SECURE_DELETE and BTS_OVERWRITE are cleared
3057	** newFlag==1 BTS_SECURE_DELETE set and BTS_OVERWRITE is cleared
3058	** newFlag==2 BTS_SECURE_DELETE cleared and BTS_OVERWRITE is set
3059	** newFlag==(-1) No changes
3060	**
3061	** This routine acts as a query if newFlag is less than zero
3062	**
3063	** With BTS_OVERWRITE set, deleted content is overwritten by zeros, but
3064	** freelist leaf pages are not written back to the database. Thus in-page
3065	** deleted content is cleared, but freelist deleted content is not.
3066	**
3067	** With BTS_SECURE_DELETE, operation is like BTS_OVERWRITE with the addition
3068	** that freelist leaf pages are written back into the database, increasing
3069	** the amount of disk I/O.
3070	*/
3071	int sqlite3BtreeSecureDelete(Btree p, int* newFlag){
3072	int b;
3073	if( p==`0` ) return `0`;
3074	sqlite3BtreeEnter(p);
3075	assert( BTS_OVERWRITE==BTS_SECURE_DELETE*`2` );
3076	assert( BTS_FAST_SECURE==(BTS_OVERWRITE\|BTS_SECURE_DELETE) );
3077	if( newFlag>=`0` ){
3078	p->pBt->btsFlags &= ~BTS_FAST_SECURE;
3079	p->pBt->btsFlags \|= BTS_SECURE_DELETE*newFlag;
3080	}
3081	b = (p->pBt->btsFlags & BTS_FAST_SECURE)/BTS_SECURE_DELETE;
3082	sqlite3BtreeLeave(p);
3083	return b;
3084	}
3085
3086	/*
3087	** Change the 'auto-vacuum' property of the database. If the 'autoVacuum'
3088	** parameter is non-zero, then auto-vacuum mode is enabled. If zero, it
3089	** is disabled. The default value for the auto-vacuum property is
3090	** determined by the SQLITE_DEFAULT_AUTOVACUUM macro.
3091	*/
3092	int sqlite3BtreeSetAutoVacuum(Btree p, int* autoVacuum){
3093	#ifdef SQLITE_OMIT_AUTOVACUUM
3094	return SQLITE_READONLY;
3095	#else
3096	BtShared *pBt = p->pBt;
3097	int rc = SQLITE_OK;
3098	u8 av = (u8)autoVacuum;
3099
3100	sqlite3BtreeEnter(p);
3101	if( (pBt->btsFlags & BTS_PAGESIZE_FIXED)!=`0` && (av ?`1`:`0`)!=pBt->autoVacuum ){
3102	rc = SQLITE_READONLY;
3103	}else{
3104	pBt->autoVacuum = av ?`1`:`0`;
3105	pBt->incrVacuum = av==`2` ?`1`:`0`;
3106	}
3107	sqlite3BtreeLeave(p);
3108	return rc;
3109	#endif
3110	}
3111
3112	/*
3113	** Return the value of the 'auto-vacuum' property. If auto-vacuum is
3114	** enabled 1 is returned. Otherwise 0.
3115	*/
3116	int sqlite3BtreeGetAutoVacuum(Btree *p){
3117	#ifdef SQLITE_OMIT_AUTOVACUUM
3118	return BTREE_AUTOVACUUM_NONE;
3119	#else
3120	int rc;
3121	sqlite3BtreeEnter(p);
3122	rc = (
3123	(!p->pBt->autoVacuum)?BTREE_AUTOVACUUM_NONE:
3124	(!p->pBt->incrVacuum)?BTREE_AUTOVACUUM_FULL:
3125	BTREE_AUTOVACUUM_INCR
3126	);
3127	sqlite3BtreeLeave(p);
3128	return rc;
3129	#endif
3130	}
3131
3132	/*
3133	** If the user has not set the safety-level for this database connection
3134	** using "PRAGMA synchronous", and if the safety-level is not already
3135	** set to the value passed to this function as the second parameter,
3136	** set it so.
3137	*/
3138	#if SQLITE_DEFAULT_SYNCHRONOUS!=SQLITE_DEFAULT_WAL_SYNCHRONOUS \
3139	&& !defined(SQLITE_OMIT_WAL)
3140	static void setDefaultSyncFlag(BtShared *pBt, u8 safety_level){
3141	sqlite3 *db;
3142	Db *pDb;
3143	if( (db=pBt->db)!=`0` && (pDb=db->aDb)!=`0` ){
3144	while( pDb->pBt==`0` \|\| pDb->pBt->pBt!=pBt ){ pDb++; }
3145	if( pDb->bSyncSet==`0`
3146	&& pDb->safety_level!=safety_level
3147	&& pDb!=&db->aDb[`1`]
3148	){
3149	pDb->safety_level = safety_level;
3150	sqlite3PagerSetFlags(pBt->pPager,
3151	pDb->safety_level \| (db->flags & PAGER_FLAGS_MASK));
3152	}
3153	}
3154	}
3155	#else
3156	# define setDefaultSyncFlag(pBt,safety_level)
3157	#endif
3158
3159	/ Forward declaration /
3160	static int newDatabase(BtShared*);
3161
3162
3163	/*
3164	** Get a reference to pPage1 of the database file. This will
3165	** also acquire a readlock on that file.
3166	**
3167	** SQLITE_OK is returned on success. If the file is not a
3168	** well-formed database file, then SQLITE_CORRUPT is returned.
3169	** SQLITE_BUSY is returned if the database is locked. SQLITE_NOMEM
3170	** is returned if we run out of memory.
3171	*/
3172	static int lockBtree(BtShared *pBt){
3173	int rc; / Result code from subfunctions /
3174	MemPage pPage1; /* Page 1 of the database file /
3175	u32 nPage; / Number of pages in the database /
3176	u32 nPageFile = `0`; / Number of pages in the database file /
3177
3178	assert( sqlite3_mutex_held(pBt->mutex) );
3179	assert( pBt->pPage1==`0` );
3180	rc = sqlite3PagerSharedLock(pBt->pPager);
3181	if( rc!=SQLITE_OK ) return rc;
3182	rc = btreeGetPage(pBt, `1`, &pPage1, `0`);
3183	if( rc!=SQLITE_OK ) return rc;
3184
3185	/ Do some checking to help insure the file we opened really is*
3186	** a valid database file.
3187	*/
3188	nPage = get4byte(`28`+(u8*)pPage1->aData);
3189	sqlite3PagerPagecount(pBt->pPager, (int*)&nPageFile);
3190	if( nPage==`0` \|\| memcmp(`24`+(u8)pPage1->aData, `92`+(u8)pPage1->aData,`4`)!=`0` ){
3191	nPage = nPageFile;
3192	}
3193	if( (pBt->db->flags & SQLITE_ResetDatabase)!=`0` ){
3194	nPage = `0`;
3195	}
3196	if( nPage>`0` ){
3197	u32 pageSize;
3198	u32 usableSize;
3199	u8 *page1 = pPage1->aData;
3200	rc = SQLITE_NOTADB;
3201	/ EVIDENCE-OF: R-43737-39999 Every valid SQLite database file begins*
3202	** with the following 16 bytes (in hex): 53 51 4c 69 74 65 20 66 6f 72 6d
3203	** 61 74 20 33 00. */
3204	if( memcmp(page1, zMagicHeader, `16`)!=`0` ){
3205	goto page1_init_failed;
3206	}
3207
3208	#ifdef SQLITE_OMIT_WAL
3209	if( page1[`18`]>`1` ){
3210	pBt->btsFlags \|= BTS_READ_ONLY;
3211	}
3212	if( page1[`19`]>`1` ){
3213	goto page1_init_failed;
3214	}
3215	#else
3216	if( page1[`18`]>`2` ){
3217	pBt->btsFlags \|= BTS_READ_ONLY;
3218	}
3219	if( page1[`19`]>`2` ){
3220	goto page1_init_failed;
3221	}
3222
3223	/ If the read version is set to 2, this database should be accessed*
3224	** in WAL mode. If the log is not already open, open it now. Then
3225	** return SQLITE_OK and return without populating BtShared.pPage1.
3226	** The caller detects this and calls this function again. This is
3227	** required as the version of page 1 currently in the page1 buffer
3228	** may not be the latest version - there may be a newer one in the log
3229	** file.
3230	*/
3231	if( page1[`19`]==`2` && (pBt->btsFlags & BTS_NO_WAL)==`0` ){
3232	int isOpen = `0`;
3233	rc = sqlite3PagerOpenWal(pBt->pPager, &isOpen);
3234	if( rc!=SQLITE_OK ){
3235	goto page1_init_failed;
3236	}else{
3237	setDefaultSyncFlag(pBt, SQLITE_DEFAULT_WAL_SYNCHRONOUS+`1`);
3238	if( isOpen==`0` ){
3239	releasePageOne(pPage1);
3240	return SQLITE_OK;
3241	}
3242	}
3243	rc = SQLITE_NOTADB;
3244	}else{
3245	setDefaultSyncFlag(pBt, SQLITE_DEFAULT_SYNCHRONOUS+`1`);
3246	}
3247	#endif
3248
3249	/ EVIDENCE-OF: R-15465-20813 The maximum and minimum embedded payload*
3250	** fractions and the leaf payload fraction values must be 64, 32, and 32.
3251	**
3252	** The original design allowed these amounts to vary, but as of
3253	** version 3.6.0, we require them to be fixed.
3254	*/
3255	if( memcmp(&page1[`21`], "\100\040\040",`3`)!=`0` ){
3256	goto page1_init_failed;
3257	}
3258	/ EVIDENCE-OF: R-51873-39618 The page size for a database file is*
3259	** determined by the 2-byte integer located at an offset of 16 bytes from
3260	** the beginning of the database file. */
3261	pageSize = (page1[`16`]<<`8`) \| (page1[`17`]<<`16`);
3262	/ EVIDENCE-OF: R-25008-21688 The size of a page is a power of two*
3263	** between 512 and 65536 inclusive. */
3264	if( ((pageSize-`1`)&pageSize)!=`0`
3265	\|\| pageSize>SQLITE_MAX_PAGE_SIZE
3266	\|\| pageSize<=`256`
3267	){
3268	goto page1_init_failed;
3269	}
3270	pBt->btsFlags \|= BTS_PAGESIZE_FIXED;
3271	assert( (pageSize & `7`)==`0` );
3272	/ EVIDENCE-OF: R-59310-51205 The "reserved space" size in the 1-byte*
3273	** integer at offset 20 is the number of bytes of space at the end of
3274	** each page to reserve for extensions.
3275	**
3276	** EVIDENCE-OF: R-37497-42412 The size of the reserved region is
3277	** determined by the one-byte unsigned integer found at an offset of 20
3278	** into the database file header. */
3279	usableSize = pageSize - page1[`20`];
3280	if( (u32)pageSize!=pBt->pageSize ){
3281	/ After reading the first page of the database assuming a page size*
3282	** of BtShared.pageSize, we have discovered that the page-size is
3283	** actually pageSize. Unlock the database, leave pBt->pPage1 at
3284	** zero and return SQLITE_OK. The caller will call this function
3285	** again with the correct page-size.
3286	*/
3287	releasePageOne(pPage1);
3288	pBt->usableSize = usableSize;
3289	pBt->pageSize = pageSize;
3290	freeTempSpace(pBt);
3291	rc = sqlite3PagerSetPagesize(pBt->pPager, &pBt->pageSize,
3292	pageSize-usableSize);
3293	return rc;
3294	}
3295	if( nPage>nPageFile ){
3296	if( sqlite3WritableSchema(pBt->db)==`0` ){
3297	rc = SQLITE_CORRUPT_BKPT;
3298	goto page1_init_failed;
3299	}else{
3300	nPage = nPageFile;
3301	}
3302	}
3303	/ EVIDENCE-OF: R-28312-64704 However, the usable size is not allowed to*
3304	** be less than 480. In other words, if the page size is 512, then the
3305	** reserved space size cannot exceed 32. */
3306	if( usableSize<`480` ){
3307	goto page1_init_failed;
3308	}
3309	pBt->pageSize = pageSize;
3310	pBt->usableSize = usableSize;
3311	#ifndef SQLITE_OMIT_AUTOVACUUM
3312	pBt->autoVacuum = (get4byte(&page1[`36` + `4`*`4`])?`1`:`0`);
3313	pBt->incrVacuum = (get4byte(&page1[`36` + `7`*`4`])?`1`:`0`);
3314	#endif
3315	}
3316
3317	/ maxLocal is the maximum amount of payload to store locally for*
3318	** a cell. Make sure it is small enough so that at least minFanout
3319	** cells can will fit on one page. We assume a 10-byte page header.
3320	** Besides the payload, the cell must store:
3321	** 2-byte pointer to the cell
3322	** 4-byte child pointer
3323	** 9-byte nKey value
3324	** 4-byte nData value
3325	** 4-byte overflow page pointer
3326	** So a cell consists of a 2-byte pointer, a header which is as much as
3327	** 17 bytes long, 0 to N bytes of payload, and an optional 4 byte overflow
3328	** page pointer.
3329	*/
3330	pBt->maxLocal = (u16)((pBt->usableSize-`12`)*`64`/`255` - `23`);
3331	pBt->minLocal = (u16)((pBt->usableSize-`12`)*`32`/`255` - `23`);
3332	pBt->maxLeaf = (u16)(pBt->usableSize - `35`);
3333	pBt->minLeaf = (u16)((pBt->usableSize-`12`)*`32`/`255` - `23`);
3334	if( pBt->maxLocal>`127` ){
3335	pBt->max1bytePayload = `127`;
3336	}else{
3337	pBt->max1bytePayload = (u8)pBt->maxLocal;
3338	}
3339	assert( pBt->maxLeaf + `23` <= MX_CELL_SIZE(pBt) );
3340	pBt->pPage1 = pPage1;
3341	pBt->nPage = nPage;
3342	return SQLITE_OK;
3343
3344	page1_init_failed:
3345	releasePageOne(pPage1);
3346	pBt->pPage1 = `0`;
3347	return rc;
3348	}
3349
3350	#ifndef NDEBUG
3351	/*
3352	** Return the number of cursors open on pBt. This is for use
3353	** in assert() expressions, so it is only compiled if NDEBUG is not
3354	** defined.
3355	**
3356	** Only write cursors are counted if wrOnly is true. If wrOnly is
3357	** false then all cursors are counted.
3358	**
3359	** For the purposes of this routine, a cursor is any cursor that
3360	** is capable of reading or writing to the database. Cursors that
3361	** have been tripped into the CURSOR_FAULT state are not counted.
3362	*/
3363	static int countValidCursors(BtShared pBt, int* wrOnly){
3364	BtCursor *pCur;
3365	int r = `0`;
3366	for(pCur=pBt->pCursor; pCur; pCur=pCur->pNext){
3367	if( (wrOnly==`0` \|\| (pCur->curFlags & BTCF_WriteFlag)!=`0`)
3368	&& pCur->eState!=CURSOR_FAULT ) r++;
3369	}
3370	return r;
3371	}
3372	#endif
3373
3374	/*
3375	** If there are no outstanding cursors and we are not in the middle
3376	** of a transaction but there is a read lock on the database, then
3377	** this routine unrefs the first page of the database file which
3378	** has the effect of releasing the read lock.
3379	**
3380	** If there is a transaction in progress, this routine is a no-op.
3381	*/
3382	static void unlockBtreeIfUnused(BtShared *pBt){
3383	assert( sqlite3_mutex_held(pBt->mutex) );
3384	assert( countValidCursors(pBt,`0`)==`0` \|\| pBt->inTransaction>TRANS_NONE );
3385	if( pBt->inTransaction==TRANS_NONE && pBt->pPage1!=`0` ){
3386	MemPage *pPage1 = pBt->pPage1;
3387	assert( pPage1->aData );
3388	assert( sqlite3PagerRefcount(pBt->pPager)==`1` );
3389	pBt->pPage1 = `0`;
3390	releasePageOne(pPage1);
3391	}
3392	}
3393
3394	/*
3395	** If pBt points to an empty file then convert that empty file
3396	** into a new empty database by initializing the first page of
3397	** the database.
3398	*/
3399	static int newDatabase(BtShared *pBt){
3400	MemPage *pP1;
3401	unsigned char *data;
3402	int rc;
3403
3404	assert( sqlite3_mutex_held(pBt->mutex) );
3405	if( pBt->nPage>`0` ){
3406	return SQLITE_OK;
3407	}
3408	pP1 = pBt->pPage1;
3409	assert( pP1!=`0` );
3410	data = pP1->aData;
3411	rc = sqlite3PagerWrite(pP1->pDbPage);
3412	if( rc ) return rc;
3413	memcpy(data, zMagicHeader, sizeof(zMagicHeader));
3414	assert( sizeof(zMagicHeader)==`16` );
3415	data[`16`] = (u8)((pBt->pageSize>>`8`)&`0xff`);
3416	data[`17`] = (u8)((pBt->pageSize>>`16`)&`0xff`);
3417	data[`18`] = `1`;
3418	data[`19`] = `1`;
3419	assert( pBt->usableSize<=pBt->pageSize && pBt->usableSize+`255`>=pBt->pageSize);
3420	data[`20`] = (u8)(pBt->pageSize - pBt->usableSize);
3421	data[`21`] = `64`;
3422	data[`22`] = `32`;
3423	data[`23`] = `32`;
3424	memset(&data[`24`], `0`, `100`-`24`);
3425	zeroPage(pP1, PTF_INTKEY\|PTF_LEAF\|PTF_LEAFDATA );
3426	pBt->btsFlags \|= BTS_PAGESIZE_FIXED;
3427	#ifndef SQLITE_OMIT_AUTOVACUUM
3428	assert( pBt->autoVacuum==`1` \|\| pBt->autoVacuum==`0` );
3429	assert( pBt->incrVacuum==`1` \|\| pBt->incrVacuum==`0` );
3430	put4byte(&data[`36` + `4`*`4`], pBt->autoVacuum);
3431	put4byte(&data[`36` + `7`*`4`], pBt->incrVacuum);
3432	#endif
3433	pBt->nPage = `1`;
3434	data[`31`] = `1`;
3435	return SQLITE_OK;
3436	}
3437
3438	/*
3439	** Initialize the first page of the database file (creating a database
3440	** consisting of a single page and no schema objects). Return SQLITE_OK
3441	** if successful, or an SQLite error code otherwise.
3442	*/
3443	int sqlite3BtreeNewDb(Btree *p){
3444	int rc;
3445	sqlite3BtreeEnter(p);
3446	p->pBt->nPage = `0`;
3447	rc = newDatabase(p->pBt);
3448	sqlite3BtreeLeave(p);
3449	return rc;
3450	}
3451
3452	/*
3453	** Attempt to start a new transaction. A write-transaction
3454	** is started if the second argument is nonzero, otherwise a read-
3455	** transaction. If the second argument is 2 or more and exclusive
3456	** transaction is started, meaning that no other process is allowed
3457	** to access the database. A preexisting transaction may not be
3458	** upgraded to exclusive by calling this routine a second time - the
3459	** exclusivity flag only works for a new transaction.
3460	**
3461	** A write-transaction must be started before attempting any
3462	** changes to the database. None of the following routines
3463	** will work unless a transaction is started first:
3464	**
3465	** sqlite3BtreeCreateTable()
3466	** sqlite3BtreeCreateIndex()
3467	** sqlite3BtreeClearTable()
3468	** sqlite3BtreeDropTable()
3469	** sqlite3BtreeInsert()
3470	** sqlite3BtreeDelete()
3471	** sqlite3BtreeUpdateMeta()
3472	**
3473	** If an initial attempt to acquire the lock fails because of lock contention
3474	** and the database was previously unlocked, then invoke the busy handler
3475	** if there is one. But if there was previously a read-lock, do not
3476	** invoke the busy handler - just return SQLITE_BUSY. SQLITE_BUSY is
3477	** returned when there is already a read-lock in order to avoid a deadlock.
3478	**
3479	** Suppose there are two processes A and B. A has a read lock and B has
3480	** a reserved lock. B tries to promote to exclusive but is blocked because
3481	** of A's read lock. A tries to promote to reserved but is blocked by B.
3482	** One or the other of the two processes must give way or there can be
3483	** no progress. By returning SQLITE_BUSY and not invoking the busy callback
3484	** when A already has a read lock, we encourage A to give up and let B
3485	** proceed.
3486	*/
3487	int sqlite3BtreeBeginTrans(Btree p, int* wrflag, int *pSchemaVersion){
3488	BtShared *pBt = p->pBt;
3489	Pager *pPager = pBt->pPager;
3490	int rc = SQLITE_OK;
3491
3492	sqlite3BtreeEnter(p);
3493	btreeIntegrity(p);
3494
3495	/ If the btree is already in a write-transaction, or it*
3496	** is already in a read-transaction and a read-transaction
3497	** is requested, this is a no-op.
3498	*/
3499	if( p->inTrans==TRANS_WRITE \|\| (p->inTrans==TRANS_READ && !wrflag) ){
3500	goto trans_begun;
3501	}
3502	assert( pBt->inTransaction==TRANS_WRITE \|\| IfNotOmitAV(pBt->bDoTruncate)==`0` );
3503
3504	if( (p->db->flags & SQLITE_ResetDatabase)
3505	&& sqlite3PagerIsreadonly(pPager)==`0`
3506	){
3507	pBt->btsFlags &= ~BTS_READ_ONLY;
3508	}
3509
3510	/ Write transactions are not possible on a read-only database /
3511	if( (pBt->btsFlags & BTS_READ_ONLY)!=`0` && wrflag ){
3512	rc = SQLITE_READONLY;
3513	goto trans_begun;
3514	}
3515
3516	#ifndef SQLITE_OMIT_SHARED_CACHE
3517	{
3518	sqlite3 *pBlock = `0`;
3519	/ If another database handle has already opened a write transaction*
3520	** on this shared-btree structure and a second write transaction is
3521	** requested, return SQLITE_LOCKED.
3522	*/
3523	if( (wrflag && pBt->inTransaction==TRANS_WRITE)
3524	\|\| (pBt->btsFlags & BTS_PENDING)!=`0`
3525	){
3526	pBlock = pBt->pWriter->db;
3527	}else if( wrflag>`1` ){
3528	BtLock *pIter;
3529	for(pIter=pBt->pLock; pIter; pIter=pIter->pNext){
3530	if( pIter->pBtree!=p ){
3531	pBlock = pIter->pBtree->db;
3532	break;
3533	}
3534	}
3535	}
3536	if( pBlock ){
3537	sqlite3ConnectionBlocked(p->db, pBlock);
3538	rc = SQLITE_LOCKED_SHAREDCACHE;
3539	goto trans_begun;
3540	}
3541	}
3542	#endif
3543
3544	/ Any read-only or read-write transaction implies a read-lock on*
3545	** page 1. So if some other shared-cache client already has a write-lock
3546	** on page 1, the transaction cannot be opened. */
3547	rc = querySharedCacheTableLock(p, SCHEMA_ROOT, READ_LOCK);
3548	if( SQLITE_OK!=rc ) goto trans_begun;
3549
3550	pBt->btsFlags &= ~BTS_INITIALLY_EMPTY;
3551	if( pBt->nPage==`0` ) pBt->btsFlags \|= BTS_INITIALLY_EMPTY;
3552	do {
3553	sqlite3PagerWalDb(pPager, p->db);
3554
3555	#ifdef SQLITE_ENABLE_SETLK_TIMEOUT
3556	/ If transitioning from no transaction directly to a write transaction,*
3557	** block for the WRITER lock first if possible. */
3558	if( pBt->pPage1==`0` && wrflag ){
3559	assert( pBt->inTransaction==TRANS_NONE );
3560	rc = sqlite3PagerWalWriteLock(pPager, `1`);
3561	if( rc!=SQLITE_BUSY && rc!=SQLITE_OK ) break;
3562	}
3563	#endif
3564
3565	/ Call lockBtree() until either pBt->pPage1 is populated or*
3566	** lockBtree() returns something other than SQLITE_OK. lockBtree()
3567	** may return SQLITE_OK but leave pBt->pPage1 set to 0 if after
3568	** reading page 1 it discovers that the page-size of the database
3569	** file is not pBt->pageSize. In this case lockBtree() will update
3570	** pBt->pageSize to the page-size of the file on disk.
3571	*/
3572	while( pBt->pPage1==`0` && SQLITE_OK==(rc = lockBtree(pBt)) );
3573
3574	if( rc==SQLITE_OK && wrflag ){
3575	if( (pBt->btsFlags & BTS_READ_ONLY)!=`0` ){
3576	rc = SQLITE_READONLY;
3577	}else{
3578	rc = sqlite3PagerBegin(pPager, wrflag>`1`, sqlite3TempInMemory(p->db));
3579	if( rc==SQLITE_OK ){
3580	rc = newDatabase(pBt);
3581	}else if( rc==SQLITE_BUSY_SNAPSHOT && pBt->inTransaction==TRANS_NONE ){
3582	/ if there was no transaction opened when this function was*
3583	** called and SQLITE_BUSY_SNAPSHOT is returned, change the error
3584	** code to SQLITE_BUSY. */
3585	rc = SQLITE_BUSY;
3586	}
3587	}
3588	}
3589
3590	if( rc!=SQLITE_OK ){
3591	(void)sqlite3PagerWalWriteLock(pPager, `0`);
3592	unlockBtreeIfUnused(pBt);
3593	}
3594	}while( (rc&`0xFF`)==SQLITE_BUSY && pBt->inTransaction==TRANS_NONE &&
3595	btreeInvokeBusyHandler(pBt) );
3596	sqlite3PagerWalDb(pPager, `0`);
3597	#ifdef SQLITE_ENABLE_SETLK_TIMEOUT
3598	if( rc==SQLITE_BUSY_TIMEOUT ) rc = SQLITE_BUSY;
3599	#endif
3600
3601	if( rc==SQLITE_OK ){
3602	if( p->inTrans==TRANS_NONE ){
3603	pBt->nTransaction++;
3604	#ifndef SQLITE_OMIT_SHARED_CACHE
3605	if( p->sharable ){
3606	assert( p->lock.pBtree==p && p->lock.iTable==`1` );
3607	p->lock.eLock = READ_LOCK;
3608	p->lock.pNext = pBt->pLock;
3609	pBt->pLock = &p->lock;
3610	}
3611	#endif
3612	}
3613	p->inTrans = (wrflag?TRANS_WRITE:TRANS_READ);
3614	if( p->inTrans>pBt->inTransaction ){
3615	pBt->inTransaction = p->inTrans;
3616	}
3617	if( wrflag ){
3618	MemPage *pPage1 = pBt->pPage1;
3619	#ifndef SQLITE_OMIT_SHARED_CACHE
3620	assert( !pBt->pWriter );
3621	pBt->pWriter = p;
3622	pBt->btsFlags &= ~BTS_EXCLUSIVE;
3623	if( wrflag>`1` ) pBt->btsFlags \|= BTS_EXCLUSIVE;
3624	#endif
3625
3626	/ If the db-size header field is incorrect (as it may be if an old*
3627	** client has been writing the database file), update it now. Doing
3628	** this sooner rather than later means the database size can safely
3629	** re-read the database size from page 1 if a savepoint or transaction
3630	** rollback occurs within the transaction.
3631	*/
3632	if( pBt->nPage!=get4byte(&pPage1->aData[`28`]) ){
3633	rc = sqlite3PagerWrite(pPage1->pDbPage);
3634	if( rc==SQLITE_OK ){
3635	put4byte(&pPage1->aData[`28`], pBt->nPage);
3636	}
3637	}
3638	}
3639	}
3640
3641	trans_begun:
3642	if( rc==SQLITE_OK ){
3643	if( pSchemaVersion ){
3644	*pSchemaVersion = get4byte(&pBt->pPage1->aData[`40`]);
3645	}
3646	if( wrflag ){
3647	/ This call makes sure that the pager has the correct number of*
3648	** open savepoints. If the second parameter is greater than 0 and
3649	** the sub-journal is not already open, then it will be opened here.
3650	*/
3651	rc = sqlite3PagerOpenSavepoint(pPager, p->db->nSavepoint);
3652	}
3653	}
3654
3655	btreeIntegrity(p);
3656	sqlite3BtreeLeave(p);
3657	return rc;
3658	}
3659
3660	#ifndef SQLITE_OMIT_AUTOVACUUM
3661
3662	/*
3663	** Set the pointer-map entries for all children of page pPage. Also, if
3664	** pPage contains cells that point to overflow pages, set the pointer
3665	** map entries for the overflow pages as well.
3666	*/
3667	static int setChildPtrmaps(MemPage *pPage){
3668	int i; / Counter variable /
3669	int nCell; / Number of cells in page pPage /
3670	int rc; / Return code /
3671	BtShared *pBt = pPage->pBt;
3672	Pgno pgno = pPage->pgno;
3673
3674	assert( sqlite3_mutex_held(pPage->pBt->mutex) );
3675	rc = pPage->isInit ? SQLITE_OK : btreeInitPage(pPage);
3676	if( rc!=SQLITE_OK ) return rc;
3677	nCell = pPage->nCell;
3678
3679	for(i=`0`; i<nCell; i++){
3680	u8 *pCell = findCell(pPage, i);
3681
3682	ptrmapPutOvflPtr(pPage, pPage, pCell, &rc);
3683
3684	if( !pPage->leaf ){
3685	Pgno childPgno = get4byte(pCell);
3686	ptrmapPut(pBt, childPgno, PTRMAP_BTREE, pgno, &rc);
3687	}
3688	}
3689
3690	if( !pPage->leaf ){
3691	Pgno childPgno = get4byte(&pPage->aData[pPage->hdrOffset+`8`]);
3692	ptrmapPut(pBt, childPgno, PTRMAP_BTREE, pgno, &rc);
3693	}
3694
3695	return rc;
3696	}
3697
3698	/*
3699	** Somewhere on pPage is a pointer to page iFrom. Modify this pointer so
3700	** that it points to iTo. Parameter eType describes the type of pointer to
3701	** be modified, as follows:
3702	**
3703	** PTRMAP_BTREE: pPage is a btree-page. The pointer points at a child
3704	** page of pPage.
3705	**
3706	** PTRMAP_OVERFLOW1: pPage is a btree-page. The pointer points at an overflow
3707	** page pointed to by one of the cells on pPage.
3708	**
3709	** PTRMAP_OVERFLOW2: pPage is an overflow-page. The pointer points at the next
3710	** overflow page in the list.
3711	*/
3712	static int modifyPagePointer(MemPage *pPage, Pgno iFrom, Pgno iTo, u8 eType){
3713	assert( sqlite3_mutex_held(pPage->pBt->mutex) );
3714	assert( sqlite3PagerIswriteable(pPage->pDbPage) );
3715	if( eType==PTRMAP_OVERFLOW2 ){
3716	/ The pointer is always the first 4 bytes of the page in this case. /
3717	if( get4byte(pPage->aData)!=iFrom ){
3718	return SQLITE_CORRUPT_PAGE(pPage);
3719	}
3720	put4byte(pPage->aData, iTo);
3721	}else{
3722	int i;
3723	int nCell;
3724	int rc;
3725
3726	rc = pPage->isInit ? SQLITE_OK : btreeInitPage(pPage);
3727	if( rc ) return rc;
3728	nCell = pPage->nCell;
3729
3730	for(i=`0`; i<nCell; i++){
3731	u8 *pCell = findCell(pPage, i);
3732	if( eType==PTRMAP_OVERFLOW1 ){
3733	CellInfo info;
3734	pPage->xParseCell(pPage, pCell, &info);
3735	if( info.nLocal<info.nPayload ){
3736	if( pCell+info.nSize > pPage->aData+pPage->pBt->usableSize ){
3737	return SQLITE_CORRUPT_PAGE(pPage);
3738	}
3739	if( iFrom==get4byte(pCell+info.nSize-`4`) ){
3740	put4byte(pCell+info.nSize-`4`, iTo);
3741	break;
3742	}
3743	}
3744	}else{
3745	if( pCell+`4` > pPage->aData+pPage->pBt->usableSize ){
3746	return SQLITE_CORRUPT_PAGE(pPage);
3747	}
3748	if( get4byte(pCell)==iFrom ){
3749	put4byte(pCell, iTo);
3750	break;
3751	}
3752	}
3753	}
3754
3755	if( i==nCell ){
3756	if( eType!=PTRMAP_BTREE \|\|
3757	get4byte(&pPage->aData[pPage->hdrOffset+`8`])!=iFrom ){
3758	return SQLITE_CORRUPT_PAGE(pPage);
3759	}
3760	put4byte(&pPage->aData[pPage->hdrOffset+`8`], iTo);
3761	}
3762	}
3763	return SQLITE_OK;
3764	}
3765
3766
3767	/*
3768	** Move the open database page pDbPage to location iFreePage in the
3769	** database. The pDbPage reference remains valid.
3770	**
3771	** The isCommit flag indicates that there is no need to remember that
3772	** the journal needs to be sync()ed before database page pDbPage->pgno
3773	** can be written to. The caller has already promised not to write to that
3774	** page.
3775	*/
3776	static int relocatePage(
3777	BtShared pBt, /* Btree /
3778	MemPage pDbPage, /* Open page to move /
3779	u8 eType, / Pointer map 'type' entry for pDbPage /
3780	Pgno iPtrPage, / Pointer map 'page-no' entry for pDbPage /
3781	Pgno iFreePage, / The location to move pDbPage to /
3782	int isCommit / isCommit flag passed to sqlite3PagerMovepage /
3783	){
3784	MemPage pPtrPage; /* The page that contains a pointer to pDbPage /
3785	Pgno iDbPage = pDbPage->pgno;
3786	Pager *pPager = pBt->pPager;
3787	int rc;
3788
3789	assert( eType==PTRMAP_OVERFLOW2 \|\| eType==PTRMAP_OVERFLOW1 \|\|
3790	eType==PTRMAP_BTREE \|\| eType==PTRMAP_ROOTPAGE );
3791	assert( sqlite3_mutex_held(pBt->mutex) );
3792	assert( pDbPage->pBt==pBt );
3793	if( iDbPage<`3` ) return SQLITE_CORRUPT_BKPT;
3794
3795	/ Move page iDbPage from its current location to page number iFreePage /
3796	TRACE(("AUTOVACUUM: Moving %d to free page %d (ptr page %d type %d)\n",
3797	iDbPage, iFreePage, iPtrPage, eType));
3798	rc = sqlite3PagerMovepage(pPager, pDbPage->pDbPage, iFreePage, isCommit);
3799	if( rc!=SQLITE_OK ){
3800	return rc;
3801	}
3802	pDbPage->pgno = iFreePage;
3803
3804	/ If pDbPage was a btree-page, then it may have child pages and/or cells*
3805	** that point to overflow pages. The pointer map entries for all these
3806	** pages need to be changed.
3807	**
3808	** If pDbPage is an overflow page, then the first 4 bytes may store a
3809	** pointer to a subsequent overflow page. If this is the case, then
3810	** the pointer map needs to be updated for the subsequent overflow page.
3811	*/
3812	if( eType==PTRMAP_BTREE \|\| eType==PTRMAP_ROOTPAGE ){
3813	rc = setChildPtrmaps(pDbPage);
3814	if( rc!=SQLITE_OK ){
3815	return rc;
3816	}
3817	}else{
3818	Pgno nextOvfl = get4byte(pDbPage->aData);
3819	if( nextOvfl!=`0` ){
3820	ptrmapPut(pBt, nextOvfl, PTRMAP_OVERFLOW2, iFreePage, &rc);
3821	if( rc!=SQLITE_OK ){
3822	return rc;
3823	}
3824	}
3825	}
3826
3827	/ Fix the database pointer on page iPtrPage that pointed at iDbPage so*
3828	** that it points at iFreePage. Also fix the pointer map entry for
3829	** iPtrPage.
3830	*/
3831	if( eType!=PTRMAP_ROOTPAGE ){
3832	rc = btreeGetPage(pBt, iPtrPage, &pPtrPage, `0`);
3833	if( rc!=SQLITE_OK ){
3834	return rc;
3835	}
3836	rc = sqlite3PagerWrite(pPtrPage->pDbPage);
3837	if( rc!=SQLITE_OK ){
3838	releasePage(pPtrPage);
3839	return rc;
3840	}
3841	rc = modifyPagePointer(pPtrPage, iDbPage, iFreePage, eType);
3842	releasePage(pPtrPage);
3843	if( rc==SQLITE_OK ){
3844	ptrmapPut(pBt, iFreePage, eType, iPtrPage, &rc);
3845	}
3846	}
3847	return rc;
3848	}
3849
3850	/ Forward declaration required by incrVacuumStep(). /
3851	static int allocateBtreePage(BtShared , MemPage , Pgno , Pgno, u8);
3852
3853	/*
3854	** Perform a single step of an incremental-vacuum. If successful, return
3855	** SQLITE_OK. If there is no work to do (and therefore no point in
3856	** calling this function again), return SQLITE_DONE. Or, if an error
3857	** occurs, return some other error code.
3858	**
3859	** More specifically, this function attempts to re-organize the database so
3860	** that the last page of the file currently in use is no longer in use.
3861	**
3862	** Parameter nFin is the number of pages that this database would contain
3863	** were this function called until it returns SQLITE_DONE.
3864	**
3865	** If the bCommit parameter is non-zero, this function assumes that the
3866	** caller will keep calling incrVacuumStep() until it returns SQLITE_DONE
3867	** or an error. bCommit is passed true for an auto-vacuum-on-commit
3868	** operation, or false for an incremental vacuum.
3869	*/
3870	static int incrVacuumStep(BtShared pBt, Pgno nFin, Pgno iLastPg, int* bCommit){
3871	Pgno nFreeList; / Number of pages still on the free-list /
3872	int rc;
3873
3874	assert( sqlite3_mutex_held(pBt->mutex) );
3875	assert( iLastPg>nFin );
3876
3877	if( !PTRMAP_ISPAGE(pBt, iLastPg) && iLastPg!=PENDING_BYTE_PAGE(pBt) ){
3878	u8 eType;
3879	Pgno iPtrPage;
3880
3881	nFreeList = get4byte(&pBt->pPage1->aData[`36`]);
3882	if( nFreeList==`0` ){
3883	return SQLITE_DONE;
3884	}
3885
3886	rc = ptrmapGet(pBt, iLastPg, &eType, &iPtrPage);
3887	if( rc!=SQLITE_OK ){
3888	return rc;
3889	}
3890	if( eType==PTRMAP_ROOTPAGE ){
3891	return SQLITE_CORRUPT_BKPT;
3892	}
3893
3894	if( eType==PTRMAP_FREEPAGE ){
3895	if( bCommit==`0` ){
3896	/ Remove the page from the files free-list. This is not required*
3897	** if bCommit is non-zero. In that case, the free-list will be
3898	** truncated to zero after this function returns, so it doesn't
3899	** matter if it still contains some garbage entries.
3900	*/
3901	Pgno iFreePg;
3902	MemPage *pFreePg;
3903	rc = allocateBtreePage(pBt, &pFreePg, &iFreePg, iLastPg, BTALLOC_EXACT);
3904	if( rc!=SQLITE_OK ){
3905	return rc;
3906	}
3907	assert( iFreePg==iLastPg );
3908	releasePage(pFreePg);
3909	}
3910	} else {
3911	Pgno iFreePg; / Index of free page to move pLastPg to /
3912	MemPage *pLastPg;
3913	u8 eMode = BTALLOC_ANY; / Mode parameter for allocateBtreePage() /
3914	Pgno iNear = `0`; / nearby parameter for allocateBtreePage() /
3915
3916	rc = btreeGetPage(pBt, iLastPg, &pLastPg, `0`);
3917	if( rc!=SQLITE_OK ){
3918	return rc;
3919	}
3920
3921	/ If bCommit is zero, this loop runs exactly once and page pLastPg*
3922	** is swapped with the first free page pulled off the free list.
3923	**
3924	** On the other hand, if bCommit is greater than zero, then keep
3925	** looping until a free-page located within the first nFin pages
3926	** of the file is found.
3927	*/
3928	if( bCommit==`0` ){
3929	eMode = BTALLOC_LE;
3930	iNear = nFin;
3931	}
3932	do {
3933	MemPage *pFreePg;
3934	Pgno dbSize = btreePagecount(pBt);
3935	rc = allocateBtreePage(pBt, &pFreePg, &iFreePg, iNear, eMode);
3936	if( rc!=SQLITE_OK ){
3937	releasePage(pLastPg);
3938	return rc;
3939	}
3940	releasePage(pFreePg);
3941	if( iFreePg>dbSize ){
3942	releasePage(pLastPg);
3943	return SQLITE_CORRUPT_BKPT;
3944	}
3945	}while( bCommit && iFreePg>nFin );
3946	assert( iFreePg<iLastPg );
3947
3948	rc = relocatePage(pBt, pLastPg, eType, iPtrPage, iFreePg, bCommit);
3949	releasePage(pLastPg);
3950	if( rc!=SQLITE_OK ){
3951	return rc;
3952	}
3953	}
3954	}
3955
3956	if( bCommit==`0` ){
3957	do {
3958	iLastPg--;
3959	}while( iLastPg==PENDING_BYTE_PAGE(pBt) \|\| PTRMAP_ISPAGE(pBt, iLastPg) );
3960	pBt->bDoTruncate = `1`;
3961	pBt->nPage = iLastPg;
3962	}
3963	return SQLITE_OK;
3964	}
3965
3966	/*
3967	** The database opened by the first argument is an auto-vacuum database
3968	** nOrig pages in size containing nFree free pages. Return the expected
3969	** size of the database in pages following an auto-vacuum operation.
3970	*/
3971	static Pgno finalDbSize(BtShared *pBt, Pgno nOrig, Pgno nFree){
3972	int nEntry; / Number of entries on one ptrmap page /
3973	Pgno nPtrmap; / Number of PtrMap pages to be freed /
3974	Pgno nFin; / Return value /
3975
3976	nEntry = pBt->usableSize/`5`;
3977	nPtrmap = (nFree-nOrig+PTRMAP_PAGENO(pBt, nOrig)+nEntry)/nEntry;
3978	nFin = nOrig - nFree - nPtrmap;
3979	if( nOrig>PENDING_BYTE_PAGE(pBt) && nFin<PENDING_BYTE_PAGE(pBt) ){
3980	nFin--;
3981	}
3982	while( PTRMAP_ISPAGE(pBt, nFin) \|\| nFin==PENDING_BYTE_PAGE(pBt) ){
3983	nFin--;
3984	}
3985
3986	return nFin;
3987	}
3988
3989	/*
3990	** A write-transaction must be opened before calling this function.
3991	** It performs a single unit of work towards an incremental vacuum.
3992	**
3993	** If the incremental vacuum is finished after this function has run,
3994	** SQLITE_DONE is returned. If it is not finished, but no error occurred,
3995	** SQLITE_OK is returned. Otherwise an SQLite error code.
3996	*/
3997	int sqlite3BtreeIncrVacuum(Btree *p){
3998	int rc;
3999	BtShared *pBt = p->pBt;
4000
4001	sqlite3BtreeEnter(p);
4002	assert( pBt->inTransaction==TRANS_WRITE && p->inTrans==TRANS_WRITE );
4003	if( !pBt->autoVacuum ){
4004	rc = SQLITE_DONE;
4005	}else{
4006	Pgno nOrig = btreePagecount(pBt);
4007	Pgno nFree = get4byte(&pBt->pPage1->aData[`36`]);
4008	Pgno nFin = finalDbSize(pBt, nOrig, nFree);
4009
4010	if( nOrig<nFin \|\| nFree>=nOrig ){
4011	rc = SQLITE_CORRUPT_BKPT;
4012	}else if( nFree>`0` ){
4013	rc = saveAllCursors(pBt, `0`, `0`);
4014	if( rc==SQLITE_OK ){
4015	invalidateAllOverflowCache(pBt);
4016	rc = incrVacuumStep(pBt, nFin, nOrig, `0`);
4017	}
4018	if( rc==SQLITE_OK ){
4019	rc = sqlite3PagerWrite(pBt->pPage1->pDbPage);
4020	put4byte(&pBt->pPage1->aData[`28`], pBt->nPage);
4021	}
4022	}else{
4023	rc = SQLITE_DONE;
4024	}
4025	}
4026	sqlite3BtreeLeave(p);
4027	return rc;
4028	}
4029
4030	/*
4031	** This routine is called prior to sqlite3PagerCommit when a transaction
4032	** is committed for an auto-vacuum database.
4033	*/
4034	static int autoVacuumCommit(Btree *p){
4035	int rc = SQLITE_OK;
4036	Pager *pPager;
4037	BtShared *pBt;
4038	sqlite3 *db;
4039	VVA_ONLY( int nRef );
4040
4041	assert( p!=`0` );
4042	pBt = p->pBt;
4043	pPager = pBt->pPager;
4044	VVA_ONLY( nRef = sqlite3PagerRefcount(pPager); )
4045
4046	assert( sqlite3_mutex_held(pBt->mutex) );
4047	invalidateAllOverflowCache(pBt);
4048	assert(pBt->autoVacuum);
4049	if( !pBt->incrVacuum ){
4050	Pgno nFin; / Number of pages in database after autovacuuming /
4051	Pgno nFree; / Number of pages on the freelist initially /
4052	Pgno nVac; / Number of pages to vacuum /
4053	Pgno iFree; / The next page to be freed /
4054	Pgno nOrig; / Database size before freeing /
4055
4056	nOrig = btreePagecount(pBt);
4057	if( PTRMAP_ISPAGE(pBt, nOrig) \|\| nOrig==PENDING_BYTE_PAGE(pBt) ){
4058	/ It is not possible to create a database for which the final page*
4059	** is either a pointer-map page or the pending-byte page. If one
4060	** is encountered, this indicates corruption.
4061	*/
4062	return SQLITE_CORRUPT_BKPT;
4063	}
4064
4065	nFree = get4byte(&pBt->pPage1->aData[`36`]);
4066	db = p->db;
4067	if( db->xAutovacPages ){
4068	int iDb;
4069	for(iDb=`0`; ALWAYS(iDb<db->nDb); iDb++){
4070	if( db->aDb[iDb].pBt==p ) break;
4071	}
4072	nVac = db->xAutovacPages(
4073	db->pAutovacPagesArg,
4074	db->aDb[iDb].zDbSName,
4075	nOrig,
4076	nFree,
4077	pBt->pageSize
4078	);
4079	if( nVac>nFree ){
4080	nVac = nFree;
4081	}
4082	if( nVac==`0` ){
4083	return SQLITE_OK;
4084	}
4085	}else{
4086	nVac = nFree;
4087	}
4088	nFin = finalDbSize(pBt, nOrig, nVac);
4089	if( nFin>nOrig ) return SQLITE_CORRUPT_BKPT;
4090	if( nFin<nOrig ){
4091	rc = saveAllCursors(pBt, `0`, `0`);
4092	}
4093	for(iFree=nOrig; iFree>nFin && rc==SQLITE_OK; iFree--){
4094	rc = incrVacuumStep(pBt, nFin, iFree, nVac==nFree);
4095	}
4096	if( (rc==SQLITE_DONE \|\| rc==SQLITE_OK) && nFree>`0` ){
4097	rc = sqlite3PagerWrite(pBt->pPage1->pDbPage);
4098	if( nVac==nFree ){
4099	put4byte(&pBt->pPage1->aData[`32`], `0`);
4100	put4byte(&pBt->pPage1->aData[`36`], `0`);
4101	}
4102	put4byte(&pBt->pPage1->aData[`28`], nFin);
4103	pBt->bDoTruncate = `1`;
4104	pBt->nPage = nFin;
4105	}
4106	if( rc!=SQLITE_OK ){
4107	sqlite3PagerRollback(pPager);
4108	}
4109	}
4110
4111	assert( nRef>=sqlite3PagerRefcount(pPager) );
4112	return rc;
4113	}
4114
4115	#else /* ifndef SQLITE_OMIT_AUTOVACUUM */
4116	# define setChildPtrmaps(x) SQLITE_OK
4117	#endif
4118
4119	/*
4120	** This routine does the first phase of a two-phase commit. This routine
4121	** causes a rollback journal to be created (if it does not already exist)
4122	** and populated with enough information so that if a power loss occurs
4123	** the database can be restored to its original state by playing back
4124	** the journal. Then the contents of the journal are flushed out to
4125	** the disk. After the journal is safely on oxide, the changes to the
4126	** database are written into the database file and flushed to oxide.
4127	** At the end of this call, the rollback journal still exists on the
4128	** disk and we are still holding all locks, so the transaction has not
4129	** committed. See sqlite3BtreeCommitPhaseTwo() for the second phase of the
4130	** commit process.
4131	**
4132	** This call is a no-op if no write-transaction is currently active on pBt.
4133	**
4134	** Otherwise, sync the database file for the btree pBt. zSuperJrnl points to
4135	** the name of a super-journal file that should be written into the
4136	** individual journal file, or is NULL, indicating no super-journal file
4137	** (single database transaction).
4138	**
4139	** When this is called, the super-journal should already have been
4140	** created, populated with this journal pointer and synced to disk.
4141	**
4142	** Once this is routine has returned, the only thing required to commit
4143	** the write-transaction for this database file is to delete the journal.
4144	*/
4145	int sqlite3BtreeCommitPhaseOne(Btree p, const* char *zSuperJrnl){
4146	int rc = SQLITE_OK;
4147	if( p->inTrans==TRANS_WRITE ){
4148	BtShared *pBt = p->pBt;
4149	sqlite3BtreeEnter(p);
4150	#ifndef SQLITE_OMIT_AUTOVACUUM
4151	if( pBt->autoVacuum ){
4152	rc = autoVacuumCommit(p);
4153	if( rc!=SQLITE_OK ){
4154	sqlite3BtreeLeave(p);
4155	return rc;
4156	}
4157	}
4158	if( pBt->bDoTruncate ){
4159	sqlite3PagerTruncateImage(pBt->pPager, pBt->nPage);
4160	}
4161	#endif
4162	rc = sqlite3PagerCommitPhaseOne(pBt->pPager, zSuperJrnl, `0`);
4163	sqlite3BtreeLeave(p);
4164	}
4165	return rc;
4166	}
4167
4168	/*
4169	** This function is called from both BtreeCommitPhaseTwo() and BtreeRollback()
4170	** at the conclusion of a transaction.
4171	*/
4172	static void btreeEndTransaction(Btree *p){
4173	BtShared *pBt = p->pBt;
4174	sqlite3 *db = p->db;
4175	assert( sqlite3BtreeHoldsMutex(p) );
4176
4177	#ifndef SQLITE_OMIT_AUTOVACUUM
4178	pBt->bDoTruncate = `0`;
4179	#endif
4180	if( p->inTrans>TRANS_NONE && db->nVdbeRead>`1` ){
4181	/ If there are other active statements that belong to this database*
4182	** handle, downgrade to a read-only transaction. The other statements
4183	** may still be reading from the database. */
4184	downgradeAllSharedCacheTableLocks(p);
4185	p->inTrans = TRANS_READ;
4186	}else{
4187	/ If the handle had any kind of transaction open, decrement the*
4188	** transaction count of the shared btree. If the transaction count
4189	** reaches 0, set the shared state to TRANS_NONE. The unlockBtreeIfUnused()
4190	** call below will unlock the pager. */
4191	if( p->inTrans!=TRANS_NONE ){
4192	clearAllSharedCacheTableLocks(p);
4193	pBt->nTransaction--;
4194	if( `0`==pBt->nTransaction ){
4195	pBt->inTransaction = TRANS_NONE;
4196	}
4197	}
4198
4199	/ Set the current transaction state to TRANS_NONE and unlock the*
4200	** pager if this call closed the only read or write transaction. */
4201	p->inTrans = TRANS_NONE;
4202	unlockBtreeIfUnused(pBt);
4203	}
4204
4205	btreeIntegrity(p);
4206	}
4207
4208	/*
4209	** Commit the transaction currently in progress.
4210	**
4211	** This routine implements the second phase of a 2-phase commit. The
4212	** sqlite3BtreeCommitPhaseOne() routine does the first phase and should
4213	** be invoked prior to calling this routine. The sqlite3BtreeCommitPhaseOne()
4214	** routine did all the work of writing information out to disk and flushing the
4215	** contents so that they are written onto the disk platter. All this
4216	** routine has to do is delete or truncate or zero the header in the
4217	** the rollback journal (which causes the transaction to commit) and
4218	** drop locks.
4219	**
4220	** Normally, if an error occurs while the pager layer is attempting to
4221	** finalize the underlying journal file, this function returns an error and
4222	** the upper layer will attempt a rollback. However, if the second argument
4223	** is non-zero then this b-tree transaction is part of a multi-file
4224	** transaction. In this case, the transaction has already been committed
4225	** (by deleting a super-journal file) and the caller will ignore this
4226	** functions return code. So, even if an error occurs in the pager layer,
4227	** reset the b-tree objects internal state to indicate that the write
4228	** transaction has been closed. This is quite safe, as the pager will have
4229	** transitioned to the error state.
4230	**
4231	** This will release the write lock on the database file. If there
4232	** are no active cursors, it also releases the read lock.
4233	*/
4234	int sqlite3BtreeCommitPhaseTwo(Btree p, int* bCleanup){
4235
4236	if( p->inTrans==TRANS_NONE ) return SQLITE_OK;
4237	sqlite3BtreeEnter(p);
4238	btreeIntegrity(p);
4239
4240	/ If the handle has a write-transaction open, commit the shared-btrees*
4241	** transaction and set the shared state to TRANS_READ.
4242	*/
4243	if( p->inTrans==TRANS_WRITE ){
4244	int rc;
4245	BtShared *pBt = p->pBt;
4246	assert( pBt->inTransaction==TRANS_WRITE );
4247	assert( pBt->nTransaction>`0` );
4248	rc = sqlite3PagerCommitPhaseTwo(pBt->pPager);
4249	if( rc!=SQLITE_OK && bCleanup==`0` ){
4250	sqlite3BtreeLeave(p);
4251	return rc;
4252	}
4253	p->iBDataVersion--; / Compensate for pPager->iDataVersion++; /
4254	pBt->inTransaction = TRANS_READ;
4255	btreeClearHasContent(pBt);
4256	}
4257
4258	btreeEndTransaction(p);
4259	sqlite3BtreeLeave(p);
4260	return SQLITE_OK;
4261	}
4262
4263	/*
4264	** Do both phases of a commit.
4265	*/
4266	int sqlite3BtreeCommit(Btree *p){
4267	int rc;
4268	sqlite3BtreeEnter(p);
4269	rc = sqlite3BtreeCommitPhaseOne(p, `0`);
4270	if( rc==SQLITE_OK ){
4271	rc = sqlite3BtreeCommitPhaseTwo(p, `0`);
4272	}
4273	sqlite3BtreeLeave(p);
4274	return rc;
4275	}
4276
4277	/*
4278	** This routine sets the state to CURSOR_FAULT and the error
4279	** code to errCode for every cursor on any BtShared that pBtree
4280	** references. Or if the writeOnly flag is set to 1, then only
4281	** trip write cursors and leave read cursors unchanged.
4282	**
4283	** Every cursor is a candidate to be tripped, including cursors
4284	** that belong to other database connections that happen to be
4285	** sharing the cache with pBtree.
4286	**
4287	** This routine gets called when a rollback occurs. If the writeOnly
4288	** flag is true, then only write-cursors need be tripped - read-only
4289	** cursors save their current positions so that they may continue
4290	** following the rollback. Or, if writeOnly is false, all cursors are
4291	** tripped. In general, writeOnly is false if the transaction being
4292	** rolled back modified the database schema. In this case b-tree root
4293	** pages may be moved or deleted from the database altogether, making
4294	** it unsafe for read cursors to continue.
4295	**
4296	** If the writeOnly flag is true and an error is encountered while
4297	** saving the current position of a read-only cursor, all cursors,
4298	** including all read-cursors are tripped.
4299	**
4300	** SQLITE_OK is returned if successful, or if an error occurs while
4301	** saving a cursor position, an SQLite error code.
4302	*/
4303	int sqlite3BtreeTripAllCursors(Btree pBtree, int* errCode, int writeOnly){
4304	BtCursor *p;
4305	int rc = SQLITE_OK;
4306
4307	assert( (writeOnly==`0` \|\| writeOnly==`1`) && BTCF_WriteFlag==`1` );
4308	if( pBtree ){
4309	sqlite3BtreeEnter(pBtree);
4310	for(p=pBtree->pBt->pCursor; p; p=p->pNext){
4311	if( writeOnly && (p->curFlags & BTCF_WriteFlag)==`0` ){
4312	if( p->eState==CURSOR_VALID \|\| p->eState==CURSOR_SKIPNEXT ){
4313	rc = saveCursorPosition(p);
4314	if( rc!=SQLITE_OK ){
4315	(void)sqlite3BtreeTripAllCursors(pBtree, rc, `0`);
4316	break;
4317	}
4318	}
4319	}else{
4320	sqlite3BtreeClearCursor(p);
4321	p->eState = CURSOR_FAULT;
4322	p->skipNext = errCode;
4323	}
4324	btreeReleaseAllCursorPages(p);
4325	}
4326	sqlite3BtreeLeave(pBtree);
4327	}
4328	return rc;
4329	}
4330
4331	/*
4332	** Set the pBt->nPage field correctly, according to the current
4333	** state of the database. Assume pBt->pPage1 is valid.
4334	*/
4335	static void btreeSetNPage(BtShared pBt, MemPage pPage1){
4336	int nPage = get4byte(&pPage1->aData[`28`]);
4337	testcase( nPage==`0` );
4338	if( nPage==`0` ) sqlite3PagerPagecount(pBt->pPager, &nPage);
4339	testcase( pBt->nPage!=(u32)nPage );
4340	pBt->nPage = nPage;
4341	}
4342
4343	/*
4344	** Rollback the transaction in progress.
4345	**
4346	** If tripCode is not SQLITE_OK then cursors will be invalidated (tripped).
4347	** Only write cursors are tripped if writeOnly is true but all cursors are
4348	** tripped if writeOnly is false. Any attempt to use
4349	** a tripped cursor will result in an error.
4350	**
4351	** This will release the write lock on the database file. If there
4352	** are no active cursors, it also releases the read lock.
4353	*/
4354	int sqlite3BtreeRollback(Btree p, int* tripCode, int writeOnly){
4355	int rc;
4356	BtShared *pBt = p->pBt;
4357	MemPage *pPage1;
4358
4359	assert( writeOnly==`1` \|\| writeOnly==`0` );
4360	assert( tripCode==SQLITE_ABORT_ROLLBACK \|\| tripCode==SQLITE_OK );
4361	sqlite3BtreeEnter(p);
4362	if( tripCode==SQLITE_OK ){
4363	rc = tripCode = saveAllCursors(pBt, `0`, `0`);
4364	if( rc ) writeOnly = `0`;
4365	}else{
4366	rc = SQLITE_OK;
4367	}
4368	if( tripCode ){
4369	int rc2 = sqlite3BtreeTripAllCursors(p, tripCode, writeOnly);
4370	assert( rc==SQLITE_OK \|\| (writeOnly==`0` && rc2==SQLITE_OK) );
4371	if( rc2!=SQLITE_OK ) rc = rc2;
4372	}
4373	btreeIntegrity(p);
4374
4375	if( p->inTrans==TRANS_WRITE ){
4376	int rc2;
4377
4378	assert( TRANS_WRITE==pBt->inTransaction );
4379	rc2 = sqlite3PagerRollback(pBt->pPager);
4380	if( rc2!=SQLITE_OK ){
4381	rc = rc2;
4382	}
4383
4384	/ The rollback may have destroyed the pPage1->aData value. So*
4385	** call btreeGetPage() on page 1 again to make
4386	** sure pPage1->aData is set correctly. */
4387	if( btreeGetPage(pBt, `1`, &pPage1, `0`)==SQLITE_OK ){
4388	btreeSetNPage(pBt, pPage1);
4389	releasePageOne(pPage1);
4390	}
4391	assert( countValidCursors(pBt, `1`)==`0` );
4392	pBt->inTransaction = TRANS_READ;
4393	btreeClearHasContent(pBt);
4394	}
4395
4396	btreeEndTransaction(p);
4397	sqlite3BtreeLeave(p);
4398	return rc;
4399	}
4400
4401	/*
4402	** Start a statement subtransaction. The subtransaction can be rolled
4403	** back independently of the main transaction. You must start a transaction
4404	** before starting a subtransaction. The subtransaction is ended automatically
4405	** if the main transaction commits or rolls back.
4406	**
4407	** Statement subtransactions are used around individual SQL statements
4408	** that are contained within a BEGIN...COMMIT block. If a constraint
4409	** error occurs within the statement, the effect of that one statement
4410	** can be rolled back without having to rollback the entire transaction.
4411	**
4412	** A statement sub-transaction is implemented as an anonymous savepoint. The
4413	** value passed as the second parameter is the total number of savepoints,
4414	** including the new anonymous savepoint, open on the B-Tree. i.e. if there
4415	** are no active savepoints and no other statement-transactions open,
4416	** iStatement is 1. This anonymous savepoint can be released or rolled back
4417	** using the sqlite3BtreeSavepoint() function.
4418	*/
4419	int sqlite3BtreeBeginStmt(Btree p, int* iStatement){
4420	int rc;
4421	BtShared *pBt = p->pBt;
4422	sqlite3BtreeEnter(p);
4423	assert( p->inTrans==TRANS_WRITE );
4424	assert( (pBt->btsFlags & BTS_READ_ONLY)==`0` );
4425	assert( iStatement>`0` );
4426	assert( iStatement>p->db->nSavepoint );
4427	assert( pBt->inTransaction==TRANS_WRITE );
4428	/ At the pager level, a statement transaction is a savepoint with*
4429	** an index greater than all savepoints created explicitly using
4430	** SQL statements. It is illegal to open, release or rollback any
4431	** such savepoints while the statement transaction savepoint is active.
4432	*/
4433	rc = sqlite3PagerOpenSavepoint(pBt->pPager, iStatement);
4434	sqlite3BtreeLeave(p);
4435	return rc;
4436	}
4437
4438	/*
4439	** The second argument to this function, op, is always SAVEPOINT_ROLLBACK
4440	** or SAVEPOINT_RELEASE. This function either releases or rolls back the
4441	** savepoint identified by parameter iSavepoint, depending on the value
4442	** of op.
4443	**
4444	** Normally, iSavepoint is greater than or equal to zero. However, if op is
4445	** SAVEPOINT_ROLLBACK, then iSavepoint may also be -1. In this case the
4446	** contents of the entire transaction are rolled back. This is different
4447	** from a normal transaction rollback, as no locks are released and the
4448	** transaction remains open.
4449	*/
4450	int sqlite3BtreeSavepoint(Btree p, int* op, int iSavepoint){
4451	int rc = SQLITE_OK;
4452	if( p && p->inTrans==TRANS_WRITE ){
4453	BtShared *pBt = p->pBt;
4454	assert( op==SAVEPOINT_RELEASE \|\| op==SAVEPOINT_ROLLBACK );
4455	assert( iSavepoint>=`0` \|\| (iSavepoint==-`1` && op==SAVEPOINT_ROLLBACK) );
4456	sqlite3BtreeEnter(p);
4457	if( op==SAVEPOINT_ROLLBACK ){
4458	rc = saveAllCursors(pBt, `0`, `0`);
4459	}
4460	if( rc==SQLITE_OK ){
4461	rc = sqlite3PagerSavepoint(pBt->pPager, op, iSavepoint);
4462	}
4463	if( rc==SQLITE_OK ){
4464	if( iSavepoint<`0` && (pBt->btsFlags & BTS_INITIALLY_EMPTY)!=`0` ){
4465	pBt->nPage = `0`;
4466	}
4467	rc = newDatabase(pBt);
4468	btreeSetNPage(pBt, pBt->pPage1);
4469
4470	/ pBt->nPage might be zero if the database was corrupt when*
4471	** the transaction was started. Otherwise, it must be at least 1. */
4472	assert( CORRUPT_DB \|\| pBt->nPage>`0` );
4473	}
4474	sqlite3BtreeLeave(p);
4475	}
4476	return rc;
4477	}
4478
4479	/*
4480	** Create a new cursor for the BTree whose root is on the page
4481	** iTable. If a read-only cursor is requested, it is assumed that
4482	** the caller already has at least a read-only transaction open
4483	** on the database already. If a write-cursor is requested, then
4484	** the caller is assumed to have an open write transaction.
4485	**
4486	** If the BTREE_WRCSR bit of wrFlag is clear, then the cursor can only
4487	** be used for reading. If the BTREE_WRCSR bit is set, then the cursor
4488	** can be used for reading or for writing if other conditions for writing
4489	** are also met. These are the conditions that must be met in order
4490	** for writing to be allowed:
4491	**
4492	** 1: The cursor must have been opened with wrFlag containing BTREE_WRCSR
4493	**
4494	** 2: Other database connections that share the same pager cache
4495	** but which are not in the READ_UNCOMMITTED state may not have
4496	** cursors open with wrFlag==0 on the same table. Otherwise
4497	** the changes made by this write cursor would be visible to
4498	** the read cursors in the other database connection.
4499	**
4500	** 3: The database must be writable (not on read-only media)
4501	**
4502	** 4: There must be an active transaction.
4503	**
4504	** The BTREE_FORDELETE bit of wrFlag may optionally be set if BTREE_WRCSR
4505	** is set. If FORDELETE is set, that is a hint to the implementation that
4506	** this cursor will only be used to seek to and delete entries of an index
4507	** as part of a larger DELETE statement. The FORDELETE hint is not used by
4508	** this implementation. But in a hypothetical alternative storage engine
4509	** in which index entries are automatically deleted when corresponding table
4510	** rows are deleted, the FORDELETE flag is a hint that all SEEK and DELETE
4511	** operations on this cursor can be no-ops and all READ operations can
4512	** return a null row (2-bytes: 0x01 0x00).
4513	**
4514	** No checking is done to make sure that page iTable really is the
4515	** root page of a b-tree. If it is not, then the cursor acquired
4516	** will not work correctly.
4517	**
4518	** It is assumed that the sqlite3BtreeCursorZero() has been called
4519	** on pCur to initialize the memory space prior to invoking this routine.
4520	*/
4521	static int btreeCursor(
4522	Btree p, /* The btree /
4523	Pgno iTable, / Root page of table to open /
4524	int wrFlag, / 1 to write. 0 read-only /
4525	struct KeyInfo pKeyInfo, /* First arg to comparison function /
4526	BtCursor pCur /* Space for new cursor /
4527	){
4528	BtShared pBt = p->pBt; /* Shared b-tree handle /
4529	BtCursor pX; /* Looping over other all cursors /
4530
4531	assert( sqlite3BtreeHoldsMutex(p) );
4532	assert( wrFlag==`0`
4533	\|\| wrFlag==BTREE_WRCSR
4534	\|\| wrFlag==(BTREE_WRCSR\|BTREE_FORDELETE)
4535	);
4536
4537	/ The following assert statements verify that if this is a sharable*
4538	** b-tree database, the connection is holding the required table locks,
4539	** and that no other connection has any open cursor that conflicts with
4540	** this lock. The iTable<1 term disables the check for corrupt schemas. */
4541	assert( hasSharedCacheTableLock(p, iTable, pKeyInfo!=`0`, (wrFlag?`2`:`1`))
4542	\|\| iTable<`1` );
4543	assert( wrFlag==`0` \|\| !hasReadConflicts(p, iTable) );
4544
4545	/ Assert that the caller has opened the required transaction. /
4546	assert( p->inTrans>TRANS_NONE );
4547	assert( wrFlag==`0` \|\| p->inTrans==TRANS_WRITE );
4548	assert( pBt->pPage1 && pBt->pPage1->aData );
4549	assert( wrFlag==`0` \|\| (pBt->btsFlags & BTS_READ_ONLY)==`0` );
4550
4551	if( iTable<=`1` ){
4552	if( iTable<`1` ){
4553	return SQLITE_CORRUPT_BKPT;
4554	}else if( btreePagecount(pBt)==`0` ){
4555	assert( wrFlag==`0` );
4556	iTable = `0`;
4557	}
4558	}
4559
4560	/ Now that no other errors can occur, finish filling in the BtCursor*
4561	** variables and link the cursor into the BtShared list. */
4562	pCur->pgnoRoot = iTable;
4563	pCur->iPage = -`1`;
4564	pCur->pKeyInfo = pKeyInfo;
4565	pCur->pBtree = p;
4566	pCur->pBt = pBt;
4567	pCur->curFlags = `0`;
4568	/ If there are two or more cursors on the same btree, then all such*
4569	** cursors must have the BTCF_Multiple flag set. */
4570	for(pX=pBt->pCursor; pX; pX=pX->pNext){
4571	if( pX->pgnoRoot==iTable ){
4572	pX->curFlags \|= BTCF_Multiple;
4573	pCur->curFlags = BTCF_Multiple;
4574	}
4575	}
4576	pCur->eState = CURSOR_INVALID;
4577	pCur->pNext = pBt->pCursor;
4578	pBt->pCursor = pCur;
4579	if( wrFlag ){
4580	pCur->curFlags \|= BTCF_WriteFlag;
4581	pCur->curPagerFlags = `0`;
4582	if( pBt->pTmpSpace==`0` ) return allocateTempSpace(pBt);
4583	}else{
4584	pCur->curPagerFlags = PAGER_GET_READONLY;
4585	}
4586	return SQLITE_OK;
4587	}
4588	static int btreeCursorWithLock(
4589	Btree p, /* The btree /
4590	Pgno iTable, / Root page of table to open /
4591	int wrFlag, / 1 to write. 0 read-only /
4592	struct KeyInfo pKeyInfo, /* First arg to comparison function /
4593	BtCursor pCur /* Space for new cursor /
4594	){
4595	int rc;
4596	sqlite3BtreeEnter(p);
4597	rc = btreeCursor(p, iTable, wrFlag, pKeyInfo, pCur);
4598	sqlite3BtreeLeave(p);
4599	return rc;
4600	}
4601	int sqlite3BtreeCursor(
4602	Btree p, /* The btree /
4603	Pgno iTable, / Root page of table to open /
4604	int wrFlag, / 1 to write. 0 read-only /
4605	struct KeyInfo pKeyInfo, /* First arg to xCompare() /
4606	BtCursor pCur /* Write new cursor here /
4607	){
4608	if( p->sharable ){
4609	return btreeCursorWithLock(p, iTable, wrFlag, pKeyInfo, pCur);
4610	}else{
4611	return btreeCursor(p, iTable, wrFlag, pKeyInfo, pCur);
4612	}
4613	}
4614
4615	/*
4616	** Return the size of a BtCursor object in bytes.
4617	**
4618	** This interfaces is needed so that users of cursors can preallocate
4619	** sufficient storage to hold a cursor. The BtCursor object is opaque
4620	** to users so they cannot do the sizeof() themselves - they must call
4621	** this routine.
4622	*/
4623	int sqlite3BtreeCursorSize(void){
4624	return ROUND8(sizeof(BtCursor));
4625	}
4626
4627	/*
4628	** Initialize memory that will be converted into a BtCursor object.
4629	**
4630	** The simple approach here would be to memset() the entire object
4631	** to zero. But it turns out that the apPage[] and aiIdx[] arrays
4632	** do not need to be zeroed and they are large, so we can save a lot
4633	** of run-time by skipping the initialization of those elements.
4634	*/
4635	void sqlite3BtreeCursorZero(BtCursor *p){
4636	memset(p, `0`, offsetof(BtCursor, BTCURSOR_FIRST_UNINIT));
4637	}
4638
4639	/*
4640	** Close a cursor. The read lock on the database file is released
4641	** when the last cursor is closed.
4642	*/
4643	int sqlite3BtreeCloseCursor(BtCursor *pCur){
4644	Btree *pBtree = pCur->pBtree;
4645	if( pBtree ){
4646	BtShared *pBt = pCur->pBt;
4647	sqlite3BtreeEnter(pBtree);
4648	assert( pBt->pCursor!=`0` );
4649	if( pBt->pCursor==pCur ){
4650	pBt->pCursor = pCur->pNext;
4651	}else{
4652	BtCursor *pPrev = pBt->pCursor;
4653	do{
4654	if( pPrev->pNext==pCur ){
4655	pPrev->pNext = pCur->pNext;
4656	break;
4657	}
4658	pPrev = pPrev->pNext;
4659	}while( ALWAYS(pPrev) );
4660	}
4661	btreeReleaseAllCursorPages(pCur);
4662	unlockBtreeIfUnused(pBt);
4663	sqlite3_free(pCur->aOverflow);
4664	sqlite3_free(pCur->pKey);
4665	if( (pBt->openFlags & BTREE_SINGLE) && pBt->pCursor==`0` ){
4666	/ Since the BtShared is not sharable, there is no need to*
4667	** worry about the missing sqlite3BtreeLeave() call here. */
4668	assert( pBtree->sharable==`0` );
4669	sqlite3BtreeClose(pBtree);
4670	}else{
4671	sqlite3BtreeLeave(pBtree);
4672	}
4673	pCur->pBtree = `0`;
4674	}
4675	return SQLITE_OK;
4676	}
4677
4678	/*
4679	** Make sure the BtCursor* given in the argument has a valid
4680	** BtCursor.info structure. If it is not already valid, call
4681	** btreeParseCell() to fill it in.
4682	**
4683	** BtCursor.info is a cache of the information in the current cell.
4684	** Using this cache reduces the number of calls to btreeParseCell().
4685	*/
4686	#ifndef NDEBUG
4687	static int cellInfoEqual(CellInfo a, CellInfo b){
4688	if( a->nKey!=b->nKey ) return `0`;
4689	if( a->pPayload!=b->pPayload ) return `0`;
4690	if( a->nPayload!=b->nPayload ) return `0`;
4691	if( a->nLocal!=b->nLocal ) return `0`;
4692	if( a->nSize!=b->nSize ) return `0`;
4693	return `1`;
4694	}
4695	static void assertCellInfo(BtCursor *pCur){
4696	CellInfo info;
4697	memset(&info, `0`, sizeof(info));
4698	btreeParseCell(pCur->pPage, pCur->ix, &info);
4699	assert( CORRUPT_DB \|\| cellInfoEqual(&info, &pCur->info) );
4700	}
4701	#else
4702	#define assertCellInfo(x)
4703	#endif
4704	static SQLITE_NOINLINE void getCellInfo(BtCursor *pCur){
4705	if( pCur->info.nSize==`0` ){
4706	pCur->curFlags \|= BTCF_ValidNKey;
4707	btreeParseCell(pCur->pPage,pCur->ix,&pCur->info);
4708	}else{
4709	assertCellInfo(pCur);
4710	}
4711	}
4712
4713	#ifndef NDEBUG /* The next routine used only within assert() statements */
4714	/*
4715	** Return true if the given BtCursor is valid. A valid cursor is one
4716	** that is currently pointing to a row in a (non-empty) table.
4717	** This is a verification routine is used only within assert() statements.
4718	*/
4719	int sqlite3BtreeCursorIsValid(BtCursor *pCur){
4720	return pCur && pCur->eState==CURSOR_VALID;
4721	}
4722	#endif /* NDEBUG */
4723	int sqlite3BtreeCursorIsValidNN(BtCursor *pCur){
4724	assert( pCur!=`0` );
4725	return pCur->eState==CURSOR_VALID;
4726	}
4727
4728	/*
4729	** Return the value of the integer key or "rowid" for a table btree.
4730	** This routine is only valid for a cursor that is pointing into a
4731	** ordinary table btree. If the cursor points to an index btree or
4732	** is invalid, the result of this routine is undefined.
4733	*/
4734	i64 sqlite3BtreeIntegerKey(BtCursor *pCur){
4735	assert( cursorHoldsMutex(pCur) );
4736	assert( pCur->eState==CURSOR_VALID );
4737	assert( pCur->curIntKey );
4738	getCellInfo(pCur);
4739	return pCur->info.nKey;
4740	}
4741
4742	/*
4743	** Pin or unpin a cursor.
4744	*/
4745	void sqlite3BtreeCursorPin(BtCursor *pCur){
4746	assert( (pCur->curFlags & BTCF_Pinned)==`0` );
4747	pCur->curFlags \|= BTCF_Pinned;
4748	}
4749	void sqlite3BtreeCursorUnpin(BtCursor *pCur){
4750	assert( (pCur->curFlags & BTCF_Pinned)!=`0` );
4751	pCur->curFlags &= ~BTCF_Pinned;
4752	}
4753
4754	#ifdef SQLITE_ENABLE_OFFSET_SQL_FUNC
4755	/*
4756	** Return the offset into the database file for the start of the
4757	** payload to which the cursor is pointing.
4758	*/
4759	i64 sqlite3BtreeOffset(BtCursor *pCur){
4760	assert( cursorHoldsMutex(pCur) );
4761	assert( pCur->eState==CURSOR_VALID );
4762	getCellInfo(pCur);
4763	return (i64)pCur->pBt->pageSize*((i64)pCur->pPage->pgno - `1`) +
4764	(i64)(pCur->info.pPayload - pCur->pPage->aData);
4765	}
4766	#endif /* SQLITE_ENABLE_OFFSET_SQL_FUNC */
4767
4768	/*
4769	** Return the number of bytes of payload for the entry that pCur is
4770	** currently pointing to. For table btrees, this will be the amount
4771	** of data. For index btrees, this will be the size of the key.
4772	**
4773	** The caller must guarantee that the cursor is pointing to a non-NULL
4774	** valid entry. In other words, the calling procedure must guarantee
4775	** that the cursor has Cursor.eState==CURSOR_VALID.
4776	*/
4777	u32 sqlite3BtreePayloadSize(BtCursor *pCur){
4778	assert( cursorHoldsMutex(pCur) );
4779	assert( pCur->eState==CURSOR_VALID );
4780	getCellInfo(pCur);
4781	return pCur->info.nPayload;
4782	}
4783
4784	/*
4785	** Return an upper bound on the size of any record for the table
4786	** that the cursor is pointing into.
4787	**
4788	** This is an optimization. Everything will still work if this
4789	** routine always returns 2147483647 (which is the largest record
4790	** that SQLite can handle) or more. But returning a smaller value might
4791	** prevent large memory allocations when trying to interpret a
4792	** corrupt datrabase.
4793	**
4794	** The current implementation merely returns the size of the underlying
4795	** database file.
4796	*/
4797	sqlite3_int64 sqlite3BtreeMaxRecordSize(BtCursor *pCur){
4798	assert( cursorHoldsMutex(pCur) );
4799	assert( pCur->eState==CURSOR_VALID );
4800	return pCur->pBt->pageSize * (sqlite3_int64)pCur->pBt->nPage;
4801	}
4802
4803	/*
4804	** Given the page number of an overflow page in the database (parameter
4805	** ovfl), this function finds the page number of the next page in the
4806	** linked list of overflow pages. If possible, it uses the auto-vacuum
4807	** pointer-map data instead of reading the content of page ovfl to do so.
4808	**
4809	** If an error occurs an SQLite error code is returned. Otherwise:
4810	**
4811	** The page number of the next overflow page in the linked list is
4812	** written to *pPgnoNext. If page ovfl is the last page in its linked
4813	** list, *pPgnoNext is set to zero.
4814	**
4815	** If ppPage is not NULL, and a reference to the MemPage object corresponding
4816	** to page number pOvfl was obtained, then *ppPage is set to point to that
4817	** reference. It is the responsibility of the caller to call releasePage()
4818	** on *ppPage to free the reference. In no reference was obtained (because
4819	** the pointer-map was used to obtain the value for *pPgnoNext), then
4820	** *ppPage is set to zero.
4821	*/
4822	static int getOverflowPage(
4823	BtShared pBt, /* The database file /
4824	Pgno ovfl, / Current overflow page number /
4825	MemPage *ppPage, /* OUT: MemPage handle (may be NULL) /
4826	Pgno pPgnoNext /* OUT: Next overflow page number /
4827	){
4828	Pgno next = `0`;
4829	MemPage *pPage = `0`;
4830	int rc = SQLITE_OK;
4831
4832	assert( sqlite3_mutex_held(pBt->mutex) );
4833	assert(pPgnoNext);
4834
4835	#ifndef SQLITE_OMIT_AUTOVACUUM
4836	/ Try to find the next page in the overflow list using the*
4837	** autovacuum pointer-map pages. Guess that the next page in
4838	** the overflow list is page number (ovfl+1). If that guess turns
4839	** out to be wrong, fall back to loading the data of page
4840	** number ovfl to determine the next page number.
4841	*/
4842	if( pBt->autoVacuum ){
4843	Pgno pgno;
4844	Pgno iGuess = ovfl+`1`;
4845	u8 eType;
4846
4847	while( PTRMAP_ISPAGE(pBt, iGuess) \|\| iGuess==PENDING_BYTE_PAGE(pBt) ){
4848	iGuess++;
4849	}
4850
4851	if( iGuess<=btreePagecount(pBt) ){
4852	rc = ptrmapGet(pBt, iGuess, &eType, &pgno);
4853	if( rc==SQLITE_OK && eType==PTRMAP_OVERFLOW2 && pgno==ovfl ){
4854	next = iGuess;
4855	rc = SQLITE_DONE;
4856	}
4857	}
4858	}
4859	#endif
4860
4861	assert( next==`0` \|\| rc==SQLITE_DONE );
4862	if( rc==SQLITE_OK ){
4863	rc = btreeGetPage(pBt, ovfl, &pPage, (ppPage==`0`) ? PAGER_GET_READONLY : `0`);
4864	assert( rc==SQLITE_OK \|\| pPage==`0` );
4865	if( rc==SQLITE_OK ){
4866	next = get4byte(pPage->aData);
4867	}
4868	}
4869
4870	*pPgnoNext = next;
4871	if( ppPage ){
4872	*ppPage = pPage;
4873	}else{
4874	releasePage(pPage);
4875	}
4876	return (rc==SQLITE_DONE ? SQLITE_OK : rc);
4877	}
4878
4879	/*
4880	** Copy data from a buffer to a page, or from a page to a buffer.
4881	**
4882	** pPayload is a pointer to data stored on database page pDbPage.
4883	** If argument eOp is false, then nByte bytes of data are copied
4884	** from pPayload to the buffer pointed at by pBuf. If eOp is true,
4885	** then sqlite3PagerWrite() is called on pDbPage and nByte bytes
4886	** of data are copied from the buffer pBuf to pPayload.
4887	**
4888	** SQLITE_OK is returned on success, otherwise an error code.
4889	*/
4890	static int copyPayload(
4891	void pPayload, /* Pointer to page data /
4892	void pBuf, /* Pointer to buffer /
4893	int nByte, / Number of bytes to copy /
4894	int eOp, / 0 -> copy from page, 1 -> copy to page /
4895	DbPage pDbPage /* Page containing pPayload /
4896	){
4897	if( eOp ){
4898	/ Copy data from buffer to page (a write operation) /
4899	int rc = sqlite3PagerWrite(pDbPage);
4900	if( rc!=SQLITE_OK ){
4901	return rc;
4902	}
4903	memcpy(pPayload, pBuf, nByte);
4904	}else{
4905	/ Copy data from page to buffer (a read operation) /
4906	memcpy(pBuf, pPayload, nByte);
4907	}
4908	return SQLITE_OK;
4909	}
4910
4911	/*
4912	** This function is used to read or overwrite payload information
4913	** for the entry that the pCur cursor is pointing to. The eOp
4914	** argument is interpreted as follows:
4915	**
4916	** 0: The operation is a read. Populate the overflow cache.
4917	** 1: The operation is a write. Populate the overflow cache.
4918	**
4919	** A total of "amt" bytes are read or written beginning at "offset".
4920	** Data is read to or from the buffer pBuf.
4921	**
4922	** The content being read or written might appear on the main page
4923	** or be scattered out on multiple overflow pages.
4924	**
4925	** If the current cursor entry uses one or more overflow pages
4926	** this function may allocate space for and lazily populate
4927	** the overflow page-list cache array (BtCursor.aOverflow).
4928	** Subsequent calls use this cache to make seeking to the supplied offset
4929	** more efficient.
4930	**
4931	** Once an overflow page-list cache has been allocated, it must be
4932	** invalidated if some other cursor writes to the same table, or if
4933	** the cursor is moved to a different row. Additionally, in auto-vacuum
4934	** mode, the following events may invalidate an overflow page-list cache.
4935	**
4936	** * An incremental vacuum,
4937	** * A commit in auto_vacuum="full" mode,
4938	** * Creating a table (may require moving an overflow page).
4939	*/
4940	static int accessPayload(
4941	BtCursor pCur, /* Cursor pointing to entry to read from /
4942	u32 offset, / Begin reading this far into payload /
4943	u32 amt, / Read this many bytes /
4944	unsigned char pBuf, /* Write the bytes into this buffer /
4945	int eOp / zero to read. non-zero to write. /
4946	){
4947	unsigned char *aPayload;
4948	int rc = SQLITE_OK;
4949	int iIdx = `0`;
4950	MemPage pPage = pCur->pPage; /* Btree page of current entry /
4951	BtShared pBt = pCur->pBt; /* Btree this cursor belongs to /
4952	#ifdef SQLITE_DIRECT_OVERFLOW_READ
4953	unsigned char * const pBufStart = pBuf; / Start of original out buffer /
4954	#endif
4955
4956	assert( pPage );
4957	assert( eOp==`0` \|\| eOp==`1` );
4958	assert( pCur->eState==CURSOR_VALID );
4959	if( pCur->ix>=pPage->nCell ){
4960	return SQLITE_CORRUPT_PAGE(pPage);
4961	}
4962	assert( cursorHoldsMutex(pCur) );
4963
4964	getCellInfo(pCur);
4965	aPayload = pCur->info.pPayload;
4966	assert( offset+amt <= pCur->info.nPayload );
4967
4968	assert( aPayload > pPage->aData );
4969	if( (uptr)(aPayload - pPage->aData) > (pBt->usableSize - pCur->info.nLocal) ){
4970	/ Trying to read or write past the end of the data is an error. The*
4971	** conditional above is really:
4972	** &aPayload[pCur->info.nLocal] > &pPage->aData[pBt->usableSize]
4973	** but is recast into its current form to avoid integer overflow problems
4974	*/
4975	return SQLITE_CORRUPT_PAGE(pPage);
4976	}
4977
4978	/ Check if data must be read/written to/from the btree page itself. /
4979	if( offset<pCur->info.nLocal ){
4980	int a = amt;
4981	if( a+offset>pCur->info.nLocal ){
4982	a = pCur->info.nLocal - offset;
4983	}
4984	rc = copyPayload(&aPayload[offset], pBuf, a, eOp, pPage->pDbPage);
4985	offset = `0`;
4986	pBuf += a;
4987	amt -= a;
4988	}else{
4989	offset -= pCur->info.nLocal;
4990	}
4991
4992
4993	if( rc==SQLITE_OK && amt>`0` ){
4994	const u32 ovflSize = pBt->usableSize - `4`; / Bytes content per ovfl page /
4995	Pgno nextPage;
4996
4997	nextPage = get4byte(&aPayload[pCur->info.nLocal]);
4998
4999	/ If the BtCursor.aOverflow[] has not been allocated, allocate it now.*
5000	**
5001	** The aOverflow[] array is sized at one entry for each overflow page
5002	** in the overflow chain. The page number of the first overflow page is
5003	** stored in aOverflow[0], etc. A value of 0 in the aOverflow[] array
5004	** means "not yet known" (the cache is lazily populated).
5005	*/
5006	if( (pCur->curFlags & BTCF_ValidOvfl)==`0` ){
5007	int nOvfl = (pCur->info.nPayload-pCur->info.nLocal+ovflSize-`1`)/ovflSize;
5008	if( pCur->aOverflow==`0`
5009	\|\| nOvfl(int)sizeof*(Pgno) > sqlite3MallocSize(pCur->aOverflow)
5010	){
5011	Pgno aNew = (Pgno)sqlite3Realloc(
5012	pCur->aOverflow, nOvfl`2`sizeof(Pgno)
5013	);
5014	if( aNew==`0` ){
5015	return SQLITE_NOMEM_BKPT;
5016	}else{
5017	pCur->aOverflow = aNew;
5018	}
5019	}
5020	memset(pCur->aOverflow, `0`, nOvfl*sizeof(Pgno));
5021	pCur->curFlags \|= BTCF_ValidOvfl;
5022	}else{
5023	/ If the overflow page-list cache has been allocated and the*
5024	** entry for the first required overflow page is valid, skip
5025	** directly to it.
5026	*/
5027	if( pCur->aOverflow[offset/ovflSize] ){
5028	iIdx = (offset/ovflSize);
5029	nextPage = pCur->aOverflow[iIdx];
5030	offset = (offset%ovflSize);
5031	}
5032	}
5033
5034	assert( rc==SQLITE_OK && amt>`0` );
5035	while( nextPage ){
5036	/ If required, populate the overflow page-list cache. /
5037	if( nextPage > pBt->nPage ) return SQLITE_CORRUPT_BKPT;
5038	assert( pCur->aOverflow[iIdx]==`0`
5039	\|\| pCur->aOverflow[iIdx]==nextPage
5040	\|\| CORRUPT_DB );
5041	pCur->aOverflow[iIdx] = nextPage;
5042
5043	if( offset>=ovflSize ){
5044	/ The only reason to read this page is to obtain the page*
5045	** number for the next page in the overflow chain. The page
5046	** data is not required. So first try to lookup the overflow
5047	** page-list cache, if any, then fall back to the getOverflowPage()
5048	** function.
5049	*/
5050	assert( pCur->curFlags & BTCF_ValidOvfl );
5051	assert( pCur->pBtree->db==pBt->db );
5052	if( pCur->aOverflow[iIdx+`1`] ){
5053	nextPage = pCur->aOverflow[iIdx+`1`];
5054	}else{
5055	rc = getOverflowPage(pBt, nextPage, `0`, &nextPage);
5056	}
5057	offset -= ovflSize;
5058	}else{
5059	/ Need to read this page properly. It contains some of the*
5060	** range of data that is being read (eOp==0) or written (eOp!=0).
5061	*/
5062	int a = amt;
5063	if( a + offset > ovflSize ){
5064	a = ovflSize - offset;
5065	}
5066
5067	#ifdef SQLITE_DIRECT_OVERFLOW_READ
5068	/ If all the following are true:*
5069	**
5070	** 1) this is a read operation, and
5071	** 2) data is required from the start of this overflow page, and
5072	** 3) there are no dirty pages in the page-cache
5073	** 4) the database is file-backed, and
5074	** 5) the page is not in the WAL file
5075	** 6) at least 4 bytes have already been read into the output buffer
5076	**
5077	** then data can be read directly from the database file into the
5078	** output buffer, bypassing the page-cache altogether. This speeds
5079	** up loading large records that span many overflow pages.
5080	*/
5081	if( eOp==`0` / (1) /
5082	&& offset==`0` / (2) /
5083	&& sqlite3PagerDirectReadOk(pBt->pPager, nextPage) / (3,4,5) /
5084	&& &pBuf[-`4`]>=pBufStart / (6) /
5085	){
5086	sqlite3_file *fd = sqlite3PagerFile(pBt->pPager);
5087	u8 aSave[`4`];
5088	u8 *aWrite = &pBuf[-`4`];
5089	assert( aWrite>=pBufStart ); / due to (6) /
5090	memcpy(aSave, aWrite, `4`);
5091	rc = sqlite3OsRead(fd, aWrite, a+`4`, (i64)pBt->pageSize*(nextPage-`1`));
5092	if( rc && nextPage>pBt->nPage ) rc = SQLITE_CORRUPT_BKPT;
5093	nextPage = get4byte(aWrite);
5094	memcpy(aWrite, aSave, `4`);
5095	}else
5096	#endif
5097
5098	{
5099	DbPage *pDbPage;
5100	rc = sqlite3PagerGet(pBt->pPager, nextPage, &pDbPage,
5101	(eOp==`0` ? PAGER_GET_READONLY : `0`)
5102	);
5103	if( rc==SQLITE_OK ){
5104	aPayload = sqlite3PagerGetData(pDbPage);
5105	nextPage = get4byte(aPayload);
5106	rc = copyPayload(&aPayload[offset+`4`], pBuf, a, eOp, pDbPage);
5107	sqlite3PagerUnref(pDbPage);
5108	offset = `0`;
5109	}
5110	}
5111	amt -= a;
5112	if( amt==`0` ) return rc;
5113	pBuf += a;
5114	}
5115	if( rc ) break;
5116	iIdx++;
5117	}
5118	}
5119
5120	if( rc==SQLITE_OK && amt>`0` ){
5121	/ Overflow chain ends prematurely /
5122	return SQLITE_CORRUPT_PAGE(pPage);
5123	}
5124	return rc;
5125	}
5126
5127	/*
5128	** Read part of the payload for the row at which that cursor pCur is currently
5129	** pointing. "amt" bytes will be transferred into pBuf[]. The transfer
5130	** begins at "offset".
5131	**
5132	** pCur can be pointing to either a table or an index b-tree.
5133	** If pointing to a table btree, then the content section is read. If
5134	** pCur is pointing to an index b-tree then the key section is read.
5135	**
5136	** For sqlite3BtreePayload(), the caller must ensure that pCur is pointing
5137	** to a valid row in the table. For sqlite3BtreePayloadChecked(), the
5138	** cursor might be invalid or might need to be restored before being read.
5139	**
5140	** Return SQLITE_OK on success or an error code if anything goes
5141	** wrong. An error is returned if "offset+amt" is larger than
5142	** the available payload.
5143	*/
5144	int sqlite3BtreePayload(BtCursor pCur, u32 offset, u32 amt, void* *pBuf){
5145	assert( cursorHoldsMutex(pCur) );
5146	assert( pCur->eState==CURSOR_VALID );
5147	assert( pCur->iPage>=`0` && pCur->pPage );
5148	return accessPayload(pCur, offset, amt, (unsigned char*)pBuf, `0`);
5149	}
5150
5151	/*
5152	** This variant of sqlite3BtreePayload() works even if the cursor has not
5153	** in the CURSOR_VALID state. It is only used by the sqlite3_blob_read()
5154	** interface.
5155	*/
5156	#ifndef SQLITE_OMIT_INCRBLOB
5157	static SQLITE_NOINLINE int accessPayloadChecked(
5158	BtCursor *pCur,
5159	u32 offset,
5160	u32 amt,
5161	void *pBuf
5162	){
5163	int rc;
5164	if ( pCur->eState==CURSOR_INVALID ){
5165	return SQLITE_ABORT;
5166	}
5167	assert( cursorOwnsBtShared(pCur) );
5168	rc = btreeRestoreCursorPosition(pCur);
5169	return rc ? rc : accessPayload(pCur, offset, amt, pBuf, `0`);
5170	}
5171	int sqlite3BtreePayloadChecked(BtCursor pCur, u32 offset, u32 amt, void* *pBuf){
5172	if( pCur->eState==CURSOR_VALID ){
5173	assert( cursorOwnsBtShared(pCur) );
5174	return accessPayload(pCur, offset, amt, pBuf, `0`);
5175	}else{
5176	return accessPayloadChecked(pCur, offset, amt, pBuf);
5177	}
5178	}
5179	#endif /* SQLITE_OMIT_INCRBLOB */
5180
5181	/*
5182	** Return a pointer to payload information from the entry that the
5183	** pCur cursor is pointing to. The pointer is to the beginning of
5184	** the key if index btrees (pPage->intKey==0) and is the data for
5185	** table btrees (pPage->intKey==1). The number of bytes of available
5186	** key/data is written into pAmt. If pAmt==0, then the value
5187	** returned will not be a valid pointer.
5188	**
5189	** This routine is an optimization. It is common for the entire key
5190	** and data to fit on the local page and for there to be no overflow
5191	** pages. When that is so, this routine can be used to access the
5192	** key and data without making a copy. If the key and/or data spills
5193	** onto overflow pages, then accessPayload() must be used to reassemble
5194	** the key/data and copy it into a preallocated buffer.
5195	**
5196	** The pointer returned by this routine looks directly into the cached
5197	** page of the database. The data might change or move the next time
5198	** any btree routine is called.
5199	*/
5200	static const void *fetchPayload(
5201	BtCursor pCur, /* Cursor pointing to entry to read from /
5202	u32 pAmt /* Write the number of available bytes here /
5203	){
5204	int amt;
5205	assert( pCur!=`0` && pCur->iPage>=`0` && pCur->pPage);
5206	assert( pCur->eState==CURSOR_VALID );
5207	assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) );
5208	assert( cursorOwnsBtShared(pCur) );
5209	assert( pCur->ix<pCur->pPage->nCell \|\| CORRUPT_DB );
5210	assert( pCur->info.nSize>`0` );
5211	assert( pCur->info.pPayload>pCur->pPage->aData \|\| CORRUPT_DB );
5212	assert( pCur->info.pPayload<pCur->pPage->aDataEnd \|\|CORRUPT_DB);
5213	amt = pCur->info.nLocal;
5214	if( amt>(int)(pCur->pPage->aDataEnd - pCur->info.pPayload) ){
5215	/ There is too little space on the page for the expected amount*
5216	** of local content. Database must be corrupt. */
5217	assert( CORRUPT_DB );
5218	amt = MAX(`0`, (int)(pCur->pPage->aDataEnd - pCur->info.pPayload));
5219	}
5220	*pAmt = (u32)amt;
5221	return (void*)pCur->info.pPayload;
5222	}
5223
5224
5225	/*
5226	** For the entry that cursor pCur is point to, return as
5227	** many bytes of the key or data as are available on the local
5228	** b-tree page. Write the number of available bytes into *pAmt.
5229	**
5230	** The pointer returned is ephemeral. The key/data may move
5231	** or be destroyed on the next call to any Btree routine,
5232	** including calls from other threads against the same cache.
5233	** Hence, a mutex on the BtShared should be held prior to calling
5234	** this routine.
5235	**
5236	** These routines is used to get quick access to key and data
5237	** in the common case where no overflow pages are used.
5238	*/
5239	const void sqlite3BtreePayloadFetch(BtCursor pCur, u32 *pAmt){
5240	return fetchPayload(pCur, pAmt);
5241	}
5242
5243
5244	/*
5245	** Move the cursor down to a new child page. The newPgno argument is the
5246	** page number of the child page to move to.
5247	**
5248	** This function returns SQLITE_CORRUPT if the page-header flags field of
5249	** the new child page does not match the flags field of the parent (i.e.
5250	** if an intkey page appears to be the parent of a non-intkey page, or
5251	** vice-versa).
5252	*/
5253	static int moveToChild(BtCursor *pCur, u32 newPgno){
5254	assert( cursorOwnsBtShared(pCur) );
5255	assert( pCur->eState==CURSOR_VALID );
5256	assert( pCur->iPage<BTCURSOR_MAX_DEPTH );
5257	assert( pCur->iPage>=`0` );
5258	if( pCur->iPage>=(BTCURSOR_MAX_DEPTH-`1`) ){
5259	return SQLITE_CORRUPT_BKPT;
5260	}
5261	pCur->info.nSize = `0`;
5262	pCur->curFlags &= ~(BTCF_ValidNKey\|BTCF_ValidOvfl);
5263	pCur->aiIdx[pCur->iPage] = pCur->ix;
5264	pCur->apPage[pCur->iPage] = pCur->pPage;
5265	pCur->ix = `0`;
5266	pCur->iPage++;
5267	return getAndInitPage(pCur->pBt, newPgno, &pCur->pPage, pCur,
5268	pCur->curPagerFlags);
5269	}
5270
5271	#ifdef SQLITE_DEBUG
5272	/*
5273	** Page pParent is an internal (non-leaf) tree page. This function
5274	** asserts that page number iChild is the left-child if the iIdx'th
5275	** cell in page pParent. Or, if iIdx is equal to the total number of
5276	** cells in pParent, that page number iChild is the right-child of
5277	** the page.
5278	*/
5279	static void assertParentIndex(MemPage pParent, int* iIdx, Pgno iChild){
5280	if( CORRUPT_DB ) return; / The conditions tested below might not be true*
5281	** in a corrupt database */
5282	assert( iIdx<=pParent->nCell );
5283	if( iIdx==pParent->nCell ){
5284	assert( get4byte(&pParent->aData[pParent->hdrOffset+`8`])==iChild );
5285	}else{
5286	assert( get4byte(findCell(pParent, iIdx))==iChild );
5287	}
5288	}
5289	#else
5290	# define assertParentIndex(x,y,z)
5291	#endif
5292
5293	/*
5294	** Move the cursor up to the parent page.
5295	**
5296	** pCur->idx is set to the cell index that contains the pointer
5297	** to the page we are coming from. If we are coming from the
5298	** right-most child page then pCur->idx is set to one more than
5299	** the largest cell index.
5300	*/
5301	static void moveToParent(BtCursor *pCur){
5302	MemPage *pLeaf;
5303	assert( cursorOwnsBtShared(pCur) );
5304	assert( pCur->eState==CURSOR_VALID );
5305	assert( pCur->iPage>`0` );
5306	assert( pCur->pPage );
5307	assertParentIndex(
5308	pCur->apPage[pCur->iPage-`1`],
5309	pCur->aiIdx[pCur->iPage-`1`],
5310	pCur->pPage->pgno
5311	);
5312	testcase( pCur->aiIdx[pCur->iPage-`1`] > pCur->apPage[pCur->iPage-`1`]->nCell );
5313	pCur->info.nSize = `0`;
5314	pCur->curFlags &= ~(BTCF_ValidNKey\|BTCF_ValidOvfl);
5315	pCur->ix = pCur->aiIdx[pCur->iPage-`1`];
5316	pLeaf = pCur->pPage;
5317	pCur->pPage = pCur->apPage[--pCur->iPage];
5318	releasePageNotNull(pLeaf);
5319	}
5320
5321	/*
5322	** Move the cursor to point to the root page of its b-tree structure.
5323	**
5324	** If the table has a virtual root page, then the cursor is moved to point
5325	** to the virtual root page instead of the actual root page. A table has a
5326	** virtual root page when the actual root page contains no cells and a
5327	** single child page. This can only happen with the table rooted at page 1.
5328	**
5329	** If the b-tree structure is empty, the cursor state is set to
5330	** CURSOR_INVALID and this routine returns SQLITE_EMPTY. Otherwise,
5331	** the cursor is set to point to the first cell located on the root
5332	** (or virtual root) page and the cursor state is set to CURSOR_VALID.
5333	**
5334	** If this function returns successfully, it may be assumed that the
5335	** page-header flags indicate that the [virtual] root-page is the expected
5336	** kind of b-tree page (i.e. if when opening the cursor the caller did not
5337	** specify a KeyInfo structure the flags byte is set to 0x05 or 0x0D,
5338	** indicating a table b-tree, or if the caller did specify a KeyInfo
5339	** structure the flags byte is set to 0x02 or 0x0A, indicating an index
5340	** b-tree).
5341	*/
5342	static int moveToRoot(BtCursor *pCur){
5343	MemPage *pRoot;
5344	int rc = SQLITE_OK;
5345
5346	assert( cursorOwnsBtShared(pCur) );
5347	assert( CURSOR_INVALID < CURSOR_REQUIRESEEK );
5348	assert( CURSOR_VALID < CURSOR_REQUIRESEEK );
5349	assert( CURSOR_FAULT > CURSOR_REQUIRESEEK );
5350	assert( pCur->eState < CURSOR_REQUIRESEEK \|\| pCur->iPage<`0` );
5351	assert( pCur->pgnoRoot>`0` \|\| pCur->iPage<`0` );
5352
5353	if( pCur->iPage>=`0` ){
5354	if( pCur->iPage ){
5355	releasePageNotNull(pCur->pPage);
5356	while( --pCur->iPage ){
5357	releasePageNotNull(pCur->apPage[pCur->iPage]);
5358	}
5359	pRoot = pCur->pPage = pCur->apPage[`0`];
5360	goto skip_init;
5361	}
5362	}else if( pCur->pgnoRoot==`0` ){
5363	pCur->eState = CURSOR_INVALID;
5364	return SQLITE_EMPTY;
5365	}else{
5366	assert( pCur->iPage==(-`1`) );
5367	if( pCur->eState>=CURSOR_REQUIRESEEK ){
5368	if( pCur->eState==CURSOR_FAULT ){
5369	assert( pCur->skipNext!=SQLITE_OK );
5370	return pCur->skipNext;
5371	}
5372	sqlite3BtreeClearCursor(pCur);
5373	}
5374	rc = getAndInitPage(pCur->pBt, pCur->pgnoRoot, &pCur->pPage,
5375	`0`, pCur->curPagerFlags);
5376	if( rc!=SQLITE_OK ){
5377	pCur->eState = CURSOR_INVALID;
5378	return rc;
5379	}
5380	pCur->iPage = `0`;
5381	pCur->curIntKey = pCur->pPage->intKey;
5382	}
5383	pRoot = pCur->pPage;
5384	assert( pRoot->pgno==pCur->pgnoRoot \|\| CORRUPT_DB );
5385
5386	/ If pCur->pKeyInfo is not NULL, then the caller that opened this cursor*
5387	** expected to open it on an index b-tree. Otherwise, if pKeyInfo is
5388	** NULL, the caller expects a table b-tree. If this is not the case,
5389	** return an SQLITE_CORRUPT error.
5390	**
5391	** Earlier versions of SQLite assumed that this test could not fail
5392	** if the root page was already loaded when this function was called (i.e.
5393	** if pCur->iPage>=0). But this is not so if the database is corrupted
5394	** in such a way that page pRoot is linked into a second b-tree table
5395	** (or the freelist). */
5396	assert( pRoot->intKey==`1` \|\| pRoot->intKey==`0` );
5397	if( pRoot->isInit==`0` \|\| (pCur->pKeyInfo==`0`)!=pRoot->intKey ){
5398	return SQLITE_CORRUPT_PAGE(pCur->pPage);
5399	}
5400
5401	skip_init:
5402	pCur->ix = `0`;
5403	pCur->info.nSize = `0`;
5404	pCur->curFlags &= ~(BTCF_AtLast\|BTCF_ValidNKey\|BTCF_ValidOvfl);
5405
5406	if( pRoot->nCell>`0` ){
5407	pCur->eState = CURSOR_VALID;
5408	}else if( !pRoot->leaf ){
5409	Pgno subpage;
5410	if( pRoot->pgno!=`1` ) return SQLITE_CORRUPT_BKPT;
5411	subpage = get4byte(&pRoot->aData[pRoot->hdrOffset+`8`]);
5412	pCur->eState = CURSOR_VALID;
5413	rc = moveToChild(pCur, subpage);
5414	}else{
5415	pCur->eState = CURSOR_INVALID;
5416	rc = SQLITE_EMPTY;
5417	}
5418	return rc;
5419	}
5420
5421	/*
5422	** Move the cursor down to the left-most leaf entry beneath the
5423	** entry to which it is currently pointing.
5424	**
5425	** The left-most leaf is the one with the smallest key - the first
5426	** in ascending order.
5427	*/
5428	static int moveToLeftmost(BtCursor *pCur){
5429	Pgno pgno;
5430	int rc = SQLITE_OK;
5431	MemPage *pPage;
5432
5433	assert( cursorOwnsBtShared(pCur) );
5434	assert( pCur->eState==CURSOR_VALID );
5435	while( rc==SQLITE_OK && !(pPage = pCur->pPage)->leaf ){
5436	assert( pCur->ix<pPage->nCell );
5437	pgno = get4byte(findCell(pPage, pCur->ix));
5438	rc = moveToChild(pCur, pgno);
5439	}
5440	return rc;
5441	}
5442
5443	/*
5444	** Move the cursor down to the right-most leaf entry beneath the
5445	** page to which it is currently pointing. Notice the difference
5446	** between moveToLeftmost() and moveToRightmost(). moveToLeftmost()
5447	** finds the left-most entry beneath the entry whereas moveToRightmost()
5448	** finds the right-most entry beneath the page.
5449	**
5450	** The right-most entry is the one with the largest key - the last
5451	** key in ascending order.
5452	*/
5453	static int moveToRightmost(BtCursor *pCur){
5454	Pgno pgno;
5455	int rc = SQLITE_OK;
5456	MemPage *pPage = `0`;
5457
5458	assert( cursorOwnsBtShared(pCur) );
5459	assert( pCur->eState==CURSOR_VALID );
5460	while( !(pPage = pCur->pPage)->leaf ){
5461	pgno = get4byte(&pPage->aData[pPage->hdrOffset+`8`]);
5462	pCur->ix = pPage->nCell;
5463	rc = moveToChild(pCur, pgno);
5464	if( rc ) return rc;
5465	}
5466	pCur->ix = pPage->nCell-`1`;
5467	assert( pCur->info.nSize==`0` );
5468	assert( (pCur->curFlags & BTCF_ValidNKey)==`0` );
5469	return SQLITE_OK;
5470	}
5471
5472	/ Move the cursor to the first entry in the table. Return SQLITE_OK*
5473	** on success. Set *pRes to 0 if the cursor actually points to something
5474	** or set *pRes to 1 if the table is empty.
5475	*/
5476	int sqlite3BtreeFirst(BtCursor pCur, int* *pRes){
5477	int rc;
5478
5479	assert( cursorOwnsBtShared(pCur) );
5480	assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) );
5481	rc = moveToRoot(pCur);
5482	if( rc==SQLITE_OK ){
5483	assert( pCur->pPage->nCell>`0` );
5484	*pRes = `0`;
5485	rc = moveToLeftmost(pCur);
5486	}else if( rc==SQLITE_EMPTY ){
5487	assert( pCur->pgnoRoot==`0` \|\| pCur->pPage->nCell==`0` );
5488	*pRes = `1`;
5489	rc = SQLITE_OK;
5490	}
5491	return rc;
5492	}
5493
5494	/ Move the cursor to the last entry in the table. Return SQLITE_OK*
5495	** on success. Set *pRes to 0 if the cursor actually points to something
5496	** or set *pRes to 1 if the table is empty.
5497	*/
5498	int sqlite3BtreeLast(BtCursor pCur, int* *pRes){
5499	int rc;
5500
5501	assert( cursorOwnsBtShared(pCur) );
5502	assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) );
5503
5504	/ If the cursor already points to the last entry, this is a no-op. /
5505	if( CURSOR_VALID==pCur->eState && (pCur->curFlags & BTCF_AtLast)!=`0` ){
5506	#ifdef SQLITE_DEBUG
5507	/ This block serves to assert() that the cursor really does point*
5508	** to the last entry in the b-tree. */
5509	int ii;
5510	for(ii=`0`; ii<pCur->iPage; ii++){
5511	assert( pCur->aiIdx[ii]==pCur->apPage[ii]->nCell );
5512	}
5513	assert( pCur->ix==pCur->pPage->nCell-`1` \|\| CORRUPT_DB );
5514	testcase( pCur->ix!=pCur->pPage->nCell-`1` );
5515	/ ^-- dbsqlfuzz b92b72e4de80b5140c30ab71372ca719b8feb618 /
5516	assert( pCur->pPage->leaf );
5517	#endif
5518	*pRes = `0`;
5519	return SQLITE_OK;
5520	}
5521
5522	rc = moveToRoot(pCur);
5523	if( rc==SQLITE_OK ){
5524	assert( pCur->eState==CURSOR_VALID );
5525	*pRes = `0`;
5526	rc = moveToRightmost(pCur);
5527	if( rc==SQLITE_OK ){
5528	pCur->curFlags \|= BTCF_AtLast;
5529	}else{
5530	pCur->curFlags &= ~BTCF_AtLast;
5531	}
5532	}else if( rc==SQLITE_EMPTY ){
5533	assert( pCur->pgnoRoot==`0` \|\| pCur->pPage->nCell==`0` );
5534	*pRes = `1`;
5535	rc = SQLITE_OK;
5536	}
5537	return rc;
5538	}
5539
5540	/ Move the cursor so that it points to an entry in a table (a.k.a INTKEY)*
5541	** table near the key intKey. Return a success code.
5542	**
5543	** If an exact match is not found, then the cursor is always
5544	** left pointing at a leaf page which would hold the entry if it
5545	** were present. The cursor might point to an entry that comes
5546	** before or after the key.
5547	**
5548	** An integer is written into *pRes which is the result of
5549	** comparing the key with the entry to which the cursor is
5550	** pointing. The meaning of the integer written into
5551	** *pRes is as follows:
5552	**
5553	** *pRes<0 The cursor is left pointing at an entry that
5554	** is smaller than intKey or if the table is empty
5555	** and the cursor is therefore left point to nothing.
5556	**
5557	** *pRes==0 The cursor is left pointing at an entry that
5558	** exactly matches intKey.
5559	**
5560	** *pRes>0 The cursor is left pointing at an entry that
5561	** is larger than intKey.
5562	*/
5563	int sqlite3BtreeTableMoveto(
5564	BtCursor pCur, /* The cursor to be moved /
5565	i64 intKey, / The table key /
5566	int biasRight, / If true, bias the search to the high end /
5567	int pRes /* Write search results here /
5568	){
5569	int rc;
5570
5571	assert( cursorOwnsBtShared(pCur) );
5572	assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) );
5573	assert( pRes );
5574	assert( pCur->pKeyInfo==`0` );
5575	assert( pCur->eState!=CURSOR_VALID \|\| pCur->curIntKey!=`0` );
5576
5577	/ If the cursor is already positioned at the point we are trying*
5578	** to move to, then just return without doing any work */
5579	if( pCur->eState==CURSOR_VALID && (pCur->curFlags & BTCF_ValidNKey)!=`0` ){
5580	if( pCur->info.nKey==intKey ){
5581	*pRes = `0`;
5582	return SQLITE_OK;
5583	}
5584	if( pCur->info.nKey<intKey ){
5585	if( (pCur->curFlags & BTCF_AtLast)!=`0` ){
5586	*pRes = -`1`;
5587	return SQLITE_OK;
5588	}
5589	/ If the requested key is one more than the previous key, then*
5590	** try to get there using sqlite3BtreeNext() rather than a full
5591	** binary search. This is an optimization only. The correct answer
5592	** is still obtained without this case, only a little more slowely */
5593	if( pCur->info.nKey+`1`==intKey ){
5594	*pRes = `0`;
5595	rc = sqlite3BtreeNext(pCur, `0`);
5596	if( rc==SQLITE_OK ){
5597	getCellInfo(pCur);
5598	if( pCur->info.nKey==intKey ){
5599	return SQLITE_OK;
5600	}
5601	}else if( rc!=SQLITE_DONE ){
5602	return rc;
5603	}
5604	}
5605	}
5606	}
5607
5608	#ifdef SQLITE_DEBUG
5609	pCur->pBtree->nSeek++; / Performance measurement during testing /
5610	#endif
5611
5612	rc = moveToRoot(pCur);
5613	if( rc ){
5614	if( rc==SQLITE_EMPTY ){
5615	assert( pCur->pgnoRoot==`0` \|\| pCur->pPage->nCell==`0` );
5616	*pRes = -`1`;
5617	return SQLITE_OK;
5618	}
5619	return rc;
5620	}
5621	assert( pCur->pPage );
5622	assert( pCur->pPage->isInit );
5623	assert( pCur->eState==CURSOR_VALID );
5624	assert( pCur->pPage->nCell > `0` );
5625	assert( pCur->iPage==`0` \|\| pCur->apPage[`0`]->intKey==pCur->curIntKey );
5626	assert( pCur->curIntKey );
5627
5628	for(;;){
5629	int lwr, upr, idx, c;
5630	Pgno chldPg;
5631	MemPage *pPage = pCur->pPage;
5632	u8 pCell; /* Pointer to current cell in pPage /
5633
5634	/ pPage->nCell must be greater than zero. If this is the root-page*
5635	** the cursor would have been INVALID above and this for(;;) loop
5636	** not run. If this is not the root-page, then the moveToChild() routine
5637	** would have already detected db corruption. Similarly, pPage must
5638	** be the right kind (index or table) of b-tree page. Otherwise
5639	** a moveToChild() or moveToRoot() call would have detected corruption. */
5640	assert( pPage->nCell>`0` );
5641	assert( pPage->intKey );
5642	lwr = `0`;
5643	upr = pPage->nCell-`1`;
5644	assert( biasRight==`0` \|\| biasRight==`1` );
5645	idx = upr>>(`1`-biasRight); / idx = biasRight ? upr : (lwr+upr)/2; /
5646	for(;;){
5647	i64 nCellKey;
5648	pCell = findCellPastPtr(pPage, idx);
5649	if( pPage->intKeyLeaf ){
5650	while( `0x80` <= *(pCell++) ){
5651	if( pCell>=pPage->aDataEnd ){
5652	return SQLITE_CORRUPT_PAGE(pPage);
5653	}
5654	}
5655	}
5656	getVarint(pCell, (u64*)&nCellKey);
5657	if( nCellKey<intKey ){
5658	lwr = idx+`1`;
5659	if( lwr>upr ){ c = -`1`; break; }
5660	}else if( nCellKey>intKey ){
5661	upr = idx-`1`;
5662	if( lwr>upr ){ c = +`1`; break; }
5663	}else{
5664	assert( nCellKey==intKey );
5665	pCur->ix = (u16)idx;
5666	if( !pPage->leaf ){
5667	lwr = idx;
5668	goto moveto_table_next_layer;
5669	}else{
5670	pCur->curFlags \|= BTCF_ValidNKey;
5671	pCur->info.nKey = nCellKey;
5672	pCur->info.nSize = `0`;
5673	*pRes = `0`;
5674	return SQLITE_OK;
5675	}
5676	}
5677	assert( lwr+upr>=`0` );
5678	idx = (lwr+upr)>>`1`; / idx = (lwr+upr)/2; /
5679	}
5680	assert( lwr==upr+`1` \|\| !pPage->leaf );
5681	assert( pPage->isInit );
5682	if( pPage->leaf ){
5683	assert( pCur->ix<pCur->pPage->nCell );
5684	pCur->ix = (u16)idx;
5685	*pRes = c;
5686	rc = SQLITE_OK;
5687	goto moveto_table_finish;
5688	}
5689	moveto_table_next_layer:
5690	if( lwr>=pPage->nCell ){
5691	chldPg = get4byte(&pPage->aData[pPage->hdrOffset+`8`]);
5692	}else{
5693	chldPg = get4byte(findCell(pPage, lwr));
5694	}
5695	pCur->ix = (u16)lwr;
5696	rc = moveToChild(pCur, chldPg);
5697	if( rc ) break;
5698	}
5699	moveto_table_finish:
5700	pCur->info.nSize = `0`;
5701	assert( (pCur->curFlags & BTCF_ValidOvfl)==`0` );
5702	return rc;
5703	}
5704
5705	/*
5706	** Compare the "idx"-th cell on the page the cursor pCur is currently
5707	** pointing to to pIdxKey using xRecordCompare. Return negative or
5708	** zero if the cell is less than or equal pIdxKey. Return positive
5709	** if unknown.
5710	**
5711	** Return value negative: Cell at pCur[idx] less than pIdxKey
5712	**
5713	** Return value is zero: Cell at pCur[idx] equals pIdxKey
5714	**
5715	** Return value positive: Nothing is known about the relationship
5716	** of the cell at pCur[idx] and pIdxKey.
5717	**
5718	** This routine is part of an optimization. It is always safe to return
5719	** a positive value as that will cause the optimization to be skipped.
5720	*/
5721	static int indexCellCompare(
5722	BtCursor *pCur,
5723	int idx,
5724	UnpackedRecord *pIdxKey,
5725	RecordCompare xRecordCompare
5726	){
5727	MemPage *pPage = pCur->pPage;
5728	int c;
5729	int nCell; / Size of the pCell cell in bytes /
5730	u8 *pCell = findCellPastPtr(pPage, idx);
5731
5732	nCell = pCell[`0`];
5733	if( nCell<=pPage->max1bytePayload ){
5734	/ This branch runs if the record-size field of the cell is a*
5735	** single byte varint and the record fits entirely on the main
5736	** b-tree page. */
5737	testcase( pCell+nCell+`1`==pPage->aDataEnd );
5738	c = xRecordCompare(nCell, (void*)&pCell[`1`], pIdxKey);
5739	}else if( !(pCell[`1`] & `0x80`)
5740	&& (nCell = ((nCell&`0x7f`)<<`7`) + pCell[`1`])<=pPage->maxLocal
5741	){
5742	/ The record-size field is a 2 byte varint and the record*
5743	** fits entirely on the main b-tree page. */
5744	testcase( pCell+nCell+`2`==pPage->aDataEnd );
5745	c = xRecordCompare(nCell, (void*)&pCell[`2`], pIdxKey);
5746	}else{
5747	/ If the record extends into overflow pages, do not attempt*
5748	** the optimization. */
5749	c = `99`;
5750	}
5751	return c;
5752	}
5753
5754	/*
5755	** Return true (non-zero) if pCur is current pointing to the last
5756	** page of a table.
5757	*/
5758	static int cursorOnLastPage(BtCursor *pCur){
5759	int i;
5760	assert( pCur->eState==CURSOR_VALID );
5761	for(i=`0`; i<pCur->iPage; i++){
5762	MemPage *pPage = pCur->apPage[i];
5763	if( pCur->aiIdx[i]<pPage->nCell ) return `0`;
5764	}
5765	return `1`;
5766	}
5767
5768	/ Move the cursor so that it points to an entry in an index table*
5769	** near the key pIdxKey. Return a success code.
5770	**
5771	** If an exact match is not found, then the cursor is always
5772	** left pointing at a leaf page which would hold the entry if it
5773	** were present. The cursor might point to an entry that comes
5774	** before or after the key.
5775	**
5776	** An integer is written into *pRes which is the result of
5777	** comparing the key with the entry to which the cursor is
5778	** pointing. The meaning of the integer written into
5779	** *pRes is as follows:
5780	**
5781	** *pRes<0 The cursor is left pointing at an entry that
5782	** is smaller than pIdxKey or if the table is empty
5783	** and the cursor is therefore left point to nothing.
5784	**
5785	** *pRes==0 The cursor is left pointing at an entry that
5786	** exactly matches pIdxKey.
5787	**
5788	** *pRes>0 The cursor is left pointing at an entry that
5789	** is larger than pIdxKey.
5790	**
5791	** The pIdxKey->eqSeen field is set to 1 if there
5792	** exists an entry in the table that exactly matches pIdxKey.
5793	*/
5794	int sqlite3BtreeIndexMoveto(
5795	BtCursor pCur, /* The cursor to be moved /
5796	UnpackedRecord pIdxKey, /* Unpacked index key /
5797	int pRes /* Write search results here /
5798	){
5799	int rc;
5800	RecordCompare xRecordCompare;
5801
5802	assert( cursorOwnsBtShared(pCur) );
5803	assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) );
5804	assert( pRes );
5805	assert( pCur->pKeyInfo!=`0` );
5806
5807	#ifdef SQLITE_DEBUG
5808	pCur->pBtree->nSeek++; / Performance measurement during testing /
5809	#endif
5810
5811	xRecordCompare = sqlite3VdbeFindCompare(pIdxKey);
5812	pIdxKey->errCode = `0`;
5813	assert( pIdxKey->default_rc==`1`
5814	\|\| pIdxKey->default_rc==`0`
5815	\|\| pIdxKey->default_rc==-`1`
5816	);
5817
5818
5819	/ Check to see if we can skip a lot of work. Two cases:*
5820	**
5821	** (1) If the cursor is already pointing to the very last cell
5822	** in the table and the pIdxKey search key is greater than or
5823	** equal to that last cell, then no movement is required.
5824	**
5825	** (2) If the cursor is on the last page of the table and the first
5826	** cell on that last page is less than or equal to the pIdxKey
5827	** search key, then we can start the search on the current page
5828	** without needing to go back to root.
5829	*/
5830	if( pCur->eState==CURSOR_VALID
5831	&& pCur->pPage->leaf
5832	&& cursorOnLastPage(pCur)
5833	){
5834	int c;
5835	if( pCur->ix==pCur->pPage->nCell-`1`
5836	&& (c = indexCellCompare(pCur, pCur->ix, pIdxKey, xRecordCompare))<=`0`
5837	&& pIdxKey->errCode==SQLITE_OK
5838	){
5839	*pRes = c;
5840	return SQLITE_OK; / Cursor already pointing at the correct spot /
5841	}
5842	if( pCur->iPage>`0`
5843	&& indexCellCompare(pCur, `0`, pIdxKey, xRecordCompare)<=`0`
5844	&& pIdxKey->errCode==SQLITE_OK
5845	){
5846	pCur->curFlags &= ~BTCF_ValidOvfl;
5847	if( !pCur->pPage->isInit ){
5848	return SQLITE_CORRUPT_BKPT;
5849	}
5850	goto bypass_moveto_root; / Start search on the current page /
5851	}
5852	pIdxKey->errCode = SQLITE_OK;
5853	}
5854
5855	rc = moveToRoot(pCur);
5856	if( rc ){
5857	if( rc==SQLITE_EMPTY ){
5858	assert( pCur->pgnoRoot==`0` \|\| pCur->pPage->nCell==`0` );
5859	*pRes = -`1`;
5860	return SQLITE_OK;
5861	}
5862	return rc;
5863	}
5864
5865	bypass_moveto_root:
5866	assert( pCur->pPage );
5867	assert( pCur->pPage->isInit );
5868	assert( pCur->eState==CURSOR_VALID );
5869	assert( pCur->pPage->nCell > `0` );
5870	assert( pCur->curIntKey==`0` );
5871	assert( pIdxKey!=`0` );
5872	for(;;){
5873	int lwr, upr, idx, c;
5874	Pgno chldPg;
5875	MemPage *pPage = pCur->pPage;
5876	u8 pCell; /* Pointer to current cell in pPage /
5877
5878	/ pPage->nCell must be greater than zero. If this is the root-page*
5879	** the cursor would have been INVALID above and this for(;;) loop
5880	** not run. If this is not the root-page, then the moveToChild() routine
5881	** would have already detected db corruption. Similarly, pPage must
5882	** be the right kind (index or table) of b-tree page. Otherwise
5883	** a moveToChild() or moveToRoot() call would have detected corruption. */
5884	assert( pPage->nCell>`0` );
5885	assert( pPage->intKey==`0` );
5886	lwr = `0`;
5887	upr = pPage->nCell-`1`;
5888	idx = upr>>`1`; / idx = (lwr+upr)/2; /
5889	for(;;){
5890	int nCell; / Size of the pCell cell in bytes /
5891	pCell = findCellPastPtr(pPage, idx);
5892
5893	/ The maximum supported page-size is 65536 bytes. This means that*
5894	** the maximum number of record bytes stored on an index B-Tree
5895	** page is less than 16384 bytes and may be stored as a 2-byte
5896	** varint. This information is used to attempt to avoid parsing
5897	** the entire cell by checking for the cases where the record is
5898	** stored entirely within the b-tree page by inspecting the first
5899	** 2 bytes of the cell.
5900	*/
5901	nCell = pCell[`0`];
5902	if( nCell<=pPage->max1bytePayload ){
5903	/ This branch runs if the record-size field of the cell is a*
5904	** single byte varint and the record fits entirely on the main
5905	** b-tree page. */
5906	testcase( pCell+nCell+`1`==pPage->aDataEnd );
5907	c = xRecordCompare(nCell, (void*)&pCell[`1`], pIdxKey);
5908	}else if( !(pCell[`1`] & `0x80`)
5909	&& (nCell = ((nCell&`0x7f`)<<`7`) + pCell[`1`])<=pPage->maxLocal
5910	){
5911	/ The record-size field is a 2 byte varint and the record*
5912	** fits entirely on the main b-tree page. */
5913	testcase( pCell+nCell+`2`==pPage->aDataEnd );
5914	c = xRecordCompare(nCell, (void*)&pCell[`2`], pIdxKey);
5915	}else{
5916	/ The record flows over onto one or more overflow pages. In*
5917	** this case the whole cell needs to be parsed, a buffer allocated
5918	** and accessPayload() used to retrieve the record into the
5919	** buffer before VdbeRecordCompare() can be called.
5920	**
5921	** If the record is corrupt, the xRecordCompare routine may read
5922	** up to two varints past the end of the buffer. An extra 18
5923	** bytes of padding is allocated at the end of the buffer in
5924	** case this happens. */
5925	void *pCellKey;
5926	u8 * const pCellBody = pCell - pPage->childPtrSize;
5927	const int nOverrun = `18`; / Size of the overrun padding /
5928	pPage->xParseCell(pPage, pCellBody, &pCur->info);
5929	nCell = (int)pCur->info.nKey;
5930	testcase( nCell<`0` ); / True if key size is 2^32 or more /
5931	testcase( nCell==`0` ); / Invalid key size: 0x80 0x80 0x00 /
5932	testcase( nCell==`1` ); / Invalid key size: 0x80 0x80 0x01 /
5933	testcase( nCell==`2` ); / Minimum legal index key size /
5934	if( nCell<`2` \|\| nCell/pCur->pBt->usableSize>pCur->pBt->nPage ){
5935	rc = SQLITE_CORRUPT_PAGE(pPage);
5936	goto moveto_index_finish;
5937	}
5938	pCellKey = sqlite3Malloc( nCell+nOverrun );
5939	if( pCellKey==`0` ){
5940	rc = SQLITE_NOMEM_BKPT;
5941	goto moveto_index_finish;
5942	}
5943	pCur->ix = (u16)idx;
5944	rc = accessPayload(pCur, `0`, nCell, (unsigned char*)pCellKey, `0`);
5945	memset(((u8)pCellKey)+nCell,`0`,nOverrun); /* Fix uninit warnings /
5946	pCur->curFlags &= ~BTCF_ValidOvfl;
5947	if( rc ){
5948	sqlite3_free(pCellKey);
5949	goto moveto_index_finish;
5950	}
5951	c = sqlite3VdbeRecordCompare(nCell, pCellKey, pIdxKey);
5952	sqlite3_free(pCellKey);
5953	}
5954	assert(
5955	(pIdxKey->errCode!=SQLITE_CORRUPT \|\| c==`0`)
5956	&& (pIdxKey->errCode!=SQLITE_NOMEM \|\| pCur->pBtree->db->mallocFailed)
5957	);
5958	if( c<`0` ){
5959	lwr = idx+`1`;
5960	}else if( c>`0` ){
5961	upr = idx-`1`;
5962	}else{
5963	assert( c==`0` );
5964	*pRes = `0`;
5965	rc = SQLITE_OK;
5966	pCur->ix = (u16)idx;
5967	if( pIdxKey->errCode ) rc = SQLITE_CORRUPT_BKPT;
5968	goto moveto_index_finish;
5969	}
5970	if( lwr>upr ) break;
5971	assert( lwr+upr>=`0` );
5972	idx = (lwr+upr)>>`1`; / idx = (lwr+upr)/2 /
5973	}
5974	assert( lwr==upr+`1` \|\| (pPage->intKey && !pPage->leaf) );
5975	assert( pPage->isInit );
5976	if( pPage->leaf ){
5977	assert( pCur->ix<pCur->pPage->nCell \|\| CORRUPT_DB );
5978	pCur->ix = (u16)idx;
5979	*pRes = c;
5980	rc = SQLITE_OK;
5981	goto moveto_index_finish;
5982	}
5983	if( lwr>=pPage->nCell ){
5984	chldPg = get4byte(&pPage->aData[pPage->hdrOffset+`8`]);
5985	}else{
5986	chldPg = get4byte(findCell(pPage, lwr));
5987	}
5988	pCur->ix = (u16)lwr;
5989	rc = moveToChild(pCur, chldPg);
5990	if( rc ) break;
5991	}
5992	moveto_index_finish:
5993	pCur->info.nSize = `0`;
5994	assert( (pCur->curFlags & BTCF_ValidOvfl)==`0` );
5995	return rc;
5996	}
5997
5998
5999	/*
6000	** Return TRUE if the cursor is not pointing at an entry of the table.
6001	**
6002	** TRUE will be returned after a call to sqlite3BtreeNext() moves
6003	** past the last entry in the table or sqlite3BtreePrev() moves past
6004	** the first entry. TRUE is also returned if the table is empty.
6005	*/
6006	int sqlite3BtreeEof(BtCursor *pCur){
6007	/ TODO: What if the cursor is in CURSOR_REQUIRESEEK but all table entries*
6008	** have been deleted? This API will need to change to return an error code
6009	** as well as the boolean result value.
6010	*/
6011	return (CURSOR_VALID!=pCur->eState);
6012	}
6013
6014	/*
6015	** Return an estimate for the number of rows in the table that pCur is
6016	** pointing to. Return a negative number if no estimate is currently
6017	** available.
6018	*/
6019	i64 sqlite3BtreeRowCountEst(BtCursor *pCur){
6020	i64 n;
6021	u8 i;
6022
6023	assert( cursorOwnsBtShared(pCur) );
6024	assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) );
6025
6026	/ Currently this interface is only called by the OP_IfSmaller*
6027	** opcode, and it that case the cursor will always be valid and
6028	** will always point to a leaf node. */
6029	if( NEVER(pCur->eState!=CURSOR_VALID) ) return -`1`;
6030	if( NEVER(pCur->pPage->leaf==`0`) ) return -`1`;
6031
6032	n = pCur->pPage->nCell;
6033	for(i=`0`; i<pCur->iPage; i++){
6034	n *= pCur->apPage[i]->nCell;
6035	}
6036	return n;
6037	}
6038
6039	/*
6040	** Advance the cursor to the next entry in the database.
6041	** Return value:
6042	**
6043	** SQLITE_OK success
6044	** SQLITE_DONE cursor is already pointing at the last element
6045	** otherwise some kind of error occurred
6046	**
6047	** The main entry point is sqlite3BtreeNext(). That routine is optimized
6048	** for the common case of merely incrementing the cell counter BtCursor.aiIdx
6049	** to the next cell on the current page. The (slower) btreeNext() helper
6050	** routine is called when it is necessary to move to a different page or
6051	** to restore the cursor.
6052	**
6053	** If bit 0x01 of the F argument in sqlite3BtreeNext(C,F) is 1, then the
6054	** cursor corresponds to an SQL index and this routine could have been
6055	** skipped if the SQL index had been a unique index. The F argument
6056	** is a hint to the implement. SQLite btree implementation does not use
6057	** this hint, but COMDB2 does.
6058	*/
6059	static SQLITE_NOINLINE int btreeNext(BtCursor *pCur){
6060	int rc;
6061	int idx;
6062	MemPage *pPage;
6063
6064	assert( cursorOwnsBtShared(pCur) );
6065	if( pCur->eState!=CURSOR_VALID ){
6066	assert( (pCur->curFlags & BTCF_ValidOvfl)==`0` );
6067	rc = restoreCursorPosition(pCur);
6068	if( rc!=SQLITE_OK ){
6069	return rc;
6070	}
6071	if( CURSOR_INVALID==pCur->eState ){
6072	return SQLITE_DONE;
6073	}
6074	if( pCur->eState==CURSOR_SKIPNEXT ){
6075	pCur->eState = CURSOR_VALID;
6076	if( pCur->skipNext>`0` ) return SQLITE_OK;
6077	}
6078	}
6079
6080	pPage = pCur->pPage;
6081	idx = ++pCur->ix;
6082	if( NEVER(!pPage->isInit) \|\| sqlite3FaultSim(`412`) ){
6083	return SQLITE_CORRUPT_BKPT;
6084	}
6085
6086	if( idx>=pPage->nCell ){
6087	if( !pPage->leaf ){
6088	rc = moveToChild(pCur, get4byte(&pPage->aData[pPage->hdrOffset+`8`]));
6089	if( rc ) return rc;
6090	return moveToLeftmost(pCur);
6091	}
6092	do{
6093	if( pCur->iPage==`0` ){
6094	pCur->eState = CURSOR_INVALID;
6095	return SQLITE_DONE;
6096	}
6097	moveToParent(pCur);
6098	pPage = pCur->pPage;
6099	}while( pCur->ix>=pPage->nCell );
6100	if( pPage->intKey ){
6101	return sqlite3BtreeNext(pCur, `0`);
6102	}else{
6103	return SQLITE_OK;
6104	}
6105	}
6106	if( pPage->leaf ){
6107	return SQLITE_OK;
6108	}else{
6109	return moveToLeftmost(pCur);
6110	}
6111	}
6112	int sqlite3BtreeNext(BtCursor pCur, int* flags){
6113	MemPage *pPage;
6114	UNUSED_PARAMETER( flags ); / Used in COMDB2 but not native SQLite /
6115	assert( cursorOwnsBtShared(pCur) );
6116	assert( flags==`0` \|\| flags==`1` );
6117	pCur->info.nSize = `0`;
6118	pCur->curFlags &= ~(BTCF_ValidNKey\|BTCF_ValidOvfl);
6119	if( pCur->eState!=CURSOR_VALID ) return btreeNext(pCur);
6120	pPage = pCur->pPage;
6121	if( (++pCur->ix)>=pPage->nCell ){
6122	pCur->ix--;
6123	return btreeNext(pCur);
6124	}
6125	if( pPage->leaf ){
6126	return SQLITE_OK;
6127	}else{
6128	return moveToLeftmost(pCur);
6129	}
6130	}
6131
6132	/*
6133	** Step the cursor to the back to the previous entry in the database.
6134	** Return values:
6135	**
6136	** SQLITE_OK success
6137	** SQLITE_DONE the cursor is already on the first element of the table
6138	** otherwise some kind of error occurred
6139	**
6140	** The main entry point is sqlite3BtreePrevious(). That routine is optimized
6141	** for the common case of merely decrementing the cell counter BtCursor.aiIdx
6142	** to the previous cell on the current page. The (slower) btreePrevious()
6143	** helper routine is called when it is necessary to move to a different page
6144	** or to restore the cursor.
6145	**
6146	** If bit 0x01 of the F argument to sqlite3BtreePrevious(C,F) is 1, then
6147	** the cursor corresponds to an SQL index and this routine could have been
6148	** skipped if the SQL index had been a unique index. The F argument is a
6149	** hint to the implement. The native SQLite btree implementation does not
6150	** use this hint, but COMDB2 does.
6151	*/
6152	static SQLITE_NOINLINE int btreePrevious(BtCursor *pCur){
6153	int rc;
6154	MemPage *pPage;
6155
6156	assert( cursorOwnsBtShared(pCur) );
6157	assert( (pCur->curFlags & (BTCF_AtLast\|BTCF_ValidOvfl\|BTCF_ValidNKey))==`0` );
6158	assert( pCur->info.nSize==`0` );
6159	if( pCur->eState!=CURSOR_VALID ){
6160	rc = restoreCursorPosition(pCur);
6161	if( rc!=SQLITE_OK ){
6162	return rc;
6163	}
6164	if( CURSOR_INVALID==pCur->eState ){
6165	return SQLITE_DONE;
6166	}
6167	if( CURSOR_SKIPNEXT==pCur->eState ){
6168	pCur->eState = CURSOR_VALID;
6169	if( pCur->skipNext<`0` ) return SQLITE_OK;
6170	}
6171	}
6172
6173	pPage = pCur->pPage;
6174	assert( pPage->isInit );
6175	if( !pPage->leaf ){
6176	int idx = pCur->ix;
6177	rc = moveToChild(pCur, get4byte(findCell(pPage, idx)));
6178	if( rc ) return rc;
6179	rc = moveToRightmost(pCur);
6180	}else{
6181	while( pCur->ix==`0` ){
6182	if( pCur->iPage==`0` ){
6183	pCur->eState = CURSOR_INVALID;
6184	return SQLITE_DONE;
6185	}
6186	moveToParent(pCur);
6187	}
6188	assert( pCur->info.nSize==`0` );
6189	assert( (pCur->curFlags & (BTCF_ValidOvfl))==`0` );
6190
6191	pCur->ix--;
6192	pPage = pCur->pPage;
6193	if( pPage->intKey && !pPage->leaf ){
6194	rc = sqlite3BtreePrevious(pCur, `0`);
6195	}else{
6196	rc = SQLITE_OK;
6197	}
6198	}
6199	return rc;
6200	}
6201	int sqlite3BtreePrevious(BtCursor pCur, int* flags){
6202	assert( cursorOwnsBtShared(pCur) );
6203	assert( flags==`0` \|\| flags==`1` );
6204	UNUSED_PARAMETER( flags ); / Used in COMDB2 but not native SQLite /
6205	pCur->curFlags &= ~(BTCF_AtLast\|BTCF_ValidOvfl\|BTCF_ValidNKey);
6206	pCur->info.nSize = `0`;
6207	if( pCur->eState!=CURSOR_VALID
6208	\|\| pCur->ix==`0`
6209	\|\| pCur->pPage->leaf==`0`
6210	){
6211	return btreePrevious(pCur);
6212	}
6213	pCur->ix--;
6214	return SQLITE_OK;
6215	}
6216
6217	/*
6218	** Allocate a new page from the database file.
6219	**
6220	** The new page is marked as dirty. (In other words, sqlite3PagerWrite()
6221	** has already been called on the new page.) The new page has also
6222	** been referenced and the calling routine is responsible for calling
6223	** sqlite3PagerUnref() on the new page when it is done.
6224	**
6225	** SQLITE_OK is returned on success. Any other return value indicates
6226	** an error. *ppPage is set to NULL in the event of an error.
6227	**
6228	** If the "nearby" parameter is not 0, then an effort is made to
6229	** locate a page close to the page number "nearby". This can be used in an
6230	** attempt to keep related pages close to each other in the database file,
6231	** which in turn can make database access faster.
6232	**
6233	** If the eMode parameter is BTALLOC_EXACT and the nearby page exists
6234	** anywhere on the free-list, then it is guaranteed to be returned. If
6235	** eMode is BTALLOC_LT then the page returned will be less than or equal
6236	** to nearby if any such page exists. If eMode is BTALLOC_ANY then there
6237	** are no restrictions on which page is returned.
6238	*/
6239	static int allocateBtreePage(
6240	BtShared pBt, /* The btree /
6241	MemPage *ppPage, /* Store pointer to the allocated page here /
6242	Pgno pPgno, /* Store the page number here /
6243	Pgno nearby, / Search for a page near this one /
6244	u8 eMode / BTALLOC_EXACT, BTALLOC_LT, or BTALLOC_ANY /
6245	){
6246	MemPage *pPage1;
6247	int rc;
6248	u32 n; / Number of pages on the freelist /
6249	u32 k; / Number of leaves on the trunk of the freelist /
6250	MemPage *pTrunk = `0`;
6251	MemPage *pPrevTrunk = `0`;
6252	Pgno mxPage; / Total size of the database file /
6253
6254	assert( sqlite3_mutex_held(pBt->mutex) );
6255	assert( eMode==BTALLOC_ANY \|\| (nearby>`0` && IfNotOmitAV(pBt->autoVacuum)) );
6256	pPage1 = pBt->pPage1;
6257	mxPage = btreePagecount(pBt);
6258	/ EVIDENCE-OF: R-21003-45125 The 4-byte big-endian integer at offset 36*
6259	** stores the total number of pages on the freelist. */
6260	n = get4byte(&pPage1->aData[`36`]);
6261	testcase( n==mxPage-`1` );
6262	if( n>=mxPage ){
6263	return SQLITE_CORRUPT_BKPT;
6264	}
6265	if( n>`0` ){
6266	/ There are pages on the freelist. Reuse one of those pages. /
6267	Pgno iTrunk;
6268	u8 searchList = `0`; / If the free-list must be searched for 'nearby' /
6269	u32 nSearch = `0`; / Count of the number of search attempts /
6270
6271	/ If eMode==BTALLOC_EXACT and a query of the pointer-map*
6272	** shows that the page 'nearby' is somewhere on the free-list, then
6273	** the entire-list will be searched for that page.
6274	*/
6275	#ifndef SQLITE_OMIT_AUTOVACUUM
6276	if( eMode==BTALLOC_EXACT ){
6277	if( nearby<=mxPage ){
6278	u8 eType;
6279	assert( nearby>`0` );
6280	assert( pBt->autoVacuum );
6281	rc = ptrmapGet(pBt, nearby, &eType, `0`);
6282	if( rc ) return rc;
6283	if( eType==PTRMAP_FREEPAGE ){
6284	searchList = `1`;
6285	}
6286	}
6287	}else if( eMode==BTALLOC_LE ){
6288	searchList = `1`;
6289	}
6290	#endif
6291
6292	/ Decrement the free-list count by 1. Set iTrunk to the index of the*
6293	** first free-list trunk page. iPrevTrunk is initially 1.
6294	*/
6295	rc = sqlite3PagerWrite(pPage1->pDbPage);
6296	if( rc ) return rc;
6297	put4byte(&pPage1->aData[`36`], n-`1`);
6298
6299	/ The code within this loop is run only once if the 'searchList' variable*
6300	** is not true. Otherwise, it runs once for each trunk-page on the
6301	** free-list until the page 'nearby' is located (eMode==BTALLOC_EXACT)
6302	** or until a page less than 'nearby' is located (eMode==BTALLOC_LT)
6303	*/
6304	do {
6305	pPrevTrunk = pTrunk;
6306	if( pPrevTrunk ){
6307	/ EVIDENCE-OF: R-01506-11053 The first integer on a freelist trunk page*
6308	** is the page number of the next freelist trunk page in the list or
6309	** zero if this is the last freelist trunk page. */
6310	iTrunk = get4byte(&pPrevTrunk->aData[`0`]);
6311	}else{
6312	/ EVIDENCE-OF: R-59841-13798 The 4-byte big-endian integer at offset 32*
6313	** stores the page number of the first page of the freelist, or zero if
6314	** the freelist is empty. */
6315	iTrunk = get4byte(&pPage1->aData[`32`]);
6316	}
6317	testcase( iTrunk==mxPage );
6318	if( iTrunk>mxPage \|\| nSearch++ > n ){
6319	rc = SQLITE_CORRUPT_PGNO(pPrevTrunk ? pPrevTrunk->pgno : `1`);
6320	}else{
6321	rc = btreeGetUnusedPage(pBt, iTrunk, &pTrunk, `0`);
6322	}
6323	if( rc ){
6324	pTrunk = `0`;
6325	goto end_allocate_page;
6326	}
6327	assert( pTrunk!=`0` );
6328	assert( pTrunk->aData!=`0` );
6329	/ EVIDENCE-OF: R-13523-04394 The second integer on a freelist trunk page*
6330	** is the number of leaf page pointers to follow. */
6331	k = get4byte(&pTrunk->aData[`4`]);
6332	if( k==`0` && !searchList ){
6333	/ The trunk has no leaves and the list is not being searched.*
6334	** So extract the trunk page itself and use it as the newly
6335	** allocated page */
6336	assert( pPrevTrunk==`0` );
6337	rc = sqlite3PagerWrite(pTrunk->pDbPage);
6338	if( rc ){
6339	goto end_allocate_page;
6340	}
6341	*pPgno = iTrunk;
6342	memcpy(&pPage1->aData[`32`], &pTrunk->aData[`0`], `4`);
6343	*ppPage = pTrunk;
6344	pTrunk = `0`;
6345	TRACE(("ALLOCATE: %d trunk - %d free pages left\n", *pPgno, n-`1`));
6346	}else if( k>(u32)(pBt->usableSize/`4` - `2`) ){
6347	/ Value of k is out of range. Database corruption /
6348	rc = SQLITE_CORRUPT_PGNO(iTrunk);
6349	goto end_allocate_page;
6350	#ifndef SQLITE_OMIT_AUTOVACUUM
6351	}else if( searchList
6352	&& (nearby==iTrunk \|\| (iTrunk<nearby && eMode==BTALLOC_LE))
6353	){
6354	/ The list is being searched and this trunk page is the page*
6355	** to allocate, regardless of whether it has leaves.
6356	*/
6357	*pPgno = iTrunk;
6358	*ppPage = pTrunk;
6359	searchList = `0`;
6360	rc = sqlite3PagerWrite(pTrunk->pDbPage);
6361	if( rc ){
6362	goto end_allocate_page;
6363	}
6364	if( k==`0` ){
6365	if( !pPrevTrunk ){
6366	memcpy(&pPage1->aData[`32`], &pTrunk->aData[`0`], `4`);
6367	}else{
6368	rc = sqlite3PagerWrite(pPrevTrunk->pDbPage);
6369	if( rc!=SQLITE_OK ){
6370	goto end_allocate_page;
6371	}
6372	memcpy(&pPrevTrunk->aData[`0`], &pTrunk->aData[`0`], `4`);
6373	}
6374	}else{
6375	/ The trunk page is required by the caller but it contains*
6376	** pointers to free-list leaves. The first leaf becomes a trunk
6377	** page in this case.
6378	*/
6379	MemPage *pNewTrunk;
6380	Pgno iNewTrunk = get4byte(&pTrunk->aData[`8`]);
6381	if( iNewTrunk>mxPage ){
6382	rc = SQLITE_CORRUPT_PGNO(iTrunk);
6383	goto end_allocate_page;
6384	}
6385	testcase( iNewTrunk==mxPage );
6386	rc = btreeGetUnusedPage(pBt, iNewTrunk, &pNewTrunk, `0`);
6387	if( rc!=SQLITE_OK ){
6388	goto end_allocate_page;
6389	}
6390	rc = sqlite3PagerWrite(pNewTrunk->pDbPage);
6391	if( rc!=SQLITE_OK ){
6392	releasePage(pNewTrunk);
6393	goto end_allocate_page;
6394	}
6395	memcpy(&pNewTrunk->aData[`0`], &pTrunk->aData[`0`], `4`);
6396	put4byte(&pNewTrunk->aData[`4`], k-`1`);
6397	memcpy(&pNewTrunk->aData[`8`], &pTrunk->aData[`12`], (k-`1`)*`4`);
6398	releasePage(pNewTrunk);
6399	if( !pPrevTrunk ){
6400	assert( sqlite3PagerIswriteable(pPage1->pDbPage) );
6401	put4byte(&pPage1->aData[`32`], iNewTrunk);
6402	}else{
6403	rc = sqlite3PagerWrite(pPrevTrunk->pDbPage);
6404	if( rc ){
6405	goto end_allocate_page;
6406	}
6407	put4byte(&pPrevTrunk->aData[`0`], iNewTrunk);
6408	}
6409	}
6410	pTrunk = `0`;
6411	TRACE(("ALLOCATE: %d trunk - %d free pages left\n", *pPgno, n-`1`));
6412	#endif
6413	}else if( k>`0` ){
6414	/ Extract a leaf from the trunk /
6415	u32 closest;
6416	Pgno iPage;
6417	unsigned char *aData = pTrunk->aData;
6418	if( nearby>`0` ){
6419	u32 i;
6420	closest = `0`;
6421	if( eMode==BTALLOC_LE ){
6422	for(i=`0`; i<k; i++){
6423	iPage = get4byte(&aData[`8`+i*`4`]);
6424	if( iPage<=nearby ){
6425	closest = i;
6426	break;
6427	}
6428	}
6429	}else{
6430	int dist;
6431	dist = sqlite3AbsInt32(get4byte(&aData[`8`]) - nearby);
6432	for(i=`1`; i<k; i++){
6433	int d2 = sqlite3AbsInt32(get4byte(&aData[`8`+i*`4`]) - nearby);
6434	if( d2<dist ){
6435	closest = i;
6436	dist = d2;
6437	}
6438	}
6439	}
6440	}else{
6441	closest = `0`;
6442	}
6443
6444	iPage = get4byte(&aData[`8`+closest*`4`]);
6445	testcase( iPage==mxPage );
6446	if( iPage>mxPage \|\| iPage<`2` ){
6447	rc = SQLITE_CORRUPT_PGNO(iTrunk);
6448	goto end_allocate_page;
6449	}
6450	testcase( iPage==mxPage );
6451	if( !searchList
6452	\|\| (iPage==nearby \|\| (iPage<nearby && eMode==BTALLOC_LE))
6453	){
6454	int noContent;
6455	*pPgno = iPage;
6456	TRACE(("ALLOCATE: %d was leaf %d of %d on trunk %d"
6457	": %d more free pages\n",
6458	*pPgno, closest+`1`, k, pTrunk->pgno, n-`1`));
6459	rc = sqlite3PagerWrite(pTrunk->pDbPage);
6460	if( rc ) goto end_allocate_page;
6461	if( closest<k-`1` ){
6462	memcpy(&aData[`8`+closest`4`], &aData[`4`+k`4`], `4`);
6463	}
6464	put4byte(&aData[`4`], k-`1`);
6465	noContent = !btreeGetHasContent(pBt, *pPgno)? PAGER_GET_NOCONTENT : `0`;
6466	rc = btreeGetUnusedPage(pBt, *pPgno, ppPage, noContent);
6467	if( rc==SQLITE_OK ){
6468	rc = sqlite3PagerWrite((*ppPage)->pDbPage);
6469	if( rc!=SQLITE_OK ){
6470	releasePage(*ppPage);
6471	*ppPage = `0`;
6472	}
6473	}
6474	searchList = `0`;
6475	}
6476	}
6477	releasePage(pPrevTrunk);
6478	pPrevTrunk = `0`;
6479	}while( searchList );
6480	}else{
6481	/ There are no pages on the freelist, so append a new page to the*
6482	** database image.
6483	**
6484	** Normally, new pages allocated by this block can be requested from the
6485	** pager layer with the 'no-content' flag set. This prevents the pager
6486	** from trying to read the pages content from disk. However, if the
6487	** current transaction has already run one or more incremental-vacuum
6488	** steps, then the page we are about to allocate may contain content
6489	** that is required in the event of a rollback. In this case, do
6490	** not set the no-content flag. This causes the pager to load and journal
6491	** the current page content before overwriting it.
6492	**
6493	** Note that the pager will not actually attempt to load or journal
6494	** content for any page that really does lie past the end of the database
6495	** file on disk. So the effects of disabling the no-content optimization
6496	** here are confined to those pages that lie between the end of the
6497	** database image and the end of the database file.
6498	*/
6499	int bNoContent = (`0`==IfNotOmitAV(pBt->bDoTruncate))? PAGER_GET_NOCONTENT:`0`;
6500
6501	rc = sqlite3PagerWrite(pBt->pPage1->pDbPage);
6502	if( rc ) return rc;
6503	pBt->nPage++;
6504	if( pBt->nPage==PENDING_BYTE_PAGE(pBt) ) pBt->nPage++;
6505
6506	#ifndef SQLITE_OMIT_AUTOVACUUM
6507	if( pBt->autoVacuum && PTRMAP_ISPAGE(pBt, pBt->nPage) ){
6508	/ If pPgno refers to a pointer-map page, allocate two new pages
6509	** at the end of the file instead of one. The first allocated page
6510	** becomes a new pointer-map page, the second is used by the caller.
6511	*/
6512	MemPage *pPg = `0`;
6513	TRACE(("ALLOCATE: %d from end of file (pointer-map page)\n", pBt->nPage));
6514	assert( pBt->nPage!=PENDING_BYTE_PAGE(pBt) );
6515	rc = btreeGetUnusedPage(pBt, pBt->nPage, &pPg, bNoContent);
6516	if( rc==SQLITE_OK ){
6517	rc = sqlite3PagerWrite(pPg->pDbPage);
6518	releasePage(pPg);
6519	}
6520	if( rc ) return rc;
6521	pBt->nPage++;
6522	if( pBt->nPage==PENDING_BYTE_PAGE(pBt) ){ pBt->nPage++; }
6523	}
6524	#endif
6525	put4byte(`28` + (u8*)pBt->pPage1->aData, pBt->nPage);
6526	*pPgno = pBt->nPage;
6527
6528	assert( *pPgno!=PENDING_BYTE_PAGE(pBt) );
6529	rc = btreeGetUnusedPage(pBt, *pPgno, ppPage, bNoContent);
6530	if( rc ) return rc;
6531	rc = sqlite3PagerWrite((*ppPage)->pDbPage);
6532	if( rc!=SQLITE_OK ){
6533	releasePage(*ppPage);
6534	*ppPage = `0`;
6535	}
6536	TRACE(("ALLOCATE: %d from end of file\n", *pPgno));
6537	}
6538
6539	assert( CORRUPT_DB \|\| *pPgno!=PENDING_BYTE_PAGE(pBt) );
6540
6541	end_allocate_page:
6542	releasePage(pTrunk);
6543	releasePage(pPrevTrunk);
6544	assert( rc!=SQLITE_OK \|\| sqlite3PagerPageRefcount((*ppPage)->pDbPage)<=`1` );
6545	assert( rc!=SQLITE_OK \|\| (*ppPage)->isInit==`0` );
6546	return rc;
6547	}
6548
6549	/*
6550	** This function is used to add page iPage to the database file free-list.
6551	** It is assumed that the page is not already a part of the free-list.
6552	**
6553	** The value passed as the second argument to this function is optional.
6554	** If the caller happens to have a pointer to the MemPage object
6555	** corresponding to page iPage handy, it may pass it as the second value.
6556	** Otherwise, it may pass NULL.
6557	**
6558	** If a pointer to a MemPage object is passed as the second argument,
6559	** its reference count is not altered by this function.
6560	*/
6561	static int freePage2(BtShared pBt, MemPage pMemPage, Pgno iPage){
6562	MemPage pTrunk = `0`; /* Free-list trunk page /
6563	Pgno iTrunk = `0`; / Page number of free-list trunk page /
6564	MemPage pPage1 = pBt->pPage1; /* Local reference to page 1 /
6565	MemPage pPage; /* Page being freed. May be NULL. /
6566	int rc; / Return Code /
6567	u32 nFree; / Initial number of pages on free-list /
6568
6569	assert( sqlite3_mutex_held(pBt->mutex) );
6570	assert( CORRUPT_DB \|\| iPage>`1` );
6571	assert( !pMemPage \|\| pMemPage->pgno==iPage );
6572
6573	if( iPage<`2` \|\| iPage>pBt->nPage ){
6574	return SQLITE_CORRUPT_BKPT;
6575	}
6576	if( pMemPage ){
6577	pPage = pMemPage;
6578	sqlite3PagerRef(pPage->pDbPage);
6579	}else{
6580	pPage = btreePageLookup(pBt, iPage);
6581	}
6582
6583	/ Increment the free page count on pPage1 /
6584	rc = sqlite3PagerWrite(pPage1->pDbPage);
6585	if( rc ) goto freepage_out;
6586	nFree = get4byte(&pPage1->aData[`36`]);
6587	put4byte(&pPage1->aData[`36`], nFree+`1`);
6588
6589	if( pBt->btsFlags & BTS_SECURE_DELETE ){
6590	/ If the secure_delete option is enabled, then*
6591	** always fully overwrite deleted information with zeros.
6592	*/
6593	if( (!pPage && ((rc = btreeGetPage(pBt, iPage, &pPage, `0`))!=`0`) )
6594	\|\| ((rc = sqlite3PagerWrite(pPage->pDbPage))!=`0`)
6595	){
6596	goto freepage_out;
6597	}
6598	memset(pPage->aData, `0`, pPage->pBt->pageSize);
6599	}
6600
6601	/ If the database supports auto-vacuum, write an entry in the pointer-map*
6602	** to indicate that the page is free.
6603	*/
6604	if( ISAUTOVACUUM ){
6605	ptrmapPut(pBt, iPage, PTRMAP_FREEPAGE, `0`, &rc);
6606	if( rc ) goto freepage_out;
6607	}
6608
6609	/ Now manipulate the actual database free-list structure. There are two*
6610	** possibilities. If the free-list is currently empty, or if the first
6611	** trunk page in the free-list is full, then this page will become a
6612	** new free-list trunk page. Otherwise, it will become a leaf of the
6613	** first trunk page in the current free-list. This block tests if it
6614	** is possible to add the page as a new free-list leaf.
6615	*/
6616	if( nFree!=`0` ){
6617	u32 nLeaf; / Initial number of leaf cells on trunk page /
6618
6619	iTrunk = get4byte(&pPage1->aData[`32`]);
6620	if( iTrunk>btreePagecount(pBt) ){
6621	rc = SQLITE_CORRUPT_BKPT;
6622	goto freepage_out;
6623	}
6624	rc = btreeGetPage(pBt, iTrunk, &pTrunk, `0`);
6625	if( rc!=SQLITE_OK ){
6626	goto freepage_out;
6627	}
6628
6629	nLeaf = get4byte(&pTrunk->aData[`4`]);
6630	assert( pBt->usableSize>`32` );
6631	if( nLeaf > (u32)pBt->usableSize/`4` - `2` ){
6632	rc = SQLITE_CORRUPT_BKPT;
6633	goto freepage_out;
6634	}
6635	if( nLeaf < (u32)pBt->usableSize/`4` - `8` ){
6636	/ In this case there is room on the trunk page to insert the page*
6637	** being freed as a new leaf.
6638	**
6639	** Note that the trunk page is not really full until it contains
6640	** usableSize/4 - 2 entries, not usableSize/4 - 8 entries as we have
6641	** coded. But due to a coding error in versions of SQLite prior to
6642	** 3.6.0, databases with freelist trunk pages holding more than
6643	** usableSize/4 - 8 entries will be reported as corrupt. In order
6644	** to maintain backwards compatibility with older versions of SQLite,
6645	** we will continue to restrict the number of entries to usableSize/4 - 8
6646	** for now. At some point in the future (once everyone has upgraded
6647	** to 3.6.0 or later) we should consider fixing the conditional above
6648	** to read "usableSize/4-2" instead of "usableSize/4-8".
6649	**
6650	** EVIDENCE-OF: R-19920-11576 However, newer versions of SQLite still
6651	** avoid using the last six entries in the freelist trunk page array in
6652	** order that database files created by newer versions of SQLite can be
6653	** read by older versions of SQLite.
6654	*/
6655	rc = sqlite3PagerWrite(pTrunk->pDbPage);
6656	if( rc==SQLITE_OK ){
6657	put4byte(&pTrunk->aData[`4`], nLeaf+`1`);
6658	put4byte(&pTrunk->aData[`8`+nLeaf*`4`], iPage);
6659	if( pPage && (pBt->btsFlags & BTS_SECURE_DELETE)==`0` ){
6660	sqlite3PagerDontWrite(pPage->pDbPage);
6661	}
6662	rc = btreeSetHasContent(pBt, iPage);
6663	}
6664	TRACE(("FREE-PAGE: %d leaf on trunk page %d\n",pPage->pgno,pTrunk->pgno));
6665	goto freepage_out;
6666	}
6667	}
6668
6669	/ If control flows to this point, then it was not possible to add the*
6670	** the page being freed as a leaf page of the first trunk in the free-list.
6671	** Possibly because the free-list is empty, or possibly because the
6672	** first trunk in the free-list is full. Either way, the page being freed
6673	** will become the new first trunk page in the free-list.
6674	*/
6675	if( pPage==`0` && SQLITE_OK!=(rc = btreeGetPage(pBt, iPage, &pPage, `0`)) ){
6676	goto freepage_out;
6677	}
6678	rc = sqlite3PagerWrite(pPage->pDbPage);
6679	if( rc!=SQLITE_OK ){
6680	goto freepage_out;
6681	}
6682	put4byte(pPage->aData, iTrunk);
6683	put4byte(&pPage->aData[`4`], `0`);
6684	put4byte(&pPage1->aData[`32`], iPage);
6685	TRACE(("FREE-PAGE: %d new trunk page replacing %d\n", pPage->pgno, iTrunk));
6686
6687	freepage_out:
6688	if( pPage ){
6689	pPage->isInit = `0`;
6690	}
6691	releasePage(pPage);
6692	releasePage(pTrunk);
6693	return rc;
6694	}
6695	static void freePage(MemPage pPage, int* *pRC){
6696	if( (*pRC)==SQLITE_OK ){
6697	*pRC = freePage2(pPage->pBt, pPage, pPage->pgno);
6698	}
6699	}
6700
6701	/*
6702	** Free the overflow pages associated with the given Cell.
6703	*/
6704	static SQLITE_NOINLINE int clearCellOverflow(
6705	MemPage pPage, /* The page that contains the Cell /
6706	unsigned char pCell, /* First byte of the Cell /
6707	CellInfo pInfo /* Size information about the cell /
6708	){
6709	BtShared *pBt;
6710	Pgno ovflPgno;
6711	int rc;
6712	int nOvfl;
6713	u32 ovflPageSize;
6714
6715	assert( sqlite3_mutex_held(pPage->pBt->mutex) );
6716	assert( pInfo->nLocal!=pInfo->nPayload );
6717	testcase( pCell + pInfo->nSize == pPage->aDataEnd );
6718	testcase( pCell + (pInfo->nSize-`1`) == pPage->aDataEnd );
6719	if( pCell + pInfo->nSize > pPage->aDataEnd ){
6720	/ Cell extends past end of page /
6721	return SQLITE_CORRUPT_PAGE(pPage);
6722	}
6723	ovflPgno = get4byte(pCell + pInfo->nSize - `4`);
6724	pBt = pPage->pBt;
6725	assert( pBt->usableSize > `4` );
6726	ovflPageSize = pBt->usableSize - `4`;
6727	nOvfl = (pInfo->nPayload - pInfo->nLocal + ovflPageSize - `1`)/ovflPageSize;
6728	assert( nOvfl>`0` \|\|
6729	(CORRUPT_DB && (pInfo->nPayload + ovflPageSize)<ovflPageSize)
6730	);
6731	while( nOvfl-- ){
6732	Pgno iNext = `0`;
6733	MemPage *pOvfl = `0`;
6734	if( ovflPgno<`2` \|\| ovflPgno>btreePagecount(pBt) ){
6735	/ 0 is not a legal page number and page 1 cannot be an*
6736	** overflow page. Therefore if ovflPgno<2 or past the end of the
6737	** file the database must be corrupt. */
6738	return SQLITE_CORRUPT_BKPT;
6739	}
6740	if( nOvfl ){
6741	rc = getOverflowPage(pBt, ovflPgno, &pOvfl, &iNext);
6742	if( rc ) return rc;
6743	}
6744
6745	if( ( pOvfl \|\| ((pOvfl = btreePageLookup(pBt, ovflPgno))!=`0`) )
6746	&& sqlite3PagerPageRefcount(pOvfl->pDbPage)!=`1`
6747	){
6748	/ There is no reason any cursor should have an outstanding reference*
6749	** to an overflow page belonging to a cell that is being deleted/updated.
6750	** So if there exists more than one reference to this page, then it
6751	** must not really be an overflow page and the database must be corrupt.
6752	** It is helpful to detect this before calling freePage2(), as
6753	** freePage2() may zero the page contents if secure-delete mode is
6754	** enabled. If this 'overflow' page happens to be a page that the
6755	** caller is iterating through or using in some other way, this
6756	** can be problematic.
6757	*/
6758	rc = SQLITE_CORRUPT_BKPT;
6759	}else{
6760	rc = freePage2(pBt, pOvfl, ovflPgno);
6761	}
6762
6763	if( pOvfl ){
6764	sqlite3PagerUnref(pOvfl->pDbPage);
6765	}
6766	if( rc ) return rc;
6767	ovflPgno = iNext;
6768	}
6769	return SQLITE_OK;
6770	}
6771
6772	/ Call xParseCell to compute the size of a cell. If the cell contains*
6773	** overflow, then invoke cellClearOverflow to clear out that overflow.
6774	** STore the result code (SQLITE_OK or some error code) in rc.
6775	**
6776	** Implemented as macro to force inlining for performance.
6777	*/
6778	#define BTREE_CLEAR_CELL(rc, pPage, pCell, sInfo) \
6779	pPage->xParseCell(pPage, pCell, &sInfo); \
6780	if( sInfo.nLocal!=sInfo.nPayload ){ \
6781	rc = clearCellOverflow(pPage, pCell, &sInfo); \
6782	}else{ \
6783	rc = SQLITE_OK; \
6784	}
6785
6786
6787	/*
6788	** Create the byte sequence used to represent a cell on page pPage
6789	** and write that byte sequence into pCell[]. Overflow pages are
6790	** allocated and filled in as necessary. The calling procedure
6791	** is responsible for making sure sufficient space has been allocated
6792	** for pCell[].
6793	**
6794	** Note that pCell does not necessary need to point to the pPage->aData
6795	** area. pCell might point to some temporary storage. The cell will
6796	** be constructed in this temporary area then copied into pPage->aData
6797	** later.
6798	*/
6799	static int fillInCell(
6800	MemPage pPage, /* The page that contains the cell /
6801	unsigned char pCell, /* Complete text of the cell /
6802	const BtreePayload pX, /* Payload with which to construct the cell /
6803	int pnSize /* Write cell size here /
6804	){
6805	int nPayload;
6806	const u8 *pSrc;
6807	int nSrc, n, rc, mn;
6808	int spaceLeft;
6809	MemPage *pToRelease;
6810	unsigned char *pPrior;
6811	unsigned char *pPayload;
6812	BtShared *pBt;
6813	Pgno pgnoOvfl;
6814	int nHeader;
6815
6816	assert( sqlite3_mutex_held(pPage->pBt->mutex) );
6817
6818	/ pPage is not necessarily writeable since pCell might be auxiliary*
6819	** buffer space that is separate from the pPage buffer area */
6820	assert( pCell<pPage->aData \|\| pCell>=&pPage->aData[pPage->pBt->pageSize]
6821	\|\| sqlite3PagerIswriteable(pPage->pDbPage) );
6822
6823	/ Fill in the header. /
6824	nHeader = pPage->childPtrSize;
6825	if( pPage->intKey ){
6826	nPayload = pX->nData + pX->nZero;
6827	pSrc = pX->pData;
6828	nSrc = pX->nData;
6829	assert( pPage->intKeyLeaf ); / fillInCell() only called for leaves /
6830	nHeader += putVarint32(&pCell[nHeader], nPayload);
6831	nHeader += putVarint(&pCell[nHeader], (u64)&pX->nKey);
6832	}else{
6833	assert( pX->nKey<=`0x7fffffff` && pX->pKey!=`0` );
6834	nSrc = nPayload = (int)pX->nKey;
6835	pSrc = pX->pKey;
6836	nHeader += putVarint32(&pCell[nHeader], nPayload);
6837	}
6838
6839	/ Fill in the payload /
6840	pPayload = &pCell[nHeader];
6841	if( nPayload<=pPage->maxLocal ){
6842	/ This is the common case where everything fits on the btree page*
6843	** and no overflow pages are required. */
6844	n = nHeader + nPayload;
6845	testcase( n==`3` );
6846	testcase( n==`4` );
6847	if( n<`4` ) n = `4`;
6848	*pnSize = n;
6849	assert( nSrc<=nPayload );
6850	testcase( nSrc<nPayload );
6851	memcpy(pPayload, pSrc, nSrc);
6852	memset(pPayload+nSrc, `0`, nPayload-nSrc);
6853	return SQLITE_OK;
6854	}
6855
6856	/ If we reach this point, it means that some of the content will need*
6857	** to spill onto overflow pages.
6858	*/
6859	mn = pPage->minLocal;
6860	n = mn + (nPayload - mn) % (pPage->pBt->usableSize - `4`);
6861	testcase( n==pPage->maxLocal );
6862	testcase( n==pPage->maxLocal+`1` );
6863	if( n > pPage->maxLocal ) n = mn;
6864	spaceLeft = n;
6865	*pnSize = n + nHeader + `4`;
6866	pPrior = &pCell[nHeader+n];
6867	pToRelease = `0`;
6868	pgnoOvfl = `0`;
6869	pBt = pPage->pBt;
6870
6871	/ At this point variables should be set as follows:*
6872	**
6873	** nPayload Total payload size in bytes
6874	** pPayload Begin writing payload here
6875	** spaceLeft Space available at pPayload. If nPayload>spaceLeft,
6876	** that means content must spill into overflow pages.
6877	** *pnSize Size of the local cell (not counting overflow pages)
6878	** pPrior Where to write the pgno of the first overflow page
6879	**
6880	** Use a call to btreeParseCellPtr() to verify that the values above
6881	** were computed correctly.
6882	*/
6883	#ifdef SQLITE_DEBUG
6884	{
6885	CellInfo info;
6886	pPage->xParseCell(pPage, pCell, &info);
6887	assert( nHeader==(int)(info.pPayload - pCell) );
6888	assert( info.nKey==pX->nKey );
6889	assert( *pnSize == info.nSize );
6890	assert( spaceLeft == info.nLocal );
6891	}
6892	#endif
6893
6894	/ Write the payload into the local Cell and any extra into overflow pages /
6895	while( `1` ){
6896	n = nPayload;
6897	if( n>spaceLeft ) n = spaceLeft;
6898
6899	/ If pToRelease is not zero than pPayload points into the data area*
6900	** of pToRelease. Make sure pToRelease is still writeable. */
6901	assert( pToRelease==`0` \|\| sqlite3PagerIswriteable(pToRelease->pDbPage) );
6902
6903	/ If pPayload is part of the data area of pPage, then make sure pPage*
6904	** is still writeable */
6905	assert( pPayload<pPage->aData \|\| pPayload>=&pPage->aData[pBt->pageSize]
6906	\|\| sqlite3PagerIswriteable(pPage->pDbPage) );
6907
6908	if( nSrc>=n ){
6909	memcpy(pPayload, pSrc, n);
6910	}else if( nSrc>`0` ){
6911	n = nSrc;
6912	memcpy(pPayload, pSrc, n);
6913	}else{
6914	memset(pPayload, `0`, n);
6915	}
6916	nPayload -= n;
6917	if( nPayload<=`0` ) break;
6918	pPayload += n;
6919	pSrc += n;
6920	nSrc -= n;
6921	spaceLeft -= n;
6922	if( spaceLeft==`0` ){
6923	MemPage *pOvfl = `0`;
6924	#ifndef SQLITE_OMIT_AUTOVACUUM
6925	Pgno pgnoPtrmap = pgnoOvfl; / Overflow page pointer-map entry page /
6926	if( pBt->autoVacuum ){
6927	do{
6928	pgnoOvfl++;
6929	} while(
6930	PTRMAP_ISPAGE(pBt, pgnoOvfl) \|\| pgnoOvfl==PENDING_BYTE_PAGE(pBt)
6931	);
6932	}
6933	#endif
6934	rc = allocateBtreePage(pBt, &pOvfl, &pgnoOvfl, pgnoOvfl, `0`);
6935	#ifndef SQLITE_OMIT_AUTOVACUUM
6936	/ If the database supports auto-vacuum, and the second or subsequent*
6937	** overflow page is being allocated, add an entry to the pointer-map
6938	** for that page now.
6939	**
6940	** If this is the first overflow page, then write a partial entry
6941	** to the pointer-map. If we write nothing to this pointer-map slot,
6942	** then the optimistic overflow chain processing in clearCell()
6943	** may misinterpret the uninitialized values and delete the
6944	** wrong pages from the database.
6945	*/
6946	if( pBt->autoVacuum && rc==SQLITE_OK ){
6947	u8 eType = (pgnoPtrmap?PTRMAP_OVERFLOW2:PTRMAP_OVERFLOW1);
6948	ptrmapPut(pBt, pgnoOvfl, eType, pgnoPtrmap, &rc);
6949	if( rc ){
6950	releasePage(pOvfl);
6951	}
6952	}
6953	#endif
6954	if( rc ){
6955	releasePage(pToRelease);
6956	return rc;
6957	}
6958
6959	/ If pToRelease is not zero than pPrior points into the data area*
6960	** of pToRelease. Make sure pToRelease is still writeable. */
6961	assert( pToRelease==`0` \|\| sqlite3PagerIswriteable(pToRelease->pDbPage) );
6962
6963	/ If pPrior is part of the data area of pPage, then make sure pPage*
6964	** is still writeable */
6965	assert( pPrior<pPage->aData \|\| pPrior>=&pPage->aData[pBt->pageSize]
6966	\|\| sqlite3PagerIswriteable(pPage->pDbPage) );
6967
6968	put4byte(pPrior, pgnoOvfl);
6969	releasePage(pToRelease);
6970	pToRelease = pOvfl;
6971	pPrior = pOvfl->aData;
6972	put4byte(pPrior, `0`);
6973	pPayload = &pOvfl->aData[`4`];
6974	spaceLeft = pBt->usableSize - `4`;
6975	}
6976	}
6977	releasePage(pToRelease);
6978	return SQLITE_OK;
6979	}
6980
6981	/*
6982	** Remove the i-th cell from pPage. This routine effects pPage only.
6983	** The cell content is not freed or deallocated. It is assumed that
6984	** the cell content has been copied someplace else. This routine just
6985	** removes the reference to the cell from pPage.
6986	**
6987	** "sz" must be the number of bytes in the cell.
6988	*/
6989	static void dropCell(MemPage pPage, int* idx, int sz, int *pRC){
6990	u32 pc; / Offset to cell content of cell being deleted /
6991	u8 data; /* pPage->aData /
6992	u8 ptr; /* Used to move bytes around within data[] /
6993	int rc; / The return code /
6994	int hdr; / Beginning of the header. 0 most pages. 100 page 1 /
6995
6996	if( pRC ) return*;
6997	assert( idx>=`0` );
6998	assert( idx<pPage->nCell );
6999	assert( CORRUPT_DB \|\| sz==cellSize(pPage, idx) );
7000	assert( sqlite3PagerIswriteable(pPage->pDbPage) );
7001	assert( sqlite3_mutex_held(pPage->pBt->mutex) );
7002	assert( pPage->nFree>=`0` );
7003	data = pPage->aData;
7004	ptr = &pPage->aCellIdx[`2`*idx];
7005	assert( pPage->pBt->usableSize > (u32)(ptr-data) );
7006	pc = get2byte(ptr);
7007	hdr = pPage->hdrOffset;
7008	testcase( pc==(u32)get2byte(&data[hdr+`5`]) );
7009	testcase( pc+sz==pPage->pBt->usableSize );
7010	if( pc+sz > pPage->pBt->usableSize ){
7011	*pRC = SQLITE_CORRUPT_BKPT;
7012	return;
7013	}
7014	rc = freeSpace(pPage, pc, sz);
7015	if( rc ){
7016	*pRC = rc;
7017	return;
7018	}
7019	pPage->nCell--;
7020	if( pPage->nCell==`0` ){
7021	memset(&data[hdr+`1`], `0`, `4`);
7022	data[hdr+`7`] = `0`;
7023	put2byte(&data[hdr+`5`], pPage->pBt->usableSize);
7024	pPage->nFree = pPage->pBt->usableSize - pPage->hdrOffset
7025	- pPage->childPtrSize - `8`;
7026	}else{
7027	memmove(ptr, ptr+`2`, `2`*(pPage->nCell - idx));
7028	put2byte(&data[hdr+`3`], pPage->nCell);
7029	pPage->nFree += `2`;
7030	}
7031	}
7032
7033	/*
7034	** Insert a new cell on pPage at cell index "i". pCell points to the
7035	** content of the cell.
7036	**
7037	** If the cell content will fit on the page, then put it there. If it
7038	** will not fit, then make a copy of the cell content into pTemp if
7039	** pTemp is not null. Regardless of pTemp, allocate a new entry
7040	** in pPage->apOvfl[] and make it point to the cell content (either
7041	** in pTemp or the original pCell) and also record its index.
7042	** Allocating a new entry in pPage->aCell[] implies that
7043	** pPage->nOverflow is incremented.
7044	**
7045	** *pRC must be SQLITE_OK when this routine is called.
7046	*/
7047	static void insertCell(
7048	MemPage pPage, /* Page into which we are copying /
7049	int i, / New cell becomes the i-th cell of the page /
7050	u8 pCell, /* Content of the new cell /
7051	int sz, / Bytes of content in pCell /
7052	u8 pTemp, /* Temp storage space for pCell, if needed /
7053	Pgno iChild, / If non-zero, replace first 4 bytes with this value /
7054	int pRC /* Read and write return code from here /
7055	){
7056	int idx = `0`; / Where to write new cell content in data[] /
7057	int j; / Loop counter /
7058	u8 data; /* The content of the whole page /
7059	u8 pIns; /* The point in pPage->aCellIdx[] where no cell inserted /
7060
7061	assert( *pRC==SQLITE_OK );
7062	assert( i>=`0` && i<=pPage->nCell+pPage->nOverflow );
7063	assert( MX_CELL(pPage->pBt)<=`10921` );
7064	assert( pPage->nCell<=MX_CELL(pPage->pBt) \|\| CORRUPT_DB );
7065	assert( pPage->nOverflow<=ArraySize(pPage->apOvfl) );
7066	assert( ArraySize(pPage->apOvfl)==ArraySize(pPage->aiOvfl) );
7067	assert( sqlite3_mutex_held(pPage->pBt->mutex) );
7068	assert( sz==pPage->xCellSize(pPage, pCell) \|\| CORRUPT_DB );
7069	assert( pPage->nFree>=`0` );
7070	if( pPage->nOverflow \|\| sz+`2`>pPage->nFree ){
7071	if( pTemp ){
7072	memcpy(pTemp, pCell, sz);
7073	pCell = pTemp;
7074	}
7075	if( iChild ){
7076	put4byte(pCell, iChild);
7077	}
7078	j = pPage->nOverflow++;
7079	/ Comparison against ArraySize-1 since we hold back one extra slot*
7080	** as a contingency. In other words, never need more than 3 overflow
7081	** slots but 4 are allocated, just to be safe. */
7082	assert( j < ArraySize(pPage->apOvfl)-`1` );
7083	pPage->apOvfl[j] = pCell;
7084	pPage->aiOvfl[j] = (u16)i;
7085
7086	/ When multiple overflows occur, they are always sequential and in*
7087	** sorted order. This invariants arise because multiple overflows can
7088	** only occur when inserting divider cells into the parent page during
7089	** balancing, and the dividers are adjacent and sorted.
7090	*/
7091	assert( j==`0` \|\| pPage->aiOvfl[j-`1`]<(u16)i ); / Overflows in sorted order /
7092	assert( j==`0` \|\| i==pPage->aiOvfl[j-`1`]+`1` ); / Overflows are sequential /
7093	}else{
7094	int rc = sqlite3PagerWrite(pPage->pDbPage);
7095	if( rc!=SQLITE_OK ){
7096	*pRC = rc;
7097	return;
7098	}
7099	assert( sqlite3PagerIswriteable(pPage->pDbPage) );
7100	data = pPage->aData;
7101	assert( &data[pPage->cellOffset]==pPage->aCellIdx );
7102	rc = allocateSpace(pPage, sz, &idx);
7103	if( rc ){ pRC = rc; return*; }
7104	/ The allocateSpace() routine guarantees the following properties*
7105	** if it returns successfully */
7106	assert( idx >= `0` );
7107	assert( idx >= pPage->cellOffset+`2`*pPage->nCell+`2` \|\| CORRUPT_DB );
7108	assert( idx+sz <= (int)pPage->pBt->usableSize );
7109	pPage->nFree -= (u16)(`2` + sz);
7110	if( iChild ){
7111	/ In a corrupt database where an entry in the cell index section of*
7112	** a btree page has a value of 3 or less, the pCell value might point
7113	** as many as 4 bytes in front of the start of the aData buffer for
7114	** the source page. Make sure this does not cause problems by not
7115	** reading the first 4 bytes */
7116	memcpy(&data[idx+`4`], pCell+`4`, sz-`4`);
7117	put4byte(&data[idx], iChild);
7118	}else{
7119	memcpy(&data[idx], pCell, sz);
7120	}
7121	pIns = pPage->aCellIdx + i*`2`;
7122	memmove(pIns+`2`, pIns, `2`*(pPage->nCell - i));
7123	put2byte(pIns, idx);
7124	pPage->nCell++;
7125	/ increment the cell count /
7126	if( (++data[pPage->hdrOffset+`4`])==`0` ) data[pPage->hdrOffset+`3`]++;
7127	assert( get2byte(&data[pPage->hdrOffset+`3`])==pPage->nCell \|\| CORRUPT_DB );
7128	#ifndef SQLITE_OMIT_AUTOVACUUM
7129	if( pPage->pBt->autoVacuum ){
7130	/ The cell may contain a pointer to an overflow page. If so, write*
7131	** the entry for the overflow page into the pointer map.
7132	*/
7133	ptrmapPutOvflPtr(pPage, pPage, pCell, pRC);
7134	}
7135	#endif
7136	}
7137	}
7138
7139	/*
7140	** The following parameters determine how many adjacent pages get involved
7141	** in a balancing operation. NN is the number of neighbors on either side
7142	** of the page that participate in the balancing operation. NB is the
7143	** total number of pages that participate, including the target page and
7144	** NN neighbors on either side.
7145	**
7146	** The minimum value of NN is 1 (of course). Increasing NN above 1
7147	** (to 2 or 3) gives a modest improvement in SELECT and DELETE performance
7148	** in exchange for a larger degradation in INSERT and UPDATE performance.
7149	** The value of NN appears to give the best results overall.
7150	**
7151	** (Later:) The description above makes it seem as if these values are
7152	** tunable - as if you could change them and recompile and it would all work.
7153	** But that is unlikely. NB has been 3 since the inception of SQLite and
7154	** we have never tested any other value.
7155	*/
7156	#define NN 1 /* Number of neighbors on either side of pPage */
7157	#define NB 3 /* (NN2+1): Total pages involved in the balance /
7158
7159	/*
7160	** A CellArray object contains a cache of pointers and sizes for a
7161	** consecutive sequence of cells that might be held on multiple pages.
7162	**
7163	** The cells in this array are the divider cell or cells from the pParent
7164	** page plus up to three child pages. There are a total of nCell cells.
7165	**
7166	** pRef is a pointer to one of the pages that contributes cells. This is
7167	** used to access information such as MemPage.intKey and MemPage.pBt->pageSize
7168	** which should be common to all pages that contribute cells to this array.
7169	**
7170	** apCell[] and szCell[] hold, respectively, pointers to the start of each
7171	** cell and the size of each cell. Some of the apCell[] pointers might refer
7172	** to overflow cells. In other words, some apCel[] pointers might not point
7173	** to content area of the pages.
7174	**
7175	** A szCell[] of zero means the size of that cell has not yet been computed.
7176	**
7177	** The cells come from as many as four different pages:
7178	**
7179	** -----------
7180	** \| Parent \|
7181	** -----------
7182	** / \| \
7183	** / \| \
7184	** --------- --------- ---------
7185	** \|Child-1\| \|Child-2\| \|Child-3\|
7186	** --------- --------- ---------
7187	**
7188	** The order of cells is in the array is for an index btree is:
7189	**
7190	** 1. All cells from Child-1 in order
7191	** 2. The first divider cell from Parent
7192	** 3. All cells from Child-2 in order
7193	** 4. The second divider cell from Parent
7194	** 5. All cells from Child-3 in order
7195	**
7196	** For a table-btree (with rowids) the items 2 and 4 are empty because
7197	** content exists only in leaves and there are no divider cells.
7198	**
7199	** For an index btree, the apEnd[] array holds pointer to the end of page
7200	** for Child-1, the Parent, Child-2, the Parent (again), and Child-3,
7201	** respectively. The ixNx[] array holds the number of cells contained in
7202	** each of these 5 stages, and all stages to the left. Hence:
7203	**
7204	** ixNx[0] = Number of cells in Child-1.
7205	** ixNx[1] = Number of cells in Child-1 plus 1 for first divider.
7206	** ixNx[2] = Number of cells in Child-1 and Child-2 + 1 for 1st divider.
7207	** ixNx[3] = Number of cells in Child-1 and Child-2 + both divider cells
7208	** ixNx[4] = Total number of cells.
7209	**
7210	** For a table-btree, the concept is similar, except only apEnd[0]..apEnd[2]
7211	** are used and they point to the leaf pages only, and the ixNx value are:
7212	**
7213	** ixNx[0] = Number of cells in Child-1.
7214	** ixNx[1] = Number of cells in Child-1 and Child-2.
7215	** ixNx[2] = Total number of cells.
7216	**
7217	** Sometimes when deleting, a child page can have zero cells. In those
7218	** cases, ixNx[] entries with higher indexes, and the corresponding apEnd[]
7219	** entries, shift down. The end result is that each ixNx[] entry should
7220	** be larger than the previous
7221	*/
7222	typedef struct CellArray CellArray;
7223	struct CellArray {
7224	int nCell; / Number of cells in apCell[] /
7225	MemPage pRef; /* Reference page /
7226	u8 *apCell; /* All cells begin balanced /
7227	u16 szCell; /* Local size of all cells in apCell[] /
7228	u8 apEnd[NB`2`]; / MemPage.aDataEnd values /
7229	int ixNx[NB`2`]; /* Index of at which we move to the next apEnd[] /
7230	};
7231
7232	/*
7233	** Make sure the cell sizes at idx, idx+1, ..., idx+N-1 have been
7234	** computed.
7235	*/
7236	static void populateCellCache(CellArray p, int* idx, int N){
7237	assert( idx>=`0` && idx+N<=p->nCell );
7238	while( N>`0` ){
7239	assert( p->apCell[idx]!=`0` );
7240	if( p->szCell[idx]==`0` ){
7241	p->szCell[idx] = p->pRef->xCellSize(p->pRef, p->apCell[idx]);
7242	}else{
7243	assert( CORRUPT_DB \|\|
7244	p->szCell[idx]==p->pRef->xCellSize(p->pRef, p->apCell[idx]) );
7245	}
7246	idx++;
7247	N--;
7248	}
7249	}
7250
7251	/*
7252	** Return the size of the Nth element of the cell array
7253	*/
7254	static SQLITE_NOINLINE u16 computeCellSize(CellArray p, int* N){
7255	assert( N>=`0` && N<p->nCell );
7256	assert( p->szCell[N]==`0` );
7257	p->szCell[N] = p->pRef->xCellSize(p->pRef, p->apCell[N]);
7258	return p->szCell[N];
7259	}
7260	static u16 cachedCellSize(CellArray p, int* N){
7261	assert( N>=`0` && N<p->nCell );
7262	if( p->szCell[N] ) return p->szCell[N];
7263	return computeCellSize(p, N);
7264	}
7265
7266	/*
7267	** Array apCell[] contains pointers to nCell b-tree page cells. The
7268	** szCell[] array contains the size in bytes of each cell. This function
7269	** replaces the current contents of page pPg with the contents of the cell
7270	** array.
7271	**
7272	** Some of the cells in apCell[] may currently be stored in pPg. This
7273	** function works around problems caused by this by making a copy of any
7274	** such cells before overwriting the page data.
7275	**
7276	** The MemPage.nFree field is invalidated by this function. It is the
7277	** responsibility of the caller to set it correctly.
7278	*/
7279	static int rebuildPage(
7280	CellArray pCArray, /* Content to be added to page pPg /
7281	int iFirst, / First cell in pCArray to use /
7282	int nCell, / Final number of cells on page /
7283	MemPage pPg /* The page to be reconstructed /
7284	){
7285	const int hdr = pPg->hdrOffset; / Offset of header on pPg /
7286	u8 * const aData = pPg->aData; / Pointer to data for pPg /
7287	const int usableSize = pPg->pBt->usableSize;
7288	u8 * const pEnd = &aData[usableSize];
7289	int i = iFirst; / Which cell to copy from pCArray/
7290	u32 j; / Start of cell content area /
7291	int iEnd = i+nCell; / Loop terminator /
7292	u8 *pCellptr = pPg->aCellIdx;
7293	u8 *pTmp = sqlite3PagerTempSpace(pPg->pBt->pPager);
7294	u8 *pData;
7295	int k; / Current slot in pCArray->apEnd[] /
7296	u8 pSrcEnd; /* Current pCArray->apEnd[k] value /
7297
7298	assert( i<iEnd );
7299	j = get2byte(&aData[hdr+`5`]);
7300	if( j>(u32)usableSize ){ j = `0`; }
7301	memcpy(&pTmp[j], &aData[j], usableSize - j);
7302
7303	for(k=`0`; pCArray->ixNx[k]<=i && ALWAYS(k<NB*`2`); k++){}
7304	pSrcEnd = pCArray->apEnd[k];
7305
7306	pData = pEnd;
7307	while( `1`/exit by break/ ){
7308	u8 *pCell = pCArray->apCell[i];
7309	u16 sz = pCArray->szCell[i];
7310	assert( sz>`0` );
7311	if( SQLITE_WITHIN(pCell,aData+j,pEnd) ){
7312	if( ((uptr)(pCell+sz))>(uptr)pEnd ) return SQLITE_CORRUPT_BKPT;
7313	pCell = &pTmp[pCell - aData];
7314	}else if( (uptr)(pCell+sz)>(uptr)pSrcEnd
7315	&& (uptr)(pCell)<(uptr)pSrcEnd
7316	){
7317	return SQLITE_CORRUPT_BKPT;
7318	}
7319
7320	pData -= sz;
7321	put2byte(pCellptr, (pData - aData));
7322	pCellptr += `2`;
7323	if( pData < pCellptr ) return SQLITE_CORRUPT_BKPT;
7324	memmove(pData, pCell, sz);
7325	assert( sz==pPg->xCellSize(pPg, pCell) \|\| CORRUPT_DB );
7326	i++;
7327	if( i>=iEnd ) break;
7328	if( pCArray->ixNx[k]<=i ){
7329	k++;
7330	pSrcEnd = pCArray->apEnd[k];
7331	}
7332	}
7333
7334	/ The pPg->nFree field is now set incorrectly. The caller will fix it. /
7335	pPg->nCell = nCell;
7336	pPg->nOverflow = `0`;
7337
7338	put2byte(&aData[hdr+`1`], `0`);
7339	put2byte(&aData[hdr+`3`], pPg->nCell);
7340	put2byte(&aData[hdr+`5`], pData - aData);
7341	aData[hdr+`7`] = `0x00`;
7342	return SQLITE_OK;
7343	}
7344
7345	/*
7346	** The pCArray objects contains pointers to b-tree cells and the cell sizes.
7347	** This function attempts to add the cells stored in the array to page pPg.
7348	** If it cannot (because the page needs to be defragmented before the cells
7349	** will fit), non-zero is returned. Otherwise, if the cells are added
7350	** successfully, zero is returned.
7351	**
7352	** Argument pCellptr points to the first entry in the cell-pointer array
7353	** (part of page pPg) to populate. After cell apCell[0] is written to the
7354	** page body, a 16-bit offset is written to pCellptr. And so on, for each
7355	** cell in the array. It is the responsibility of the caller to ensure
7356	** that it is safe to overwrite this part of the cell-pointer array.
7357	**
7358	** When this function is called, *ppData points to the start of the
7359	** content area on page pPg. If the size of the content area is extended,
7360	** *ppData is updated to point to the new start of the content area
7361	** before returning.
7362	**
7363	** Finally, argument pBegin points to the byte immediately following the
7364	** end of the space required by this page for the cell-pointer area (for
7365	** all cells - not just those inserted by the current call). If the content
7366	** area must be extended to before this point in order to accomodate all
7367	** cells in apCell[], then the cells do not fit and non-zero is returned.
7368	*/
7369	static int pageInsertArray(
7370	MemPage pPg, /* Page to add cells to /
7371	u8 pBegin, /* End of cell-pointer array /
7372	u8 *ppData, /* IN/OUT: Page content-area pointer /
7373	u8 pCellptr, /* Pointer to cell-pointer area /
7374	int iFirst, / Index of first cell to add /
7375	int nCell, / Number of cells to add to pPg /
7376	CellArray pCArray /* Array of cells /
7377	){
7378	int i = iFirst; / Loop counter - cell index to insert /
7379	u8 aData = pPg->aData; /* Complete page /
7380	u8 pData = ppData; / Content area. A subset of aData[] /
7381	int iEnd = iFirst + nCell; / End of loop. One past last cell to ins /
7382	int k; / Current slot in pCArray->apEnd[] /
7383	u8 pEnd; /* Maximum extent of cell data /
7384	assert( CORRUPT_DB \|\| pPg->hdrOffset==`0` ); / Never called on page 1 /
7385	if( iEnd<=iFirst ) return `0`;
7386	for(k=`0`; pCArray->ixNx[k]<=i && ALWAYS(k<NB*`2`); k++){}
7387	pEnd = pCArray->apEnd[k];
7388	while( `1` /Exit by break/ ){
7389	int sz, rc;
7390	u8 *pSlot;
7391	assert( pCArray->szCell[i]!=`0` );
7392	sz = pCArray->szCell[i];
7393	if( (aData[`1`]==`0` && aData[`2`]==`0`) \|\| (pSlot = pageFindSlot(pPg,sz,&rc))==`0` ){
7394	if( (pData - pBegin)<sz ) return `1`;
7395	pData -= sz;
7396	pSlot = pData;
7397	}
7398	/ pSlot and pCArray->apCell[i] will never overlap on a well-formed*
7399	** database. But they might for a corrupt database. Hence use memmove()
7400	** since memcpy() sends SIGABORT with overlapping buffers on OpenBSD */
7401	assert( (pSlot+sz)<=pCArray->apCell[i]
7402	\|\| pSlot>=(pCArray->apCell[i]+sz)
7403	\|\| CORRUPT_DB );
7404	if( (uptr)(pCArray->apCell[i]+sz)>(uptr)pEnd
7405	&& (uptr)(pCArray->apCell[i])<(uptr)pEnd
7406	){
7407	assert( CORRUPT_DB );
7408	(void)SQLITE_CORRUPT_BKPT;
7409	return `1`;
7410	}
7411	memmove(pSlot, pCArray->apCell[i], sz);
7412	put2byte(pCellptr, (pSlot - aData));
7413	pCellptr += `2`;
7414	i++;
7415	if( i>=iEnd ) break;
7416	if( pCArray->ixNx[k]<=i ){
7417	k++;
7418	pEnd = pCArray->apEnd[k];
7419	}
7420	}
7421	*ppData = pData;
7422	return `0`;
7423	}
7424
7425	/*
7426	** The pCArray object contains pointers to b-tree cells and their sizes.
7427	**
7428	** This function adds the space associated with each cell in the array
7429	** that is currently stored within the body of pPg to the pPg free-list.
7430	** The cell-pointers and other fields of the page are not updated.
7431	**
7432	** This function returns the total number of cells added to the free-list.
7433	*/
7434	static int pageFreeArray(
7435	MemPage pPg, /* Page to edit /
7436	int iFirst, / First cell to delete /
7437	int nCell, / Cells to delete /
7438	CellArray pCArray /* Array of cells /
7439	){
7440	u8 * const aData = pPg->aData;
7441	u8 * const pEnd = &aData[pPg->pBt->usableSize];
7442	u8 * const pStart = &aData[pPg->hdrOffset + `8` + pPg->childPtrSize];
7443	int nRet = `0`;
7444	int i;
7445	int iEnd = iFirst + nCell;
7446	u8 *pFree = `0`;
7447	int szFree = `0`;
7448
7449	for(i=iFirst; i<iEnd; i++){
7450	u8 *pCell = pCArray->apCell[i];
7451	if( SQLITE_WITHIN(pCell, pStart, pEnd) ){
7452	int sz;
7453	/ No need to use cachedCellSize() here. The sizes of all cells that*
7454	** are to be freed have already been computing while deciding which
7455	** cells need freeing */
7456	sz = pCArray->szCell[i]; assert( sz>`0` );
7457	if( pFree!=(pCell + sz) ){
7458	if( pFree ){
7459	assert( pFree>aData && (pFree - aData)<`65536` );
7460	freeSpace(pPg, (u16)(pFree - aData), szFree);
7461	}
7462	pFree = pCell;
7463	szFree = sz;
7464	if( pFree+sz>pEnd ){
7465	return `0`;
7466	}
7467	}else{
7468	pFree = pCell;
7469	szFree += sz;
7470	}
7471	nRet++;
7472	}
7473	}
7474	if( pFree ){
7475	assert( pFree>aData && (pFree - aData)<`65536` );
7476	freeSpace(pPg, (u16)(pFree - aData), szFree);
7477	}
7478	return nRet;
7479	}
7480
7481	/*
7482	** pCArray contains pointers to and sizes of all cells in the page being
7483	** balanced. The current page, pPg, has pPg->nCell cells starting with
7484	** pCArray->apCell[iOld]. After balancing, this page should hold nNew cells
7485	** starting at apCell[iNew].
7486	**
7487	** This routine makes the necessary adjustments to pPg so that it contains
7488	** the correct cells after being balanced.
7489	**
7490	** The pPg->nFree field is invalid when this function returns. It is the
7491	** responsibility of the caller to set it correctly.
7492	*/
7493	static int editPage(
7494	MemPage pPg, /* Edit this page /
7495	int iOld, / Index of first cell currently on page /
7496	int iNew, / Index of new first cell on page /
7497	int nNew, / Final number of cells on page /
7498	CellArray pCArray /* Array of cells and sizes /
7499	){
7500	u8 * const aData = pPg->aData;
7501	const int hdr = pPg->hdrOffset;
7502	u8 pBegin = &pPg->aCellIdx[nNew `2`];
7503	int nCell = pPg->nCell; / Cells stored on pPg /
7504	u8 *pData;
7505	u8 *pCellptr;
7506	int i;
7507	int iOldEnd = iOld + pPg->nCell + pPg->nOverflow;
7508	int iNewEnd = iNew + nNew;
7509
7510	#ifdef SQLITE_DEBUG
7511	u8 *pTmp = sqlite3PagerTempSpace(pPg->pBt->pPager);
7512	memcpy(pTmp, aData, pPg->pBt->usableSize);
7513	#endif
7514
7515	/ Remove cells from the start and end of the page /
7516	assert( nCell>=`0` );
7517	if( iOld<iNew ){
7518	int nShift = pageFreeArray(pPg, iOld, iNew-iOld, pCArray);
7519	if( NEVER(nShift>nCell) ) return SQLITE_CORRUPT_BKPT;
7520	memmove(pPg->aCellIdx, &pPg->aCellIdx[nShift`2`], nCell`2`);
7521	nCell -= nShift;
7522	}
7523	if( iNewEnd < iOldEnd ){
7524	int nTail = pageFreeArray(pPg, iNewEnd, iOldEnd - iNewEnd, pCArray);
7525	assert( nCell>=nTail );
7526	nCell -= nTail;
7527	}
7528
7529	pData = &aData[get2byteNotZero(&aData[hdr+`5`])];
7530	if( pData<pBegin ) goto editpage_fail;
7531	if( pData>pPg->aDataEnd ) goto editpage_fail;
7532
7533	/ Add cells to the start of the page /
7534	if( iNew<iOld ){
7535	int nAdd = MIN(nNew,iOld-iNew);
7536	assert( (iOld-iNew)<nNew \|\| nCell==`0` \|\| CORRUPT_DB );
7537	assert( nAdd>=`0` );
7538	pCellptr = pPg->aCellIdx;
7539	memmove(&pCellptr[nAdd`2`], pCellptr, nCell`2`);
7540	if( pageInsertArray(
7541	pPg, pBegin, &pData, pCellptr,
7542	iNew, nAdd, pCArray
7543	) ) goto editpage_fail;
7544	nCell += nAdd;
7545	}
7546
7547	/ Add any overflow cells /
7548	for(i=`0`; i<pPg->nOverflow; i++){
7549	int iCell = (iOld + pPg->aiOvfl[i]) - iNew;
7550	if( iCell>=`0` && iCell<nNew ){
7551	pCellptr = &pPg->aCellIdx[iCell * `2`];
7552	if( nCell>iCell ){
7553	memmove(&pCellptr[`2`], pCellptr, (nCell - iCell) * `2`);
7554	}
7555	nCell++;
7556	cachedCellSize(pCArray, iCell+iNew);
7557	if( pageInsertArray(
7558	pPg, pBegin, &pData, pCellptr,
7559	iCell+iNew, `1`, pCArray
7560	) ) goto editpage_fail;
7561	}
7562	}
7563
7564	/ Append cells to the end of the page /
7565	assert( nCell>=`0` );
7566	pCellptr = &pPg->aCellIdx[nCell*`2`];
7567	if( pageInsertArray(
7568	pPg, pBegin, &pData, pCellptr,
7569	iNew+nCell, nNew-nCell, pCArray
7570	) ) goto editpage_fail;
7571
7572	pPg->nCell = nNew;
7573	pPg->nOverflow = `0`;
7574
7575	put2byte(&aData[hdr+`3`], pPg->nCell);
7576	put2byte(&aData[hdr+`5`], pData - aData);
7577
7578	#ifdef SQLITE_DEBUG
7579	for(i=`0`; i<nNew && !CORRUPT_DB; i++){
7580	u8 *pCell = pCArray->apCell[i+iNew];
7581	int iOff = get2byteAligned(&pPg->aCellIdx[i*`2`]);
7582	if( SQLITE_WITHIN(pCell, aData, &aData[pPg->pBt->usableSize]) ){
7583	pCell = &pTmp[pCell - aData];
7584	}
7585	assert( `0`==memcmp(pCell, &aData[iOff],
7586	pCArray->pRef->xCellSize(pCArray->pRef, pCArray->apCell[i+iNew])) );
7587	}
7588	#endif
7589
7590	return SQLITE_OK;
7591	editpage_fail:
7592	/ Unable to edit this page. Rebuild it from scratch instead. /
7593	populateCellCache(pCArray, iNew, nNew);
7594	return rebuildPage(pCArray, iNew, nNew, pPg);
7595	}
7596
7597
7598	#ifndef SQLITE_OMIT_QUICKBALANCE
7599	/*
7600	** This version of balance() handles the common special case where
7601	** a new entry is being inserted on the extreme right-end of the
7602	** tree, in other words, when the new entry will become the largest
7603	** entry in the tree.
7604	**
7605	** Instead of trying to balance the 3 right-most leaf pages, just add
7606	** a new page to the right-hand side and put the one new entry in
7607	** that page. This leaves the right side of the tree somewhat
7608	** unbalanced. But odds are that we will be inserting new entries
7609	** at the end soon afterwards so the nearly empty page will quickly
7610	** fill up. On average.
7611	**
7612	** pPage is the leaf page which is the right-most page in the tree.
7613	** pParent is its parent. pPage must have a single overflow entry
7614	** which is also the right-most entry on the page.
7615	**
7616	** The pSpace buffer is used to store a temporary copy of the divider
7617	** cell that will be inserted into pParent. Such a cell consists of a 4
7618	** byte page number followed by a variable length integer. In other
7619	** words, at most 13 bytes. Hence the pSpace buffer must be at
7620	** least 13 bytes in size.
7621	*/
7622	static int balance_quick(MemPage pParent, MemPage pPage, u8 *pSpace){
7623	BtShared *const pBt = pPage->pBt; / B-Tree Database /
7624	MemPage pNew; /* Newly allocated page /
7625	int rc; / Return Code /
7626	Pgno pgnoNew; / Page number of pNew /
7627
7628	assert( sqlite3_mutex_held(pPage->pBt->mutex) );
7629	assert( sqlite3PagerIswriteable(pParent->pDbPage) );
7630	assert( pPage->nOverflow==`1` );
7631
7632	if( pPage->nCell==`0` ) return SQLITE_CORRUPT_BKPT; / dbfuzz001.test /
7633	assert( pPage->nFree>=`0` );
7634	assert( pParent->nFree>=`0` );
7635
7636	/ Allocate a new page. This page will become the right-sibling of*
7637	** pPage. Make the parent page writable, so that the new divider cell
7638	** may be inserted. If both these operations are successful, proceed.
7639	*/
7640	rc = allocateBtreePage(pBt, &pNew, &pgnoNew, `0`, `0`);
7641
7642	if( rc==SQLITE_OK ){
7643
7644	u8 *pOut = &pSpace[`4`];
7645	u8 *pCell = pPage->apOvfl[`0`];
7646	u16 szCell = pPage->xCellSize(pPage, pCell);
7647	u8 *pStop;
7648	CellArray b;
7649
7650	assert( sqlite3PagerIswriteable(pNew->pDbPage) );
7651	assert( CORRUPT_DB \|\| pPage->aData[`0`]==(PTF_INTKEY\|PTF_LEAFDATA\|PTF_LEAF) );
7652	zeroPage(pNew, PTF_INTKEY\|PTF_LEAFDATA\|PTF_LEAF);
7653	b.nCell = `1`;
7654	b.pRef = pPage;
7655	b.apCell = &pCell;
7656	b.szCell = &szCell;
7657	b.apEnd[`0`] = pPage->aDataEnd;
7658	b.ixNx[`0`] = `2`;
7659	rc = rebuildPage(&b, `0`, `1`, pNew);
7660	if( NEVER(rc) ){
7661	releasePage(pNew);
7662	return rc;
7663	}
7664	pNew->nFree = pBt->usableSize - pNew->cellOffset - `2` - szCell;
7665
7666	/ If this is an auto-vacuum database, update the pointer map*
7667	** with entries for the new page, and any pointer from the
7668	** cell on the page to an overflow page. If either of these
7669	** operations fails, the return code is set, but the contents
7670	** of the parent page are still manipulated by thh code below.
7671	** That is Ok, at this point the parent page is guaranteed to
7672	** be marked as dirty. Returning an error code will cause a
7673	** rollback, undoing any changes made to the parent page.
7674	*/
7675	if( ISAUTOVACUUM ){
7676	ptrmapPut(pBt, pgnoNew, PTRMAP_BTREE, pParent->pgno, &rc);
7677	if( szCell>pNew->minLocal ){
7678	ptrmapPutOvflPtr(pNew, pNew, pCell, &rc);
7679	}
7680	}
7681
7682	/ Create a divider cell to insert into pParent. The divider cell*
7683	** consists of a 4-byte page number (the page number of pPage) and
7684	** a variable length key value (which must be the same value as the
7685	** largest key on pPage).
7686	**
7687	** To find the largest key value on pPage, first find the right-most
7688	** cell on pPage. The first two fields of this cell are the
7689	** record-length (a variable length integer at most 32-bits in size)
7690	** and the key value (a variable length integer, may have any value).
7691	** The first of the while(...) loops below skips over the record-length
7692	** field. The second while(...) loop copies the key value from the
7693	** cell on pPage into the pSpace buffer.
7694	*/
7695	pCell = findCell(pPage, pPage->nCell-`1`);
7696	pStop = &pCell[`9`];
7697	while( (*(pCell++)&`0x80`) && pCell<pStop );
7698	pStop = &pCell[`9`];
7699	while( (((pOut++) = (pCell++))&`0x80`) && pCell<pStop );
7700
7701	/ Insert the new divider cell into pParent. /
7702	if( rc==SQLITE_OK ){
7703	insertCell(pParent, pParent->nCell, pSpace, (int)(pOut-pSpace),
7704	`0`, pPage->pgno, &rc);
7705	}
7706
7707	/ Set the right-child pointer of pParent to point to the new page. /
7708	put4byte(&pParent->aData[pParent->hdrOffset+`8`], pgnoNew);
7709
7710	/ Release the reference to the new page. /
7711	releasePage(pNew);
7712	}
7713
7714	return rc;
7715	}
7716	#endif /* SQLITE_OMIT_QUICKBALANCE */
7717
7718	#if 0
7719	/*
7720	** This function does not contribute anything to the operation of SQLite.
7721	** it is sometimes activated temporarily while debugging code responsible
7722	** for setting pointer-map entries.
7723	*/
7724	static int ptrmapCheckPages(MemPage *apPage, int* nPage){
7725	int i, j;
7726	for(i=`0`; i<nPage; i++){
7727	Pgno n;
7728	u8 e;
7729	MemPage *pPage = apPage[i];
7730	BtShared *pBt = pPage->pBt;
7731	assert( pPage->isInit );
7732
7733	for(j=`0`; j<pPage->nCell; j++){
7734	CellInfo info;
7735	u8 *z;
7736
7737	z = findCell(pPage, j);
7738	pPage->xParseCell(pPage, z, &info);
7739	if( info.nLocal<info.nPayload ){
7740	Pgno ovfl = get4byte(&z[info.nSize-`4`]);
7741	ptrmapGet(pBt, ovfl, &e, &n);
7742	assert( n==pPage->pgno && e==PTRMAP_OVERFLOW1 );
7743	}
7744	if( !pPage->leaf ){
7745	Pgno child = get4byte(z);
7746	ptrmapGet(pBt, child, &e, &n);
7747	assert( n==pPage->pgno && e==PTRMAP_BTREE );
7748	}
7749	}
7750	if( !pPage->leaf ){
7751	Pgno child = get4byte(&pPage->aData[pPage->hdrOffset+`8`]);
7752	ptrmapGet(pBt, child, &e, &n);
7753	assert( n==pPage->pgno && e==PTRMAP_BTREE );
7754	}
7755	}
7756	return `1`;
7757	}
7758	#endif
7759
7760	/*
7761	** This function is used to copy the contents of the b-tree node stored
7762	** on page pFrom to page pTo. If page pFrom was not a leaf page, then
7763	** the pointer-map entries for each child page are updated so that the
7764	** parent page stored in the pointer map is page pTo. If pFrom contained
7765	** any cells with overflow page pointers, then the corresponding pointer
7766	** map entries are also updated so that the parent page is page pTo.
7767	**
7768	** If pFrom is currently carrying any overflow cells (entries in the
7769	** MemPage.apOvfl[] array), they are not copied to pTo.
7770	**
7771	** Before returning, page pTo is reinitialized using btreeInitPage().
7772	**
7773	** The performance of this function is not critical. It is only used by
7774	** the balance_shallower() and balance_deeper() procedures, neither of
7775	** which are called often under normal circumstances.
7776	*/
7777	static void copyNodeContent(MemPage pFrom, MemPage pTo, int *pRC){
7778	if( (*pRC)==SQLITE_OK ){
7779	BtShared * const pBt = pFrom->pBt;
7780	u8 * const aFrom = pFrom->aData;
7781	u8 * const aTo = pTo->aData;
7782	int const iFromHdr = pFrom->hdrOffset;
7783	int const iToHdr = ((pTo->pgno==`1`) ? `100` : `0`);
7784	int rc;
7785	int iData;
7786
7787
7788	assert( pFrom->isInit );
7789	assert( pFrom->nFree>=iToHdr );
7790	assert( get2byte(&aFrom[iFromHdr+`5`]) <= (int)pBt->usableSize );
7791
7792	/ Copy the b-tree node content from page pFrom to page pTo. /
7793	iData = get2byte(&aFrom[iFromHdr+`5`]);
7794	memcpy(&aTo[iData], &aFrom[iData], pBt->usableSize-iData);
7795	memcpy(&aTo[iToHdr], &aFrom[iFromHdr], pFrom->cellOffset + `2`*pFrom->nCell);
7796
7797	/ Reinitialize page pTo so that the contents of the MemPage structure*
7798	** match the new data. The initialization of pTo can actually fail under
7799	** fairly obscure circumstances, even though it is a copy of initialized
7800	** page pFrom.
7801	*/
7802	pTo->isInit = `0`;
7803	rc = btreeInitPage(pTo);
7804	if( rc==SQLITE_OK ) rc = btreeComputeFreeSpace(pTo);
7805	if( rc!=SQLITE_OK ){
7806	*pRC = rc;
7807	return;
7808	}
7809
7810	/ If this is an auto-vacuum database, update the pointer-map entries*
7811	** for any b-tree or overflow pages that pTo now contains the pointers to.
7812	*/
7813	if( ISAUTOVACUUM ){
7814	*pRC = setChildPtrmaps(pTo);
7815	}
7816	}
7817	}
7818
7819	/*
7820	** This routine redistributes cells on the iParentIdx'th child of pParent
7821	** (hereafter "the page") and up to 2 siblings so that all pages have about the
7822	** same amount of free space. Usually a single sibling on either side of the
7823	** page are used in the balancing, though both siblings might come from one
7824	** side if the page is the first or last child of its parent. If the page
7825	** has fewer than 2 siblings (something which can only happen if the page
7826	** is a root page or a child of a root page) then all available siblings
7827	** participate in the balancing.
7828	**
7829	** The number of siblings of the page might be increased or decreased by
7830	** one or two in an effort to keep pages nearly full but not over full.
7831	**
7832	** Note that when this routine is called, some of the cells on the page
7833	** might not actually be stored in MemPage.aData[]. This can happen
7834	** if the page is overfull. This routine ensures that all cells allocated
7835	** to the page and its siblings fit into MemPage.aData[] before returning.
7836	**
7837	** In the course of balancing the page and its siblings, cells may be
7838	** inserted into or removed from the parent page (pParent). Doing so
7839	** may cause the parent page to become overfull or underfull. If this
7840	** happens, it is the responsibility of the caller to invoke the correct
7841	** balancing routine to fix this problem (see the balance() routine).
7842	**
7843	** If this routine fails for any reason, it might leave the database
7844	** in a corrupted state. So if this routine fails, the database should
7845	** be rolled back.
7846	**
7847	** The third argument to this function, aOvflSpace, is a pointer to a
7848	** buffer big enough to hold one page. If while inserting cells into the parent
7849	** page (pParent) the parent page becomes overfull, this buffer is
7850	** used to store the parent's overflow cells. Because this function inserts
7851	** a maximum of four divider cells into the parent page, and the maximum
7852	** size of a cell stored within an internal node is always less than 1/4
7853	** of the page-size, the aOvflSpace[] buffer is guaranteed to be large
7854	** enough for all overflow cells.
7855	**
7856	** If aOvflSpace is set to a null pointer, this function returns
7857	** SQLITE_NOMEM.
7858	*/
7859	static int balance_nonroot(
7860	MemPage pParent, /* Parent page of siblings being balanced /
7861	int iParentIdx, / Index of "the page" in pParent /
7862	u8 aOvflSpace, /* page-size bytes of space for parent ovfl /
7863	int isRoot, / True if pParent is a root-page /
7864	int bBulk / True if this call is part of a bulk load /
7865	){
7866	BtShared pBt; /* The whole database /
7867	int nMaxCells = `0`; / Allocated size of apCell, szCell, aFrom. /
7868	int nNew = `0`; / Number of pages in apNew[] /
7869	int nOld; / Number of pages in apOld[] /
7870	int i, j, k; / Loop counters /
7871	int nxDiv; / Next divider slot in pParent->aCell[] /
7872	int rc = SQLITE_OK; / The return code /
7873	u16 leafCorrection; / 4 if pPage is a leaf. 0 if not /
7874	int leafData; / True if pPage is a leaf of a LEAFDATA tree /
7875	int usableSpace; / Bytes in pPage beyond the header /
7876	int pageFlags; / Value of pPage->aData[0] /
7877	int iSpace1 = `0`; / First unused byte of aSpace1[] /
7878	int iOvflSpace = `0`; / First unused byte of aOvflSpace[] /
7879	int szScratch; / Size of scratch memory requested /
7880	MemPage apOld[NB]; /* pPage and up to two siblings /
7881	MemPage apNew[NB+`2`]; /* pPage and up to NB siblings after balancing /
7882	u8 pRight; /* Location in parent of right-sibling pointer /
7883	u8 apDiv[NB-`1`]; /* Divider cells in pParent /
7884	int cntNew[NB+`2`]; / Index in b.paCell[] of cell after i-th page /
7885	int cntOld[NB+`2`]; / Old index in b.apCell[] /
7886	int szNew[NB+`2`]; / Combined size of cells placed on i-th page /
7887	u8 aSpace1; /* Space for copies of dividers cells /
7888	Pgno pgno; / Temp var to store a page number in /
7889	u8 abDone[NB+`2`]; / True after i'th new page is populated /
7890	Pgno aPgno[NB+`2`]; / Page numbers of new pages before shuffling /
7891	CellArray b; / Parsed information on cells being balanced /
7892
7893	memset(abDone, `0`, sizeof(abDone));
7894	memset(&b, `0`, sizeof(b));
7895	pBt = pParent->pBt;
7896	assert( sqlite3_mutex_held(pBt->mutex) );
7897	assert( sqlite3PagerIswriteable(pParent->pDbPage) );
7898
7899	/ At this point pParent may have at most one overflow cell. And if*
7900	** this overflow cell is present, it must be the cell with
7901	** index iParentIdx. This scenario comes about when this function
7902	** is called (indirectly) from sqlite3BtreeDelete().
7903	*/
7904	assert( pParent->nOverflow==`0` \|\| pParent->nOverflow==`1` );
7905	assert( pParent->nOverflow==`0` \|\| pParent->aiOvfl[`0`]==iParentIdx );
7906
7907	if( !aOvflSpace ){
7908	return SQLITE_NOMEM_BKPT;
7909	}
7910	assert( pParent->nFree>=`0` );
7911
7912	/ Find the sibling pages to balance. Also locate the cells in pParent*
7913	** that divide the siblings. An attempt is made to find NN siblings on
7914	** either side of pPage. More siblings are taken from one side, however,
7915	** if there are fewer than NN siblings on the other side. If pParent
7916	** has NB or fewer children then all children of pParent are taken.
7917	**
7918	** This loop also drops the divider cells from the parent page. This
7919	** way, the remainder of the function does not have to deal with any
7920	** overflow cells in the parent page, since if any existed they will
7921	** have already been removed.
7922	*/
7923	i = pParent->nOverflow + pParent->nCell;
7924	if( i<`2` ){
7925	nxDiv = `0`;
7926	}else{
7927	assert( bBulk==`0` \|\| bBulk==`1` );
7928	if( iParentIdx==`0` ){
7929	nxDiv = `0`;
7930	}else if( iParentIdx==i ){
7931	nxDiv = i-`2`+bBulk;
7932	}else{
7933	nxDiv = iParentIdx-`1`;
7934	}
7935	i = `2`-bBulk;
7936	}
7937	nOld = i+`1`;
7938	if( (i+nxDiv-pParent->nOverflow)==pParent->nCell ){
7939	pRight = &pParent->aData[pParent->hdrOffset+`8`];
7940	}else{
7941	pRight = findCell(pParent, i+nxDiv-pParent->nOverflow);
7942	}
7943	pgno = get4byte(pRight);
7944	while( `1` ){
7945	if( rc==SQLITE_OK ){
7946	rc = getAndInitPage(pBt, pgno, &apOld[i], `0`, `0`);
7947	}
7948	if( rc ){
7949	memset(apOld, `0`, (i+`1`)*sizeof(MemPage*));
7950	goto balance_cleanup;
7951	}
7952	if( apOld[i]->nFree<`0` ){
7953	rc = btreeComputeFreeSpace(apOld[i]);
7954	if( rc ){
7955	memset(apOld, `0`, (i)*sizeof(MemPage*));
7956	goto balance_cleanup;
7957	}
7958	}
7959	nMaxCells += apOld[i]->nCell + ArraySize(pParent->apOvfl);
7960	if( (i--)==`0` ) break;
7961
7962	if( pParent->nOverflow && i+nxDiv==pParent->aiOvfl[`0`] ){
7963	apDiv[i] = pParent->apOvfl[`0`];
7964	pgno = get4byte(apDiv[i]);
7965	szNew[i] = pParent->xCellSize(pParent, apDiv[i]);
7966	pParent->nOverflow = `0`;
7967	}else{
7968	apDiv[i] = findCell(pParent, i+nxDiv-pParent->nOverflow);
7969	pgno = get4byte(apDiv[i]);
7970	szNew[i] = pParent->xCellSize(pParent, apDiv[i]);
7971
7972	/ Drop the cell from the parent page. apDiv[i] still points to*
7973	** the cell within the parent, even though it has been dropped.
7974	** This is safe because dropping a cell only overwrites the first
7975	** four bytes of it, and this function does not need the first
7976	** four bytes of the divider cell. So the pointer is safe to use
7977	** later on.
7978	**
7979	** But not if we are in secure-delete mode. In secure-delete mode,
7980	** the dropCell() routine will overwrite the entire cell with zeroes.
7981	** In this case, temporarily copy the cell into the aOvflSpace[]
7982	** buffer. It will be copied out again as soon as the aSpace[] buffer
7983	** is allocated. */
7984	if( pBt->btsFlags & BTS_FAST_SECURE ){
7985	int iOff;
7986
7987	/ If the following if() condition is not true, the db is corrupted.*
7988	** The call to dropCell() below will detect this. */
7989	iOff = SQLITE_PTR_TO_INT(apDiv[i]) - SQLITE_PTR_TO_INT(pParent->aData);
7990	if( (iOff+szNew[i])<=(int)pBt->usableSize ){
7991	memcpy(&aOvflSpace[iOff], apDiv[i], szNew[i]);
7992	apDiv[i] = &aOvflSpace[apDiv[i]-pParent->aData];
7993	}
7994	}
7995	dropCell(pParent, i+nxDiv-pParent->nOverflow, szNew[i], &rc);
7996	}
7997	}
7998
7999	/ Make nMaxCells a multiple of 4 in order to preserve 8-byte*
8000	** alignment */
8001	nMaxCells = (nMaxCells + `3`)&~`3`;
8002
8003	/*
8004	** Allocate space for memory structures
8005	*/
8006	szScratch =
8007	nMaxCells*sizeof(u8) /* b.apCell /
8008	+ nMaxCells*sizeof(u16) / b.szCell /
8009	+ pBt->pageSize; / aSpace1 /
8010
8011	assert( szScratch<=`7`(int*)pBt->pageSize );
8012	b.apCell = sqlite3StackAllocRaw(`0`, szScratch );
8013	if( b.apCell==`0` ){
8014	rc = SQLITE_NOMEM_BKPT;
8015	goto balance_cleanup;
8016	}
8017	b.szCell = (u16*)&b.apCell[nMaxCells];
8018	aSpace1 = (u8*)&b.szCell[nMaxCells];
8019	assert( EIGHT_BYTE_ALIGNMENT(aSpace1) );
8020
8021	/*
8022	** Load pointers to all cells on sibling pages and the divider cells
8023	** into the local b.apCell[] array. Make copies of the divider cells
8024	** into space obtained from aSpace1[]. The divider cells have already
8025	** been removed from pParent.
8026	**
8027	** If the siblings are on leaf pages, then the child pointers of the
8028	** divider cells are stripped from the cells before they are copied
8029	** into aSpace1[]. In this way, all cells in b.apCell[] are without
8030	** child pointers. If siblings are not leaves, then all cell in
8031	** b.apCell[] include child pointers. Either way, all cells in b.apCell[]
8032	** are alike.
8033	**
8034	** leafCorrection: 4 if pPage is a leaf. 0 if pPage is not a leaf.
8035	** leafData: 1 if pPage holds key+data and pParent holds only keys.
8036	*/
8037	b.pRef = apOld[`0`];
8038	leafCorrection = b.pRef->leaf*`4`;
8039	leafData = b.pRef->intKeyLeaf;
8040	for(i=`0`; i<nOld; i++){
8041	MemPage *pOld = apOld[i];
8042	int limit = pOld->nCell;
8043	u8 *aData = pOld->aData;
8044	u16 maskPage = pOld->maskPage;
8045	u8 *piCell = aData + pOld->cellOffset;
8046	u8 *piEnd;
8047	VVA_ONLY( int nCellAtStart = b.nCell; )
8048
8049	/ Verify that all sibling pages are of the same "type" (table-leaf,*
8050	** table-interior, index-leaf, or index-interior).
8051	*/
8052	if( pOld->aData[`0`]!=apOld[`0`]->aData[`0`] ){
8053	rc = SQLITE_CORRUPT_BKPT;
8054	goto balance_cleanup;
8055	}
8056
8057	/ Load b.apCell[] with pointers to all cells in pOld. If pOld*
8058	** contains overflow cells, include them in the b.apCell[] array
8059	** in the correct spot.
8060	**
8061	** Note that when there are multiple overflow cells, it is always the
8062	** case that they are sequential and adjacent. This invariant arises
8063	** because multiple overflows can only occurs when inserting divider
8064	** cells into a parent on a prior balance, and divider cells are always
8065	** adjacent and are inserted in order. There is an assert() tagged
8066	** with "NOTE 1" in the overflow cell insertion loop to prove this
8067	** invariant.
8068	**
8069	** This must be done in advance. Once the balance starts, the cell
8070	** offset section of the btree page will be overwritten and we will no
8071	** long be able to find the cells if a pointer to each cell is not saved
8072	** first.
8073	*/
8074	memset(&b.szCell[b.nCell], `0`, sizeof(b.szCell[`0`])*(limit+pOld->nOverflow));
8075	if( pOld->nOverflow>`0` ){
8076	if( NEVER(limit<pOld->aiOvfl[`0`]) ){
8077	rc = SQLITE_CORRUPT_BKPT;
8078	goto balance_cleanup;
8079	}
8080	limit = pOld->aiOvfl[`0`];
8081	for(j=`0`; j<limit; j++){
8082	b.apCell[b.nCell] = aData + (maskPage & get2byteAligned(piCell));
8083	piCell += `2`;
8084	b.nCell++;
8085	}
8086	for(k=`0`; k<pOld->nOverflow; k++){
8087	assert( k==`0` \|\| pOld->aiOvfl[k-`1`]+`1`==pOld->aiOvfl[k] );/ NOTE 1 /
8088	b.apCell[b.nCell] = pOld->apOvfl[k];
8089	b.nCell++;
8090	}
8091	}
8092	piEnd = aData + pOld->cellOffset + `2`*pOld->nCell;
8093	while( piCell<piEnd ){
8094	assert( b.nCell<nMaxCells );
8095	b.apCell[b.nCell] = aData + (maskPage & get2byteAligned(piCell));
8096	piCell += `2`;
8097	b.nCell++;
8098	}
8099	assert( (b.nCell-nCellAtStart)==(pOld->nCell+pOld->nOverflow) );
8100
8101	cntOld[i] = b.nCell;
8102	if( i<nOld-`1` && !leafData){
8103	u16 sz = (u16)szNew[i];
8104	u8 *pTemp;
8105	assert( b.nCell<nMaxCells );
8106	b.szCell[b.nCell] = sz;
8107	pTemp = &aSpace1[iSpace1];
8108	iSpace1 += sz;
8109	assert( sz<=pBt->maxLocal+`23` );
8110	assert( iSpace1 <= (int)pBt->pageSize );
8111	memcpy(pTemp, apDiv[i], sz);
8112	b.apCell[b.nCell] = pTemp+leafCorrection;
8113	assert( leafCorrection==`0` \|\| leafCorrection==`4` );
8114	b.szCell[b.nCell] = b.szCell[b.nCell] - leafCorrection;
8115	if( !pOld->leaf ){
8116	assert( leafCorrection==`0` );
8117	assert( pOld->hdrOffset==`0` \|\| CORRUPT_DB );
8118	/ The right pointer of the child page pOld becomes the left*
8119	** pointer of the divider cell */
8120	memcpy(b.apCell[b.nCell], &pOld->aData[`8`], `4`);
8121	}else{
8122	assert( leafCorrection==`4` );
8123	while( b.szCell[b.nCell]<`4` ){
8124	/ Do not allow any cells smaller than 4 bytes. If a smaller cell*
8125	** does exist, pad it with 0x00 bytes. */
8126	assert( b.szCell[b.nCell]==`3` \|\| CORRUPT_DB );
8127	assert( b.apCell[b.nCell]==&aSpace1[iSpace1-`3`] \|\| CORRUPT_DB );
8128	aSpace1[iSpace1++] = `0x00`;
8129	b.szCell[b.nCell]++;
8130	}
8131	}
8132	b.nCell++;
8133	}
8134	}
8135
8136	/*
8137	** Figure out the number of pages needed to hold all b.nCell cells.
8138	** Store this number in "k". Also compute szNew[] which is the total
8139	** size of all cells on the i-th page and cntNew[] which is the index
8140	** in b.apCell[] of the cell that divides page i from page i+1.
8141	** cntNew[k] should equal b.nCell.
8142	**
8143	** Values computed by this block:
8144	**
8145	** k: The total number of sibling pages
8146	** szNew[i]: Spaced used on the i-th sibling page.
8147	** cntNew[i]: Index in b.apCell[] and b.szCell[] for the first cell to
8148	** the right of the i-th sibling page.
8149	** usableSpace: Number of bytes of space available on each sibling.
8150	**
8151	*/
8152	usableSpace = pBt->usableSize - `12` + leafCorrection;
8153	for(i=k=`0`; i<nOld; i++, k++){
8154	MemPage *p = apOld[i];
8155	b.apEnd[k] = p->aDataEnd;
8156	b.ixNx[k] = cntOld[i];
8157	if( k && b.ixNx[k]==b.ixNx[k-`1`] ){
8158	k--; / Omit b.ixNx[] entry for child pages with no cells /
8159	}
8160	if( !leafData ){
8161	k++;
8162	b.apEnd[k] = pParent->aDataEnd;
8163	b.ixNx[k] = cntOld[i]+`1`;
8164	}
8165	assert( p->nFree>=`0` );
8166	szNew[i] = usableSpace - p->nFree;
8167	for(j=`0`; j<p->nOverflow; j++){
8168	szNew[i] += `2` + p->xCellSize(p, p->apOvfl[j]);
8169	}
8170	cntNew[i] = cntOld[i];
8171	}
8172	k = nOld;
8173	for(i=`0`; i<k; i++){
8174	int sz;
8175	while( szNew[i]>usableSpace ){
8176	if( i+`1`>=k ){
8177	k = i+`2`;
8178	if( k>NB+`2` ){ rc = SQLITE_CORRUPT_BKPT; goto balance_cleanup; }
8179	szNew[k-`1`] = `0`;
8180	cntNew[k-`1`] = b.nCell;
8181	}
8182	sz = `2` + cachedCellSize(&b, cntNew[i]-`1`);
8183	szNew[i] -= sz;
8184	if( !leafData ){
8185	if( cntNew[i]<b.nCell ){
8186	sz = `2` + cachedCellSize(&b, cntNew[i]);
8187	}else{
8188	sz = `0`;
8189	}
8190	}
8191	szNew[i+`1`] += sz;
8192	cntNew[i]--;
8193	}
8194	while( cntNew[i]<b.nCell ){
8195	sz = `2` + cachedCellSize(&b, cntNew[i]);
8196	if( szNew[i]+sz>usableSpace ) break;
8197	szNew[i] += sz;
8198	cntNew[i]++;
8199	if( !leafData ){
8200	if( cntNew[i]<b.nCell ){
8201	sz = `2` + cachedCellSize(&b, cntNew[i]);
8202	}else{
8203	sz = `0`;
8204	}
8205	}
8206	szNew[i+`1`] -= sz;
8207	}
8208	if( cntNew[i]>=b.nCell ){
8209	k = i+`1`;
8210	}else if( cntNew[i] <= (i>`0` ? cntNew[i-`1`] : `0`) ){
8211	rc = SQLITE_CORRUPT_BKPT;
8212	goto balance_cleanup;
8213	}
8214	}
8215
8216	/*
8217	** The packing computed by the previous block is biased toward the siblings
8218	** on the left side (siblings with smaller keys). The left siblings are
8219	** always nearly full, while the right-most sibling might be nearly empty.
8220	** The next block of code attempts to adjust the packing of siblings to
8221	** get a better balance.
8222	**
8223	** This adjustment is more than an optimization. The packing above might
8224	** be so out of balance as to be illegal. For example, the right-most
8225	** sibling might be completely empty. This adjustment is not optional.
8226	*/
8227	for(i=k-`1`; i>`0`; i--){
8228	int szRight = szNew[i]; / Size of sibling on the right /
8229	int szLeft = szNew[i-`1`]; / Size of sibling on the left /
8230	int r; / Index of right-most cell in left sibling /
8231	int d; / Index of first cell to the left of right sibling /
8232
8233	r = cntNew[i-`1`] - `1`;
8234	d = r + `1` - leafData;
8235	(void)cachedCellSize(&b, d);
8236	do{
8237	assert( d<nMaxCells );
8238	assert( r<nMaxCells );
8239	(void)cachedCellSize(&b, r);
8240	if( szRight!=`0`
8241	&& (bBulk \|\| szRight+b.szCell[d]+`2` > szLeft-(b.szCell[r]+(i==k-`1`?`0`:`2`)))){
8242	break;
8243	}
8244	szRight += b.szCell[d] + `2`;
8245	szLeft -= b.szCell[r] + `2`;
8246	cntNew[i-`1`] = r;
8247	r--;
8248	d--;
8249	}while( r>=`0` );
8250	szNew[i] = szRight;
8251	szNew[i-`1`] = szLeft;
8252	if( cntNew[i-`1`] <= (i>`1` ? cntNew[i-`2`] : `0`) ){
8253	rc = SQLITE_CORRUPT_BKPT;
8254	goto balance_cleanup;
8255	}
8256	}
8257
8258	/ Sanity check: For a non-corrupt database file one of the follwing*
8259	** must be true:
8260	** (1) We found one or more cells (cntNew[0])>0), or
8261	** (2) pPage is a virtual root page. A virtual root page is when
8262	** the real root page is page 1 and we are the only child of
8263	** that page.
8264	*/
8265	assert( cntNew[`0`]>`0` \|\| (pParent->pgno==`1` && pParent->nCell==`0`) \|\| CORRUPT_DB);
8266	TRACE(("BALANCE: old: %d(nc=%d) %d(nc=%d) %d(nc=%d)\n",
8267	apOld[`0`]->pgno, apOld[`0`]->nCell,
8268	nOld>=`2` ? apOld[`1`]->pgno : `0`, nOld>=`2` ? apOld[`1`]->nCell : `0`,
8269	nOld>=`3` ? apOld[`2`]->pgno : `0`, nOld>=`3` ? apOld[`2`]->nCell : `0`
8270	));
8271
8272	/*
8273	** Allocate k new pages. Reuse old pages where possible.
8274	*/
8275	pageFlags = apOld[`0`]->aData[`0`];
8276	for(i=`0`; i<k; i++){
8277	MemPage *pNew;
8278	if( i<nOld ){
8279	pNew = apNew[i] = apOld[i];
8280	apOld[i] = `0`;
8281	rc = sqlite3PagerWrite(pNew->pDbPage);
8282	nNew++;
8283	if( sqlite3PagerPageRefcount(pNew->pDbPage)!=`1`+(i==(iParentIdx-nxDiv))
8284	&& rc==SQLITE_OK
8285	){
8286	rc = SQLITE_CORRUPT_BKPT;
8287	}
8288	if( rc ) goto balance_cleanup;
8289	}else{
8290	assert( i>`0` );
8291	rc = allocateBtreePage(pBt, &pNew, &pgno, (bBulk ? `1` : pgno), `0`);
8292	if( rc ) goto balance_cleanup;
8293	zeroPage(pNew, pageFlags);
8294	apNew[i] = pNew;
8295	nNew++;
8296	cntOld[i] = b.nCell;
8297
8298	/ Set the pointer-map entry for the new sibling page. /
8299	if( ISAUTOVACUUM ){
8300	ptrmapPut(pBt, pNew->pgno, PTRMAP_BTREE, pParent->pgno, &rc);
8301	if( rc!=SQLITE_OK ){
8302	goto balance_cleanup;
8303	}
8304	}
8305	}
8306	}
8307
8308	/*
8309	** Reassign page numbers so that the new pages are in ascending order.
8310	** This helps to keep entries in the disk file in order so that a scan
8311	** of the table is closer to a linear scan through the file. That in turn
8312	** helps the operating system to deliver pages from the disk more rapidly.
8313	**
8314	** An O(N*N) sort algorithm is used, but since N is never more than NB+2
8315	** (5), that is not a performance concern.
8316	**
8317	** When NB==3, this one optimization makes the database about 25% faster
8318	** for large insertions and deletions.
8319	*/
8320	for(i=`0`; i<nNew; i++){
8321	aPgno[i] = apNew[i]->pgno;
8322	assert( apNew[i]->pDbPage->flags & PGHDR_WRITEABLE );
8323	assert( apNew[i]->pDbPage->flags & PGHDR_DIRTY );
8324	}
8325	for(i=`0`; i<nNew-`1`; i++){
8326	int iB = i;
8327	for(j=i+`1`; j<nNew; j++){
8328	if( apNew[j]->pgno < apNew[iB]->pgno ) iB = j;
8329	}
8330
8331	/ If apNew[i] has a page number that is bigger than any of the*
8332	** subsequence apNew[i] entries, then swap apNew[i] with the subsequent
8333	** entry that has the smallest page number (which we know to be
8334	** entry apNew[iB]).
8335	*/
8336	if( iB!=i ){
8337	Pgno pgnoA = apNew[i]->pgno;
8338	Pgno pgnoB = apNew[iB]->pgno;
8339	Pgno pgnoTemp = (PENDING_BYTE/pBt->pageSize)+`1`;
8340	u16 fgA = apNew[i]->pDbPage->flags;
8341	u16 fgB = apNew[iB]->pDbPage->flags;
8342	sqlite3PagerRekey(apNew[i]->pDbPage, pgnoTemp, fgB);
8343	sqlite3PagerRekey(apNew[iB]->pDbPage, pgnoA, fgA);
8344	sqlite3PagerRekey(apNew[i]->pDbPage, pgnoB, fgB);
8345	apNew[i]->pgno = pgnoB;
8346	apNew[iB]->pgno = pgnoA;
8347	}
8348	}
8349
8350	TRACE(("BALANCE: new: %d(%d nc=%d) %d(%d nc=%d) %d(%d nc=%d) "
8351	"%d(%d nc=%d) %d(%d nc=%d)\n",
8352	apNew[`0`]->pgno, szNew[`0`], cntNew[`0`],
8353	nNew>=`2` ? apNew[`1`]->pgno : `0`, nNew>=`2` ? szNew[`1`] : `0`,
8354	nNew>=`2` ? cntNew[`1`] - cntNew[`0`] - !leafData : `0`,
8355	nNew>=`3` ? apNew[`2`]->pgno : `0`, nNew>=`3` ? szNew[`2`] : `0`,
8356	nNew>=`3` ? cntNew[`2`] - cntNew[`1`] - !leafData : `0`,
8357	nNew>=`4` ? apNew[`3`]->pgno : `0`, nNew>=`4` ? szNew[`3`] : `0`,
8358	nNew>=`4` ? cntNew[`3`] - cntNew[`2`] - !leafData : `0`,
8359	nNew>=`5` ? apNew[`4`]->pgno : `0`, nNew>=`5` ? szNew[`4`] : `0`,
8360	nNew>=`5` ? cntNew[`4`] - cntNew[`3`] - !leafData : `0`
8361	));
8362
8363	assert( sqlite3PagerIswriteable(pParent->pDbPage) );
8364	assert( nNew>=`1` && nNew<=ArraySize(apNew) );
8365	assert( apNew[nNew-`1`]!=`0` );
8366	put4byte(pRight, apNew[nNew-`1`]->pgno);
8367
8368	/ If the sibling pages are not leaves, ensure that the right-child pointer*
8369	** of the right-most new sibling page is set to the value that was
8370	** originally in the same field of the right-most old sibling page. */
8371	if( (pageFlags & PTF_LEAF)==`0` && nOld!=nNew ){
8372	MemPage *pOld = (nNew>nOld ? apNew : apOld)[nOld-`1`];
8373	memcpy(&apNew[nNew-`1`]->aData[`8`], &pOld->aData[`8`], `4`);
8374	}
8375
8376	/ Make any required updates to pointer map entries associated with*
8377	** cells stored on sibling pages following the balance operation. Pointer
8378	** map entries associated with divider cells are set by the insertCell()
8379	** routine. The associated pointer map entries are:
8380	**
8381	** a) if the cell contains a reference to an overflow chain, the
8382	** entry associated with the first page in the overflow chain, and
8383	**
8384	** b) if the sibling pages are not leaves, the child page associated
8385	** with the cell.
8386	**
8387	** If the sibling pages are not leaves, then the pointer map entry
8388	** associated with the right-child of each sibling may also need to be
8389	** updated. This happens below, after the sibling pages have been
8390	** populated, not here.
8391	*/
8392	if( ISAUTOVACUUM ){
8393	MemPage *pOld;
8394	MemPage *pNew = pOld = apNew[`0`];
8395	int cntOldNext = pNew->nCell + pNew->nOverflow;
8396	int iNew = `0`;
8397	int iOld = `0`;
8398
8399	for(i=`0`; i<b.nCell; i++){
8400	u8 *pCell = b.apCell[i];
8401	while( i==cntOldNext ){
8402	iOld++;
8403	assert( iOld<nNew \|\| iOld<nOld );
8404	assert( iOld>=`0` && iOld<NB );
8405	pOld = iOld<nNew ? apNew[iOld] : apOld[iOld];
8406	cntOldNext += pOld->nCell + pOld->nOverflow + !leafData;
8407	}
8408	if( i==cntNew[iNew] ){
8409	pNew = apNew[++iNew];
8410	if( !leafData ) continue;
8411	}
8412
8413	/ Cell pCell is destined for new sibling page pNew. Originally, it*
8414	** was either part of sibling page iOld (possibly an overflow cell),
8415	** or else the divider cell to the left of sibling page iOld. So,
8416	** if sibling page iOld had the same page number as pNew, and if
8417	** pCell really was a part of sibling page iOld (not a divider or
8418	** overflow cell), we can skip updating the pointer map entries. */
8419	if( iOld>=nNew
8420	\|\| pNew->pgno!=aPgno[iOld]
8421	\|\| !SQLITE_WITHIN(pCell,pOld->aData,pOld->aDataEnd)
8422	){
8423	if( !leafCorrection ){
8424	ptrmapPut(pBt, get4byte(pCell), PTRMAP_BTREE, pNew->pgno, &rc);
8425	}
8426	if( cachedCellSize(&b,i)>pNew->minLocal ){
8427	ptrmapPutOvflPtr(pNew, pOld, pCell, &rc);
8428	}
8429	if( rc ) goto balance_cleanup;
8430	}
8431	}
8432	}
8433
8434	/ Insert new divider cells into pParent. /
8435	for(i=`0`; i<nNew-`1`; i++){
8436	u8 *pCell;
8437	u8 *pTemp;
8438	int sz;
8439	u8 *pSrcEnd;
8440	MemPage *pNew = apNew[i];
8441	j = cntNew[i];
8442
8443	assert( j<nMaxCells );
8444	assert( b.apCell[j]!=`0` );
8445	pCell = b.apCell[j];
8446	sz = b.szCell[j] + leafCorrection;
8447	pTemp = &aOvflSpace[iOvflSpace];
8448	if( !pNew->leaf ){
8449	memcpy(&pNew->aData[`8`], pCell, `4`);
8450	}else if( leafData ){
8451	/ If the tree is a leaf-data tree, and the siblings are leaves,*
8452	** then there is no divider cell in b.apCell[]. Instead, the divider
8453	** cell consists of the integer key for the right-most cell of
8454	** the sibling-page assembled above only.
8455	*/
8456	CellInfo info;
8457	j--;
8458	pNew->xParseCell(pNew, b.apCell[j], &info);
8459	pCell = pTemp;
8460	sz = `4` + putVarint(&pCell[`4`], info.nKey);
8461	pTemp = `0`;
8462	}else{
8463	pCell -= `4`;
8464	/ Obscure case for non-leaf-data trees: If the cell at pCell was*
8465	** previously stored on a leaf node, and its reported size was 4
8466	** bytes, then it may actually be smaller than this
8467	** (see btreeParseCellPtr(), 4 bytes is the minimum size of
8468	** any cell). But it is important to pass the correct size to
8469	** insertCell(), so reparse the cell now.
8470	**
8471	** This can only happen for b-trees used to evaluate "IN (SELECT ...)"
8472	** and WITHOUT ROWID tables with exactly one column which is the
8473	** primary key.
8474	*/
8475	if( b.szCell[j]==`4` ){
8476	assert(leafCorrection==`4`);
8477	sz = pParent->xCellSize(pParent, pCell);
8478	}
8479	}
8480	iOvflSpace += sz;
8481	assert( sz<=pBt->maxLocal+`23` );
8482	assert( iOvflSpace <= (int)pBt->pageSize );
8483	for(k=`0`; b.ixNx[k]<=j && ALWAYS(k<NB*`2`); k++){}
8484	pSrcEnd = b.apEnd[k];
8485	if( SQLITE_WITHIN(pSrcEnd, pCell, pCell+sz) ){
8486	rc = SQLITE_CORRUPT_BKPT;
8487	goto balance_cleanup;
8488	}
8489	insertCell(pParent, nxDiv+i, pCell, sz, pTemp, pNew->pgno, &rc);
8490	if( rc!=SQLITE_OK ) goto balance_cleanup;
8491	assert( sqlite3PagerIswriteable(pParent->pDbPage) );
8492	}
8493
8494	/ Now update the actual sibling pages. The order in which they are updated*
8495	** is important, as this code needs to avoid disrupting any page from which
8496	** cells may still to be read. In practice, this means:
8497	**
8498	** (1) If cells are moving left (from apNew[iPg] to apNew[iPg-1])
8499	** then it is not safe to update page apNew[iPg] until after
8500	** the left-hand sibling apNew[iPg-1] has been updated.
8501	**
8502	** (2) If cells are moving right (from apNew[iPg] to apNew[iPg+1])
8503	** then it is not safe to update page apNew[iPg] until after
8504	** the right-hand sibling apNew[iPg+1] has been updated.
8505	**
8506	** If neither of the above apply, the page is safe to update.
8507	**
8508	** The iPg value in the following loop starts at nNew-1 goes down
8509	** to 0, then back up to nNew-1 again, thus making two passes over
8510	** the pages. On the initial downward pass, only condition (1) above
8511	** needs to be tested because (2) will always be true from the previous
8512	** step. On the upward pass, both conditions are always true, so the
8513	** upwards pass simply processes pages that were missed on the downward
8514	** pass.
8515	*/
8516	for(i=`1`-nNew; i<nNew; i++){
8517	int iPg = i<`0` ? -i : i;
8518	assert( iPg>=`0` && iPg<nNew );
8519	if( abDone[iPg] ) continue; / Skip pages already processed /
8520	if( i>=`0` / On the upwards pass, or... /
8521	\|\| cntOld[iPg-`1`]>=cntNew[iPg-`1`] / Condition (1) is true /
8522	){
8523	int iNew;
8524	int iOld;
8525	int nNewCell;
8526
8527	/ Verify condition (1): If cells are moving left, update iPg*
8528	** only after iPg-1 has already been updated. */
8529	assert( iPg==`0` \|\| cntOld[iPg-`1`]>=cntNew[iPg-`1`] \|\| abDone[iPg-`1`] );
8530
8531	/ Verify condition (2): If cells are moving right, update iPg*
8532	** only after iPg+1 has already been updated. */
8533	assert( cntNew[iPg]>=cntOld[iPg] \|\| abDone[iPg+`1`] );
8534
8535	if( iPg==`0` ){
8536	iNew = iOld = `0`;
8537	nNewCell = cntNew[`0`];
8538	}else{
8539	iOld = iPg<nOld ? (cntOld[iPg-`1`] + !leafData) : b.nCell;
8540	iNew = cntNew[iPg-`1`] + !leafData;
8541	nNewCell = cntNew[iPg] - iNew;
8542	}
8543
8544	rc = editPage(apNew[iPg], iOld, iNew, nNewCell, &b);
8545	if( rc ) goto balance_cleanup;
8546	abDone[iPg]++;
8547	apNew[iPg]->nFree = usableSpace-szNew[iPg];
8548	assert( apNew[iPg]->nOverflow==`0` );
8549	assert( apNew[iPg]->nCell==nNewCell );
8550	}
8551	}
8552
8553	/ All pages have been processed exactly once /
8554	assert( memcmp(abDone, "\01\01\01\01\01", nNew)==`0` );
8555
8556	assert( nOld>`0` );
8557	assert( nNew>`0` );
8558
8559	if( isRoot && pParent->nCell==`0` && pParent->hdrOffset<=apNew[`0`]->nFree ){
8560	/ The root page of the b-tree now contains no cells. The only sibling*
8561	** page is the right-child of the parent. Copy the contents of the
8562	** child page into the parent, decreasing the overall height of the
8563	** b-tree structure by one. This is described as the "balance-shallower"
8564	** sub-algorithm in some documentation.
8565	**
8566	** If this is an auto-vacuum database, the call to copyNodeContent()
8567	** sets all pointer-map entries corresponding to database image pages
8568	** for which the pointer is stored within the content being copied.
8569	**
8570	** It is critical that the child page be defragmented before being
8571	** copied into the parent, because if the parent is page 1 then it will
8572	** by smaller than the child due to the database header, and so all the
8573	** free space needs to be up front.
8574	*/
8575	assert( nNew==`1` \|\| CORRUPT_DB );
8576	rc = defragmentPage(apNew[`0`], -`1`);
8577	testcase( rc!=SQLITE_OK );
8578	assert( apNew[`0`]->nFree ==
8579	(get2byteNotZero(&apNew[`0`]->aData[`5`]) - apNew[`0`]->cellOffset
8580	- apNew[`0`]->nCell*`2`)
8581	\|\| rc!=SQLITE_OK
8582	);
8583	copyNodeContent(apNew[`0`], pParent, &rc);
8584	freePage(apNew[`0`], &rc);
8585	}else if( ISAUTOVACUUM && !leafCorrection ){
8586	/ Fix the pointer map entries associated with the right-child of each*
8587	** sibling page. All other pointer map entries have already been taken
8588	** care of. */
8589	for(i=`0`; i<nNew; i++){
8590	u32 key = get4byte(&apNew[i]->aData[`8`]);
8591	ptrmapPut(pBt, key, PTRMAP_BTREE, apNew[i]->pgno, &rc);
8592	}
8593	}
8594
8595	assert( pParent->isInit );
8596	TRACE(("BALANCE: finished: old=%d new=%d cells=%d\n",
8597	nOld, nNew, b.nCell));
8598
8599	/ Free any old pages that were not reused as new pages.*
8600	*/
8601	for(i=nNew; i<nOld; i++){
8602	freePage(apOld[i], &rc);
8603	}
8604
8605	#if 0
8606	if( ISAUTOVACUUM && rc==SQLITE_OK && apNew[`0`]->isInit ){
8607	/ The ptrmapCheckPages() contains assert() statements that verify that*
8608	** all pointer map pages are set correctly. This is helpful while
8609	** debugging. This is usually disabled because a corrupt database may
8610	** cause an assert() statement to fail. */
8611	ptrmapCheckPages(apNew, nNew);
8612	ptrmapCheckPages(&pParent, `1`);
8613	}
8614	#endif
8615
8616	/*
8617	** Cleanup before returning.
8618	*/
8619	balance_cleanup:
8620	sqlite3StackFree(`0`, b.apCell);
8621	for(i=`0`; i<nOld; i++){
8622	releasePage(apOld[i]);
8623	}
8624	for(i=`0`; i<nNew; i++){
8625	releasePage(apNew[i]);
8626	}
8627
8628	return rc;
8629	}
8630
8631
8632	/*
8633	** This function is called when the root page of a b-tree structure is
8634	** overfull (has one or more overflow pages).
8635	**
8636	** A new child page is allocated and the contents of the current root
8637	** page, including overflow cells, are copied into the child. The root
8638	** page is then overwritten to make it an empty page with the right-child
8639	** pointer pointing to the new page.
8640	**
8641	** Before returning, all pointer-map entries corresponding to pages
8642	** that the new child-page now contains pointers to are updated. The
8643	** entry corresponding to the new right-child pointer of the root
8644	** page is also updated.
8645	**
8646	** If successful, *ppChild is set to contain a reference to the child
8647	** page and SQLITE_OK is returned. In this case the caller is required
8648	** to call releasePage() on *ppChild exactly once. If an error occurs,
8649	** an error code is returned and *ppChild is set to 0.
8650	*/
8651	static int balance_deeper(MemPage pRoot, MemPage *ppChild){
8652	int rc; / Return value from subprocedures /
8653	MemPage pChild = `0`; /* Pointer to a new child page /
8654	Pgno pgnoChild = `0`; / Page number of the new child page /
8655	BtShared pBt = pRoot->pBt; /* The BTree /
8656
8657	assert( pRoot->nOverflow>`0` );
8658	assert( sqlite3_mutex_held(pBt->mutex) );
8659
8660	/ Make pRoot, the root page of the b-tree, writable. Allocate a new*
8661	** page that will become the new right-child of pPage. Copy the contents
8662	** of the node stored on pRoot into the new child page.
8663	*/
8664	rc = sqlite3PagerWrite(pRoot->pDbPage);
8665	if( rc==SQLITE_OK ){
8666	rc = allocateBtreePage(pBt,&pChild,&pgnoChild,pRoot->pgno,`0`);
8667	copyNodeContent(pRoot, pChild, &rc);
8668	if( ISAUTOVACUUM ){
8669	ptrmapPut(pBt, pgnoChild, PTRMAP_BTREE, pRoot->pgno, &rc);
8670	}
8671	}
8672	if( rc ){
8673	*ppChild = `0`;
8674	releasePage(pChild);
8675	return rc;
8676	}
8677	assert( sqlite3PagerIswriteable(pChild->pDbPage) );
8678	assert( sqlite3PagerIswriteable(pRoot->pDbPage) );
8679	assert( pChild->nCell==pRoot->nCell \|\| CORRUPT_DB );
8680
8681	TRACE(("BALANCE: copy root %d into %d\n", pRoot->pgno, pChild->pgno));
8682
8683	/ Copy the overflow cells from pRoot to pChild /
8684	memcpy(pChild->aiOvfl, pRoot->aiOvfl,
8685	pRoot->nOverflow*sizeof(pRoot->aiOvfl[`0`]));
8686	memcpy(pChild->apOvfl, pRoot->apOvfl,
8687	pRoot->nOverflow*sizeof(pRoot->apOvfl[`0`]));
8688	pChild->nOverflow = pRoot->nOverflow;
8689
8690	/ Zero the contents of pRoot. Then install pChild as the right-child. /
8691	zeroPage(pRoot, pChild->aData[`0`] & ~PTF_LEAF);
8692	put4byte(&pRoot->aData[pRoot->hdrOffset+`8`], pgnoChild);
8693
8694	*ppChild = pChild;
8695	return SQLITE_OK;
8696	}
8697
8698	/*
8699	** Return SQLITE_CORRUPT if any cursor other than pCur is currently valid
8700	** on the same B-tree as pCur.
8701	**
8702	** This can occur if a database is corrupt with two or more SQL tables
8703	** pointing to the same b-tree. If an insert occurs on one SQL table
8704	** and causes a BEFORE TRIGGER to do a secondary insert on the other SQL
8705	** table linked to the same b-tree. If the secondary insert causes a
8706	** rebalance, that can change content out from under the cursor on the
8707	** first SQL table, violating invariants on the first insert.
8708	*/
8709	static int anotherValidCursor(BtCursor *pCur){
8710	BtCursor *pOther;
8711	for(pOther=pCur->pBt->pCursor; pOther; pOther=pOther->pNext){
8712	if( pOther!=pCur
8713	&& pOther->eState==CURSOR_VALID
8714	&& pOther->pPage==pCur->pPage
8715	){
8716	return SQLITE_CORRUPT_BKPT;
8717	}
8718	}
8719	return SQLITE_OK;
8720	}
8721
8722	/*
8723	** The page that pCur currently points to has just been modified in
8724	** some way. This function figures out if this modification means the
8725	** tree needs to be balanced, and if so calls the appropriate balancing
8726	** routine. Balancing routines are:
8727	**
8728	** balance_quick()
8729	** balance_deeper()
8730	** balance_nonroot()
8731	*/
8732	static int balance(BtCursor *pCur){
8733	int rc = SQLITE_OK;
8734	u8 aBalanceQuickSpace[`13`];
8735	u8 *pFree = `0`;
8736
8737	VVA_ONLY( int balance_quick_called = `0` );
8738	VVA_ONLY( int balance_deeper_called = `0` );
8739
8740	do {
8741	int iPage;
8742	MemPage *pPage = pCur->pPage;
8743
8744	if( NEVER(pPage->nFree<`0`) && btreeComputeFreeSpace(pPage) ) break;
8745	if( pPage->nOverflow==`0` && pPage->nFree`3`<=(int)pCur->pBt->usableSize`2` ){
8746	/ No rebalance required as long as:*
8747	** (1) There are no overflow cells
8748	** (2) The amount of free space on the page is less than 2/3rds of
8749	** the total usable space on the page. */
8750	break;
8751	}else if( (iPage = pCur->iPage)==`0` ){
8752	if( pPage->nOverflow && (rc = anotherValidCursor(pCur))==SQLITE_OK ){
8753	/ The root page of the b-tree is overfull. In this case call the*
8754	** balance_deeper() function to create a new child for the root-page
8755	** and copy the current contents of the root-page to it. The
8756	** next iteration of the do-loop will balance the child page.
8757	*/
8758	assert( balance_deeper_called==`0` );
8759	VVA_ONLY( balance_deeper_called++ );
8760	rc = balance_deeper(pPage, &pCur->apPage[`1`]);
8761	if( rc==SQLITE_OK ){
8762	pCur->iPage = `1`;
8763	pCur->ix = `0`;
8764	pCur->aiIdx[`0`] = `0`;
8765	pCur->apPage[`0`] = pPage;
8766	pCur->pPage = pCur->apPage[`1`];
8767	assert( pCur->pPage->nOverflow );
8768	}
8769	}else{
8770	break;
8771	}
8772	}else if( sqlite3PagerPageRefcount(pPage->pDbPage)>`1` ){
8773	/ The page being written is not a root page, and there is currently*
8774	** more than one reference to it. This only happens if the page is one
8775	** of its own ancestor pages. Corruption. */
8776	rc = SQLITE_CORRUPT_BKPT;
8777	}else{
8778	MemPage * const pParent = pCur->apPage[iPage-`1`];
8779	int const iIdx = pCur->aiIdx[iPage-`1`];
8780
8781	rc = sqlite3PagerWrite(pParent->pDbPage);
8782	if( rc==SQLITE_OK && pParent->nFree<`0` ){
8783	rc = btreeComputeFreeSpace(pParent);
8784	}
8785	if( rc==SQLITE_OK ){
8786	#ifndef SQLITE_OMIT_QUICKBALANCE
8787	if( pPage->intKeyLeaf
8788	&& pPage->nOverflow==`1`
8789	&& pPage->aiOvfl[`0`]==pPage->nCell
8790	&& pParent->pgno!=`1`
8791	&& pParent->nCell==iIdx
8792	){
8793	/ Call balance_quick() to create a new sibling of pPage on which*
8794	** to store the overflow cell. balance_quick() inserts a new cell
8795	** into pParent, which may cause pParent overflow. If this
8796	** happens, the next iteration of the do-loop will balance pParent
8797	** use either balance_nonroot() or balance_deeper(). Until this
8798	** happens, the overflow cell is stored in the aBalanceQuickSpace[]
8799	** buffer.
8800	**
8801	** The purpose of the following assert() is to check that only a
8802	** single call to balance_quick() is made for each call to this
8803	** function. If this were not verified, a subtle bug involving reuse
8804	** of the aBalanceQuickSpace[] might sneak in.
8805	*/
8806	assert( balance_quick_called==`0` );
8807	VVA_ONLY( balance_quick_called++ );
8808	rc = balance_quick(pParent, pPage, aBalanceQuickSpace);
8809	}else
8810	#endif
8811	{
8812	/ In this case, call balance_nonroot() to redistribute cells*
8813	** between pPage and up to 2 of its sibling pages. This involves
8814	** modifying the contents of pParent, which may cause pParent to
8815	** become overfull or underfull. The next iteration of the do-loop
8816	** will balance the parent page to correct this.
8817	**
8818	** If the parent page becomes overfull, the overflow cell or cells
8819	** are stored in the pSpace buffer allocated immediately below.
8820	** A subsequent iteration of the do-loop will deal with this by
8821	** calling balance_nonroot() (balance_deeper() may be called first,
8822	** but it doesn't deal with overflow cells - just moves them to a
8823	** different page). Once this subsequent call to balance_nonroot()
8824	** has completed, it is safe to release the pSpace buffer used by
8825	** the previous call, as the overflow cell data will have been
8826	** copied either into the body of a database page or into the new
8827	** pSpace buffer passed to the latter call to balance_nonroot().
8828	*/
8829	u8 *pSpace = sqlite3PageMalloc(pCur->pBt->pageSize);
8830	rc = balance_nonroot(pParent, iIdx, pSpace, iPage==`1`,
8831	pCur->hints&BTREE_BULKLOAD);
8832	if( pFree ){
8833	/ If pFree is not NULL, it points to the pSpace buffer used*
8834	** by a previous call to balance_nonroot(). Its contents are
8835	** now stored either on real database pages or within the
8836	** new pSpace buffer, so it may be safely freed here. */
8837	sqlite3PageFree(pFree);
8838	}
8839
8840	/ The pSpace buffer will be freed after the next call to*
8841	** balance_nonroot(), or just before this function returns, whichever
8842	** comes first. */
8843	pFree = pSpace;
8844	}
8845	}
8846
8847	pPage->nOverflow = `0`;
8848
8849	/ The next iteration of the do-loop balances the parent page. /
8850	releasePage(pPage);
8851	pCur->iPage--;
8852	assert( pCur->iPage>=`0` );
8853	pCur->pPage = pCur->apPage[pCur->iPage];
8854	}
8855	}while( rc==SQLITE_OK );
8856
8857	if( pFree ){
8858	sqlite3PageFree(pFree);
8859	}
8860	return rc;
8861	}
8862
8863	/ Overwrite content from pX into pDest. Only do the write if the*
8864	** content is different from what is already there.
8865	*/
8866	static int btreeOverwriteContent(
8867	MemPage pPage, /* MemPage on which writing will occur /
8868	u8 pDest, /* Pointer to the place to start writing /
8869	const BtreePayload pX, /* Source of data to write /
8870	int iOffset, / Offset of first byte to write /
8871	int iAmt / Number of bytes to be written /
8872	){
8873	int nData = pX->nData - iOffset;
8874	if( nData<=`0` ){
8875	/ Overwritting with zeros /
8876	int i;
8877	for(i=`0`; i<iAmt && pDest[i]==`0`; i++){}
8878	if( i<iAmt ){
8879	int rc = sqlite3PagerWrite(pPage->pDbPage);
8880	if( rc ) return rc;
8881	memset(pDest + i, `0`, iAmt - i);
8882	}
8883	}else{
8884	if( nData<iAmt ){
8885	/ Mixed read data and zeros at the end. Make a recursive call*
8886	** to write the zeros then fall through to write the real data */
8887	int rc = btreeOverwriteContent(pPage, pDest+nData, pX, iOffset+nData,
8888	iAmt-nData);
8889	if( rc ) return rc;
8890	iAmt = nData;
8891	}
8892	if( memcmp(pDest, ((u8*)pX->pData) + iOffset, iAmt)!=`0` ){
8893	int rc = sqlite3PagerWrite(pPage->pDbPage);
8894	if( rc ) return rc;
8895	/ In a corrupt database, it is possible for the source and destination*
8896	** buffers to overlap. This is harmless since the database is already
8897	** corrupt but it does cause valgrind and ASAN warnings. So use
8898	** memmove(). */
8899	memmove(pDest, ((u8*)pX->pData) + iOffset, iAmt);
8900	}
8901	}
8902	return SQLITE_OK;
8903	}
8904
8905	/*
8906	** Overwrite the cell that cursor pCur is pointing to with fresh content
8907	** contained in pX.
8908	*/
8909	static int btreeOverwriteCell(BtCursor pCur, const* BtreePayload *pX){
8910	int iOffset; / Next byte of pX->pData to write /
8911	int nTotal = pX->nData + pX->nZero; / Total bytes of to write /
8912	int rc; / Return code /
8913	MemPage pPage = pCur->pPage; /* Page being written /
8914	BtShared pBt; /* Btree /
8915	Pgno ovflPgno; / Next overflow page to write /
8916	u32 ovflPageSize; / Size to write on overflow page /
8917
8918	if( pCur->info.pPayload + pCur->info.nLocal > pPage->aDataEnd
8919	\|\| pCur->info.pPayload < pPage->aData + pPage->cellOffset
8920	){
8921	return SQLITE_CORRUPT_BKPT;
8922	}
8923	/ Overwrite the local portion first /
8924	rc = btreeOverwriteContent(pPage, pCur->info.pPayload, pX,
8925	`0`, pCur->info.nLocal);
8926	if( rc ) return rc;
8927	if( pCur->info.nLocal==nTotal ) return SQLITE_OK;
8928
8929	/ Now overwrite the overflow pages /
8930	iOffset = pCur->info.nLocal;
8931	assert( nTotal>=`0` );
8932	assert( iOffset>=`0` );
8933	ovflPgno = get4byte(pCur->info.pPayload + iOffset);
8934	pBt = pPage->pBt;
8935	ovflPageSize = pBt->usableSize - `4`;
8936	do{
8937	rc = btreeGetPage(pBt, ovflPgno, &pPage, `0`);
8938	if( rc ) return rc;
8939	if( sqlite3PagerPageRefcount(pPage->pDbPage)!=`1` \|\| pPage->isInit ){
8940	rc = SQLITE_CORRUPT_BKPT;
8941	}else{
8942	if( iOffset+ovflPageSize<(u32)nTotal ){
8943	ovflPgno = get4byte(pPage->aData);
8944	}else{
8945	ovflPageSize = nTotal - iOffset;
8946	}
8947	rc = btreeOverwriteContent(pPage, pPage->aData+`4`, pX,
8948	iOffset, ovflPageSize);
8949	}
8950	sqlite3PagerUnref(pPage->pDbPage);
8951	if( rc ) return rc;
8952	iOffset += ovflPageSize;
8953	}while( iOffset<nTotal );
8954	return SQLITE_OK;
8955	}
8956
8957
8958	/*
8959	** Insert a new record into the BTree. The content of the new record
8960	** is described by the pX object. The pCur cursor is used only to
8961	** define what table the record should be inserted into, and is left
8962	** pointing at a random location.
8963	**
8964	** For a table btree (used for rowid tables), only the pX.nKey value of
8965	** the key is used. The pX.pKey value must be NULL. The pX.nKey is the
8966	** rowid or INTEGER PRIMARY KEY of the row. The pX.nData,pData,nZero fields
8967	** hold the content of the row.
8968	**
8969	** For an index btree (used for indexes and WITHOUT ROWID tables), the
8970	** key is an arbitrary byte sequence stored in pX.pKey,nKey. The
8971	** pX.pData,nData,nZero fields must be zero.
8972	**
8973	** If the seekResult parameter is non-zero, then a successful call to
8974	** sqlite3BtreeIndexMoveto() to seek cursor pCur to (pKey,nKey) has already
8975	** been performed. In other words, if seekResult!=0 then the cursor
8976	** is currently pointing to a cell that will be adjacent to the cell
8977	** to be inserted. If seekResult<0 then pCur points to a cell that is
8978	** smaller then (pKey,nKey). If seekResult>0 then pCur points to a cell
8979	** that is larger than (pKey,nKey).
8980	**
8981	** If seekResult==0, that means pCur is pointing at some unknown location.
8982	** In that case, this routine must seek the cursor to the correct insertion
8983	** point for (pKey,nKey) before doing the insertion. For index btrees,
8984	** if pX->nMem is non-zero, then pX->aMem contains pointers to the unpacked
8985	** key values and pX->aMem can be used instead of pX->pKey to avoid having
8986	** to decode the key.
8987	*/
8988	int sqlite3BtreeInsert(
8989	BtCursor pCur, /* Insert data into the table of this cursor /
8990	const BtreePayload pX, /* Content of the row to be inserted /
8991	int flags, / True if this is likely an append /
8992	int seekResult / Result of prior IndexMoveto() call /
8993	){
8994	int rc;
8995	int loc = seekResult; / -1: before desired location +1: after /
8996	int szNew = `0`;
8997	int idx;
8998	MemPage *pPage;
8999	Btree *p = pCur->pBtree;
9000	BtShared *pBt = p->pBt;
9001	unsigned char *oldCell;
9002	unsigned char *newCell = `0`;
9003
9004	assert( (flags & (BTREE_SAVEPOSITION\|BTREE_APPEND\|BTREE_PREFORMAT))==flags );
9005	assert( (flags & BTREE_PREFORMAT)==`0` \|\| seekResult \|\| pCur->pKeyInfo==`0` );
9006
9007	/ Save the positions of any other cursors open on this table.*
9008	**
9009	** In some cases, the call to btreeMoveto() below is a no-op. For
9010	** example, when inserting data into a table with auto-generated integer
9011	** keys, the VDBE layer invokes sqlite3BtreeLast() to figure out the
9012	** integer key to use. It then calls this function to actually insert the
9013	** data into the intkey B-Tree. In this case btreeMoveto() recognizes
9014	** that the cursor is already where it needs to be and returns without
9015	** doing any work. To avoid thwarting these optimizations, it is important
9016	** not to clear the cursor here.
9017	*/
9018	if( pCur->curFlags & BTCF_Multiple ){
9019	rc = saveAllCursors(pBt, pCur->pgnoRoot, pCur);
9020	if( rc ) return rc;
9021	if( loc && pCur->iPage<`0` ){
9022	/ This can only happen if the schema is corrupt such that there is more*
9023	** than one table or index with the same root page as used by the cursor.
9024	** Which can only happen if the SQLITE_NoSchemaError flag was set when
9025	** the schema was loaded. This cannot be asserted though, as a user might
9026	** set the flag, load the schema, and then unset the flag. */
9027	return SQLITE_CORRUPT_BKPT;
9028	}
9029	}
9030
9031	/ Ensure that the cursor is not in the CURSOR_FAULT state and that it*
9032	** points to a valid cell.
9033	*/
9034	if( pCur->eState>=CURSOR_REQUIRESEEK ){
9035	testcase( pCur->eState==CURSOR_REQUIRESEEK );
9036	testcase( pCur->eState==CURSOR_FAULT );
9037	rc = moveToRoot(pCur);
9038	if( rc && rc!=SQLITE_EMPTY ) return rc;
9039	}
9040
9041	assert( cursorOwnsBtShared(pCur) );
9042	assert( (pCur->curFlags & BTCF_WriteFlag)!=`0`
9043	&& pBt->inTransaction==TRANS_WRITE
9044	&& (pBt->btsFlags & BTS_READ_ONLY)==`0` );
9045	assert( hasSharedCacheTableLock(p, pCur->pgnoRoot, pCur->pKeyInfo!=`0`, `2`) );
9046
9047	/ Assert that the caller has been consistent. If this cursor was opened*
9048	** expecting an index b-tree, then the caller should be inserting blob
9049	** keys with no associated data. If the cursor was opened expecting an
9050	** intkey table, the caller should be inserting integer keys with a
9051	** blob of associated data. */
9052	assert( (flags & BTREE_PREFORMAT) \|\| (pX->pKey==`0`)==(pCur->pKeyInfo==`0`) );
9053
9054	if( pCur->pKeyInfo==`0` ){
9055	assert( pX->pKey==`0` );
9056	/ If this is an insert into a table b-tree, invalidate any incrblob*
9057	** cursors open on the row being replaced */
9058	if( p->hasIncrblobCur ){
9059	invalidateIncrblobCursors(p, pCur->pgnoRoot, pX->nKey, `0`);
9060	}
9061
9062	/ If BTREE_SAVEPOSITION is set, the cursor must already be pointing*
9063	** to a row with the same key as the new entry being inserted.
9064	*/
9065	#ifdef SQLITE_DEBUG
9066	if( flags & BTREE_SAVEPOSITION ){
9067	assert( pCur->curFlags & BTCF_ValidNKey );
9068	assert( pX->nKey==pCur->info.nKey );
9069	assert( loc==`0` );
9070	}
9071	#endif
9072
9073	/ On the other hand, BTREE_SAVEPOSITION==0 does not imply*
9074	** that the cursor is not pointing to a row to be overwritten.
9075	** So do a complete check.
9076	*/
9077	if( (pCur->curFlags&BTCF_ValidNKey)!=`0` && pX->nKey==pCur->info.nKey ){
9078	/ The cursor is pointing to the entry that is to be*
9079	** overwritten */
9080	assert( pX->nData>=`0` && pX->nZero>=`0` );
9081	if( pCur->info.nSize!=`0`
9082	&& pCur->info.nPayload==(u32)pX->nData+pX->nZero
9083	){
9084	/ New entry is the same size as the old. Do an overwrite /
9085	return btreeOverwriteCell(pCur, pX);
9086	}
9087	assert( loc==`0` );
9088	}else if( loc==`0` ){
9089	/ The cursor is not pointing to the cell to be overwritten, nor*
9090	** to an adjacent cell. Move the cursor so that it is pointing either
9091	** to the cell to be overwritten or an adjacent cell.
9092	*/
9093	rc = sqlite3BtreeTableMoveto(pCur, pX->nKey,
9094	(flags & BTREE_APPEND)!=`0`, &loc);
9095	if( rc ) return rc;
9096	}
9097	}else{
9098	/ This is an index or a WITHOUT ROWID table /
9099
9100	/ If BTREE_SAVEPOSITION is set, the cursor must already be pointing*
9101	** to a row with the same key as the new entry being inserted.
9102	*/
9103	assert( (flags & BTREE_SAVEPOSITION)==`0` \|\| loc==`0` );
9104
9105	/ If the cursor is not already pointing either to the cell to be*
9106	** overwritten, or if a new cell is being inserted, if the cursor is
9107	** not pointing to an immediately adjacent cell, then move the cursor
9108	** so that it does.
9109	*/
9110	if( loc==`0` && (flags & BTREE_SAVEPOSITION)==`0` ){
9111	if( pX->nMem ){
9112	UnpackedRecord r;
9113	r.pKeyInfo = pCur->pKeyInfo;
9114	r.aMem = pX->aMem;
9115	r.nField = pX->nMem;
9116	r.default_rc = `0`;
9117	r.eqSeen = `0`;
9118	rc = sqlite3BtreeIndexMoveto(pCur, &r, &loc);
9119	}else{
9120	rc = btreeMoveto(pCur, pX->pKey, pX->nKey,
9121	(flags & BTREE_APPEND)!=`0`, &loc);
9122	}
9123	if( rc ) return rc;
9124	}
9125
9126	/ If the cursor is currently pointing to an entry to be overwritten*
9127	** and the new content is the same as as the old, then use the
9128	** overwrite optimization.
9129	*/
9130	if( loc==`0` ){
9131	getCellInfo(pCur);
9132	if( pCur->info.nKey==pX->nKey ){
9133	BtreePayload x2;
9134	x2.pData = pX->pKey;
9135	x2.nData = pX->nKey;
9136	x2.nZero = `0`;
9137	return btreeOverwriteCell(pCur, &x2);
9138	}
9139	}
9140	}
9141	assert( pCur->eState==CURSOR_VALID
9142	\|\| (pCur->eState==CURSOR_INVALID && loc) );
9143
9144	pPage = pCur->pPage;
9145	assert( pPage->intKey \|\| pX->nKey>=`0` \|\| (flags & BTREE_PREFORMAT) );
9146	assert( pPage->leaf \|\| !pPage->intKey );
9147	if( pPage->nFree<`0` ){
9148	if( NEVER(pCur->eState>CURSOR_INVALID) ){
9149	/ ^^^^^--- due to the moveToRoot() call above /
9150	rc = SQLITE_CORRUPT_BKPT;
9151	}else{
9152	rc = btreeComputeFreeSpace(pPage);
9153	}
9154	if( rc ) return rc;
9155	}
9156
9157	TRACE(("INSERT: table=%d nkey=%lld ndata=%d page=%d %s\n",
9158	pCur->pgnoRoot, pX->nKey, pX->nData, pPage->pgno,
9159	loc==`0` ? "overwrite" : "new entry"));
9160	assert( pPage->isInit \|\| CORRUPT_DB );
9161	newCell = pBt->pTmpSpace;
9162	assert( newCell!=`0` );
9163	if( flags & BTREE_PREFORMAT ){
9164	rc = SQLITE_OK;
9165	szNew = pBt->nPreformatSize;
9166	if( szNew<`4` ) szNew = `4`;
9167	if( ISAUTOVACUUM && szNew>pPage->maxLocal ){
9168	CellInfo info;
9169	pPage->xParseCell(pPage, newCell, &info);
9170	if( info.nPayload!=info.nLocal ){
9171	Pgno ovfl = get4byte(&newCell[szNew-`4`]);
9172	ptrmapPut(pBt, ovfl, PTRMAP_OVERFLOW1, pPage->pgno, &rc);
9173	}
9174	}
9175	}else{
9176	rc = fillInCell(pPage, newCell, pX, &szNew);
9177	}
9178	if( rc ) goto end_insert;
9179	assert( szNew==pPage->xCellSize(pPage, newCell) );
9180	assert( szNew <= MX_CELL_SIZE(pBt) );
9181	idx = pCur->ix;
9182	if( loc==`0` ){
9183	CellInfo info;
9184	assert( idx>=`0` );
9185	if( idx>=pPage->nCell ){
9186	return SQLITE_CORRUPT_BKPT;
9187	}
9188	rc = sqlite3PagerWrite(pPage->pDbPage);
9189	if( rc ){
9190	goto end_insert;
9191	}
9192	oldCell = findCell(pPage, idx);
9193	if( !pPage->leaf ){
9194	memcpy(newCell, oldCell, `4`);
9195	}
9196	BTREE_CLEAR_CELL(rc, pPage, oldCell, info);
9197	testcase( pCur->curFlags & BTCF_ValidOvfl );
9198	invalidateOverflowCache(pCur);
9199	if( info.nSize==szNew && info.nLocal==info.nPayload
9200	&& (!ISAUTOVACUUM \|\| szNew<pPage->minLocal)
9201	){
9202	/ Overwrite the old cell with the new if they are the same size.*
9203	** We could also try to do this if the old cell is smaller, then add
9204	** the leftover space to the free list. But experiments show that
9205	** doing that is no faster then skipping this optimization and just
9206	** calling dropCell() and insertCell().
9207	**
9208	** This optimization cannot be used on an autovacuum database if the
9209	** new entry uses overflow pages, as the insertCell() call below is
9210	** necessary to add the PTRMAP_OVERFLOW1 pointer-map entry. */
9211	assert( rc==SQLITE_OK ); / clearCell never fails when nLocal==nPayload /
9212	if( oldCell < pPage->aData+pPage->hdrOffset+`10` ){
9213	return SQLITE_CORRUPT_BKPT;
9214	}
9215	if( oldCell+szNew > pPage->aDataEnd ){
9216	return SQLITE_CORRUPT_BKPT;
9217	}
9218	memcpy(oldCell, newCell, szNew);
9219	return SQLITE_OK;
9220	}
9221	dropCell(pPage, idx, info.nSize, &rc);
9222	if( rc ) goto end_insert;
9223	}else if( loc<`0` && pPage->nCell>`0` ){
9224	assert( pPage->leaf );
9225	idx = ++pCur->ix;
9226	pCur->curFlags &= ~BTCF_ValidNKey;
9227	}else{
9228	assert( pPage->leaf );
9229	}
9230	insertCell(pPage, idx, newCell, szNew, `0`, `0`, &rc);
9231	assert( pPage->nOverflow==`0` \|\| rc==SQLITE_OK );
9232	assert( rc!=SQLITE_OK \|\| pPage->nCell>`0` \|\| pPage->nOverflow>`0` );
9233
9234	/ If no error has occurred and pPage has an overflow cell, call balance()*
9235	** to redistribute the cells within the tree. Since balance() may move
9236	** the cursor, zero the BtCursor.info.nSize and BTCF_ValidNKey
9237	** variables.
9238	**
9239	** Previous versions of SQLite called moveToRoot() to move the cursor
9240	** back to the root page as balance() used to invalidate the contents
9241	** of BtCursor.apPage[] and BtCursor.aiIdx[]. Instead of doing that,
9242	** set the cursor state to "invalid". This makes common insert operations
9243	** slightly faster.
9244	**
9245	** There is a subtle but important optimization here too. When inserting
9246	** multiple records into an intkey b-tree using a single cursor (as can
9247	** happen while processing an "INSERT INTO ... SELECT" statement), it
9248	** is advantageous to leave the cursor pointing to the last entry in
9249	** the b-tree if possible. If the cursor is left pointing to the last
9250	** entry in the table, and the next row inserted has an integer key
9251	** larger than the largest existing key, it is possible to insert the
9252	** row without seeking the cursor. This can be a big performance boost.
9253	*/
9254	pCur->info.nSize = `0`;
9255	if( pPage->nOverflow ){
9256	assert( rc==SQLITE_OK );
9257	pCur->curFlags &= ~(BTCF_ValidNKey);
9258	rc = balance(pCur);
9259
9260	/ Must make sure nOverflow is reset to zero even if the balance()*
9261	** fails. Internal data structure corruption will result otherwise.
9262	** Also, set the cursor state to invalid. This stops saveCursorPosition()
9263	** from trying to save the current position of the cursor. */
9264	pCur->pPage->nOverflow = `0`;
9265	pCur->eState = CURSOR_INVALID;
9266	if( (flags & BTREE_SAVEPOSITION) && rc==SQLITE_OK ){
9267	btreeReleaseAllCursorPages(pCur);
9268	if( pCur->pKeyInfo ){
9269	assert( pCur->pKey==`0` );
9270	pCur->pKey = sqlite3Malloc( pX->nKey );
9271	if( pCur->pKey==`0` ){
9272	rc = SQLITE_NOMEM;
9273	}else{
9274	memcpy(pCur->pKey, pX->pKey, pX->nKey);
9275	}
9276	}
9277	pCur->eState = CURSOR_REQUIRESEEK;
9278	pCur->nKey = pX->nKey;
9279	}
9280	}
9281	assert( pCur->iPage<`0` \|\| pCur->pPage->nOverflow==`0` );
9282
9283	end_insert:
9284	return rc;
9285	}
9286
9287	/*
9288	** This function is used as part of copying the current row from cursor
9289	** pSrc into cursor pDest. If the cursors are open on intkey tables, then
9290	** parameter iKey is used as the rowid value when the record is copied
9291	** into pDest. Otherwise, the record is copied verbatim.
9292	**
9293	** This function does not actually write the new value to cursor pDest.
9294	** Instead, it creates and populates any required overflow pages and
9295	** writes the data for the new cell into the BtShared.pTmpSpace buffer
9296	** for the destination database. The size of the cell, in bytes, is left
9297	** in BtShared.nPreformatSize. The caller completes the insertion by
9298	** calling sqlite3BtreeInsert() with the BTREE_PREFORMAT flag specified.
9299	**
9300	** SQLITE_OK is returned if successful, or an SQLite error code otherwise.
9301	*/
9302	int sqlite3BtreeTransferRow(BtCursor pDest, BtCursor pSrc, i64 iKey){
9303	int rc = SQLITE_OK;
9304	BtShared *pBt = pDest->pBt;
9305	u8 aOut = pBt->pTmpSpace; /* Pointer to next output buffer /
9306	const u8 aIn; /* Pointer to next input buffer /
9307	u32 nIn; / Size of input buffer aIn[] /
9308	u32 nRem; / Bytes of data still to copy /
9309
9310	getCellInfo(pSrc);
9311	if( pSrc->info.nPayload<`0x80` ){
9312	*(aOut++) = pSrc->info.nPayload;
9313	}else{
9314	aOut += sqlite3PutVarint(aOut, pSrc->info.nPayload);
9315	}
9316	if( pDest->pKeyInfo==`0` ) aOut += putVarint(aOut, iKey);
9317	nIn = pSrc->info.nLocal;
9318	aIn = pSrc->info.pPayload;
9319	if( aIn+nIn>pSrc->pPage->aDataEnd ){
9320	return SQLITE_CORRUPT_BKPT;
9321	}
9322	nRem = pSrc->info.nPayload;
9323	if( nIn==nRem && nIn<pDest->pPage->maxLocal ){
9324	memcpy(aOut, aIn, nIn);
9325	pBt->nPreformatSize = nIn + (aOut - pBt->pTmpSpace);
9326	}else{
9327	Pager *pSrcPager = pSrc->pBt->pPager;
9328	u8 *pPgnoOut = `0`;
9329	Pgno ovflIn = `0`;
9330	DbPage *pPageIn = `0`;
9331	MemPage *pPageOut = `0`;
9332	u32 nOut; / Size of output buffer aOut[] /
9333
9334	nOut = btreePayloadToLocal(pDest->pPage, pSrc->info.nPayload);
9335	pBt->nPreformatSize = nOut + (aOut - pBt->pTmpSpace);
9336	if( nOut<pSrc->info.nPayload ){
9337	pPgnoOut = &aOut[nOut];
9338	pBt->nPreformatSize += `4`;
9339	}
9340
9341	if( nRem>nIn ){
9342	if( aIn+nIn+`4`>pSrc->pPage->aDataEnd ){
9343	return SQLITE_CORRUPT_BKPT;
9344	}
9345	ovflIn = get4byte(&pSrc->info.pPayload[nIn]);
9346	}
9347
9348	do {
9349	nRem -= nOut;
9350	do{
9351	assert( nOut>`0` );
9352	if( nIn>`0` ){
9353	int nCopy = MIN(nOut, nIn);
9354	memcpy(aOut, aIn, nCopy);
9355	nOut -= nCopy;
9356	nIn -= nCopy;
9357	aOut += nCopy;
9358	aIn += nCopy;
9359	}
9360	if( nOut>`0` ){
9361	sqlite3PagerUnref(pPageIn);
9362	pPageIn = `0`;
9363	rc = sqlite3PagerGet(pSrcPager, ovflIn, &pPageIn, PAGER_GET_READONLY);
9364	if( rc==SQLITE_OK ){
9365	aIn = (const u8*)sqlite3PagerGetData(pPageIn);
9366	ovflIn = get4byte(aIn);
9367	aIn += `4`;
9368	nIn = pSrc->pBt->usableSize - `4`;
9369	}
9370	}
9371	}while( rc==SQLITE_OK && nOut>`0` );
9372
9373	if( rc==SQLITE_OK && nRem>`0` && ALWAYS(pPgnoOut) ){
9374	Pgno pgnoNew;
9375	MemPage *pNew = `0`;
9376	rc = allocateBtreePage(pBt, &pNew, &pgnoNew, `0`, `0`);
9377	put4byte(pPgnoOut, pgnoNew);
9378	if( ISAUTOVACUUM && pPageOut ){
9379	ptrmapPut(pBt, pgnoNew, PTRMAP_OVERFLOW2, pPageOut->pgno, &rc);
9380	}
9381	releasePage(pPageOut);
9382	pPageOut = pNew;
9383	if( pPageOut ){
9384	pPgnoOut = pPageOut->aData;
9385	put4byte(pPgnoOut, `0`);
9386	aOut = &pPgnoOut[`4`];
9387	nOut = MIN(pBt->usableSize - `4`, nRem);
9388	}
9389	}
9390	}while( nRem>`0` && rc==SQLITE_OK );
9391
9392	releasePage(pPageOut);
9393	sqlite3PagerUnref(pPageIn);
9394	}
9395
9396	return rc;
9397	}
9398
9399	/*
9400	** Delete the entry that the cursor is pointing to.
9401	**
9402	** If the BTREE_SAVEPOSITION bit of the flags parameter is zero, then
9403	** the cursor is left pointing at an arbitrary location after the delete.
9404	** But if that bit is set, then the cursor is left in a state such that
9405	** the next call to BtreeNext() or BtreePrev() moves it to the same row
9406	** as it would have been on if the call to BtreeDelete() had been omitted.
9407	**
9408	** The BTREE_AUXDELETE bit of flags indicates that is one of several deletes
9409	** associated with a single table entry and its indexes. Only one of those
9410	** deletes is considered the "primary" delete. The primary delete occurs
9411	** on a cursor that is not a BTREE_FORDELETE cursor. All but one delete
9412	** operation on non-FORDELETE cursors is tagged with the AUXDELETE flag.
9413	** The BTREE_AUXDELETE bit is a hint that is not used by this implementation,
9414	** but which might be used by alternative storage engines.
9415	*/
9416	int sqlite3BtreeDelete(BtCursor *pCur, u8 flags){
9417	Btree *p = pCur->pBtree;
9418	BtShared *pBt = p->pBt;
9419	int rc; / Return code /
9420	MemPage pPage; /* Page to delete cell from /
9421	unsigned char pCell; /* Pointer to cell to delete /
9422	int iCellIdx; / Index of cell to delete /
9423	int iCellDepth; / Depth of node containing pCell /
9424	CellInfo info; / Size of the cell being deleted /
9425	u8 bPreserve; / Keep cursor valid. 2 for CURSOR_SKIPNEXT /
9426
9427	assert( cursorOwnsBtShared(pCur) );
9428	assert( pBt->inTransaction==TRANS_WRITE );
9429	assert( (pBt->btsFlags & BTS_READ_ONLY)==`0` );
9430	assert( pCur->curFlags & BTCF_WriteFlag );
9431	assert( hasSharedCacheTableLock(p, pCur->pgnoRoot, pCur->pKeyInfo!=`0`, `2`) );
9432	assert( !hasReadConflicts(p, pCur->pgnoRoot) );
9433	assert( (flags & ~(BTREE_SAVEPOSITION \| BTREE_AUXDELETE))==`0` );
9434	if( pCur->eState!=CURSOR_VALID ){
9435	if( pCur->eState>=CURSOR_REQUIRESEEK ){
9436	rc = btreeRestoreCursorPosition(pCur);
9437	assert( rc!=SQLITE_OK \|\| CORRUPT_DB \|\| pCur->eState==CURSOR_VALID );
9438	if( rc \|\| pCur->eState!=CURSOR_VALID ) return rc;
9439	}else{
9440	return SQLITE_CORRUPT_BKPT;
9441	}
9442	}
9443	assert( pCur->eState==CURSOR_VALID );
9444
9445	iCellDepth = pCur->iPage;
9446	iCellIdx = pCur->ix;
9447	pPage = pCur->pPage;
9448	if( pPage->nCell<=iCellIdx ){
9449	return SQLITE_CORRUPT_BKPT;
9450	}
9451	pCell = findCell(pPage, iCellIdx);
9452	if( pPage->nFree<`0` && btreeComputeFreeSpace(pPage) ){
9453	return SQLITE_CORRUPT_BKPT;
9454	}
9455
9456	/ If the BTREE_SAVEPOSITION bit is on, then the cursor position must*
9457	** be preserved following this delete operation. If the current delete
9458	** will cause a b-tree rebalance, then this is done by saving the cursor
9459	** key and leaving the cursor in CURSOR_REQUIRESEEK state before
9460	** returning.
9461	**
9462	** If the current delete will not cause a rebalance, then the cursor
9463	** will be left in CURSOR_SKIPNEXT state pointing to the entry immediately
9464	** before or after the deleted entry.
9465	**
9466	** The bPreserve value records which path is required:
9467	**
9468	** bPreserve==0 Not necessary to save the cursor position
9469	** bPreserve==1 Use CURSOR_REQUIRESEEK to save the cursor position
9470	** bPreserve==2 Cursor won't move. Set CURSOR_SKIPNEXT.
9471	*/
9472	bPreserve = (flags & BTREE_SAVEPOSITION)!=`0`;
9473	if( bPreserve ){
9474	if( !pPage->leaf
9475	\|\| (pPage->nFree+pPage->xCellSize(pPage,pCell)+`2`) >
9476	(int)(pBt->usableSize*`2`/`3`)
9477	\|\| pPage->nCell==`1` / See dbfuzz001.test for a test case /
9478	){
9479	/ A b-tree rebalance will be required after deleting this entry.*
9480	** Save the cursor key. */
9481	rc = saveCursorKey(pCur);
9482	if( rc ) return rc;
9483	}else{
9484	bPreserve = `2`;
9485	}
9486	}
9487
9488	/ If the page containing the entry to delete is not a leaf page, move*
9489	** the cursor to the largest entry in the tree that is smaller than
9490	** the entry being deleted. This cell will replace the cell being deleted
9491	** from the internal node. The 'previous' entry is used for this instead
9492	** of the 'next' entry, as the previous entry is always a part of the
9493	** sub-tree headed by the child page of the cell being deleted. This makes
9494	** balancing the tree following the delete operation easier. */
9495	if( !pPage->leaf ){
9496	rc = sqlite3BtreePrevious(pCur, `0`);
9497	assert( rc!=SQLITE_DONE );
9498	if( rc ) return rc;
9499	}
9500
9501	/ Save the positions of any other cursors open on this table before*
9502	** making any modifications. */
9503	if( pCur->curFlags & BTCF_Multiple ){
9504	rc = saveAllCursors(pBt, pCur->pgnoRoot, pCur);
9505	if( rc ) return rc;
9506	}
9507
9508	/ If this is a delete operation to remove a row from a table b-tree,*
9509	** invalidate any incrblob cursors open on the row being deleted. */
9510	if( pCur->pKeyInfo==`0` && p->hasIncrblobCur ){
9511	invalidateIncrblobCursors(p, pCur->pgnoRoot, pCur->info.nKey, `0`);
9512	}
9513
9514	/ Make the page containing the entry to be deleted writable. Then free any*
9515	** overflow pages associated with the entry and finally remove the cell
9516	** itself from within the page. */
9517	rc = sqlite3PagerWrite(pPage->pDbPage);
9518	if( rc ) return rc;
9519	BTREE_CLEAR_CELL(rc, pPage, pCell, info);
9520	dropCell(pPage, iCellIdx, info.nSize, &rc);
9521	if( rc ) return rc;
9522
9523	/ If the cell deleted was not located on a leaf page, then the cursor*
9524	** is currently pointing to the largest entry in the sub-tree headed
9525	** by the child-page of the cell that was just deleted from an internal
9526	** node. The cell from the leaf node needs to be moved to the internal
9527	** node to replace the deleted cell. */
9528	if( !pPage->leaf ){
9529	MemPage *pLeaf = pCur->pPage;
9530	int nCell;
9531	Pgno n;
9532	unsigned char *pTmp;
9533
9534	if( pLeaf->nFree<`0` ){
9535	rc = btreeComputeFreeSpace(pLeaf);
9536	if( rc ) return rc;
9537	}
9538	if( iCellDepth<pCur->iPage-`1` ){
9539	n = pCur->apPage[iCellDepth+`1`]->pgno;
9540	}else{
9541	n = pCur->pPage->pgno;
9542	}
9543	pCell = findCell(pLeaf, pLeaf->nCell-`1`);
9544	if( pCell<&pLeaf->aData[`4`] ) return SQLITE_CORRUPT_BKPT;
9545	nCell = pLeaf->xCellSize(pLeaf, pCell);
9546	assert( MX_CELL_SIZE(pBt) >= nCell );
9547	pTmp = pBt->pTmpSpace;
9548	assert( pTmp!=`0` );
9549	rc = sqlite3PagerWrite(pLeaf->pDbPage);
9550	if( rc==SQLITE_OK ){
9551	insertCell(pPage, iCellIdx, pCell-`4`, nCell+`4`, pTmp, n, &rc);
9552	}
9553	dropCell(pLeaf, pLeaf->nCell-`1`, nCell, &rc);
9554	if( rc ) return rc;
9555	}
9556
9557	/ Balance the tree. If the entry deleted was located on a leaf page,*
9558	** then the cursor still points to that page. In this case the first
9559	** call to balance() repairs the tree, and the if(...) condition is
9560	** never true.
9561	**
9562	** Otherwise, if the entry deleted was on an internal node page, then
9563	** pCur is pointing to the leaf page from which a cell was removed to
9564	** replace the cell deleted from the internal node. This is slightly
9565	** tricky as the leaf node may be underfull, and the internal node may
9566	** be either under or overfull. In this case run the balancing algorithm
9567	** on the leaf node first. If the balance proceeds far enough up the
9568	** tree that we can be sure that any problem in the internal node has
9569	** been corrected, so be it. Otherwise, after balancing the leaf node,
9570	** walk the cursor up the tree to the internal node and balance it as
9571	** well. */
9572	assert( pCur->pPage->nOverflow==`0` );
9573	assert( pCur->pPage->nFree>=`0` );
9574	if( pCur->pPage->nFree`3`<=(int)pCur->pBt->usableSize`2` ){
9575	/ Optimization: If the free space is less than 2/3rds of the page,*
9576	** then balance() will always be a no-op. No need to invoke it. */
9577	rc = SQLITE_OK;
9578	}else{
9579	rc = balance(pCur);
9580	}
9581	if( rc==SQLITE_OK && pCur->iPage>iCellDepth ){
9582	releasePageNotNull(pCur->pPage);
9583	pCur->iPage--;
9584	while( pCur->iPage>iCellDepth ){
9585	releasePage(pCur->apPage[pCur->iPage--]);
9586	}
9587	pCur->pPage = pCur->apPage[pCur->iPage];
9588	rc = balance(pCur);
9589	}
9590
9591	if( rc==SQLITE_OK ){
9592	if( bPreserve>`1` ){
9593	assert( (pCur->iPage==iCellDepth \|\| CORRUPT_DB) );
9594	assert( pPage==pCur->pPage \|\| CORRUPT_DB );
9595	assert( (pPage->nCell>`0` \|\| CORRUPT_DB) && iCellIdx<=pPage->nCell );
9596	pCur->eState = CURSOR_SKIPNEXT;
9597	if( iCellIdx>=pPage->nCell ){
9598	pCur->skipNext = -`1`;
9599	pCur->ix = pPage->nCell-`1`;
9600	}else{
9601	pCur->skipNext = `1`;
9602	}
9603	}else{
9604	rc = moveToRoot(pCur);
9605	if( bPreserve ){
9606	btreeReleaseAllCursorPages(pCur);
9607	pCur->eState = CURSOR_REQUIRESEEK;
9608	}
9609	if( rc==SQLITE_EMPTY ) rc = SQLITE_OK;
9610	}
9611	}
9612	return rc;
9613	}
9614
9615	/*
9616	** Create a new BTree table. Write into *piTable the page
9617	** number for the root page of the new table.
9618	**
9619	** The type of type is determined by the flags parameter. Only the
9620	** following values of flags are currently in use. Other values for
9621	** flags might not work:
9622	**
9623	** BTREE_INTKEY\|BTREE_LEAFDATA Used for SQL tables with rowid keys
9624	** BTREE_ZERODATA Used for SQL indices
9625	*/
9626	static int btreeCreateTable(Btree p, Pgno piTable, int createTabFlags){
9627	BtShared *pBt = p->pBt;
9628	MemPage *pRoot;
9629	Pgno pgnoRoot;
9630	int rc;
9631	int ptfFlags; / Page-type flage for the root page of new table /
9632
9633	assert( sqlite3BtreeHoldsMutex(p) );
9634	assert( pBt->inTransaction==TRANS_WRITE );
9635	assert( (pBt->btsFlags & BTS_READ_ONLY)==`0` );
9636
9637	#ifdef SQLITE_OMIT_AUTOVACUUM
9638	rc = allocateBtreePage(pBt, &pRoot, &pgnoRoot, `1`, `0`);
9639	if( rc ){
9640	return rc;
9641	}
9642	#else
9643	if( pBt->autoVacuum ){
9644	Pgno pgnoMove; / Move a page here to make room for the root-page /
9645	MemPage pPageMove; /* The page to move to. /
9646
9647	/ Creating a new table may probably require moving an existing database*
9648	** to make room for the new tables root page. In case this page turns
9649	** out to be an overflow page, delete all overflow page-map caches
9650	** held by open cursors.
9651	*/
9652	invalidateAllOverflowCache(pBt);
9653
9654	/ Read the value of meta[3] from the database to determine where the*
9655	** root page of the new table should go. meta[3] is the largest root-page
9656	** created so far, so the new root-page is (meta[3]+1).
9657	*/
9658	sqlite3BtreeGetMeta(p, BTREE_LARGEST_ROOT_PAGE, &pgnoRoot);
9659	if( pgnoRoot>btreePagecount(pBt) ){
9660	return SQLITE_CORRUPT_BKPT;
9661	}
9662	pgnoRoot++;
9663
9664	/ The new root-page may not be allocated on a pointer-map page, or the*
9665	** PENDING_BYTE page.
9666	*/
9667	while( pgnoRoot==PTRMAP_PAGENO(pBt, pgnoRoot) \|\|
9668	pgnoRoot==PENDING_BYTE_PAGE(pBt) ){
9669	pgnoRoot++;
9670	}
9671	assert( pgnoRoot>=`3` );
9672
9673	/ Allocate a page. The page that currently resides at pgnoRoot will*
9674	** be moved to the allocated page (unless the allocated page happens
9675	** to reside at pgnoRoot).
9676	*/
9677	rc = allocateBtreePage(pBt, &pPageMove, &pgnoMove, pgnoRoot, BTALLOC_EXACT);
9678	if( rc!=SQLITE_OK ){
9679	return rc;
9680	}
9681
9682	if( pgnoMove!=pgnoRoot ){
9683	/ pgnoRoot is the page that will be used for the root-page of*
9684	** the new table (assuming an error did not occur). But we were
9685	** allocated pgnoMove. If required (i.e. if it was not allocated
9686	** by extending the file), the current page at position pgnoMove
9687	** is already journaled.
9688	*/
9689	u8 eType = `0`;
9690	Pgno iPtrPage = `0`;
9691
9692	/ Save the positions of any open cursors. This is required in*
9693	** case they are holding a reference to an xFetch reference
9694	** corresponding to page pgnoRoot. */
9695	rc = saveAllCursors(pBt, `0`, `0`);
9696	releasePage(pPageMove);
9697	if( rc!=SQLITE_OK ){
9698	return rc;
9699	}
9700
9701	/ Move the page currently at pgnoRoot to pgnoMove. /
9702	rc = btreeGetPage(pBt, pgnoRoot, &pRoot, `0`);
9703	if( rc!=SQLITE_OK ){
9704	return rc;
9705	}
9706	rc = ptrmapGet(pBt, pgnoRoot, &eType, &iPtrPage);
9707	if( eType==PTRMAP_ROOTPAGE \|\| eType==PTRMAP_FREEPAGE ){
9708	rc = SQLITE_CORRUPT_BKPT;
9709	}
9710	if( rc!=SQLITE_OK ){
9711	releasePage(pRoot);
9712	return rc;
9713	}
9714	assert( eType!=PTRMAP_ROOTPAGE );
9715	assert( eType!=PTRMAP_FREEPAGE );
9716	rc = relocatePage(pBt, pRoot, eType, iPtrPage, pgnoMove, `0`);
9717	releasePage(pRoot);
9718
9719	/ Obtain the page at pgnoRoot /
9720	if( rc!=SQLITE_OK ){
9721	return rc;
9722	}
9723	rc = btreeGetPage(pBt, pgnoRoot, &pRoot, `0`);
9724	if( rc!=SQLITE_OK ){
9725	return rc;
9726	}
9727	rc = sqlite3PagerWrite(pRoot->pDbPage);
9728	if( rc!=SQLITE_OK ){
9729	releasePage(pRoot);
9730	return rc;
9731	}
9732	}else{
9733	pRoot = pPageMove;
9734	}
9735
9736	/ Update the pointer-map and meta-data with the new root-page number. /
9737	ptrmapPut(pBt, pgnoRoot, PTRMAP_ROOTPAGE, `0`, &rc);
9738	if( rc ){
9739	releasePage(pRoot);
9740	return rc;
9741	}
9742
9743	/ When the new root page was allocated, page 1 was made writable in*
9744	** order either to increase the database filesize, or to decrement the
9745	** freelist count. Hence, the sqlite3BtreeUpdateMeta() call cannot fail.
9746	*/
9747	assert( sqlite3PagerIswriteable(pBt->pPage1->pDbPage) );
9748	rc = sqlite3BtreeUpdateMeta(p, `4`, pgnoRoot);
9749	if( NEVER(rc) ){
9750	releasePage(pRoot);
9751	return rc;
9752	}
9753
9754	}else{
9755	rc = allocateBtreePage(pBt, &pRoot, &pgnoRoot, `1`, `0`);
9756	if( rc ) return rc;
9757	}
9758	#endif
9759	assert( sqlite3PagerIswriteable(pRoot->pDbPage) );
9760	if( createTabFlags & BTREE_INTKEY ){
9761	ptfFlags = PTF_INTKEY \| PTF_LEAFDATA \| PTF_LEAF;
9762	}else{
9763	ptfFlags = PTF_ZERODATA \| PTF_LEAF;
9764	}
9765	zeroPage(pRoot, ptfFlags);
9766	sqlite3PagerUnref(pRoot->pDbPage);
9767	assert( (pBt->openFlags & BTREE_SINGLE)==`0` \|\| pgnoRoot==`2` );
9768	*piTable = pgnoRoot;
9769	return SQLITE_OK;
9770	}
9771	int sqlite3BtreeCreateTable(Btree p, Pgno piTable, int flags){
9772	int rc;
9773	sqlite3BtreeEnter(p);
9774	rc = btreeCreateTable(p, piTable, flags);
9775	sqlite3BtreeLeave(p);
9776	return rc;
9777	}
9778
9779	/*
9780	** Erase the given database page and all its children. Return
9781	** the page to the freelist.
9782	*/
9783	static int clearDatabasePage(
9784	BtShared pBt, /* The BTree that contains the table /
9785	Pgno pgno, / Page number to clear /
9786	int freePageFlag, / Deallocate page if true /
9787	i64 pnChange /* Add number of Cells freed to this counter /
9788	){
9789	MemPage *pPage;
9790	int rc;
9791	unsigned char *pCell;
9792	int i;
9793	int hdr;
9794	CellInfo info;
9795
9796	assert( sqlite3_mutex_held(pBt->mutex) );
9797	if( pgno>btreePagecount(pBt) ){
9798	return SQLITE_CORRUPT_BKPT;
9799	}
9800	rc = getAndInitPage(pBt, pgno, &pPage, `0`, `0`);
9801	if( rc ) return rc;
9802	if( (pBt->openFlags & BTREE_SINGLE)==`0`
9803	&& sqlite3PagerPageRefcount(pPage->pDbPage) != (`1` + (pgno==`1`))
9804	){
9805	rc = SQLITE_CORRUPT_BKPT;
9806	goto cleardatabasepage_out;
9807	}
9808	hdr = pPage->hdrOffset;
9809	for(i=`0`; i<pPage->nCell; i++){
9810	pCell = findCell(pPage, i);
9811	if( !pPage->leaf ){
9812	rc = clearDatabasePage(pBt, get4byte(pCell), `1`, pnChange);
9813	if( rc ) goto cleardatabasepage_out;
9814	}
9815	BTREE_CLEAR_CELL(rc, pPage, pCell, info);
9816	if( rc ) goto cleardatabasepage_out;
9817	}
9818	if( !pPage->leaf ){
9819	rc = clearDatabasePage(pBt, get4byte(&pPage->aData[hdr+`8`]), `1`, pnChange);
9820	if( rc ) goto cleardatabasepage_out;
9821	if( pPage->intKey ) pnChange = `0`;
9822	}
9823	if( pnChange ){
9824	testcase( !pPage->intKey );
9825	*pnChange += pPage->nCell;
9826	}
9827	if( freePageFlag ){
9828	freePage(pPage, &rc);
9829	}else if( (rc = sqlite3PagerWrite(pPage->pDbPage))==`0` ){
9830	zeroPage(pPage, pPage->aData[hdr] \| PTF_LEAF);
9831	}
9832
9833	cleardatabasepage_out:
9834	releasePage(pPage);
9835	return rc;
9836	}
9837
9838	/*
9839	** Delete all information from a single table in the database. iTable is
9840	** the page number of the root of the table. After this routine returns,
9841	** the root page is empty, but still exists.
9842	**
9843	** This routine will fail with SQLITE_LOCKED if there are any open
9844	** read cursors on the table. Open write cursors are moved to the
9845	** root of the table.
9846	**
9847	** If pnChange is not NULL, then the integer value pointed to by pnChange
9848	** is incremented by the number of entries in the table.
9849	*/
9850	int sqlite3BtreeClearTable(Btree p, int* iTable, i64 *pnChange){
9851	int rc;
9852	BtShared *pBt = p->pBt;
9853	sqlite3BtreeEnter(p);
9854	assert( p->inTrans==TRANS_WRITE );
9855
9856	rc = saveAllCursors(pBt, (Pgno)iTable, `0`);
9857
9858	if( SQLITE_OK==rc ){
9859	/ Invalidate all incrblob cursors open on table iTable (assuming iTable*
9860	** is the root of a table b-tree - if it is not, the following call is
9861	** a no-op). */
9862	if( p->hasIncrblobCur ){
9863	invalidateIncrblobCursors(p, (Pgno)iTable, `0`, `1`);
9864	}
9865	rc = clearDatabasePage(pBt, (Pgno)iTable, `0`, pnChange);
9866	}
9867	sqlite3BtreeLeave(p);
9868	return rc;
9869	}
9870
9871	/*
9872	** Delete all information from the single table that pCur is open on.
9873	**
9874	** This routine only work for pCur on an ephemeral table.
9875	*/
9876	int sqlite3BtreeClearTableOfCursor(BtCursor *pCur){
9877	return sqlite3BtreeClearTable(pCur->pBtree, pCur->pgnoRoot, `0`);
9878	}
9879
9880	/*
9881	** Erase all information in a table and add the root of the table to
9882	** the freelist. Except, the root of the principle table (the one on
9883	** page 1) is never added to the freelist.
9884	**
9885	** This routine will fail with SQLITE_LOCKED if there are any open
9886	** cursors on the table.
9887	**
9888	** If AUTOVACUUM is enabled and the page at iTable is not the last
9889	** root page in the database file, then the last root page
9890	** in the database file is moved into the slot formerly occupied by
9891	** iTable and that last slot formerly occupied by the last root page
9892	** is added to the freelist instead of iTable. In this say, all
9893	** root pages are kept at the beginning of the database file, which
9894	** is necessary for AUTOVACUUM to work right. *piMoved is set to the
9895	** page number that used to be the last root page in the file before
9896	** the move. If no page gets moved, *piMoved is set to 0.
9897	** The last root page is recorded in meta[3] and the value of
9898	** meta[3] is updated by this procedure.
9899	*/
9900	static int btreeDropTable(Btree p, Pgno iTable, int* *piMoved){
9901	int rc;
9902	MemPage *pPage = `0`;
9903	BtShared *pBt = p->pBt;
9904
9905	assert( sqlite3BtreeHoldsMutex(p) );
9906	assert( p->inTrans==TRANS_WRITE );
9907	assert( iTable>=`2` );
9908	if( iTable>btreePagecount(pBt) ){
9909	return SQLITE_CORRUPT_BKPT;
9910	}
9911
9912	rc = sqlite3BtreeClearTable(p, iTable, `0`);
9913	if( rc ) return rc;
9914	rc = btreeGetPage(pBt, (Pgno)iTable, &pPage, `0`);
9915	if( NEVER(rc) ){
9916	releasePage(pPage);
9917	return rc;
9918	}
9919
9920	*piMoved = `0`;
9921
9922	#ifdef SQLITE_OMIT_AUTOVACUUM
9923	freePage(pPage, &rc);
9924	releasePage(pPage);
9925	#else
9926	if( pBt->autoVacuum ){
9927	Pgno maxRootPgno;
9928	sqlite3BtreeGetMeta(p, BTREE_LARGEST_ROOT_PAGE, &maxRootPgno);
9929
9930	if( iTable==maxRootPgno ){
9931	/ If the table being dropped is the table with the largest root-page*
9932	** number in the database, put the root page on the free list.
9933	*/
9934	freePage(pPage, &rc);
9935	releasePage(pPage);
9936	if( rc!=SQLITE_OK ){
9937	return rc;
9938	}
9939	}else{
9940	/ The table being dropped does not have the largest root-page*
9941	** number in the database. So move the page that does into the
9942	** gap left by the deleted root-page.
9943	*/
9944	MemPage *pMove;
9945	releasePage(pPage);
9946	rc = btreeGetPage(pBt, maxRootPgno, &pMove, `0`);
9947	if( rc!=SQLITE_OK ){
9948	return rc;
9949	}
9950	rc = relocatePage(pBt, pMove, PTRMAP_ROOTPAGE, `0`, iTable, `0`);
9951	releasePage(pMove);
9952	if( rc!=SQLITE_OK ){
9953	return rc;
9954	}
9955	pMove = `0`;
9956	rc = btreeGetPage(pBt, maxRootPgno, &pMove, `0`);
9957	freePage(pMove, &rc);
9958	releasePage(pMove);
9959	if( rc!=SQLITE_OK ){
9960	return rc;
9961	}
9962	*piMoved = maxRootPgno;
9963	}
9964
9965	/ Set the new 'max-root-page' value in the database header. This*
9966	** is the old value less one, less one more if that happens to
9967	** be a root-page number, less one again if that is the
9968	** PENDING_BYTE_PAGE.
9969	*/
9970	maxRootPgno--;
9971	while( maxRootPgno==PENDING_BYTE_PAGE(pBt)
9972	\|\| PTRMAP_ISPAGE(pBt, maxRootPgno) ){
9973	maxRootPgno--;
9974	}
9975	assert( maxRootPgno!=PENDING_BYTE_PAGE(pBt) );
9976
9977	rc = sqlite3BtreeUpdateMeta(p, `4`, maxRootPgno);
9978	}else{
9979	freePage(pPage, &rc);
9980	releasePage(pPage);
9981	}
9982	#endif
9983	return rc;
9984	}
9985	int sqlite3BtreeDropTable(Btree p, int* iTable, int *piMoved){
9986	int rc;
9987	sqlite3BtreeEnter(p);
9988	rc = btreeDropTable(p, iTable, piMoved);
9989	sqlite3BtreeLeave(p);
9990	return rc;
9991	}
9992
9993
9994	/*
9995	** This function may only be called if the b-tree connection already
9996	** has a read or write transaction open on the database.
9997	**
9998	** Read the meta-information out of a database file. Meta[0]
9999	** is the number of free pages currently in the database. Meta[1]
10000	** through meta[15] are available for use by higher layers. Meta[0]
10001	** is read-only, the others are read/write.
10002	**
10003	** The schema layer numbers meta values differently. At the schema
10004	** layer (and the SetCookie and ReadCookie opcodes) the number of
10005	** free pages is not visible. So Cookie[0] is the same as Meta[1].
10006	**
10007	** This routine treats Meta[BTREE_DATA_VERSION] as a special case. Instead
10008	** of reading the value out of the header, it instead loads the "DataVersion"
10009	** from the pager. The BTREE_DATA_VERSION value is not actually stored in the
10010	** database file. It is a number computed by the pager. But its access
10011	** pattern is the same as header meta values, and so it is convenient to
10012	** read it from this routine.
10013	*/
10014	void sqlite3BtreeGetMeta(Btree p, int* idx, u32 *pMeta){
10015	BtShared *pBt = p->pBt;
10016
10017	sqlite3BtreeEnter(p);
10018	assert( p->inTrans>TRANS_NONE );
10019	assert( SQLITE_OK==querySharedCacheTableLock(p, SCHEMA_ROOT, READ_LOCK) );
10020	assert( pBt->pPage1 );
10021	assert( idx>=`0` && idx<=`15` );
10022
10023	if( idx==BTREE_DATA_VERSION ){
10024	*pMeta = sqlite3PagerDataVersion(pBt->pPager) + p->iBDataVersion;
10025	}else{
10026	pMeta = get4byte(&pBt->pPage1->aData[`36` + idx`4`]);
10027	}
10028
10029	/ If auto-vacuum is disabled in this build and this is an auto-vacuum*
10030	** database, mark the database as read-only. */
10031	#ifdef SQLITE_OMIT_AUTOVACUUM
10032	if( idx==BTREE_LARGEST_ROOT_PAGE && *pMeta>`0` ){
10033	pBt->btsFlags \|= BTS_READ_ONLY;
10034	}
10035	#endif
10036
10037	sqlite3BtreeLeave(p);
10038	}
10039
10040	/*
10041	** Write meta-information back into the database. Meta[0] is
10042	** read-only and may not be written.
10043	*/
10044	int sqlite3BtreeUpdateMeta(Btree p, int* idx, u32 iMeta){
10045	BtShared *pBt = p->pBt;
10046	unsigned char *pP1;
10047	int rc;
10048	assert( idx>=`1` && idx<=`15` );
10049	sqlite3BtreeEnter(p);
10050	assert( p->inTrans==TRANS_WRITE );
10051	assert( pBt->pPage1!=`0` );
10052	pP1 = pBt->pPage1->aData;
10053	rc = sqlite3PagerWrite(pBt->pPage1->pDbPage);
10054	if( rc==SQLITE_OK ){
10055	put4byte(&pP1[`36` + idx*`4`], iMeta);
10056	#ifndef SQLITE_OMIT_AUTOVACUUM
10057	if( idx==BTREE_INCR_VACUUM ){
10058	assert( pBt->autoVacuum \|\| iMeta==`0` );
10059	assert( iMeta==`0` \|\| iMeta==`1` );
10060	pBt->incrVacuum = (u8)iMeta;
10061	}
10062	#endif
10063	}
10064	sqlite3BtreeLeave(p);
10065	return rc;
10066	}
10067
10068	/*
10069	** The first argument, pCur, is a cursor opened on some b-tree. Count the
10070	** number of entries in the b-tree and write the result to *pnEntry.
10071	**
10072	** SQLITE_OK is returned if the operation is successfully executed.
10073	** Otherwise, if an error is encountered (i.e. an IO error or database
10074	** corruption) an SQLite error code is returned.
10075	*/
10076	int sqlite3BtreeCount(sqlite3 db, BtCursor pCur, i64 *pnEntry){
10077	i64 nEntry = `0`; / Value to return in pnEntry /*
10078	int rc; / Return code /
10079
10080	rc = moveToRoot(pCur);
10081	if( rc==SQLITE_EMPTY ){
10082	*pnEntry = `0`;
10083	return SQLITE_OK;
10084	}
10085
10086	/ Unless an error occurs, the following loop runs one iteration for each*
10087	** page in the B-Tree structure (not including overflow pages).
10088	*/
10089	while( rc==SQLITE_OK && !AtomicLoad(&db->u1.isInterrupted) ){
10090	int iIdx; / Index of child node in parent /
10091	MemPage pPage; /* Current page of the b-tree /
10092
10093	/ If this is a leaf page or the tree is not an int-key tree, then*
10094	** this page contains countable entries. Increment the entry counter
10095	** accordingly.
10096	*/
10097	pPage = pCur->pPage;
10098	if( pPage->leaf \|\| !pPage->intKey ){
10099	nEntry += pPage->nCell;
10100	}
10101
10102	/ pPage is a leaf node. This loop navigates the cursor so that it*
10103	** points to the first interior cell that it points to the parent of
10104	** the next page in the tree that has not yet been visited. The
10105	** pCur->aiIdx[pCur->iPage] value is set to the index of the parent cell
10106	** of the page, or to the number of cells in the page if the next page
10107	** to visit is the right-child of its parent.
10108	**
10109	** If all pages in the tree have been visited, return SQLITE_OK to the
10110	** caller.
10111	*/
10112	if( pPage->leaf ){
10113	do {
10114	if( pCur->iPage==`0` ){
10115	/ All pages of the b-tree have been visited. Return successfully. /
10116	*pnEntry = nEntry;
10117	return moveToRoot(pCur);
10118	}
10119	moveToParent(pCur);
10120	}while ( pCur->ix>=pCur->pPage->nCell );
10121
10122	pCur->ix++;
10123	pPage = pCur->pPage;
10124	}
10125
10126	/ Descend to the child node of the cell that the cursor currently*
10127	** points at. This is the right-child if (iIdx==pPage->nCell).
10128	*/
10129	iIdx = pCur->ix;
10130	if( iIdx==pPage->nCell ){
10131	rc = moveToChild(pCur, get4byte(&pPage->aData[pPage->hdrOffset+`8`]));
10132	}else{
10133	rc = moveToChild(pCur, get4byte(findCell(pPage, iIdx)));
10134	}
10135	}
10136
10137	/ An error has occurred. Return an error code. /
10138	return rc;
10139	}
10140
10141	/*
10142	** Return the pager associated with a BTree. This routine is used for
10143	** testing and debugging only.
10144	*/
10145	Pager sqlite3BtreePager(Btree p){
10146	return p->pBt->pPager;
10147	}
10148
10149	#ifndef SQLITE_OMIT_INTEGRITY_CHECK
10150	/*
10151	** Append a message to the error message string.
10152	*/
10153	static void checkAppendMsg(
10154	IntegrityCk *pCheck,
10155	const char *zFormat,
10156	...
10157	){
10158	va_list ap;
10159	if( !pCheck->mxErr ) return;
10160	pCheck->mxErr--;
10161	pCheck->nErr++;
10162	va_start(ap, zFormat);
10163	if( pCheck->errMsg.nChar ){
10164	sqlite3_str_append(&pCheck->errMsg, "\n", `1`);
10165	}
10166	if( pCheck->zPfx ){
10167	sqlite3_str_appendf(&pCheck->errMsg, pCheck->zPfx, pCheck->v1, pCheck->v2);
10168	}
10169	sqlite3_str_vappendf(&pCheck->errMsg, zFormat, ap);
10170	va_end(ap);
10171	if( pCheck->errMsg.accError==SQLITE_NOMEM ){
10172	pCheck->bOomFault = `1`;
10173	}
10174	}
10175	#endif /* SQLITE_OMIT_INTEGRITY_CHECK */
10176
10177	#ifndef SQLITE_OMIT_INTEGRITY_CHECK
10178
10179	/*
10180	** Return non-zero if the bit in the IntegrityCk.aPgRef[] array that
10181	** corresponds to page iPg is already set.
10182	*/
10183	static int getPageReferenced(IntegrityCk *pCheck, Pgno iPg){
10184	assert( iPg<=pCheck->nPage && sizeof(pCheck->aPgRef[`0`])==`1` );
10185	return (pCheck->aPgRef[iPg/`8`] & (`1` << (iPg & `0x07`)));
10186	}
10187
10188	/*
10189	** Set the bit in the IntegrityCk.aPgRef[] array that corresponds to page iPg.
10190	*/
10191	static void setPageReferenced(IntegrityCk *pCheck, Pgno iPg){
10192	assert( iPg<=pCheck->nPage && sizeof(pCheck->aPgRef[`0`])==`1` );
10193	pCheck->aPgRef[iPg/`8`] \|= (`1` << (iPg & `0x07`));
10194	}
10195
10196
10197	/*
10198	** Add 1 to the reference count for page iPage. If this is the second
10199	** reference to the page, add an error message to pCheck->zErrMsg.
10200	** Return 1 if there are 2 or more references to the page and 0 if
10201	** if this is the first reference to the page.
10202	**
10203	** Also check that the page number is in bounds.
10204	*/
10205	static int checkRef(IntegrityCk *pCheck, Pgno iPage){
10206	if( iPage>pCheck->nPage \|\| iPage==`0` ){
10207	checkAppendMsg(pCheck, "invalid page number %d", iPage);
10208	return `1`;
10209	}
10210	if( getPageReferenced(pCheck, iPage) ){
10211	checkAppendMsg(pCheck, "2nd reference to page %d", iPage);
10212	return `1`;
10213	}
10214	if( AtomicLoad(&pCheck->db->u1.isInterrupted) ) return `1`;
10215	setPageReferenced(pCheck, iPage);
10216	return `0`;
10217	}
10218
10219	#ifndef SQLITE_OMIT_AUTOVACUUM
10220	/*
10221	** Check that the entry in the pointer-map for page iChild maps to
10222	** page iParent, pointer type ptrType. If not, append an error message
10223	** to pCheck.
10224	*/
10225	static void checkPtrmap(
10226	IntegrityCk pCheck, /* Integrity check context /
10227	Pgno iChild, / Child page number /
10228	u8 eType, / Expected pointer map type /
10229	Pgno iParent / Expected pointer map parent page number /
10230	){
10231	int rc;
10232	u8 ePtrmapType;
10233	Pgno iPtrmapParent;
10234
10235	rc = ptrmapGet(pCheck->pBt, iChild, &ePtrmapType, &iPtrmapParent);
10236	if( rc!=SQLITE_OK ){
10237	if( rc==SQLITE_NOMEM \|\| rc==SQLITE_IOERR_NOMEM ) pCheck->bOomFault = `1`;
10238	checkAppendMsg(pCheck, "Failed to read ptrmap key=%d", iChild);
10239	return;
10240	}
10241
10242	if( ePtrmapType!=eType \|\| iPtrmapParent!=iParent ){
10243	checkAppendMsg(pCheck,
10244	"Bad ptr map entry key=%d expected=(%d,%d) got=(%d,%d)",
10245	iChild, eType, iParent, ePtrmapType, iPtrmapParent);
10246	}
10247	}
10248	#endif
10249
10250	/*
10251	** Check the integrity of the freelist or of an overflow page list.
10252	** Verify that the number of pages on the list is N.
10253	*/
10254	static void checkList(
10255	IntegrityCk pCheck, /* Integrity checking context /
10256	int isFreeList, / True for a freelist. False for overflow page list /
10257	Pgno iPage, / Page number for first page in the list /
10258	u32 N / Expected number of pages in the list /
10259	){
10260	int i;
10261	u32 expected = N;
10262	int nErrAtStart = pCheck->nErr;
10263	while( iPage!=`0` && pCheck->mxErr ){
10264	DbPage *pOvflPage;
10265	unsigned char *pOvflData;
10266	if( checkRef(pCheck, iPage) ) break;
10267	N--;
10268	if( sqlite3PagerGet(pCheck->pPager, (Pgno)iPage, &pOvflPage, `0`) ){
10269	checkAppendMsg(pCheck, "failed to get page %d", iPage);
10270	break;
10271	}
10272	pOvflData = (unsigned char *)sqlite3PagerGetData(pOvflPage);
10273	if( isFreeList ){
10274	u32 n = (u32)get4byte(&pOvflData[`4`]);
10275	#ifndef SQLITE_OMIT_AUTOVACUUM
10276	if( pCheck->pBt->autoVacuum ){
10277	checkPtrmap(pCheck, iPage, PTRMAP_FREEPAGE, `0`);
10278	}
10279	#endif
10280	if( n>pCheck->pBt->usableSize/`4`-`2` ){
10281	checkAppendMsg(pCheck,
10282	"freelist leaf count too big on page %d", iPage);
10283	N--;
10284	}else{
10285	for(i=`0`; i<(int)n; i++){
10286	Pgno iFreePage = get4byte(&pOvflData[`8`+i*`4`]);
10287	#ifndef SQLITE_OMIT_AUTOVACUUM
10288	if( pCheck->pBt->autoVacuum ){
10289	checkPtrmap(pCheck, iFreePage, PTRMAP_FREEPAGE, `0`);
10290	}
10291	#endif
10292	checkRef(pCheck, iFreePage);
10293	}
10294	N -= n;
10295	}
10296	}
10297	#ifndef SQLITE_OMIT_AUTOVACUUM
10298	else{
10299	/ If this database supports auto-vacuum and iPage is not the last*
10300	** page in this overflow list, check that the pointer-map entry for
10301	** the following page matches iPage.
10302	*/
10303	if( pCheck->pBt->autoVacuum && N>`0` ){
10304	i = get4byte(pOvflData);
10305	checkPtrmap(pCheck, i, PTRMAP_OVERFLOW2, iPage);
10306	}
10307	}
10308	#endif
10309	iPage = get4byte(pOvflData);
10310	sqlite3PagerUnref(pOvflPage);
10311	}
10312	if( N && nErrAtStart==pCheck->nErr ){
10313	checkAppendMsg(pCheck,
10314	"%s is %d but should be %d",
10315	isFreeList ? "size" : "overflow list length",
10316	expected-N, expected);
10317	}
10318	}
10319	#endif /* SQLITE_OMIT_INTEGRITY_CHECK */
10320
10321	/*
10322	** An implementation of a min-heap.
10323	**
10324	** aHeap[0] is the number of elements on the heap. aHeap[1] is the
10325	** root element. The daughter nodes of aHeap[N] are aHeap[N*2]
10326	** and aHeap[N*2+1].
10327	**
10328	** The heap property is this: Every node is less than or equal to both
10329	** of its daughter nodes. A consequence of the heap property is that the
10330	** root node aHeap[1] is always the minimum value currently in the heap.
10331	**
10332	** The btreeHeapInsert() routine inserts an unsigned 32-bit number onto
10333	** the heap, preserving the heap property. The btreeHeapPull() routine
10334	** removes the root element from the heap (the minimum value in the heap)
10335	** and then moves other nodes around as necessary to preserve the heap
10336	** property.
10337	**
10338	** This heap is used for cell overlap and coverage testing. Each u32
10339	** entry represents the span of a cell or freeblock on a btree page.
10340	** The upper 16 bits are the index of the first byte of a range and the
10341	** lower 16 bits are the index of the last byte of that range.
10342	*/
10343	static void btreeHeapInsert(u32 *aHeap, u32 x){
10344	u32 j, i = ++aHeap[`0`];
10345	aHeap[i] = x;
10346	while( (j = i/`2`)>`0` && aHeap[j]>aHeap[i] ){
10347	x = aHeap[j];
10348	aHeap[j] = aHeap[i];
10349	aHeap[i] = x;
10350	i = j;
10351	}
10352	}
10353	static int btreeHeapPull(u32 aHeap, u32 pOut){
10354	u32 j, i, x;
10355	if( (x = aHeap[`0`])==`0` ) return `0`;
10356	*pOut = aHeap[`1`];
10357	aHeap[`1`] = aHeap[x];
10358	aHeap[x] = `0xffffffff`;
10359	aHeap[`0`]--;
10360	i = `1`;
10361	while( (j = i*`2`)<=aHeap[`0`] ){
10362	if( aHeap[j]>aHeap[j+`1`] ) j++;
10363	if( aHeap[i]<aHeap[j] ) break;
10364	x = aHeap[i];
10365	aHeap[i] = aHeap[j];
10366	aHeap[j] = x;
10367	i = j;
10368	}
10369	return `1`;
10370	}
10371
10372	#ifndef SQLITE_OMIT_INTEGRITY_CHECK
10373	/*
10374	** Do various sanity checks on a single page of a tree. Return
10375	** the tree depth. Root pages return 0. Parents of root pages
10376	** return 1, and so forth.
10377	**
10378	** These checks are done:
10379	**
10380	** 1. Make sure that cells and freeblocks do not overlap
10381	** but combine to completely cover the page.
10382	** 2. Make sure integer cell keys are in order.
10383	** 3. Check the integrity of overflow pages.
10384	** 4. Recursively call checkTreePage on all children.
10385	** 5. Verify that the depth of all children is the same.
10386	*/
10387	static int checkTreePage(
10388	IntegrityCk pCheck, /* Context for the sanity check /
10389	Pgno iPage, / Page number of the page to check /
10390	i64 piMinKey, /* Write minimum integer primary key here /
10391	i64 maxKey / Error if integer primary key greater than this /
10392	){
10393	MemPage pPage = `0`; /* The page being analyzed /
10394	int i; / Loop counter /
10395	int rc; / Result code from subroutine call /
10396	int depth = -`1`, d2; / Depth of a subtree /
10397	int pgno; / Page number /
10398	int nFrag; / Number of fragmented bytes on the page /
10399	int hdr; / Offset to the page header /
10400	int cellStart; / Offset to the start of the cell pointer array /
10401	int nCell; / Number of cells /
10402	int doCoverageCheck = `1`; / True if cell coverage checking should be done /
10403	int keyCanBeEqual = `1`; / True if IPK can be equal to maxKey*
10404	** False if IPK must be strictly less than maxKey */
10405	u8 data; /* Page content /
10406	u8 pCell; /* Cell content /
10407	u8 pCellIdx; /* Next element of the cell pointer array /
10408	BtShared pBt; /* The BtShared object that owns pPage /
10409	u32 pc; / Address of a cell /
10410	u32 usableSize; / Usable size of the page /
10411	u32 contentOffset; / Offset to the start of the cell content area /
10412	u32 heap = `0`; /* Min-heap used for checking cell coverage /
10413	u32 x, prev = `0`; / Next and previous entry on the min-heap /
10414	const char *saved_zPfx = pCheck->zPfx;
10415	int saved_v1 = pCheck->v1;
10416	int saved_v2 = pCheck->v2;
10417	u8 savedIsInit = `0`;
10418
10419	/ Check that the page exists*
10420	*/
10421	pBt = pCheck->pBt;
10422	usableSize = pBt->usableSize;
10423	if( iPage==`0` ) return `0`;
10424	if( checkRef(pCheck, iPage) ) return `0`;
10425	pCheck->zPfx = "Page %u: ";
10426	pCheck->v1 = iPage;
10427	if( (rc = btreeGetPage(pBt, iPage, &pPage, `0`))!=`0` ){
10428	checkAppendMsg(pCheck,
10429	"unable to get the page. error code=%d", rc);
10430	goto end_of_check;
10431	}
10432
10433	/ Clear MemPage.isInit to make sure the corruption detection code in*
10434	** btreeInitPage() is executed. */
10435	savedIsInit = pPage->isInit;
10436	pPage->isInit = `0`;
10437	if( (rc = btreeInitPage(pPage))!=`0` ){
10438	assert( rc==SQLITE_CORRUPT ); / The only possible error from InitPage /
10439	checkAppendMsg(pCheck,
10440	"btreeInitPage() returns error code %d", rc);
10441	goto end_of_check;
10442	}
10443	if( (rc = btreeComputeFreeSpace(pPage))!=`0` ){
10444	assert( rc==SQLITE_CORRUPT );
10445	checkAppendMsg(pCheck, "free space corruption", rc);
10446	goto end_of_check;
10447	}
10448	data = pPage->aData;
10449	hdr = pPage->hdrOffset;
10450
10451	/ Set up for cell analysis /
10452	pCheck->zPfx = "On tree page %u cell %d: ";
10453	contentOffset = get2byteNotZero(&data[hdr+`5`]);
10454	assert( contentOffset<=usableSize ); / Enforced by btreeInitPage() /
10455
10456	/ EVIDENCE-OF: R-37002-32774 The two-byte integer at offset 3 gives the*
10457	** number of cells on the page. */
10458	nCell = get2byte(&data[hdr+`3`]);
10459	assert( pPage->nCell==nCell );
10460
10461	/ EVIDENCE-OF: R-23882-45353 The cell pointer array of a b-tree page*
10462	** immediately follows the b-tree page header. */
10463	cellStart = hdr + `12` - `4`*pPage->leaf;
10464	assert( pPage->aCellIdx==&data[cellStart] );
10465	pCellIdx = &data[cellStart + `2`*(nCell-`1`)];
10466
10467	if( !pPage->leaf ){
10468	/ Analyze the right-child page of internal pages /
10469	pgno = get4byte(&data[hdr+`8`]);
10470	#ifndef SQLITE_OMIT_AUTOVACUUM
10471	if( pBt->autoVacuum ){
10472	pCheck->zPfx = "On page %u at right child: ";
10473	checkPtrmap(pCheck, pgno, PTRMAP_BTREE, iPage);
10474	}
10475	#endif
10476	depth = checkTreePage(pCheck, pgno, &maxKey, maxKey);
10477	keyCanBeEqual = `0`;
10478	}else{
10479	/ For leaf pages, the coverage check will occur in the same loop*
10480	** as the other cell checks, so initialize the heap. */
10481	heap = pCheck->heap;
10482	heap[`0`] = `0`;
10483	}
10484
10485	/ EVIDENCE-OF: R-02776-14802 The cell pointer array consists of K 2-byte*
10486	** integer offsets to the cell contents. */
10487	for(i=nCell-`1`; i>=`0` && pCheck->mxErr; i--){
10488	CellInfo info;
10489
10490	/ Check cell size /
10491	pCheck->v2 = i;
10492	assert( pCellIdx==&data[cellStart + i*`2`] );
10493	pc = get2byteAligned(pCellIdx);
10494	pCellIdx -= `2`;
10495	if( pc<contentOffset \|\| pc>usableSize-`4` ){
10496	checkAppendMsg(pCheck, "Offset %d out of range %d..%d",
10497	pc, contentOffset, usableSize-`4`);
10498	doCoverageCheck = `0`;
10499	continue;
10500	}
10501	pCell = &data[pc];
10502	pPage->xParseCell(pPage, pCell, &info);
10503	if( pc+info.nSize>usableSize ){
10504	checkAppendMsg(pCheck, "Extends off end of page");
10505	doCoverageCheck = `0`;
10506	continue;
10507	}
10508
10509	/ Check for integer primary key out of range /
10510	if( pPage->intKey ){
10511	if( keyCanBeEqual ? (info.nKey > maxKey) : (info.nKey >= maxKey) ){
10512	checkAppendMsg(pCheck, "Rowid %lld out of order", info.nKey);
10513	}
10514	maxKey = info.nKey;
10515	keyCanBeEqual = `0`; / Only the first key on the page may ==maxKey /
10516	}
10517
10518	/ Check the content overflow list /
10519	if( info.nPayload>info.nLocal ){
10520	u32 nPage; / Number of pages on the overflow chain /
10521	Pgno pgnoOvfl; / First page of the overflow chain /
10522	assert( pc + info.nSize - `4` <= usableSize );
10523	nPage = (info.nPayload - info.nLocal + usableSize - `5`)/(usableSize - `4`);
10524	pgnoOvfl = get4byte(&pCell[info.nSize - `4`]);
10525	#ifndef SQLITE_OMIT_AUTOVACUUM
10526	if( pBt->autoVacuum ){
10527	checkPtrmap(pCheck, pgnoOvfl, PTRMAP_OVERFLOW1, iPage);
10528	}
10529	#endif
10530	checkList(pCheck, `0`, pgnoOvfl, nPage);
10531	}
10532
10533	if( !pPage->leaf ){
10534	/ Check sanity of left child page for internal pages /
10535	pgno = get4byte(pCell);
10536	#ifndef SQLITE_OMIT_AUTOVACUUM
10537	if( pBt->autoVacuum ){
10538	checkPtrmap(pCheck, pgno, PTRMAP_BTREE, iPage);
10539	}
10540	#endif
10541	d2 = checkTreePage(pCheck, pgno, &maxKey, maxKey);
10542	keyCanBeEqual = `0`;
10543	if( d2!=depth ){
10544	checkAppendMsg(pCheck, "Child page depth differs");
10545	depth = d2;
10546	}
10547	}else{
10548	/ Populate the coverage-checking heap for leaf pages /
10549	btreeHeapInsert(heap, (pc<<`16`)\|(pc+info.nSize-`1`));
10550	}
10551	}
10552	*piMinKey = maxKey;
10553
10554	/ Check for complete coverage of the page*
10555	*/
10556	pCheck->zPfx = `0`;
10557	if( doCoverageCheck && pCheck->mxErr>`0` ){
10558	/ For leaf pages, the min-heap has already been initialized and the*
10559	** cells have already been inserted. But for internal pages, that has
10560	** not yet been done, so do it now */
10561	if( !pPage->leaf ){
10562	heap = pCheck->heap;
10563	heap[`0`] = `0`;
10564	for(i=nCell-`1`; i>=`0`; i--){
10565	u32 size;
10566	pc = get2byteAligned(&data[cellStart+i*`2`]);
10567	size = pPage->xCellSize(pPage, &data[pc]);
10568	btreeHeapInsert(heap, (pc<<`16`)\|(pc+size-`1`));
10569	}
10570	}
10571	/ Add the freeblocks to the min-heap*
10572	**
10573	** EVIDENCE-OF: R-20690-50594 The second field of the b-tree page header
10574	** is the offset of the first freeblock, or zero if there are no
10575	** freeblocks on the page.
10576	*/
10577	i = get2byte(&data[hdr+`1`]);
10578	while( i>`0` ){
10579	int size, j;
10580	assert( (u32)i<=usableSize-`4` ); / Enforced by btreeComputeFreeSpace() /
10581	size = get2byte(&data[i+`2`]);
10582	assert( (u32)(i+size)<=usableSize ); / due to btreeComputeFreeSpace() /
10583	btreeHeapInsert(heap, (((u32)i)<<`16`)\|(i+size-`1`));
10584	/ EVIDENCE-OF: R-58208-19414 The first 2 bytes of a freeblock are a*
10585	** big-endian integer which is the offset in the b-tree page of the next
10586	** freeblock in the chain, or zero if the freeblock is the last on the
10587	** chain. */
10588	j = get2byte(&data[i]);
10589	/ EVIDENCE-OF: R-06866-39125 Freeblocks are always connected in order of*
10590	** increasing offset. */
10591	assert( j==`0` \|\| j>i+size ); / Enforced by btreeComputeFreeSpace() /
10592	assert( (u32)j<=usableSize-`4` ); / Enforced by btreeComputeFreeSpace() /
10593	i = j;
10594	}
10595	/ Analyze the min-heap looking for overlap between cells and/or*
10596	** freeblocks, and counting the number of untracked bytes in nFrag.
10597	**
10598	** Each min-heap entry is of the form: (start_address<<16)\|end_address.
10599	** There is an implied first entry the covers the page header, the cell
10600	** pointer index, and the gap between the cell pointer index and the start
10601	** of cell content.
10602	**
10603	** The loop below pulls entries from the min-heap in order and compares
10604	** the start_address against the previous end_address. If there is an
10605	** overlap, that means bytes are used multiple times. If there is a gap,
10606	** that gap is added to the fragmentation count.
10607	*/
10608	nFrag = `0`;
10609	prev = contentOffset - `1`; / Implied first min-heap entry /
10610	while( btreeHeapPull(heap,&x) ){
10611	if( (prev&`0xffff`)>=(x>>`16`) ){
10612	checkAppendMsg(pCheck,
10613	"Multiple uses for byte %u of page %u", x>>`16`, iPage);
10614	break;
10615	}else{
10616	nFrag += (x>>`16`) - (prev&`0xffff`) - `1`;
10617	prev = x;
10618	}
10619	}
10620	nFrag += usableSize - (prev&`0xffff`) - `1`;
10621	/ EVIDENCE-OF: R-43263-13491 The total number of bytes in all fragments*
10622	** is stored in the fifth field of the b-tree page header.
10623	** EVIDENCE-OF: R-07161-27322 The one-byte integer at offset 7 gives the
10624	** number of fragmented free bytes within the cell content area.
10625	*/
10626	if( heap[`0`]==`0` && nFrag!=data[hdr+`7`] ){
10627	checkAppendMsg(pCheck,
10628	"Fragmentation of %d bytes reported as %d on page %u",
10629	nFrag, data[hdr+`7`], iPage);
10630	}
10631	}
10632
10633	end_of_check:
10634	if( !doCoverageCheck ) pPage->isInit = savedIsInit;
10635	releasePage(pPage);
10636	pCheck->zPfx = saved_zPfx;
10637	pCheck->v1 = saved_v1;
10638	pCheck->v2 = saved_v2;
10639	return depth+`1`;
10640	}
10641	#endif /* SQLITE_OMIT_INTEGRITY_CHECK */
10642
10643	#ifndef SQLITE_OMIT_INTEGRITY_CHECK
10644	/*
10645	** This routine does a complete check of the given BTree file. aRoot[] is
10646	** an array of pages numbers were each page number is the root page of
10647	** a table. nRoot is the number of entries in aRoot.
10648	**
10649	** A read-only or read-write transaction must be opened before calling
10650	** this function.
10651	**
10652	** Write the number of error seen in *pnErr. Except for some memory
10653	** allocation errors, an error message held in memory obtained from
10654	** malloc is returned if pnErr is non-zero. If pnErr==0 then NULL is
10655	** returned. If a memory allocation error occurs, NULL is returned.
10656	**
10657	** If the first entry in aRoot[] is 0, that indicates that the list of
10658	** root pages is incomplete. This is a "partial integrity-check". This
10659	** happens when performing an integrity check on a single table. The
10660	** zero is skipped, of course. But in addition, the freelist checks
10661	** and the checks to make sure every page is referenced are also skipped,
10662	** since obviously it is not possible to know which pages are covered by
10663	** the unverified btrees. Except, if aRoot[1] is 1, then the freelist
10664	** checks are still performed.
10665	*/
10666	char *sqlite3BtreeIntegrityCheck(
10667	sqlite3 db, /* Database connection that is running the check /
10668	Btree p, /* The btree to be checked /
10669	Pgno aRoot, /* An array of root pages numbers for individual trees /
10670	int nRoot, / Number of entries in aRoot[] /
10671	int mxErr, / Stop reporting errors after this many /
10672	int pnErr /* Write number of errors seen to this variable /
10673	){
10674	Pgno i;
10675	IntegrityCk sCheck;
10676	BtShared *pBt = p->pBt;
10677	u64 savedDbFlags = pBt->db->flags;
10678	char zErr[`100`];
10679	int bPartial = `0`; / True if not checking all btrees /
10680	int bCkFreelist = `1`; / True to scan the freelist /
10681	VVA_ONLY( int nRef );
10682	assert( nRoot>`0` );
10683
10684	/ aRoot[0]==0 means this is a partial check /
10685	if( aRoot[`0`]==`0` ){
10686	assert( nRoot>`1` );
10687	bPartial = `1`;
10688	if( aRoot[`1`]!=`1` ) bCkFreelist = `0`;
10689	}
10690
10691	sqlite3BtreeEnter(p);
10692	assert( p->inTrans>TRANS_NONE && pBt->inTransaction>TRANS_NONE );
10693	VVA_ONLY( nRef = sqlite3PagerRefcount(pBt->pPager) );
10694	assert( nRef>=`0` );
10695	sCheck.db = db;
10696	sCheck.pBt = pBt;
10697	sCheck.pPager = pBt->pPager;
10698	sCheck.nPage = btreePagecount(sCheck.pBt);
10699	sCheck.mxErr = mxErr;
10700	sCheck.nErr = `0`;
10701	sCheck.bOomFault = `0`;
10702	sCheck.zPfx = `0`;
10703	sCheck.v1 = `0`;
10704	sCheck.v2 = `0`;
10705	sCheck.aPgRef = `0`;
10706	sCheck.heap = `0`;
10707	sqlite3StrAccumInit(&sCheck.errMsg, `0`, zErr, sizeof(zErr), SQLITE_MAX_LENGTH);
10708	sCheck.errMsg.printfFlags = SQLITE_PRINTF_INTERNAL;
10709	if( sCheck.nPage==`0` ){
10710	goto integrity_ck_cleanup;
10711	}
10712
10713	sCheck.aPgRef = sqlite3MallocZero((sCheck.nPage / `8`)+ `1`);
10714	if( !sCheck.aPgRef ){
10715	sCheck.bOomFault = `1`;
10716	goto integrity_ck_cleanup;
10717	}
10718	sCheck.heap = (u32*)sqlite3PageMalloc( pBt->pageSize );
10719	if( sCheck.heap==`0` ){
10720	sCheck.bOomFault = `1`;
10721	goto integrity_ck_cleanup;
10722	}
10723
10724	i = PENDING_BYTE_PAGE(pBt);
10725	if( i<=sCheck.nPage ) setPageReferenced(&sCheck, i);
10726
10727	/ Check the integrity of the freelist*
10728	*/
10729	if( bCkFreelist ){
10730	sCheck.zPfx = "Main freelist: ";
10731	checkList(&sCheck, `1`, get4byte(&pBt->pPage1->aData[`32`]),
10732	get4byte(&pBt->pPage1->aData[`36`]));
10733	sCheck.zPfx = `0`;
10734	}
10735
10736	/ Check all the tables.*
10737	*/
10738	#ifndef SQLITE_OMIT_AUTOVACUUM
10739	if( !bPartial ){
10740	if( pBt->autoVacuum ){
10741	Pgno mx = `0`;
10742	Pgno mxInHdr;
10743	for(i=`0`; (int)i<nRoot; i++) if( mx<aRoot[i] ) mx = aRoot[i];
10744	mxInHdr = get4byte(&pBt->pPage1->aData[`52`]);
10745	if( mx!=mxInHdr ){
10746	checkAppendMsg(&sCheck,
10747	"max rootpage (%d) disagrees with header (%d)",
10748	mx, mxInHdr
10749	);
10750	}
10751	}else if( get4byte(&pBt->pPage1->aData[`64`])!=`0` ){
10752	checkAppendMsg(&sCheck,
10753	"incremental_vacuum enabled with a max rootpage of zero"
10754	);
10755	}
10756	}
10757	#endif
10758	testcase( pBt->db->flags & SQLITE_CellSizeCk );
10759	pBt->db->flags &= ~(u64)SQLITE_CellSizeCk;
10760	for(i=`0`; (int)i<nRoot && sCheck.mxErr; i++){
10761	i64 notUsed;
10762	if( aRoot[i]==`0` ) continue;
10763	#ifndef SQLITE_OMIT_AUTOVACUUM
10764	if( pBt->autoVacuum && aRoot[i]>`1` && !bPartial ){
10765	checkPtrmap(&sCheck, aRoot[i], PTRMAP_ROOTPAGE, `0`);
10766	}
10767	#endif
10768	checkTreePage(&sCheck, aRoot[i], &notUsed, LARGEST_INT64);
10769	}
10770	pBt->db->flags = savedDbFlags;
10771
10772	/ Make sure every page in the file is referenced*
10773	*/
10774	if( !bPartial ){
10775	for(i=`1`; i<=sCheck.nPage && sCheck.mxErr; i++){
10776	#ifdef SQLITE_OMIT_AUTOVACUUM
10777	if( getPageReferenced(&sCheck, i)==`0` ){
10778	checkAppendMsg(&sCheck, "Page %d is never used", i);
10779	}
10780	#else
10781	/ If the database supports auto-vacuum, make sure no tables contain*
10782	** references to pointer-map pages.
10783	*/
10784	if( getPageReferenced(&sCheck, i)==`0` &&
10785	(PTRMAP_PAGENO(pBt, i)!=i \|\| !pBt->autoVacuum) ){
10786	checkAppendMsg(&sCheck, "Page %d is never used", i);
10787	}
10788	if( getPageReferenced(&sCheck, i)!=`0` &&
10789	(PTRMAP_PAGENO(pBt, i)==i && pBt->autoVacuum) ){
10790	checkAppendMsg(&sCheck, "Pointer map page %d is referenced", i);
10791	}
10792	#endif
10793	}
10794	}
10795
10796	/ Clean up and report errors.*
10797	*/
10798	integrity_ck_cleanup:
10799	sqlite3PageFree(sCheck.heap);
10800	sqlite3_free(sCheck.aPgRef);
10801	if( sCheck.bOomFault ){
10802	sqlite3_str_reset(&sCheck.errMsg);
10803	sCheck.nErr++;
10804	}
10805	*pnErr = sCheck.nErr;
10806	if( sCheck.nErr==`0` ) sqlite3_str_reset(&sCheck.errMsg);
10807	/ Make sure this analysis did not leave any unref() pages. /
10808	assert( nRef==sqlite3PagerRefcount(pBt->pPager) );
10809	sqlite3BtreeLeave(p);
10810	return sqlite3StrAccumFinish(&sCheck.errMsg);
10811	}
10812	#endif /* SQLITE_OMIT_INTEGRITY_CHECK */
10813
10814	/*
10815	** Return the full pathname of the underlying database file. Return
10816	** an empty string if the database is in-memory or a TEMP database.
10817	**
10818	** The pager filename is invariant as long as the pager is
10819	** open so it is safe to access without the BtShared mutex.
10820	*/
10821	const char sqlite3BtreeGetFilename(Btree p){
10822	assert( p->pBt->pPager!=`0` );
10823	return sqlite3PagerFilename(p->pBt->pPager, `1`);
10824	}
10825
10826	/*
10827	** Return the pathname of the journal file for this database. The return
10828	** value of this routine is the same regardless of whether the journal file
10829	** has been created or not.
10830	**
10831	** The pager journal filename is invariant as long as the pager is
10832	** open so it is safe to access without the BtShared mutex.
10833	*/
10834	const char sqlite3BtreeGetJournalname(Btree p){
10835	assert( p->pBt->pPager!=`0` );
10836	return sqlite3PagerJournalname(p->pBt->pPager);
10837	}
10838
10839	/*
10840	** Return one of SQLITE_TXN_NONE, SQLITE_TXN_READ, or SQLITE_TXN_WRITE
10841	** to describe the current transaction state of Btree p.
10842	*/
10843	int sqlite3BtreeTxnState(Btree *p){
10844	assert( p==`0` \|\| sqlite3_mutex_held(p->db->mutex) );
10845	return p ? p->inTrans : `0`;
10846	}
10847
10848	#ifndef SQLITE_OMIT_WAL
10849	/*
10850	** Run a checkpoint on the Btree passed as the first argument.
10851	**
10852	** Return SQLITE_LOCKED if this or any other connection has an open
10853	** transaction on the shared-cache the argument Btree is connected to.
10854	**
10855	** Parameter eMode is one of SQLITE_CHECKPOINT_PASSIVE, FULL or RESTART.
10856	*/
10857	int sqlite3BtreeCheckpoint(Btree p, int* eMode, int pnLog, int* *pnCkpt){
10858	int rc = SQLITE_OK;
10859	if( p ){
10860	BtShared *pBt = p->pBt;
10861	sqlite3BtreeEnter(p);
10862	if( pBt->inTransaction!=TRANS_NONE ){
10863	rc = SQLITE_LOCKED;
10864	}else{
10865	rc = sqlite3PagerCheckpoint(pBt->pPager, p->db, eMode, pnLog, pnCkpt);
10866	}
10867	sqlite3BtreeLeave(p);
10868	}
10869	return rc;
10870	}
10871	#endif
10872
10873	/*
10874	** Return true if there is currently a backup running on Btree p.
10875	*/
10876	int sqlite3BtreeIsInBackup(Btree *p){
10877	assert( p );
10878	assert( sqlite3_mutex_held(p->db->mutex) );
10879	return p->nBackup!=`0`;
10880	}
10881
10882	/*
10883	** This function returns a pointer to a blob of memory associated with
10884	** a single shared-btree. The memory is used by client code for its own
10885	** purposes (for example, to store a high-level schema associated with
10886	** the shared-btree). The btree layer manages reference counting issues.
10887	**
10888	** The first time this is called on a shared-btree, nBytes bytes of memory
10889	** are allocated, zeroed, and returned to the caller. For each subsequent
10890	** call the nBytes parameter is ignored and a pointer to the same blob
10891	** of memory returned.
10892	**
10893	** If the nBytes parameter is 0 and the blob of memory has not yet been
10894	** allocated, a null pointer is returned. If the blob has already been
10895	** allocated, it is returned as normal.
10896	**
10897	** Just before the shared-btree is closed, the function passed as the
10898	** xFree argument when the memory allocation was made is invoked on the
10899	** blob of allocated memory. The xFree function should not call sqlite3_free()
10900	** on the memory, the btree layer does that.
10901	*/
10902	void sqlite3BtreeSchema(Btree p, int nBytes, void(xFree)(void* *)){
10903	BtShared *pBt = p->pBt;
10904	sqlite3BtreeEnter(p);
10905	if( !pBt->pSchema && nBytes ){
10906	pBt->pSchema = sqlite3DbMallocZero(`0`, nBytes);
10907	pBt->xFreeSchema = xFree;
10908	}
10909	sqlite3BtreeLeave(p);
10910	return pBt->pSchema;
10911	}
10912
10913	/*
10914	** Return SQLITE_LOCKED_SHAREDCACHE if another user of the same shared
10915	** btree as the argument handle holds an exclusive lock on the
10916	** sqlite_schema table. Otherwise SQLITE_OK.
10917	*/
10918	int sqlite3BtreeSchemaLocked(Btree *p){
10919	int rc;
10920	assert( sqlite3_mutex_held(p->db->mutex) );
10921	sqlite3BtreeEnter(p);
10922	rc = querySharedCacheTableLock(p, SCHEMA_ROOT, READ_LOCK);
10923	assert( rc==SQLITE_OK \|\| rc==SQLITE_LOCKED_SHAREDCACHE );
10924	sqlite3BtreeLeave(p);
10925	return rc;
10926	}
10927
10928
10929	#ifndef SQLITE_OMIT_SHARED_CACHE
10930	/*
10931	** Obtain a lock on the table whose root page is iTab. The
10932	** lock is a write lock if isWritelock is true or a read lock
10933	** if it is false.
10934	*/
10935	int sqlite3BtreeLockTable(Btree p, int* iTab, u8 isWriteLock){
10936	int rc = SQLITE_OK;
10937	assert( p->inTrans!=TRANS_NONE );
10938	if( p->sharable ){
10939	u8 lockType = READ_LOCK + isWriteLock;
10940	assert( READ_LOCK+`1`==WRITE_LOCK );
10941	assert( isWriteLock==`0` \|\| isWriteLock==`1` );
10942
10943	sqlite3BtreeEnter(p);
10944	rc = querySharedCacheTableLock(p, iTab, lockType);
10945	if( rc==SQLITE_OK ){
10946	rc = setSharedCacheTableLock(p, iTab, lockType);
10947	}
10948	sqlite3BtreeLeave(p);
10949	}
10950	return rc;
10951	}
10952	#endif
10953
10954	#ifndef SQLITE_OMIT_INCRBLOB
10955	/*
10956	** Argument pCsr must be a cursor opened for writing on an
10957	** INTKEY table currently pointing at a valid table entry.
10958	** This function modifies the data stored as part of that entry.
10959	**
10960	** Only the data content may only be modified, it is not possible to
10961	** change the length of the data stored. If this function is called with
10962	** parameters that attempt to write past the end of the existing data,
10963	** no modifications are made and SQLITE_CORRUPT is returned.
10964	*/
10965	int sqlite3BtreePutData(BtCursor pCsr, u32 offset, u32 amt, void* *z){
10966	int rc;
10967	assert( cursorOwnsBtShared(pCsr) );
10968	assert( sqlite3_mutex_held(pCsr->pBtree->db->mutex) );
10969	assert( pCsr->curFlags & BTCF_Incrblob );
10970
10971	rc = restoreCursorPosition(pCsr);
10972	if( rc!=SQLITE_OK ){
10973	return rc;
10974	}
10975	assert( pCsr->eState!=CURSOR_REQUIRESEEK );
10976	if( pCsr->eState!=CURSOR_VALID ){
10977	return SQLITE_ABORT;
10978	}
10979
10980	/ Save the positions of all other cursors open on this table. This is*
10981	** required in case any of them are holding references to an xFetch
10982	** version of the b-tree page modified by the accessPayload call below.
10983	**
10984	** Note that pCsr must be open on a INTKEY table and saveCursorPosition()
10985	** and hence saveAllCursors() cannot fail on a BTREE_INTKEY table, hence
10986	** saveAllCursors can only return SQLITE_OK.
10987	*/
10988	VVA_ONLY(rc =) saveAllCursors(pCsr->pBt, pCsr->pgnoRoot, pCsr);
10989	assert( rc==SQLITE_OK );
10990
10991	/ Check some assumptions:*
10992	** (a) the cursor is open for writing,
10993	** (b) there is a read/write transaction open,
10994	** (c) the connection holds a write-lock on the table (if required),
10995	** (d) there are no conflicting read-locks, and
10996	** (e) the cursor points at a valid row of an intKey table.
10997	*/
10998	if( (pCsr->curFlags & BTCF_WriteFlag)==`0` ){
10999	return SQLITE_READONLY;
11000	}
11001	assert( (pCsr->pBt->btsFlags & BTS_READ_ONLY)==`0`
11002	&& pCsr->pBt->inTransaction==TRANS_WRITE );
11003	assert( hasSharedCacheTableLock(pCsr->pBtree, pCsr->pgnoRoot, `0`, `2`) );
11004	assert( !hasReadConflicts(pCsr->pBtree, pCsr->pgnoRoot) );
11005	assert( pCsr->pPage->intKey );
11006
11007	return accessPayload(pCsr, offset, amt, (unsigned char *)z, `1`);
11008	}
11009
11010	/*
11011	** Mark this cursor as an incremental blob cursor.
11012	*/
11013	void sqlite3BtreeIncrblobCursor(BtCursor *pCur){
11014	pCur->curFlags \|= BTCF_Incrblob;
11015	pCur->pBtree->hasIncrblobCur = `1`;
11016	}
11017	#endif
11018
11019	/*
11020	** Set both the "read version" (single byte at byte offset 18) and
11021	** "write version" (single byte at byte offset 19) fields in the database
11022	** header to iVersion.
11023	*/
11024	int sqlite3BtreeSetVersion(Btree pBtree, int* iVersion){
11025	BtShared *pBt = pBtree->pBt;
11026	int rc; / Return code /
11027
11028	assert( iVersion==`1` \|\| iVersion==`2` );
11029
11030	/ If setting the version fields to 1, do not automatically open the*
11031	** WAL connection, even if the version fields are currently set to 2.
11032	*/
11033	pBt->btsFlags &= ~BTS_NO_WAL;
11034	if( iVersion==`1` ) pBt->btsFlags \|= BTS_NO_WAL;
11035
11036	rc = sqlite3BtreeBeginTrans(pBtree, `0`, `0`);
11037	if( rc==SQLITE_OK ){
11038	u8 *aData = pBt->pPage1->aData;
11039	if( aData[`18`]!=(u8)iVersion \|\| aData[`19`]!=(u8)iVersion ){
11040	rc = sqlite3BtreeBeginTrans(pBtree, `2`, `0`);
11041	if( rc==SQLITE_OK ){
11042	rc = sqlite3PagerWrite(pBt->pPage1->pDbPage);
11043	if( rc==SQLITE_OK ){
11044	aData[`18`] = (u8)iVersion;
11045	aData[`19`] = (u8)iVersion;
11046	}
11047	}
11048	}
11049	}
11050
11051	pBt->btsFlags &= ~BTS_NO_WAL;
11052	return rc;
11053	}
11054
11055	/*
11056	** Return true if the cursor has a hint specified. This routine is
11057	** only used from within assert() statements
11058	*/
11059	int sqlite3BtreeCursorHasHint(BtCursor pCsr, unsigned* int mask){
11060	return (pCsr->hints & mask)!=`0`;
11061	}
11062
11063	/*
11064	** Return true if the given Btree is read-only.
11065	*/
11066	int sqlite3BtreeIsReadonly(Btree *p){
11067	return (p->pBt->btsFlags & BTS_READ_ONLY)!=`0`;
11068	}
11069
11070	/*
11071	** Return the size of the header added to each page by this module.
11072	*/
11073	int sqlite3HeaderSizeBtree(void){ return ROUND8(sizeof(MemPage)); }
11074
11075	/*
11076	** If no transaction is active and the database is not a temp-db, clear
11077	** the in-memory pager cache.
11078	*/
11079	void sqlite3BtreeClearCache(Btree *p){
11080	BtShared *pBt = p->pBt;
11081	if( pBt->inTransaction==TRANS_NONE ){
11082	sqlite3PagerClearCache(pBt->pPager);
11083	}
11084	}
11085
11086	#if !defined(SQLITE_OMIT_SHARED_CACHE)
11087	/*
11088	** Return true if the Btree passed as the only argument is sharable.
11089	*/
11090	int sqlite3BtreeSharable(Btree *p){
11091	return p->sharable;
11092	}
11093
11094	/*
11095	** Return the number of connections to the BtShared object accessed by
11096	** the Btree handle passed as the only argument. For private caches
11097	** this is always 1. For shared caches it may be 1 or greater.
11098	*/
11099	int sqlite3BtreeConnectionCount(Btree *p){
11100	testcase( p->sharable );
11101	return p->pBt->nRef;
11102	}
11103	#endif
11104

Browse the source code of sqlite/src/btree.c