1/*
2** 2008 October 7
3**
4** The author disclaims copyright to this source code. In place of
5** a legal notice, here is a blessing:
6**
7** May you do good and not evil.
8** May you find forgiveness for yourself and forgive others.
9** May you share freely, never taking more than you give.
10**
11*************************************************************************
12**
13** This file contains code use to implement an in-memory rollback journal.
14** The in-memory rollback journal is used to journal transactions for
15** ":memory:" databases and when the journal_mode=MEMORY pragma is used.
16**
17** Update: The in-memory journal is also used to temporarily cache
18** smaller journals that are not critical for power-loss recovery.
19** For example, statement journals that are not too big will be held
20** entirely in memory, thus reducing the number of file I/O calls, and
21** more importantly, reducing temporary file creation events. If these
22** journals become too large for memory, they are spilled to disk. But
23** in the common case, they are usually small and no file I/O needs to
24** occur.
25*/
26#include "sqliteInt.h"
27
28/* Forward references to internal structures */
29typedef struct MemJournal MemJournal;
30typedef struct FilePoint FilePoint;
31typedef struct FileChunk FileChunk;
32
33/*
34** The rollback journal is composed of a linked list of these structures.
35**
36** The zChunk array is always at least 8 bytes in size - usually much more.
37** Its actual size is stored in the MemJournal.nChunkSize variable.
38*/
39struct FileChunk {
40 FileChunk *pNext; /* Next chunk in the journal */
41 u8 zChunk[8]; /* Content of this chunk */
42};
43
44/*
45** By default, allocate this many bytes of memory for each FileChunk object.
46*/
47#define MEMJOURNAL_DFLT_FILECHUNKSIZE 1024
48
49/*
50** For chunk size nChunkSize, return the number of bytes that should
51** be allocated for each FileChunk structure.
52*/
53#define fileChunkSize(nChunkSize) (sizeof(FileChunk) + ((nChunkSize)-8))
54
55/*
56** An instance of this object serves as a cursor into the rollback journal.
57** The cursor can be either for reading or writing.
58*/
59struct FilePoint {
60 sqlite3_int64 iOffset; /* Offset from the beginning of the file */
61 FileChunk *pChunk; /* Specific chunk into which cursor points */
62};
63
64/*
65** This structure is a subclass of sqlite3_file. Each open memory-journal
66** is an instance of this class.
67*/
68struct MemJournal {
69 const sqlite3_io_methods *pMethod; /* Parent class. MUST BE FIRST */
70 int nChunkSize; /* In-memory chunk-size */
71
72 int nSpill; /* Bytes of data before flushing */
73 FileChunk *pFirst; /* Head of in-memory chunk-list */
74 FilePoint endpoint; /* Pointer to the end of the file */
75 FilePoint readpoint; /* Pointer to the end of the last xRead() */
76
77 int flags; /* xOpen flags */
78 sqlite3_vfs *pVfs; /* The "real" underlying VFS */
79 const char *zJournal; /* Name of the journal file */
80};
81
82/*
83** Read data from the in-memory journal file. This is the implementation
84** of the sqlite3_vfs.xRead method.
85*/
86static int memjrnlRead(
87 sqlite3_file *pJfd, /* The journal file from which to read */
88 void *zBuf, /* Put the results here */
89 int iAmt, /* Number of bytes to read */
90 sqlite_int64 iOfst /* Begin reading at this offset */
91){
92 MemJournal *p = (MemJournal *)pJfd;
93 u8 *zOut = zBuf;
94 int nRead = iAmt;
95 int iChunkOffset;
96 FileChunk *pChunk;
97
98 if( (iAmt+iOfst)>p->endpoint.iOffset ){
99 return SQLITE_IOERR_SHORT_READ;
100 }
101 assert( p->readpoint.iOffset==0 || p->readpoint.pChunk!=0 );
102 if( p->readpoint.iOffset!=iOfst || iOfst==0 ){
103 sqlite3_int64 iOff = 0;
104 for(pChunk=p->pFirst;
105 ALWAYS(pChunk) && (iOff+p->nChunkSize)<=iOfst;
106 pChunk=pChunk->pNext
107 ){
108 iOff += p->nChunkSize;
109 }
110 }else{
111 pChunk = p->readpoint.pChunk;
112 assert( pChunk!=0 );
113 }
114
115 iChunkOffset = (int)(iOfst%p->nChunkSize);
116 do {
117 int iSpace = p->nChunkSize - iChunkOffset;
118 int nCopy = MIN(nRead, (p->nChunkSize - iChunkOffset));
119 memcpy(zOut, (u8*)pChunk->zChunk + iChunkOffset, nCopy);
120 zOut += nCopy;
121 nRead -= iSpace;
122 iChunkOffset = 0;
123 } while( nRead>=0 && (pChunk=pChunk->pNext)!=0 && nRead>0 );
124 p->readpoint.iOffset = pChunk ? iOfst+iAmt : 0;
125 p->readpoint.pChunk = pChunk;
126
127 return SQLITE_OK;
128}
129
130/*
131** Free the list of FileChunk structures headed at MemJournal.pFirst.
132*/
133static void memjrnlFreeChunks(FileChunk *pFirst){
134 FileChunk *pIter;
135 FileChunk *pNext;
136 for(pIter=pFirst; pIter; pIter=pNext){
137 pNext = pIter->pNext;
138 sqlite3_free(pIter);
139 }
140}
141
142/*
143** Flush the contents of memory to a real file on disk.
144*/
145static int memjrnlCreateFile(MemJournal *p){
146 int rc;
147 sqlite3_file *pReal = (sqlite3_file*)p;
148 MemJournal copy = *p;
149
150 memset(p, 0, sizeof(MemJournal));
151 rc = sqlite3OsOpen(copy.pVfs, copy.zJournal, pReal, copy.flags, 0);
152 if( rc==SQLITE_OK ){
153 int nChunk = copy.nChunkSize;
154 i64 iOff = 0;
155 FileChunk *pIter;
156 for(pIter=copy.pFirst; pIter; pIter=pIter->pNext){
157 if( iOff + nChunk > copy.endpoint.iOffset ){
158 nChunk = copy.endpoint.iOffset - iOff;
159 }
160 rc = sqlite3OsWrite(pReal, (u8*)pIter->zChunk, nChunk, iOff);
161 if( rc ) break;
162 iOff += nChunk;
163 }
164 if( rc==SQLITE_OK ){
165 /* No error has occurred. Free the in-memory buffers. */
166 memjrnlFreeChunks(copy.pFirst);
167 }
168 }
169 if( rc!=SQLITE_OK ){
170 /* If an error occurred while creating or writing to the file, restore
171 ** the original before returning. This way, SQLite uses the in-memory
172 ** journal data to roll back changes made to the internal page-cache
173 ** before this function was called. */
174 sqlite3OsClose(pReal);
175 *p = copy;
176 }
177 return rc;
178}
179
180
181/* Forward reference */
182static int memjrnlTruncate(sqlite3_file *pJfd, sqlite_int64 size);
183
184/*
185** Write data to the file.
186*/
187static int memjrnlWrite(
188 sqlite3_file *pJfd, /* The journal file into which to write */
189 const void *zBuf, /* Take data to be written from here */
190 int iAmt, /* Number of bytes to write */
191 sqlite_int64 iOfst /* Begin writing at this offset into the file */
192){
193 MemJournal *p = (MemJournal *)pJfd;
194 int nWrite = iAmt;
195 u8 *zWrite = (u8 *)zBuf;
196
197 /* If the file should be created now, create it and write the new data
198 ** into the file on disk. */
199 if( p->nSpill>0 && (iAmt+iOfst)>p->nSpill ){
200 int rc = memjrnlCreateFile(p);
201 if( rc==SQLITE_OK ){
202 rc = sqlite3OsWrite(pJfd, zBuf, iAmt, iOfst);
203 }
204 return rc;
205 }
206
207 /* If the contents of this write should be stored in memory */
208 else{
209 /* An in-memory journal file should only ever be appended to. Random
210 ** access writes are not required. The only exception to this is when
211 ** the in-memory journal is being used by a connection using the
212 ** atomic-write optimization. In this case the first 28 bytes of the
213 ** journal file may be written as part of committing the transaction. */
214 assert( iOfst<=p->endpoint.iOffset );
215 if( iOfst>0 && iOfst!=p->endpoint.iOffset ){
216 memjrnlTruncate(pJfd, iOfst);
217 }
218 if( iOfst==0 && p->pFirst ){
219 assert( p->nChunkSize>iAmt );
220 memcpy((u8*)p->pFirst->zChunk, zBuf, iAmt);
221 }else{
222 while( nWrite>0 ){
223 FileChunk *pChunk = p->endpoint.pChunk;
224 int iChunkOffset = (int)(p->endpoint.iOffset%p->nChunkSize);
225 int iSpace = MIN(nWrite, p->nChunkSize - iChunkOffset);
226
227 assert( pChunk!=0 || iChunkOffset==0 );
228 if( iChunkOffset==0 ){
229 /* New chunk is required to extend the file. */
230 FileChunk *pNew = sqlite3_malloc(fileChunkSize(p->nChunkSize));
231 if( !pNew ){
232 return SQLITE_IOERR_NOMEM_BKPT;
233 }
234 pNew->pNext = 0;
235 if( pChunk ){
236 assert( p->pFirst );
237 pChunk->pNext = pNew;
238 }else{
239 assert( !p->pFirst );
240 p->pFirst = pNew;
241 }
242 pChunk = p->endpoint.pChunk = pNew;
243 }
244
245 assert( pChunk!=0 );
246 memcpy((u8*)pChunk->zChunk + iChunkOffset, zWrite, iSpace);
247 zWrite += iSpace;
248 nWrite -= iSpace;
249 p->endpoint.iOffset += iSpace;
250 }
251 }
252 }
253
254 return SQLITE_OK;
255}
256
257/*
258** Truncate the in-memory file.
259*/
260static int memjrnlTruncate(sqlite3_file *pJfd, sqlite_int64 size){
261 MemJournal *p = (MemJournal *)pJfd;
262 assert( p->endpoint.pChunk==0 || p->endpoint.pChunk->pNext==0 );
263 if( size<p->endpoint.iOffset ){
264 FileChunk *pIter = 0;
265 if( size==0 ){
266 memjrnlFreeChunks(p->pFirst);
267 p->pFirst = 0;
268 }else{
269 i64 iOff = p->nChunkSize;
270 for(pIter=p->pFirst; ALWAYS(pIter) && iOff<size; pIter=pIter->pNext){
271 iOff += p->nChunkSize;
272 }
273 if( ALWAYS(pIter) ){
274 memjrnlFreeChunks(pIter->pNext);
275 pIter->pNext = 0;
276 }
277 }
278
279 p->endpoint.pChunk = pIter;
280 p->endpoint.iOffset = size;
281 p->readpoint.pChunk = 0;
282 p->readpoint.iOffset = 0;
283 }
284 return SQLITE_OK;
285}
286
287/*
288** Close the file.
289*/
290static int memjrnlClose(sqlite3_file *pJfd){
291 MemJournal *p = (MemJournal *)pJfd;
292 memjrnlFreeChunks(p->pFirst);
293 return SQLITE_OK;
294}
295
296/*
297** Sync the file.
298**
299** If the real file has been created, call its xSync method. Otherwise,
300** syncing an in-memory journal is a no-op.
301*/
302static int memjrnlSync(sqlite3_file *pJfd, int flags){
303 UNUSED_PARAMETER2(pJfd, flags);
304 return SQLITE_OK;
305}
306
307/*
308** Query the size of the file in bytes.
309*/
310static int memjrnlFileSize(sqlite3_file *pJfd, sqlite_int64 *pSize){
311 MemJournal *p = (MemJournal *)pJfd;
312 *pSize = (sqlite_int64) p->endpoint.iOffset;
313 return SQLITE_OK;
314}
315
316/*
317** Table of methods for MemJournal sqlite3_file object.
318*/
319static const struct sqlite3_io_methods MemJournalMethods = {
320 1, /* iVersion */
321 memjrnlClose, /* xClose */
322 memjrnlRead, /* xRead */
323 memjrnlWrite, /* xWrite */
324 memjrnlTruncate, /* xTruncate */
325 memjrnlSync, /* xSync */
326 memjrnlFileSize, /* xFileSize */
327 0, /* xLock */
328 0, /* xUnlock */
329 0, /* xCheckReservedLock */
330 0, /* xFileControl */
331 0, /* xSectorSize */
332 0, /* xDeviceCharacteristics */
333 0, /* xShmMap */
334 0, /* xShmLock */
335 0, /* xShmBarrier */
336 0, /* xShmUnmap */
337 0, /* xFetch */
338 0 /* xUnfetch */
339};
340
341/*
342** Open a journal file.
343**
344** The behaviour of the journal file depends on the value of parameter
345** nSpill. If nSpill is 0, then the journal file is always create and
346** accessed using the underlying VFS. If nSpill is less than zero, then
347** all content is always stored in main-memory. Finally, if nSpill is a
348** positive value, then the journal file is initially created in-memory
349** but may be flushed to disk later on. In this case the journal file is
350** flushed to disk either when it grows larger than nSpill bytes in size,
351** or when sqlite3JournalCreate() is called.
352*/
353int sqlite3JournalOpen(
354 sqlite3_vfs *pVfs, /* The VFS to use for actual file I/O */
355 const char *zName, /* Name of the journal file */
356 sqlite3_file *pJfd, /* Preallocated, blank file handle */
357 int flags, /* Opening flags */
358 int nSpill /* Bytes buffered before opening the file */
359){
360 MemJournal *p = (MemJournal*)pJfd;
361
362 assert( zName || nSpill<0 || (flags & SQLITE_OPEN_EXCLUSIVE) );
363
364 /* Zero the file-handle object. If nSpill was passed zero, initialize
365 ** it using the sqlite3OsOpen() function of the underlying VFS. In this
366 ** case none of the code in this module is executed as a result of calls
367 ** made on the journal file-handle. */
368 memset(p, 0, sizeof(MemJournal));
369 if( nSpill==0 ){
370 return sqlite3OsOpen(pVfs, zName, pJfd, flags, 0);
371 }
372
373 if( nSpill>0 ){
374 p->nChunkSize = nSpill;
375 }else{
376 p->nChunkSize = 8 + MEMJOURNAL_DFLT_FILECHUNKSIZE - sizeof(FileChunk);
377 assert( MEMJOURNAL_DFLT_FILECHUNKSIZE==fileChunkSize(p->nChunkSize) );
378 }
379
380 pJfd->pMethods = (const sqlite3_io_methods*)&MemJournalMethods;
381 p->nSpill = nSpill;
382 p->flags = flags;
383 p->zJournal = zName;
384 p->pVfs = pVfs;
385 return SQLITE_OK;
386}
387
388/*
389** Open an in-memory journal file.
390*/
391void sqlite3MemJournalOpen(sqlite3_file *pJfd){
392 sqlite3JournalOpen(0, 0, pJfd, 0, -1);
393}
394
395#if defined(SQLITE_ENABLE_ATOMIC_WRITE) \
396 || defined(SQLITE_ENABLE_BATCH_ATOMIC_WRITE)
397/*
398** If the argument p points to a MemJournal structure that is not an
399** in-memory-only journal file (i.e. is one that was opened with a +ve
400** nSpill parameter or as SQLITE_OPEN_MAIN_JOURNAL), and the underlying
401** file has not yet been created, create it now.
402*/
403int sqlite3JournalCreate(sqlite3_file *pJfd){
404 int rc = SQLITE_OK;
405 MemJournal *p = (MemJournal*)pJfd;
406 if( pJfd->pMethods==&MemJournalMethods && (
407#ifdef SQLITE_ENABLE_ATOMIC_WRITE
408 p->nSpill>0
409#else
410 /* While this appears to not be possible without ATOMIC_WRITE, the
411 ** paths are complex, so it seems prudent to leave the test in as
412 ** a NEVER(), in case our analysis is subtly flawed. */
413 NEVER(p->nSpill>0)
414#endif
415#ifdef SQLITE_ENABLE_BATCH_ATOMIC_WRITE
416 || (p->flags & SQLITE_OPEN_MAIN_JOURNAL)
417#endif
418 )){
419 rc = memjrnlCreateFile(p);
420 }
421 return rc;
422}
423#endif
424
425/*
426** The file-handle passed as the only argument is open on a journal file.
427** Return true if this "journal file" is currently stored in heap memory,
428** or false otherwise.
429*/
430int sqlite3JournalIsInMemory(sqlite3_file *p){
431 return p->pMethods==&MemJournalMethods;
432}
433
434/*
435** Return the number of bytes required to store a JournalFile that uses vfs
436** pVfs to create the underlying on-disk files.
437*/
438int sqlite3JournalSize(sqlite3_vfs *pVfs){
439 return MAX(pVfs->szOsFile, (int)sizeof(MemJournal));
440}
441