1 | /* |
2 | ** 2008 October 7 |
3 | ** |
4 | ** The author disclaims copyright to this source code. In place of |
5 | ** a legal notice, here is a blessing: |
6 | ** |
7 | ** May you do good and not evil. |
8 | ** May you find forgiveness for yourself and forgive others. |
9 | ** May you share freely, never taking more than you give. |
10 | ** |
11 | ************************************************************************* |
12 | ** |
13 | ** This file contains code use to implement an in-memory rollback journal. |
14 | ** The in-memory rollback journal is used to journal transactions for |
15 | ** ":memory:" databases and when the journal_mode=MEMORY pragma is used. |
16 | ** |
17 | ** Update: The in-memory journal is also used to temporarily cache |
18 | ** smaller journals that are not critical for power-loss recovery. |
19 | ** For example, statement journals that are not too big will be held |
20 | ** entirely in memory, thus reducing the number of file I/O calls, and |
21 | ** more importantly, reducing temporary file creation events. If these |
22 | ** journals become too large for memory, they are spilled to disk. But |
23 | ** in the common case, they are usually small and no file I/O needs to |
24 | ** occur. |
25 | */ |
26 | #include "sqliteInt.h" |
27 | |
28 | /* Forward references to internal structures */ |
29 | typedef struct MemJournal MemJournal; |
30 | typedef struct FilePoint FilePoint; |
31 | typedef struct FileChunk FileChunk; |
32 | |
33 | /* |
34 | ** The rollback journal is composed of a linked list of these structures. |
35 | ** |
36 | ** The zChunk array is always at least 8 bytes in size - usually much more. |
37 | ** Its actual size is stored in the MemJournal.nChunkSize variable. |
38 | */ |
39 | struct FileChunk { |
40 | FileChunk *pNext; /* Next chunk in the journal */ |
41 | u8 zChunk[8]; /* Content of this chunk */ |
42 | }; |
43 | |
44 | /* |
45 | ** By default, allocate this many bytes of memory for each FileChunk object. |
46 | */ |
47 | #define MEMJOURNAL_DFLT_FILECHUNKSIZE 1024 |
48 | |
49 | /* |
50 | ** For chunk size nChunkSize, return the number of bytes that should |
51 | ** be allocated for each FileChunk structure. |
52 | */ |
53 | #define fileChunkSize(nChunkSize) (sizeof(FileChunk) + ((nChunkSize)-8)) |
54 | |
55 | /* |
56 | ** An instance of this object serves as a cursor into the rollback journal. |
57 | ** The cursor can be either for reading or writing. |
58 | */ |
59 | struct FilePoint { |
60 | sqlite3_int64 iOffset; /* Offset from the beginning of the file */ |
61 | FileChunk *pChunk; /* Specific chunk into which cursor points */ |
62 | }; |
63 | |
64 | /* |
65 | ** This structure is a subclass of sqlite3_file. Each open memory-journal |
66 | ** is an instance of this class. |
67 | */ |
68 | struct MemJournal { |
69 | const sqlite3_io_methods *pMethod; /* Parent class. MUST BE FIRST */ |
70 | int nChunkSize; /* In-memory chunk-size */ |
71 | |
72 | int nSpill; /* Bytes of data before flushing */ |
73 | FileChunk *pFirst; /* Head of in-memory chunk-list */ |
74 | FilePoint endpoint; /* Pointer to the end of the file */ |
75 | FilePoint readpoint; /* Pointer to the end of the last xRead() */ |
76 | |
77 | int flags; /* xOpen flags */ |
78 | sqlite3_vfs *pVfs; /* The "real" underlying VFS */ |
79 | const char *zJournal; /* Name of the journal file */ |
80 | }; |
81 | |
82 | /* |
83 | ** Read data from the in-memory journal file. This is the implementation |
84 | ** of the sqlite3_vfs.xRead method. |
85 | */ |
86 | static int memjrnlRead( |
87 | sqlite3_file *pJfd, /* The journal file from which to read */ |
88 | void *zBuf, /* Put the results here */ |
89 | int iAmt, /* Number of bytes to read */ |
90 | sqlite_int64 iOfst /* Begin reading at this offset */ |
91 | ){ |
92 | MemJournal *p = (MemJournal *)pJfd; |
93 | u8 *zOut = zBuf; |
94 | int nRead = iAmt; |
95 | int iChunkOffset; |
96 | FileChunk *pChunk; |
97 | |
98 | if( (iAmt+iOfst)>p->endpoint.iOffset ){ |
99 | return SQLITE_IOERR_SHORT_READ; |
100 | } |
101 | assert( p->readpoint.iOffset==0 || p->readpoint.pChunk!=0 ); |
102 | if( p->readpoint.iOffset!=iOfst || iOfst==0 ){ |
103 | sqlite3_int64 iOff = 0; |
104 | for(pChunk=p->pFirst; |
105 | ALWAYS(pChunk) && (iOff+p->nChunkSize)<=iOfst; |
106 | pChunk=pChunk->pNext |
107 | ){ |
108 | iOff += p->nChunkSize; |
109 | } |
110 | }else{ |
111 | pChunk = p->readpoint.pChunk; |
112 | assert( pChunk!=0 ); |
113 | } |
114 | |
115 | iChunkOffset = (int)(iOfst%p->nChunkSize); |
116 | do { |
117 | int iSpace = p->nChunkSize - iChunkOffset; |
118 | int nCopy = MIN(nRead, (p->nChunkSize - iChunkOffset)); |
119 | memcpy(zOut, (u8*)pChunk->zChunk + iChunkOffset, nCopy); |
120 | zOut += nCopy; |
121 | nRead -= iSpace; |
122 | iChunkOffset = 0; |
123 | } while( nRead>=0 && (pChunk=pChunk->pNext)!=0 && nRead>0 ); |
124 | p->readpoint.iOffset = pChunk ? iOfst+iAmt : 0; |
125 | p->readpoint.pChunk = pChunk; |
126 | |
127 | return SQLITE_OK; |
128 | } |
129 | |
130 | /* |
131 | ** Free the list of FileChunk structures headed at MemJournal.pFirst. |
132 | */ |
133 | static void memjrnlFreeChunks(FileChunk *pFirst){ |
134 | FileChunk *pIter; |
135 | FileChunk *pNext; |
136 | for(pIter=pFirst; pIter; pIter=pNext){ |
137 | pNext = pIter->pNext; |
138 | sqlite3_free(pIter); |
139 | } |
140 | } |
141 | |
142 | /* |
143 | ** Flush the contents of memory to a real file on disk. |
144 | */ |
145 | static int memjrnlCreateFile(MemJournal *p){ |
146 | int rc; |
147 | sqlite3_file *pReal = (sqlite3_file*)p; |
148 | MemJournal copy = *p; |
149 | |
150 | memset(p, 0, sizeof(MemJournal)); |
151 | rc = sqlite3OsOpen(copy.pVfs, copy.zJournal, pReal, copy.flags, 0); |
152 | if( rc==SQLITE_OK ){ |
153 | int nChunk = copy.nChunkSize; |
154 | i64 iOff = 0; |
155 | FileChunk *pIter; |
156 | for(pIter=copy.pFirst; pIter; pIter=pIter->pNext){ |
157 | if( iOff + nChunk > copy.endpoint.iOffset ){ |
158 | nChunk = copy.endpoint.iOffset - iOff; |
159 | } |
160 | rc = sqlite3OsWrite(pReal, (u8*)pIter->zChunk, nChunk, iOff); |
161 | if( rc ) break; |
162 | iOff += nChunk; |
163 | } |
164 | if( rc==SQLITE_OK ){ |
165 | /* No error has occurred. Free the in-memory buffers. */ |
166 | memjrnlFreeChunks(copy.pFirst); |
167 | } |
168 | } |
169 | if( rc!=SQLITE_OK ){ |
170 | /* If an error occurred while creating or writing to the file, restore |
171 | ** the original before returning. This way, SQLite uses the in-memory |
172 | ** journal data to roll back changes made to the internal page-cache |
173 | ** before this function was called. */ |
174 | sqlite3OsClose(pReal); |
175 | *p = copy; |
176 | } |
177 | return rc; |
178 | } |
179 | |
180 | |
181 | /* Forward reference */ |
182 | static int memjrnlTruncate(sqlite3_file *pJfd, sqlite_int64 size); |
183 | |
184 | /* |
185 | ** Write data to the file. |
186 | */ |
187 | static int memjrnlWrite( |
188 | sqlite3_file *pJfd, /* The journal file into which to write */ |
189 | const void *zBuf, /* Take data to be written from here */ |
190 | int iAmt, /* Number of bytes to write */ |
191 | sqlite_int64 iOfst /* Begin writing at this offset into the file */ |
192 | ){ |
193 | MemJournal *p = (MemJournal *)pJfd; |
194 | int nWrite = iAmt; |
195 | u8 *zWrite = (u8 *)zBuf; |
196 | |
197 | /* If the file should be created now, create it and write the new data |
198 | ** into the file on disk. */ |
199 | if( p->nSpill>0 && (iAmt+iOfst)>p->nSpill ){ |
200 | int rc = memjrnlCreateFile(p); |
201 | if( rc==SQLITE_OK ){ |
202 | rc = sqlite3OsWrite(pJfd, zBuf, iAmt, iOfst); |
203 | } |
204 | return rc; |
205 | } |
206 | |
207 | /* If the contents of this write should be stored in memory */ |
208 | else{ |
209 | /* An in-memory journal file should only ever be appended to. Random |
210 | ** access writes are not required. The only exception to this is when |
211 | ** the in-memory journal is being used by a connection using the |
212 | ** atomic-write optimization. In this case the first 28 bytes of the |
213 | ** journal file may be written as part of committing the transaction. */ |
214 | assert( iOfst<=p->endpoint.iOffset ); |
215 | if( iOfst>0 && iOfst!=p->endpoint.iOffset ){ |
216 | memjrnlTruncate(pJfd, iOfst); |
217 | } |
218 | if( iOfst==0 && p->pFirst ){ |
219 | assert( p->nChunkSize>iAmt ); |
220 | memcpy((u8*)p->pFirst->zChunk, zBuf, iAmt); |
221 | }else{ |
222 | while( nWrite>0 ){ |
223 | FileChunk *pChunk = p->endpoint.pChunk; |
224 | int iChunkOffset = (int)(p->endpoint.iOffset%p->nChunkSize); |
225 | int iSpace = MIN(nWrite, p->nChunkSize - iChunkOffset); |
226 | |
227 | assert( pChunk!=0 || iChunkOffset==0 ); |
228 | if( iChunkOffset==0 ){ |
229 | /* New chunk is required to extend the file. */ |
230 | FileChunk *pNew = sqlite3_malloc(fileChunkSize(p->nChunkSize)); |
231 | if( !pNew ){ |
232 | return SQLITE_IOERR_NOMEM_BKPT; |
233 | } |
234 | pNew->pNext = 0; |
235 | if( pChunk ){ |
236 | assert( p->pFirst ); |
237 | pChunk->pNext = pNew; |
238 | }else{ |
239 | assert( !p->pFirst ); |
240 | p->pFirst = pNew; |
241 | } |
242 | pChunk = p->endpoint.pChunk = pNew; |
243 | } |
244 | |
245 | assert( pChunk!=0 ); |
246 | memcpy((u8*)pChunk->zChunk + iChunkOffset, zWrite, iSpace); |
247 | zWrite += iSpace; |
248 | nWrite -= iSpace; |
249 | p->endpoint.iOffset += iSpace; |
250 | } |
251 | } |
252 | } |
253 | |
254 | return SQLITE_OK; |
255 | } |
256 | |
257 | /* |
258 | ** Truncate the in-memory file. |
259 | */ |
260 | static int memjrnlTruncate(sqlite3_file *pJfd, sqlite_int64 size){ |
261 | MemJournal *p = (MemJournal *)pJfd; |
262 | assert( p->endpoint.pChunk==0 || p->endpoint.pChunk->pNext==0 ); |
263 | if( size<p->endpoint.iOffset ){ |
264 | FileChunk *pIter = 0; |
265 | if( size==0 ){ |
266 | memjrnlFreeChunks(p->pFirst); |
267 | p->pFirst = 0; |
268 | }else{ |
269 | i64 iOff = p->nChunkSize; |
270 | for(pIter=p->pFirst; ALWAYS(pIter) && iOff<size; pIter=pIter->pNext){ |
271 | iOff += p->nChunkSize; |
272 | } |
273 | if( ALWAYS(pIter) ){ |
274 | memjrnlFreeChunks(pIter->pNext); |
275 | pIter->pNext = 0; |
276 | } |
277 | } |
278 | |
279 | p->endpoint.pChunk = pIter; |
280 | p->endpoint.iOffset = size; |
281 | p->readpoint.pChunk = 0; |
282 | p->readpoint.iOffset = 0; |
283 | } |
284 | return SQLITE_OK; |
285 | } |
286 | |
287 | /* |
288 | ** Close the file. |
289 | */ |
290 | static int memjrnlClose(sqlite3_file *pJfd){ |
291 | MemJournal *p = (MemJournal *)pJfd; |
292 | memjrnlFreeChunks(p->pFirst); |
293 | return SQLITE_OK; |
294 | } |
295 | |
296 | /* |
297 | ** Sync the file. |
298 | ** |
299 | ** If the real file has been created, call its xSync method. Otherwise, |
300 | ** syncing an in-memory journal is a no-op. |
301 | */ |
302 | static int memjrnlSync(sqlite3_file *pJfd, int flags){ |
303 | UNUSED_PARAMETER2(pJfd, flags); |
304 | return SQLITE_OK; |
305 | } |
306 | |
307 | /* |
308 | ** Query the size of the file in bytes. |
309 | */ |
310 | static int memjrnlFileSize(sqlite3_file *pJfd, sqlite_int64 *pSize){ |
311 | MemJournal *p = (MemJournal *)pJfd; |
312 | *pSize = (sqlite_int64) p->endpoint.iOffset; |
313 | return SQLITE_OK; |
314 | } |
315 | |
316 | /* |
317 | ** Table of methods for MemJournal sqlite3_file object. |
318 | */ |
319 | static const struct sqlite3_io_methods MemJournalMethods = { |
320 | 1, /* iVersion */ |
321 | memjrnlClose, /* xClose */ |
322 | memjrnlRead, /* xRead */ |
323 | memjrnlWrite, /* xWrite */ |
324 | memjrnlTruncate, /* xTruncate */ |
325 | memjrnlSync, /* xSync */ |
326 | memjrnlFileSize, /* xFileSize */ |
327 | 0, /* xLock */ |
328 | 0, /* xUnlock */ |
329 | 0, /* xCheckReservedLock */ |
330 | 0, /* xFileControl */ |
331 | 0, /* xSectorSize */ |
332 | 0, /* xDeviceCharacteristics */ |
333 | 0, /* xShmMap */ |
334 | 0, /* xShmLock */ |
335 | 0, /* xShmBarrier */ |
336 | 0, /* xShmUnmap */ |
337 | 0, /* xFetch */ |
338 | 0 /* xUnfetch */ |
339 | }; |
340 | |
341 | /* |
342 | ** Open a journal file. |
343 | ** |
344 | ** The behaviour of the journal file depends on the value of parameter |
345 | ** nSpill. If nSpill is 0, then the journal file is always create and |
346 | ** accessed using the underlying VFS. If nSpill is less than zero, then |
347 | ** all content is always stored in main-memory. Finally, if nSpill is a |
348 | ** positive value, then the journal file is initially created in-memory |
349 | ** but may be flushed to disk later on. In this case the journal file is |
350 | ** flushed to disk either when it grows larger than nSpill bytes in size, |
351 | ** or when sqlite3JournalCreate() is called. |
352 | */ |
353 | int sqlite3JournalOpen( |
354 | sqlite3_vfs *pVfs, /* The VFS to use for actual file I/O */ |
355 | const char *zName, /* Name of the journal file */ |
356 | sqlite3_file *pJfd, /* Preallocated, blank file handle */ |
357 | int flags, /* Opening flags */ |
358 | int nSpill /* Bytes buffered before opening the file */ |
359 | ){ |
360 | MemJournal *p = (MemJournal*)pJfd; |
361 | |
362 | assert( zName || nSpill<0 || (flags & SQLITE_OPEN_EXCLUSIVE) ); |
363 | |
364 | /* Zero the file-handle object. If nSpill was passed zero, initialize |
365 | ** it using the sqlite3OsOpen() function of the underlying VFS. In this |
366 | ** case none of the code in this module is executed as a result of calls |
367 | ** made on the journal file-handle. */ |
368 | memset(p, 0, sizeof(MemJournal)); |
369 | if( nSpill==0 ){ |
370 | return sqlite3OsOpen(pVfs, zName, pJfd, flags, 0); |
371 | } |
372 | |
373 | if( nSpill>0 ){ |
374 | p->nChunkSize = nSpill; |
375 | }else{ |
376 | p->nChunkSize = 8 + MEMJOURNAL_DFLT_FILECHUNKSIZE - sizeof(FileChunk); |
377 | assert( MEMJOURNAL_DFLT_FILECHUNKSIZE==fileChunkSize(p->nChunkSize) ); |
378 | } |
379 | |
380 | pJfd->pMethods = (const sqlite3_io_methods*)&MemJournalMethods; |
381 | p->nSpill = nSpill; |
382 | p->flags = flags; |
383 | p->zJournal = zName; |
384 | p->pVfs = pVfs; |
385 | return SQLITE_OK; |
386 | } |
387 | |
388 | /* |
389 | ** Open an in-memory journal file. |
390 | */ |
391 | void sqlite3MemJournalOpen(sqlite3_file *pJfd){ |
392 | sqlite3JournalOpen(0, 0, pJfd, 0, -1); |
393 | } |
394 | |
395 | #if defined(SQLITE_ENABLE_ATOMIC_WRITE) \ |
396 | || defined(SQLITE_ENABLE_BATCH_ATOMIC_WRITE) |
397 | /* |
398 | ** If the argument p points to a MemJournal structure that is not an |
399 | ** in-memory-only journal file (i.e. is one that was opened with a +ve |
400 | ** nSpill parameter or as SQLITE_OPEN_MAIN_JOURNAL), and the underlying |
401 | ** file has not yet been created, create it now. |
402 | */ |
403 | int sqlite3JournalCreate(sqlite3_file *pJfd){ |
404 | int rc = SQLITE_OK; |
405 | MemJournal *p = (MemJournal*)pJfd; |
406 | if( pJfd->pMethods==&MemJournalMethods && ( |
407 | #ifdef SQLITE_ENABLE_ATOMIC_WRITE |
408 | p->nSpill>0 |
409 | #else |
410 | /* While this appears to not be possible without ATOMIC_WRITE, the |
411 | ** paths are complex, so it seems prudent to leave the test in as |
412 | ** a NEVER(), in case our analysis is subtly flawed. */ |
413 | NEVER(p->nSpill>0) |
414 | #endif |
415 | #ifdef SQLITE_ENABLE_BATCH_ATOMIC_WRITE |
416 | || (p->flags & SQLITE_OPEN_MAIN_JOURNAL) |
417 | #endif |
418 | )){ |
419 | rc = memjrnlCreateFile(p); |
420 | } |
421 | return rc; |
422 | } |
423 | #endif |
424 | |
425 | /* |
426 | ** The file-handle passed as the only argument is open on a journal file. |
427 | ** Return true if this "journal file" is currently stored in heap memory, |
428 | ** or false otherwise. |
429 | */ |
430 | int sqlite3JournalIsInMemory(sqlite3_file *p){ |
431 | return p->pMethods==&MemJournalMethods; |
432 | } |
433 | |
434 | /* |
435 | ** Return the number of bytes required to store a JournalFile that uses vfs |
436 | ** pVfs to create the underlying on-disk files. |
437 | */ |
438 | int sqlite3JournalSize(sqlite3_vfs *pVfs){ |
439 | return MAX(pVfs->szOsFile, (int)sizeof(MemJournal)); |
440 | } |
441 | |