1 | /* |
2 | ** 2004 May 22 |
3 | ** |
4 | ** The author disclaims copyright to this source code. In place of |
5 | ** a legal notice, here is a blessing: |
6 | ** |
7 | ** May you do good and not evil. |
8 | ** May you find forgiveness for yourself and forgive others. |
9 | ** May you share freely, never taking more than you give. |
10 | ** |
11 | ****************************************************************************** |
12 | ** |
13 | ** This file contains the VFS implementation for unix-like operating systems |
14 | ** include Linux, MacOSX, *BSD, QNX, VxWorks, AIX, HPUX, and others. |
15 | ** |
16 | ** There are actually several different VFS implementations in this file. |
17 | ** The differences are in the way that file locking is done. The default |
18 | ** implementation uses Posix Advisory Locks. Alternative implementations |
19 | ** use flock(), dot-files, various proprietary locking schemas, or simply |
20 | ** skip locking all together. |
21 | ** |
22 | ** This source file is organized into divisions where the logic for various |
23 | ** subfunctions is contained within the appropriate division. PLEASE |
24 | ** KEEP THE STRUCTURE OF THIS FILE INTACT. New code should be placed |
25 | ** in the correct division and should be clearly labeled. |
26 | ** |
27 | ** The layout of divisions is as follows: |
28 | ** |
29 | ** * General-purpose declarations and utility functions. |
30 | ** * Unique file ID logic used by VxWorks. |
31 | ** * Various locking primitive implementations (all except proxy locking): |
32 | ** + for Posix Advisory Locks |
33 | ** + for no-op locks |
34 | ** + for dot-file locks |
35 | ** + for flock() locking |
36 | ** + for named semaphore locks (VxWorks only) |
37 | ** + for AFP filesystem locks (MacOSX only) |
38 | ** * sqlite3_file methods not associated with locking. |
39 | ** * Definitions of sqlite3_io_methods objects for all locking |
40 | ** methods plus "finder" functions for each locking method. |
41 | ** * sqlite3_vfs method implementations. |
42 | ** * Locking primitives for the proxy uber-locking-method. (MacOSX only) |
43 | ** * Definitions of sqlite3_vfs objects for all locking methods |
44 | ** plus implementations of sqlite3_os_init() and sqlite3_os_end(). |
45 | */ |
46 | #include "sqliteInt.h" |
47 | #if SQLITE_OS_UNIX /* This file is used on unix only */ |
48 | |
49 | /* |
50 | ** There are various methods for file locking used for concurrency |
51 | ** control: |
52 | ** |
53 | ** 1. POSIX locking (the default), |
54 | ** 2. No locking, |
55 | ** 3. Dot-file locking, |
56 | ** 4. flock() locking, |
57 | ** 5. AFP locking (OSX only), |
58 | ** 6. Named POSIX semaphores (VXWorks only), |
59 | ** 7. proxy locking. (OSX only) |
60 | ** |
61 | ** Styles 4, 5, and 7 are only available of SQLITE_ENABLE_LOCKING_STYLE |
62 | ** is defined to 1. The SQLITE_ENABLE_LOCKING_STYLE also enables automatic |
63 | ** selection of the appropriate locking style based on the filesystem |
64 | ** where the database is located. |
65 | */ |
66 | #if !defined(SQLITE_ENABLE_LOCKING_STYLE) |
67 | # if defined(__APPLE__) |
68 | # define SQLITE_ENABLE_LOCKING_STYLE 1 |
69 | # else |
70 | # define SQLITE_ENABLE_LOCKING_STYLE 0 |
71 | # endif |
72 | #endif |
73 | |
74 | /* Use pread() and pwrite() if they are available */ |
75 | #if defined(__APPLE__) |
76 | # define HAVE_PREAD 1 |
77 | # define HAVE_PWRITE 1 |
78 | #endif |
79 | #if defined(HAVE_PREAD64) && defined(HAVE_PWRITE64) |
80 | # undef USE_PREAD |
81 | # define USE_PREAD64 1 |
82 | #elif defined(HAVE_PREAD) && defined(HAVE_PWRITE) |
83 | # undef USE_PREAD64 |
84 | # define USE_PREAD 1 |
85 | #endif |
86 | |
87 | /* |
88 | ** standard include files. |
89 | */ |
90 | #include <sys/types.h> /* amalgamator: keep */ |
91 | #include <sys/stat.h> /* amalgamator: keep */ |
92 | #include <fcntl.h> |
93 | #include <sys/ioctl.h> |
94 | #include <unistd.h> /* amalgamator: keep */ |
95 | #include <time.h> |
96 | #include <sys/time.h> /* amalgamator: keep */ |
97 | #include <errno.h> |
98 | #if !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0 |
99 | # include <sys/mman.h> |
100 | #endif |
101 | |
102 | #if SQLITE_ENABLE_LOCKING_STYLE |
103 | # include <sys/ioctl.h> |
104 | # include <sys/file.h> |
105 | # include <sys/param.h> |
106 | #endif /* SQLITE_ENABLE_LOCKING_STYLE */ |
107 | |
108 | /* |
109 | ** Try to determine if gethostuuid() is available based on standard |
110 | ** macros. This might sometimes compute the wrong value for some |
111 | ** obscure platforms. For those cases, simply compile with one of |
112 | ** the following: |
113 | ** |
114 | ** -DHAVE_GETHOSTUUID=0 |
115 | ** -DHAVE_GETHOSTUUID=1 |
116 | ** |
117 | ** None if this matters except when building on Apple products with |
118 | ** -DSQLITE_ENABLE_LOCKING_STYLE. |
119 | */ |
120 | #ifndef HAVE_GETHOSTUUID |
121 | # define HAVE_GETHOSTUUID 0 |
122 | # if defined(__APPLE__) && ((__MAC_OS_X_VERSION_MIN_REQUIRED > 1050) || \ |
123 | (__IPHONE_OS_VERSION_MIN_REQUIRED > 2000)) |
124 | # if (!defined(TARGET_OS_EMBEDDED) || (TARGET_OS_EMBEDDED==0)) \ |
125 | && (!defined(TARGET_IPHONE_SIMULATOR) || (TARGET_IPHONE_SIMULATOR==0))\ |
126 | && (!defined(TARGET_OS_MACCATALYST) || (TARGET_OS_MACCATALYST==0)) |
127 | # undef HAVE_GETHOSTUUID |
128 | # define HAVE_GETHOSTUUID 1 |
129 | # else |
130 | # warning "gethostuuid() is disabled." |
131 | # endif |
132 | # endif |
133 | #endif |
134 | |
135 | |
136 | #if OS_VXWORKS |
137 | # include <sys/ioctl.h> |
138 | # include <semaphore.h> |
139 | # include <limits.h> |
140 | #endif /* OS_VXWORKS */ |
141 | |
142 | #if defined(__APPLE__) || SQLITE_ENABLE_LOCKING_STYLE |
143 | # include <sys/mount.h> |
144 | #endif |
145 | |
146 | #ifdef HAVE_UTIME |
147 | # include <utime.h> |
148 | #endif |
149 | |
150 | /* |
151 | ** Allowed values of unixFile.fsFlags |
152 | */ |
153 | #define SQLITE_FSFLAGS_IS_MSDOS 0x1 |
154 | |
155 | /* |
156 | ** If we are to be thread-safe, include the pthreads header. |
157 | */ |
158 | #if SQLITE_THREADSAFE |
159 | # include <pthread.h> |
160 | #endif |
161 | |
162 | /* |
163 | ** Default permissions when creating a new file |
164 | */ |
165 | #ifndef SQLITE_DEFAULT_FILE_PERMISSIONS |
166 | # define SQLITE_DEFAULT_FILE_PERMISSIONS 0644 |
167 | #endif |
168 | |
169 | /* |
170 | ** Default permissions when creating auto proxy dir |
171 | */ |
172 | #ifndef SQLITE_DEFAULT_PROXYDIR_PERMISSIONS |
173 | # define SQLITE_DEFAULT_PROXYDIR_PERMISSIONS 0755 |
174 | #endif |
175 | |
176 | /* |
177 | ** Maximum supported path-length. |
178 | */ |
179 | #define MAX_PATHNAME 512 |
180 | |
181 | /* |
182 | ** Maximum supported symbolic links |
183 | */ |
184 | #define SQLITE_MAX_SYMLINKS 100 |
185 | |
186 | /* Always cast the getpid() return type for compatibility with |
187 | ** kernel modules in VxWorks. */ |
188 | #define osGetpid(X) (pid_t)getpid() |
189 | |
190 | /* |
191 | ** Only set the lastErrno if the error code is a real error and not |
192 | ** a normal expected return code of SQLITE_BUSY or SQLITE_OK |
193 | */ |
194 | #define IS_LOCK_ERROR(x) ((x != SQLITE_OK) && (x != SQLITE_BUSY)) |
195 | |
196 | /* Forward references */ |
197 | typedef struct unixShm unixShm; /* Connection shared memory */ |
198 | typedef struct unixShmNode unixShmNode; /* Shared memory instance */ |
199 | typedef struct unixInodeInfo unixInodeInfo; /* An i-node */ |
200 | typedef struct UnixUnusedFd UnixUnusedFd; /* An unused file descriptor */ |
201 | |
202 | /* |
203 | ** Sometimes, after a file handle is closed by SQLite, the file descriptor |
204 | ** cannot be closed immediately. In these cases, instances of the following |
205 | ** structure are used to store the file descriptor while waiting for an |
206 | ** opportunity to either close or reuse it. |
207 | */ |
208 | struct UnixUnusedFd { |
209 | int fd; /* File descriptor to close */ |
210 | int flags; /* Flags this file descriptor was opened with */ |
211 | UnixUnusedFd *pNext; /* Next unused file descriptor on same file */ |
212 | }; |
213 | |
214 | /* |
215 | ** The unixFile structure is subclass of sqlite3_file specific to the unix |
216 | ** VFS implementations. |
217 | */ |
218 | typedef struct unixFile unixFile; |
219 | struct unixFile { |
220 | sqlite3_io_methods const *pMethod; /* Always the first entry */ |
221 | sqlite3_vfs *pVfs; /* The VFS that created this unixFile */ |
222 | unixInodeInfo *pInode; /* Info about locks on this inode */ |
223 | int h; /* The file descriptor */ |
224 | unsigned char eFileLock; /* The type of lock held on this fd */ |
225 | unsigned short int ctrlFlags; /* Behavioral bits. UNIXFILE_* flags */ |
226 | int lastErrno; /* The unix errno from last I/O error */ |
227 | void *lockingContext; /* Locking style specific state */ |
228 | UnixUnusedFd *pPreallocatedUnused; /* Pre-allocated UnixUnusedFd */ |
229 | const char *zPath; /* Name of the file */ |
230 | unixShm *pShm; /* Shared memory segment information */ |
231 | int szChunk; /* Configured by FCNTL_CHUNK_SIZE */ |
232 | #if SQLITE_MAX_MMAP_SIZE>0 |
233 | int nFetchOut; /* Number of outstanding xFetch refs */ |
234 | sqlite3_int64 mmapSize; /* Usable size of mapping at pMapRegion */ |
235 | sqlite3_int64 mmapSizeActual; /* Actual size of mapping at pMapRegion */ |
236 | sqlite3_int64 mmapSizeMax; /* Configured FCNTL_MMAP_SIZE value */ |
237 | void *pMapRegion; /* Memory mapped region */ |
238 | #endif |
239 | int sectorSize; /* Device sector size */ |
240 | int deviceCharacteristics; /* Precomputed device characteristics */ |
241 | #if SQLITE_ENABLE_LOCKING_STYLE |
242 | int openFlags; /* The flags specified at open() */ |
243 | #endif |
244 | #if SQLITE_ENABLE_LOCKING_STYLE || defined(__APPLE__) |
245 | unsigned fsFlags; /* cached details from statfs() */ |
246 | #endif |
247 | #ifdef SQLITE_ENABLE_SETLK_TIMEOUT |
248 | unsigned iBusyTimeout; /* Wait this many millisec on locks */ |
249 | #endif |
250 | #if OS_VXWORKS |
251 | struct vxworksFileId *pId; /* Unique file ID */ |
252 | #endif |
253 | #ifdef SQLITE_DEBUG |
254 | /* The next group of variables are used to track whether or not the |
255 | ** transaction counter in bytes 24-27 of database files are updated |
256 | ** whenever any part of the database changes. An assertion fault will |
257 | ** occur if a file is updated without also updating the transaction |
258 | ** counter. This test is made to avoid new problems similar to the |
259 | ** one described by ticket #3584. |
260 | */ |
261 | unsigned char transCntrChng; /* True if the transaction counter changed */ |
262 | unsigned char dbUpdate; /* True if any part of database file changed */ |
263 | unsigned char inNormalWrite; /* True if in a normal write operation */ |
264 | |
265 | #endif |
266 | |
267 | #ifdef SQLITE_TEST |
268 | /* In test mode, increase the size of this structure a bit so that |
269 | ** it is larger than the struct CrashFile defined in test6.c. |
270 | */ |
271 | char aPadding[32]; |
272 | #endif |
273 | }; |
274 | |
275 | /* This variable holds the process id (pid) from when the xRandomness() |
276 | ** method was called. If xOpen() is called from a different process id, |
277 | ** indicating that a fork() has occurred, the PRNG will be reset. |
278 | */ |
279 | static pid_t randomnessPid = 0; |
280 | |
281 | /* |
282 | ** Allowed values for the unixFile.ctrlFlags bitmask: |
283 | */ |
284 | #define UNIXFILE_EXCL 0x01 /* Connections from one process only */ |
285 | #define UNIXFILE_RDONLY 0x02 /* Connection is read only */ |
286 | #define UNIXFILE_PERSIST_WAL 0x04 /* Persistent WAL mode */ |
287 | #ifndef SQLITE_DISABLE_DIRSYNC |
288 | # define UNIXFILE_DIRSYNC 0x08 /* Directory sync needed */ |
289 | #else |
290 | # define UNIXFILE_DIRSYNC 0x00 |
291 | #endif |
292 | #define UNIXFILE_PSOW 0x10 /* SQLITE_IOCAP_POWERSAFE_OVERWRITE */ |
293 | #define UNIXFILE_DELETE 0x20 /* Delete on close */ |
294 | #define UNIXFILE_URI 0x40 /* Filename might have query parameters */ |
295 | #define UNIXFILE_NOLOCK 0x80 /* Do no file locking */ |
296 | |
297 | /* |
298 | ** Include code that is common to all os_*.c files |
299 | */ |
300 | #include "os_common.h" |
301 | |
302 | /* |
303 | ** Define various macros that are missing from some systems. |
304 | */ |
305 | #ifndef O_LARGEFILE |
306 | # define O_LARGEFILE 0 |
307 | #endif |
308 | #ifdef SQLITE_DISABLE_LFS |
309 | # undef O_LARGEFILE |
310 | # define O_LARGEFILE 0 |
311 | #endif |
312 | #ifndef O_NOFOLLOW |
313 | # define O_NOFOLLOW 0 |
314 | #endif |
315 | #ifndef O_BINARY |
316 | # define O_BINARY 0 |
317 | #endif |
318 | |
319 | /* |
320 | ** The threadid macro resolves to the thread-id or to 0. Used for |
321 | ** testing and debugging only. |
322 | */ |
323 | #if SQLITE_THREADSAFE |
324 | #define threadid pthread_self() |
325 | #else |
326 | #define threadid 0 |
327 | #endif |
328 | |
329 | /* |
330 | ** HAVE_MREMAP defaults to true on Linux and false everywhere else. |
331 | */ |
332 | #if !defined(HAVE_MREMAP) |
333 | # if defined(__linux__) && defined(_GNU_SOURCE) |
334 | # define HAVE_MREMAP 1 |
335 | # else |
336 | # define HAVE_MREMAP 0 |
337 | # endif |
338 | #endif |
339 | |
340 | /* |
341 | ** Explicitly call the 64-bit version of lseek() on Android. Otherwise, lseek() |
342 | ** is the 32-bit version, even if _FILE_OFFSET_BITS=64 is defined. |
343 | */ |
344 | #ifdef __ANDROID__ |
345 | # define lseek lseek64 |
346 | #endif |
347 | |
348 | #ifdef __linux__ |
349 | /* |
350 | ** Linux-specific IOCTL magic numbers used for controlling F2FS |
351 | */ |
352 | #define F2FS_IOCTL_MAGIC 0xf5 |
353 | #define F2FS_IOC_START_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 1) |
354 | #define F2FS_IOC_COMMIT_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 2) |
355 | #define F2FS_IOC_START_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 3) |
356 | #define F2FS_IOC_ABORT_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 5) |
357 | #define F2FS_IOC_GET_FEATURES _IOR(F2FS_IOCTL_MAGIC, 12, u32) |
358 | #define F2FS_FEATURE_ATOMIC_WRITE 0x0004 |
359 | #endif /* __linux__ */ |
360 | |
361 | |
362 | /* |
363 | ** Different Unix systems declare open() in different ways. Same use |
364 | ** open(const char*,int,mode_t). Others use open(const char*,int,...). |
365 | ** The difference is important when using a pointer to the function. |
366 | ** |
367 | ** The safest way to deal with the problem is to always use this wrapper |
368 | ** which always has the same well-defined interface. |
369 | */ |
370 | static int posixOpen(const char *zFile, int flags, int mode){ |
371 | return open(zFile, flags, mode); |
372 | } |
373 | |
374 | /* Forward reference */ |
375 | static int openDirectory(const char*, int*); |
376 | static int unixGetpagesize(void); |
377 | |
378 | /* |
379 | ** Many system calls are accessed through pointer-to-functions so that |
380 | ** they may be overridden at runtime to facilitate fault injection during |
381 | ** testing and sandboxing. The following array holds the names and pointers |
382 | ** to all overrideable system calls. |
383 | */ |
384 | static struct unix_syscall { |
385 | const char *zName; /* Name of the system call */ |
386 | sqlite3_syscall_ptr pCurrent; /* Current value of the system call */ |
387 | sqlite3_syscall_ptr pDefault; /* Default value */ |
388 | } aSyscall[] = { |
389 | { "open" , (sqlite3_syscall_ptr)posixOpen, 0 }, |
390 | #define osOpen ((int(*)(const char*,int,int))aSyscall[0].pCurrent) |
391 | |
392 | { "close" , (sqlite3_syscall_ptr)close, 0 }, |
393 | #define osClose ((int(*)(int))aSyscall[1].pCurrent) |
394 | |
395 | { "access" , (sqlite3_syscall_ptr)access, 0 }, |
396 | #define osAccess ((int(*)(const char*,int))aSyscall[2].pCurrent) |
397 | |
398 | { "getcwd" , (sqlite3_syscall_ptr)getcwd, 0 }, |
399 | #define osGetcwd ((char*(*)(char*,size_t))aSyscall[3].pCurrent) |
400 | |
401 | { "stat" , (sqlite3_syscall_ptr)stat, 0 }, |
402 | #define osStat ((int(*)(const char*,struct stat*))aSyscall[4].pCurrent) |
403 | |
404 | /* |
405 | ** The DJGPP compiler environment looks mostly like Unix, but it |
406 | ** lacks the fcntl() system call. So redefine fcntl() to be something |
407 | ** that always succeeds. This means that locking does not occur under |
408 | ** DJGPP. But it is DOS - what did you expect? |
409 | */ |
410 | #ifdef __DJGPP__ |
411 | { "fstat" , 0, 0 }, |
412 | #define osFstat(a,b,c) 0 |
413 | #else |
414 | { "fstat" , (sqlite3_syscall_ptr)fstat, 0 }, |
415 | #define osFstat ((int(*)(int,struct stat*))aSyscall[5].pCurrent) |
416 | #endif |
417 | |
418 | { "ftruncate" , (sqlite3_syscall_ptr)ftruncate, 0 }, |
419 | #define osFtruncate ((int(*)(int,off_t))aSyscall[6].pCurrent) |
420 | |
421 | { "fcntl" , (sqlite3_syscall_ptr)fcntl, 0 }, |
422 | #define osFcntl ((int(*)(int,int,...))aSyscall[7].pCurrent) |
423 | |
424 | { "read" , (sqlite3_syscall_ptr)read, 0 }, |
425 | #define osRead ((ssize_t(*)(int,void*,size_t))aSyscall[8].pCurrent) |
426 | |
427 | #if defined(USE_PREAD) || SQLITE_ENABLE_LOCKING_STYLE |
428 | { "pread" , (sqlite3_syscall_ptr)pread, 0 }, |
429 | #else |
430 | { "pread" , (sqlite3_syscall_ptr)0, 0 }, |
431 | #endif |
432 | #define osPread ((ssize_t(*)(int,void*,size_t,off_t))aSyscall[9].pCurrent) |
433 | |
434 | #if defined(USE_PREAD64) |
435 | { "pread64" , (sqlite3_syscall_ptr)pread64, 0 }, |
436 | #else |
437 | { "pread64" , (sqlite3_syscall_ptr)0, 0 }, |
438 | #endif |
439 | #define osPread64 ((ssize_t(*)(int,void*,size_t,off64_t))aSyscall[10].pCurrent) |
440 | |
441 | { "write" , (sqlite3_syscall_ptr)write, 0 }, |
442 | #define osWrite ((ssize_t(*)(int,const void*,size_t))aSyscall[11].pCurrent) |
443 | |
444 | #if defined(USE_PREAD) || SQLITE_ENABLE_LOCKING_STYLE |
445 | { "pwrite" , (sqlite3_syscall_ptr)pwrite, 0 }, |
446 | #else |
447 | { "pwrite" , (sqlite3_syscall_ptr)0, 0 }, |
448 | #endif |
449 | #define osPwrite ((ssize_t(*)(int,const void*,size_t,off_t))\ |
450 | aSyscall[12].pCurrent) |
451 | |
452 | #if defined(USE_PREAD64) |
453 | { "pwrite64" , (sqlite3_syscall_ptr)pwrite64, 0 }, |
454 | #else |
455 | { "pwrite64" , (sqlite3_syscall_ptr)0, 0 }, |
456 | #endif |
457 | #define osPwrite64 ((ssize_t(*)(int,const void*,size_t,off64_t))\ |
458 | aSyscall[13].pCurrent) |
459 | |
460 | { "fchmod" , (sqlite3_syscall_ptr)fchmod, 0 }, |
461 | #define osFchmod ((int(*)(int,mode_t))aSyscall[14].pCurrent) |
462 | |
463 | #if defined(HAVE_POSIX_FALLOCATE) && HAVE_POSIX_FALLOCATE |
464 | { "fallocate" , (sqlite3_syscall_ptr)posix_fallocate, 0 }, |
465 | #else |
466 | { "fallocate" , (sqlite3_syscall_ptr)0, 0 }, |
467 | #endif |
468 | #define osFallocate ((int(*)(int,off_t,off_t))aSyscall[15].pCurrent) |
469 | |
470 | { "unlink" , (sqlite3_syscall_ptr)unlink, 0 }, |
471 | #define osUnlink ((int(*)(const char*))aSyscall[16].pCurrent) |
472 | |
473 | { "openDirectory" , (sqlite3_syscall_ptr)openDirectory, 0 }, |
474 | #define osOpenDirectory ((int(*)(const char*,int*))aSyscall[17].pCurrent) |
475 | |
476 | { "mkdir" , (sqlite3_syscall_ptr)mkdir, 0 }, |
477 | #define osMkdir ((int(*)(const char*,mode_t))aSyscall[18].pCurrent) |
478 | |
479 | { "rmdir" , (sqlite3_syscall_ptr)rmdir, 0 }, |
480 | #define osRmdir ((int(*)(const char*))aSyscall[19].pCurrent) |
481 | |
482 | #if defined(HAVE_FCHOWN) |
483 | { "fchown" , (sqlite3_syscall_ptr)fchown, 0 }, |
484 | #else |
485 | { "fchown" , (sqlite3_syscall_ptr)0, 0 }, |
486 | #endif |
487 | #define osFchown ((int(*)(int,uid_t,gid_t))aSyscall[20].pCurrent) |
488 | |
489 | #if defined(HAVE_FCHOWN) |
490 | { "geteuid" , (sqlite3_syscall_ptr)geteuid, 0 }, |
491 | #else |
492 | { "geteuid" , (sqlite3_syscall_ptr)0, 0 }, |
493 | #endif |
494 | #define osGeteuid ((uid_t(*)(void))aSyscall[21].pCurrent) |
495 | |
496 | #if !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0 |
497 | { "mmap" , (sqlite3_syscall_ptr)mmap, 0 }, |
498 | #else |
499 | { "mmap" , (sqlite3_syscall_ptr)0, 0 }, |
500 | #endif |
501 | #define osMmap ((void*(*)(void*,size_t,int,int,int,off_t))aSyscall[22].pCurrent) |
502 | |
503 | #if !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0 |
504 | { "munmap" , (sqlite3_syscall_ptr)munmap, 0 }, |
505 | #else |
506 | { "munmap" , (sqlite3_syscall_ptr)0, 0 }, |
507 | #endif |
508 | #define osMunmap ((int(*)(void*,size_t))aSyscall[23].pCurrent) |
509 | |
510 | #if HAVE_MREMAP && (!defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0) |
511 | { "mremap" , (sqlite3_syscall_ptr)mremap, 0 }, |
512 | #else |
513 | { "mremap" , (sqlite3_syscall_ptr)0, 0 }, |
514 | #endif |
515 | #define osMremap ((void*(*)(void*,size_t,size_t,int,...))aSyscall[24].pCurrent) |
516 | |
517 | #if !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0 |
518 | { "getpagesize" , (sqlite3_syscall_ptr)unixGetpagesize, 0 }, |
519 | #else |
520 | { "getpagesize" , (sqlite3_syscall_ptr)0, 0 }, |
521 | #endif |
522 | #define osGetpagesize ((int(*)(void))aSyscall[25].pCurrent) |
523 | |
524 | #if defined(HAVE_READLINK) |
525 | { "readlink" , (sqlite3_syscall_ptr)readlink, 0 }, |
526 | #else |
527 | { "readlink" , (sqlite3_syscall_ptr)0, 0 }, |
528 | #endif |
529 | #define osReadlink ((ssize_t(*)(const char*,char*,size_t))aSyscall[26].pCurrent) |
530 | |
531 | #if defined(HAVE_LSTAT) |
532 | { "lstat" , (sqlite3_syscall_ptr)lstat, 0 }, |
533 | #else |
534 | { "lstat" , (sqlite3_syscall_ptr)0, 0 }, |
535 | #endif |
536 | #define osLstat ((int(*)(const char*,struct stat*))aSyscall[27].pCurrent) |
537 | |
538 | #if defined(__linux__) && defined(SQLITE_ENABLE_BATCH_ATOMIC_WRITE) |
539 | # ifdef __ANDROID__ |
540 | { "ioctl" , (sqlite3_syscall_ptr)(int(*)(int, int, ...))ioctl, 0 }, |
541 | #define osIoctl ((int(*)(int,int,...))aSyscall[28].pCurrent) |
542 | # else |
543 | { "ioctl" , (sqlite3_syscall_ptr)ioctl, 0 }, |
544 | #define osIoctl ((int(*)(int,unsigned long,...))aSyscall[28].pCurrent) |
545 | # endif |
546 | #else |
547 | { "ioctl" , (sqlite3_syscall_ptr)0, 0 }, |
548 | #endif |
549 | |
550 | }; /* End of the overrideable system calls */ |
551 | |
552 | |
553 | /* |
554 | ** On some systems, calls to fchown() will trigger a message in a security |
555 | ** log if they come from non-root processes. So avoid calling fchown() if |
556 | ** we are not running as root. |
557 | */ |
558 | static int robustFchown(int fd, uid_t uid, gid_t gid){ |
559 | #if defined(HAVE_FCHOWN) |
560 | return osGeteuid() ? 0 : osFchown(fd,uid,gid); |
561 | #else |
562 | return 0; |
563 | #endif |
564 | } |
565 | |
566 | /* |
567 | ** This is the xSetSystemCall() method of sqlite3_vfs for all of the |
568 | ** "unix" VFSes. Return SQLITE_OK opon successfully updating the |
569 | ** system call pointer, or SQLITE_NOTFOUND if there is no configurable |
570 | ** system call named zName. |
571 | */ |
572 | static int unixSetSystemCall( |
573 | sqlite3_vfs *pNotUsed, /* The VFS pointer. Not used */ |
574 | const char *zName, /* Name of system call to override */ |
575 | sqlite3_syscall_ptr pNewFunc /* Pointer to new system call value */ |
576 | ){ |
577 | unsigned int i; |
578 | int rc = SQLITE_NOTFOUND; |
579 | |
580 | UNUSED_PARAMETER(pNotUsed); |
581 | if( zName==0 ){ |
582 | /* If no zName is given, restore all system calls to their default |
583 | ** settings and return NULL |
584 | */ |
585 | rc = SQLITE_OK; |
586 | for(i=0; i<sizeof(aSyscall)/sizeof(aSyscall[0]); i++){ |
587 | if( aSyscall[i].pDefault ){ |
588 | aSyscall[i].pCurrent = aSyscall[i].pDefault; |
589 | } |
590 | } |
591 | }else{ |
592 | /* If zName is specified, operate on only the one system call |
593 | ** specified. |
594 | */ |
595 | for(i=0; i<sizeof(aSyscall)/sizeof(aSyscall[0]); i++){ |
596 | if( strcmp(zName, aSyscall[i].zName)==0 ){ |
597 | if( aSyscall[i].pDefault==0 ){ |
598 | aSyscall[i].pDefault = aSyscall[i].pCurrent; |
599 | } |
600 | rc = SQLITE_OK; |
601 | if( pNewFunc==0 ) pNewFunc = aSyscall[i].pDefault; |
602 | aSyscall[i].pCurrent = pNewFunc; |
603 | break; |
604 | } |
605 | } |
606 | } |
607 | return rc; |
608 | } |
609 | |
610 | /* |
611 | ** Return the value of a system call. Return NULL if zName is not a |
612 | ** recognized system call name. NULL is also returned if the system call |
613 | ** is currently undefined. |
614 | */ |
615 | static sqlite3_syscall_ptr unixGetSystemCall( |
616 | sqlite3_vfs *pNotUsed, |
617 | const char *zName |
618 | ){ |
619 | unsigned int i; |
620 | |
621 | UNUSED_PARAMETER(pNotUsed); |
622 | for(i=0; i<sizeof(aSyscall)/sizeof(aSyscall[0]); i++){ |
623 | if( strcmp(zName, aSyscall[i].zName)==0 ) return aSyscall[i].pCurrent; |
624 | } |
625 | return 0; |
626 | } |
627 | |
628 | /* |
629 | ** Return the name of the first system call after zName. If zName==NULL |
630 | ** then return the name of the first system call. Return NULL if zName |
631 | ** is the last system call or if zName is not the name of a valid |
632 | ** system call. |
633 | */ |
634 | static const char *unixNextSystemCall(sqlite3_vfs *p, const char *zName){ |
635 | int i = -1; |
636 | |
637 | UNUSED_PARAMETER(p); |
638 | if( zName ){ |
639 | for(i=0; i<ArraySize(aSyscall)-1; i++){ |
640 | if( strcmp(zName, aSyscall[i].zName)==0 ) break; |
641 | } |
642 | } |
643 | for(i++; i<ArraySize(aSyscall); i++){ |
644 | if( aSyscall[i].pCurrent!=0 ) return aSyscall[i].zName; |
645 | } |
646 | return 0; |
647 | } |
648 | |
649 | /* |
650 | ** Do not accept any file descriptor less than this value, in order to avoid |
651 | ** opening database file using file descriptors that are commonly used for |
652 | ** standard input, output, and error. |
653 | */ |
654 | #ifndef SQLITE_MINIMUM_FILE_DESCRIPTOR |
655 | # define SQLITE_MINIMUM_FILE_DESCRIPTOR 3 |
656 | #endif |
657 | |
658 | /* |
659 | ** Invoke open(). Do so multiple times, until it either succeeds or |
660 | ** fails for some reason other than EINTR. |
661 | ** |
662 | ** If the file creation mode "m" is 0 then set it to the default for |
663 | ** SQLite. The default is SQLITE_DEFAULT_FILE_PERMISSIONS (normally |
664 | ** 0644) as modified by the system umask. If m is not 0, then |
665 | ** make the file creation mode be exactly m ignoring the umask. |
666 | ** |
667 | ** The m parameter will be non-zero only when creating -wal, -journal, |
668 | ** and -shm files. We want those files to have *exactly* the same |
669 | ** permissions as their original database, unadulterated by the umask. |
670 | ** In that way, if a database file is -rw-rw-rw or -rw-rw-r-, and a |
671 | ** transaction crashes and leaves behind hot journals, then any |
672 | ** process that is able to write to the database will also be able to |
673 | ** recover the hot journals. |
674 | */ |
675 | static int robust_open(const char *z, int f, mode_t m){ |
676 | int fd; |
677 | mode_t m2 = m ? m : SQLITE_DEFAULT_FILE_PERMISSIONS; |
678 | while(1){ |
679 | #if defined(O_CLOEXEC) |
680 | fd = osOpen(z,f|O_CLOEXEC,m2); |
681 | #else |
682 | fd = osOpen(z,f,m2); |
683 | #endif |
684 | if( fd<0 ){ |
685 | if( errno==EINTR ) continue; |
686 | break; |
687 | } |
688 | if( fd>=SQLITE_MINIMUM_FILE_DESCRIPTOR ) break; |
689 | if( (f & (O_EXCL|O_CREAT))==(O_EXCL|O_CREAT) ){ |
690 | (void)osUnlink(z); |
691 | } |
692 | osClose(fd); |
693 | sqlite3_log(SQLITE_WARNING, |
694 | "attempt to open \"%s\" as file descriptor %d" , z, fd); |
695 | fd = -1; |
696 | if( osOpen("/dev/null" , O_RDONLY, m)<0 ) break; |
697 | } |
698 | if( fd>=0 ){ |
699 | if( m!=0 ){ |
700 | struct stat statbuf; |
701 | if( osFstat(fd, &statbuf)==0 |
702 | && statbuf.st_size==0 |
703 | && (statbuf.st_mode&0777)!=m |
704 | ){ |
705 | osFchmod(fd, m); |
706 | } |
707 | } |
708 | #if defined(FD_CLOEXEC) && (!defined(O_CLOEXEC) || O_CLOEXEC==0) |
709 | osFcntl(fd, F_SETFD, osFcntl(fd, F_GETFD, 0) | FD_CLOEXEC); |
710 | #endif |
711 | } |
712 | return fd; |
713 | } |
714 | |
715 | /* |
716 | ** Helper functions to obtain and relinquish the global mutex. The |
717 | ** global mutex is used to protect the unixInodeInfo and |
718 | ** vxworksFileId objects used by this file, all of which may be |
719 | ** shared by multiple threads. |
720 | ** |
721 | ** Function unixMutexHeld() is used to assert() that the global mutex |
722 | ** is held when required. This function is only used as part of assert() |
723 | ** statements. e.g. |
724 | ** |
725 | ** unixEnterMutex() |
726 | ** assert( unixMutexHeld() ); |
727 | ** unixEnterLeave() |
728 | ** |
729 | ** To prevent deadlock, the global unixBigLock must must be acquired |
730 | ** before the unixInodeInfo.pLockMutex mutex, if both are held. It is |
731 | ** OK to get the pLockMutex without holding unixBigLock first, but if |
732 | ** that happens, the unixBigLock mutex must not be acquired until after |
733 | ** pLockMutex is released. |
734 | ** |
735 | ** OK: enter(unixBigLock), enter(pLockInfo) |
736 | ** OK: enter(unixBigLock) |
737 | ** OK: enter(pLockInfo) |
738 | ** ERROR: enter(pLockInfo), enter(unixBigLock) |
739 | */ |
740 | static sqlite3_mutex *unixBigLock = 0; |
741 | static void unixEnterMutex(void){ |
742 | assert( sqlite3_mutex_notheld(unixBigLock) ); /* Not a recursive mutex */ |
743 | sqlite3_mutex_enter(unixBigLock); |
744 | } |
745 | static void unixLeaveMutex(void){ |
746 | assert( sqlite3_mutex_held(unixBigLock) ); |
747 | sqlite3_mutex_leave(unixBigLock); |
748 | } |
749 | #ifdef SQLITE_DEBUG |
750 | static int unixMutexHeld(void) { |
751 | return sqlite3_mutex_held(unixBigLock); |
752 | } |
753 | #endif |
754 | |
755 | |
756 | #ifdef SQLITE_HAVE_OS_TRACE |
757 | /* |
758 | ** Helper function for printing out trace information from debugging |
759 | ** binaries. This returns the string representation of the supplied |
760 | ** integer lock-type. |
761 | */ |
762 | static const char *azFileLock(int eFileLock){ |
763 | switch( eFileLock ){ |
764 | case NO_LOCK: return "NONE" ; |
765 | case SHARED_LOCK: return "SHARED" ; |
766 | case RESERVED_LOCK: return "RESERVED" ; |
767 | case PENDING_LOCK: return "PENDING" ; |
768 | case EXCLUSIVE_LOCK: return "EXCLUSIVE" ; |
769 | } |
770 | return "ERROR" ; |
771 | } |
772 | #endif |
773 | |
774 | #ifdef SQLITE_LOCK_TRACE |
775 | /* |
776 | ** Print out information about all locking operations. |
777 | ** |
778 | ** This routine is used for troubleshooting locks on multithreaded |
779 | ** platforms. Enable by compiling with the -DSQLITE_LOCK_TRACE |
780 | ** command-line option on the compiler. This code is normally |
781 | ** turned off. |
782 | */ |
783 | static int lockTrace(int fd, int op, struct flock *p){ |
784 | char *zOpName, *zType; |
785 | int s; |
786 | int savedErrno; |
787 | if( op==F_GETLK ){ |
788 | zOpName = "GETLK" ; |
789 | }else if( op==F_SETLK ){ |
790 | zOpName = "SETLK" ; |
791 | }else{ |
792 | s = osFcntl(fd, op, p); |
793 | sqlite3DebugPrintf("fcntl unknown %d %d %d\n" , fd, op, s); |
794 | return s; |
795 | } |
796 | if( p->l_type==F_RDLCK ){ |
797 | zType = "RDLCK" ; |
798 | }else if( p->l_type==F_WRLCK ){ |
799 | zType = "WRLCK" ; |
800 | }else if( p->l_type==F_UNLCK ){ |
801 | zType = "UNLCK" ; |
802 | }else{ |
803 | assert( 0 ); |
804 | } |
805 | assert( p->l_whence==SEEK_SET ); |
806 | s = osFcntl(fd, op, p); |
807 | savedErrno = errno; |
808 | sqlite3DebugPrintf("fcntl %d %d %s %s %d %d %d %d\n" , |
809 | threadid, fd, zOpName, zType, (int)p->l_start, (int)p->l_len, |
810 | (int)p->l_pid, s); |
811 | if( s==(-1) && op==F_SETLK && (p->l_type==F_RDLCK || p->l_type==F_WRLCK) ){ |
812 | struct flock l2; |
813 | l2 = *p; |
814 | osFcntl(fd, F_GETLK, &l2); |
815 | if( l2.l_type==F_RDLCK ){ |
816 | zType = "RDLCK" ; |
817 | }else if( l2.l_type==F_WRLCK ){ |
818 | zType = "WRLCK" ; |
819 | }else if( l2.l_type==F_UNLCK ){ |
820 | zType = "UNLCK" ; |
821 | }else{ |
822 | assert( 0 ); |
823 | } |
824 | sqlite3DebugPrintf("fcntl-failure-reason: %s %d %d %d\n" , |
825 | zType, (int)l2.l_start, (int)l2.l_len, (int)l2.l_pid); |
826 | } |
827 | errno = savedErrno; |
828 | return s; |
829 | } |
830 | #undef osFcntl |
831 | #define osFcntl lockTrace |
832 | #endif /* SQLITE_LOCK_TRACE */ |
833 | |
834 | /* |
835 | ** Retry ftruncate() calls that fail due to EINTR |
836 | ** |
837 | ** All calls to ftruncate() within this file should be made through |
838 | ** this wrapper. On the Android platform, bypassing the logic below |
839 | ** could lead to a corrupt database. |
840 | */ |
841 | static int robust_ftruncate(int h, sqlite3_int64 sz){ |
842 | int rc; |
843 | #ifdef __ANDROID__ |
844 | /* On Android, ftruncate() always uses 32-bit offsets, even if |
845 | ** _FILE_OFFSET_BITS=64 is defined. This means it is unsafe to attempt to |
846 | ** truncate a file to any size larger than 2GiB. Silently ignore any |
847 | ** such attempts. */ |
848 | if( sz>(sqlite3_int64)0x7FFFFFFF ){ |
849 | rc = SQLITE_OK; |
850 | }else |
851 | #endif |
852 | do{ rc = osFtruncate(h,sz); }while( rc<0 && errno==EINTR ); |
853 | return rc; |
854 | } |
855 | |
856 | /* |
857 | ** This routine translates a standard POSIX errno code into something |
858 | ** useful to the clients of the sqlite3 functions. Specifically, it is |
859 | ** intended to translate a variety of "try again" errors into SQLITE_BUSY |
860 | ** and a variety of "please close the file descriptor NOW" errors into |
861 | ** SQLITE_IOERR |
862 | ** |
863 | ** Errors during initialization of locks, or file system support for locks, |
864 | ** should handle ENOLCK, ENOTSUP, EOPNOTSUPP separately. |
865 | */ |
866 | static int sqliteErrorFromPosixError(int posixError, int sqliteIOErr) { |
867 | assert( (sqliteIOErr == SQLITE_IOERR_LOCK) || |
868 | (sqliteIOErr == SQLITE_IOERR_UNLOCK) || |
869 | (sqliteIOErr == SQLITE_IOERR_RDLOCK) || |
870 | (sqliteIOErr == SQLITE_IOERR_CHECKRESERVEDLOCK) ); |
871 | switch (posixError) { |
872 | case EACCES: |
873 | case EAGAIN: |
874 | case ETIMEDOUT: |
875 | case EBUSY: |
876 | case EINTR: |
877 | case ENOLCK: |
878 | /* random NFS retry error, unless during file system support |
879 | * introspection, in which it actually means what it says */ |
880 | return SQLITE_BUSY; |
881 | |
882 | case EPERM: |
883 | return SQLITE_PERM; |
884 | |
885 | default: |
886 | return sqliteIOErr; |
887 | } |
888 | } |
889 | |
890 | |
891 | /****************************************************************************** |
892 | ****************** Begin Unique File ID Utility Used By VxWorks *************** |
893 | ** |
894 | ** On most versions of unix, we can get a unique ID for a file by concatenating |
895 | ** the device number and the inode number. But this does not work on VxWorks. |
896 | ** On VxWorks, a unique file id must be based on the canonical filename. |
897 | ** |
898 | ** A pointer to an instance of the following structure can be used as a |
899 | ** unique file ID in VxWorks. Each instance of this structure contains |
900 | ** a copy of the canonical filename. There is also a reference count. |
901 | ** The structure is reclaimed when the number of pointers to it drops to |
902 | ** zero. |
903 | ** |
904 | ** There are never very many files open at one time and lookups are not |
905 | ** a performance-critical path, so it is sufficient to put these |
906 | ** structures on a linked list. |
907 | */ |
908 | struct vxworksFileId { |
909 | struct vxworksFileId *pNext; /* Next in a list of them all */ |
910 | int nRef; /* Number of references to this one */ |
911 | int nName; /* Length of the zCanonicalName[] string */ |
912 | char *zCanonicalName; /* Canonical filename */ |
913 | }; |
914 | |
915 | #if OS_VXWORKS |
916 | /* |
917 | ** All unique filenames are held on a linked list headed by this |
918 | ** variable: |
919 | */ |
920 | static struct vxworksFileId *vxworksFileList = 0; |
921 | |
922 | /* |
923 | ** Simplify a filename into its canonical form |
924 | ** by making the following changes: |
925 | ** |
926 | ** * removing any trailing and duplicate / |
927 | ** * convert /./ into just / |
928 | ** * convert /A/../ where A is any simple name into just / |
929 | ** |
930 | ** Changes are made in-place. Return the new name length. |
931 | ** |
932 | ** The original filename is in z[0..n-1]. Return the number of |
933 | ** characters in the simplified name. |
934 | */ |
935 | static int vxworksSimplifyName(char *z, int n){ |
936 | int i, j; |
937 | while( n>1 && z[n-1]=='/' ){ n--; } |
938 | for(i=j=0; i<n; i++){ |
939 | if( z[i]=='/' ){ |
940 | if( z[i+1]=='/' ) continue; |
941 | if( z[i+1]=='.' && i+2<n && z[i+2]=='/' ){ |
942 | i += 1; |
943 | continue; |
944 | } |
945 | if( z[i+1]=='.' && i+3<n && z[i+2]=='.' && z[i+3]=='/' ){ |
946 | while( j>0 && z[j-1]!='/' ){ j--; } |
947 | if( j>0 ){ j--; } |
948 | i += 2; |
949 | continue; |
950 | } |
951 | } |
952 | z[j++] = z[i]; |
953 | } |
954 | z[j] = 0; |
955 | return j; |
956 | } |
957 | |
958 | /* |
959 | ** Find a unique file ID for the given absolute pathname. Return |
960 | ** a pointer to the vxworksFileId object. This pointer is the unique |
961 | ** file ID. |
962 | ** |
963 | ** The nRef field of the vxworksFileId object is incremented before |
964 | ** the object is returned. A new vxworksFileId object is created |
965 | ** and added to the global list if necessary. |
966 | ** |
967 | ** If a memory allocation error occurs, return NULL. |
968 | */ |
969 | static struct vxworksFileId *vxworksFindFileId(const char *zAbsoluteName){ |
970 | struct vxworksFileId *pNew; /* search key and new file ID */ |
971 | struct vxworksFileId *pCandidate; /* For looping over existing file IDs */ |
972 | int n; /* Length of zAbsoluteName string */ |
973 | |
974 | assert( zAbsoluteName[0]=='/' ); |
975 | n = (int)strlen(zAbsoluteName); |
976 | pNew = sqlite3_malloc64( sizeof(*pNew) + (n+1) ); |
977 | if( pNew==0 ) return 0; |
978 | pNew->zCanonicalName = (char*)&pNew[1]; |
979 | memcpy(pNew->zCanonicalName, zAbsoluteName, n+1); |
980 | n = vxworksSimplifyName(pNew->zCanonicalName, n); |
981 | |
982 | /* Search for an existing entry that matching the canonical name. |
983 | ** If found, increment the reference count and return a pointer to |
984 | ** the existing file ID. |
985 | */ |
986 | unixEnterMutex(); |
987 | for(pCandidate=vxworksFileList; pCandidate; pCandidate=pCandidate->pNext){ |
988 | if( pCandidate->nName==n |
989 | && memcmp(pCandidate->zCanonicalName, pNew->zCanonicalName, n)==0 |
990 | ){ |
991 | sqlite3_free(pNew); |
992 | pCandidate->nRef++; |
993 | unixLeaveMutex(); |
994 | return pCandidate; |
995 | } |
996 | } |
997 | |
998 | /* No match was found. We will make a new file ID */ |
999 | pNew->nRef = 1; |
1000 | pNew->nName = n; |
1001 | pNew->pNext = vxworksFileList; |
1002 | vxworksFileList = pNew; |
1003 | unixLeaveMutex(); |
1004 | return pNew; |
1005 | } |
1006 | |
1007 | /* |
1008 | ** Decrement the reference count on a vxworksFileId object. Free |
1009 | ** the object when the reference count reaches zero. |
1010 | */ |
1011 | static void vxworksReleaseFileId(struct vxworksFileId *pId){ |
1012 | unixEnterMutex(); |
1013 | assert( pId->nRef>0 ); |
1014 | pId->nRef--; |
1015 | if( pId->nRef==0 ){ |
1016 | struct vxworksFileId **pp; |
1017 | for(pp=&vxworksFileList; *pp && *pp!=pId; pp = &((*pp)->pNext)){} |
1018 | assert( *pp==pId ); |
1019 | *pp = pId->pNext; |
1020 | sqlite3_free(pId); |
1021 | } |
1022 | unixLeaveMutex(); |
1023 | } |
1024 | #endif /* OS_VXWORKS */ |
1025 | /*************** End of Unique File ID Utility Used By VxWorks **************** |
1026 | ******************************************************************************/ |
1027 | |
1028 | |
1029 | /****************************************************************************** |
1030 | *************************** Posix Advisory Locking **************************** |
1031 | ** |
1032 | ** POSIX advisory locks are broken by design. ANSI STD 1003.1 (1996) |
1033 | ** section 6.5.2.2 lines 483 through 490 specify that when a process |
1034 | ** sets or clears a lock, that operation overrides any prior locks set |
1035 | ** by the same process. It does not explicitly say so, but this implies |
1036 | ** that it overrides locks set by the same process using a different |
1037 | ** file descriptor. Consider this test case: |
1038 | ** |
1039 | ** int fd1 = open("./file1", O_RDWR|O_CREAT, 0644); |
1040 | ** int fd2 = open("./file2", O_RDWR|O_CREAT, 0644); |
1041 | ** |
1042 | ** Suppose ./file1 and ./file2 are really the same file (because |
1043 | ** one is a hard or symbolic link to the other) then if you set |
1044 | ** an exclusive lock on fd1, then try to get an exclusive lock |
1045 | ** on fd2, it works. I would have expected the second lock to |
1046 | ** fail since there was already a lock on the file due to fd1. |
1047 | ** But not so. Since both locks came from the same process, the |
1048 | ** second overrides the first, even though they were on different |
1049 | ** file descriptors opened on different file names. |
1050 | ** |
1051 | ** This means that we cannot use POSIX locks to synchronize file access |
1052 | ** among competing threads of the same process. POSIX locks will work fine |
1053 | ** to synchronize access for threads in separate processes, but not |
1054 | ** threads within the same process. |
1055 | ** |
1056 | ** To work around the problem, SQLite has to manage file locks internally |
1057 | ** on its own. Whenever a new database is opened, we have to find the |
1058 | ** specific inode of the database file (the inode is determined by the |
1059 | ** st_dev and st_ino fields of the stat structure that fstat() fills in) |
1060 | ** and check for locks already existing on that inode. When locks are |
1061 | ** created or removed, we have to look at our own internal record of the |
1062 | ** locks to see if another thread has previously set a lock on that same |
1063 | ** inode. |
1064 | ** |
1065 | ** (Aside: The use of inode numbers as unique IDs does not work on VxWorks. |
1066 | ** For VxWorks, we have to use the alternative unique ID system based on |
1067 | ** canonical filename and implemented in the previous division.) |
1068 | ** |
1069 | ** The sqlite3_file structure for POSIX is no longer just an integer file |
1070 | ** descriptor. It is now a structure that holds the integer file |
1071 | ** descriptor and a pointer to a structure that describes the internal |
1072 | ** locks on the corresponding inode. There is one locking structure |
1073 | ** per inode, so if the same inode is opened twice, both unixFile structures |
1074 | ** point to the same locking structure. The locking structure keeps |
1075 | ** a reference count (so we will know when to delete it) and a "cnt" |
1076 | ** field that tells us its internal lock status. cnt==0 means the |
1077 | ** file is unlocked. cnt==-1 means the file has an exclusive lock. |
1078 | ** cnt>0 means there are cnt shared locks on the file. |
1079 | ** |
1080 | ** Any attempt to lock or unlock a file first checks the locking |
1081 | ** structure. The fcntl() system call is only invoked to set a |
1082 | ** POSIX lock if the internal lock structure transitions between |
1083 | ** a locked and an unlocked state. |
1084 | ** |
1085 | ** But wait: there are yet more problems with POSIX advisory locks. |
1086 | ** |
1087 | ** If you close a file descriptor that points to a file that has locks, |
1088 | ** all locks on that file that are owned by the current process are |
1089 | ** released. To work around this problem, each unixInodeInfo object |
1090 | ** maintains a count of the number of pending locks on tha inode. |
1091 | ** When an attempt is made to close an unixFile, if there are |
1092 | ** other unixFile open on the same inode that are holding locks, the call |
1093 | ** to close() the file descriptor is deferred until all of the locks clear. |
1094 | ** The unixInodeInfo structure keeps a list of file descriptors that need to |
1095 | ** be closed and that list is walked (and cleared) when the last lock |
1096 | ** clears. |
1097 | ** |
1098 | ** Yet another problem: LinuxThreads do not play well with posix locks. |
1099 | ** |
1100 | ** Many older versions of linux use the LinuxThreads library which is |
1101 | ** not posix compliant. Under LinuxThreads, a lock created by thread |
1102 | ** A cannot be modified or overridden by a different thread B. |
1103 | ** Only thread A can modify the lock. Locking behavior is correct |
1104 | ** if the appliation uses the newer Native Posix Thread Library (NPTL) |
1105 | ** on linux - with NPTL a lock created by thread A can override locks |
1106 | ** in thread B. But there is no way to know at compile-time which |
1107 | ** threading library is being used. So there is no way to know at |
1108 | ** compile-time whether or not thread A can override locks on thread B. |
1109 | ** One has to do a run-time check to discover the behavior of the |
1110 | ** current process. |
1111 | ** |
1112 | ** SQLite used to support LinuxThreads. But support for LinuxThreads |
1113 | ** was dropped beginning with version 3.7.0. SQLite will still work with |
1114 | ** LinuxThreads provided that (1) there is no more than one connection |
1115 | ** per database file in the same process and (2) database connections |
1116 | ** do not move across threads. |
1117 | */ |
1118 | |
1119 | /* |
1120 | ** An instance of the following structure serves as the key used |
1121 | ** to locate a particular unixInodeInfo object. |
1122 | */ |
1123 | struct unixFileId { |
1124 | dev_t dev; /* Device number */ |
1125 | #if OS_VXWORKS |
1126 | struct vxworksFileId *pId; /* Unique file ID for vxworks. */ |
1127 | #else |
1128 | /* We are told that some versions of Android contain a bug that |
1129 | ** sizes ino_t at only 32-bits instead of 64-bits. (See |
1130 | ** https://android-review.googlesource.com/#/c/115351/3/dist/sqlite3.c) |
1131 | ** To work around this, always allocate 64-bits for the inode number. |
1132 | ** On small machines that only have 32-bit inodes, this wastes 4 bytes, |
1133 | ** but that should not be a big deal. */ |
1134 | /* WAS: ino_t ino; */ |
1135 | u64 ino; /* Inode number */ |
1136 | #endif |
1137 | }; |
1138 | |
1139 | /* |
1140 | ** An instance of the following structure is allocated for each open |
1141 | ** inode. |
1142 | ** |
1143 | ** A single inode can have multiple file descriptors, so each unixFile |
1144 | ** structure contains a pointer to an instance of this object and this |
1145 | ** object keeps a count of the number of unixFile pointing to it. |
1146 | ** |
1147 | ** Mutex rules: |
1148 | ** |
1149 | ** (1) Only the pLockMutex mutex must be held in order to read or write |
1150 | ** any of the locking fields: |
1151 | ** nShared, nLock, eFileLock, bProcessLock, pUnused |
1152 | ** |
1153 | ** (2) When nRef>0, then the following fields are unchanging and can |
1154 | ** be read (but not written) without holding any mutex: |
1155 | ** fileId, pLockMutex |
1156 | ** |
1157 | ** (3) With the exceptions above, all the fields may only be read |
1158 | ** or written while holding the global unixBigLock mutex. |
1159 | ** |
1160 | ** Deadlock prevention: The global unixBigLock mutex may not |
1161 | ** be acquired while holding the pLockMutex mutex. If both unixBigLock |
1162 | ** and pLockMutex are needed, then unixBigLock must be acquired first. |
1163 | */ |
1164 | struct unixInodeInfo { |
1165 | struct unixFileId fileId; /* The lookup key */ |
1166 | sqlite3_mutex *pLockMutex; /* Hold this mutex for... */ |
1167 | int nShared; /* Number of SHARED locks held */ |
1168 | int nLock; /* Number of outstanding file locks */ |
1169 | unsigned char eFileLock; /* One of SHARED_LOCK, RESERVED_LOCK etc. */ |
1170 | unsigned char bProcessLock; /* An exclusive process lock is held */ |
1171 | UnixUnusedFd *pUnused; /* Unused file descriptors to close */ |
1172 | int nRef; /* Number of pointers to this structure */ |
1173 | unixShmNode *pShmNode; /* Shared memory associated with this inode */ |
1174 | unixInodeInfo *pNext; /* List of all unixInodeInfo objects */ |
1175 | unixInodeInfo *pPrev; /* .... doubly linked */ |
1176 | #if SQLITE_ENABLE_LOCKING_STYLE |
1177 | unsigned long long sharedByte; /* for AFP simulated shared lock */ |
1178 | #endif |
1179 | #if OS_VXWORKS |
1180 | sem_t *pSem; /* Named POSIX semaphore */ |
1181 | char aSemName[MAX_PATHNAME+2]; /* Name of that semaphore */ |
1182 | #endif |
1183 | }; |
1184 | |
1185 | /* |
1186 | ** A lists of all unixInodeInfo objects. |
1187 | ** |
1188 | ** Must hold unixBigLock in order to read or write this variable. |
1189 | */ |
1190 | static unixInodeInfo *inodeList = 0; /* All unixInodeInfo objects */ |
1191 | |
1192 | #ifdef SQLITE_DEBUG |
1193 | /* |
1194 | ** True if the inode mutex (on the unixFile.pFileMutex field) is held, or not. |
1195 | ** This routine is used only within assert() to help verify correct mutex |
1196 | ** usage. |
1197 | */ |
1198 | int unixFileMutexHeld(unixFile *pFile){ |
1199 | assert( pFile->pInode ); |
1200 | return sqlite3_mutex_held(pFile->pInode->pLockMutex); |
1201 | } |
1202 | int unixFileMutexNotheld(unixFile *pFile){ |
1203 | assert( pFile->pInode ); |
1204 | return sqlite3_mutex_notheld(pFile->pInode->pLockMutex); |
1205 | } |
1206 | #endif |
1207 | |
1208 | /* |
1209 | ** |
1210 | ** This function - unixLogErrorAtLine(), is only ever called via the macro |
1211 | ** unixLogError(). |
1212 | ** |
1213 | ** It is invoked after an error occurs in an OS function and errno has been |
1214 | ** set. It logs a message using sqlite3_log() containing the current value of |
1215 | ** errno and, if possible, the human-readable equivalent from strerror() or |
1216 | ** strerror_r(). |
1217 | ** |
1218 | ** The first argument passed to the macro should be the error code that |
1219 | ** will be returned to SQLite (e.g. SQLITE_IOERR_DELETE, SQLITE_CANTOPEN). |
1220 | ** The two subsequent arguments should be the name of the OS function that |
1221 | ** failed (e.g. "unlink", "open") and the associated file-system path, |
1222 | ** if any. |
1223 | */ |
1224 | #define unixLogError(a,b,c) unixLogErrorAtLine(a,b,c,__LINE__) |
1225 | static int unixLogErrorAtLine( |
1226 | int errcode, /* SQLite error code */ |
1227 | const char *zFunc, /* Name of OS function that failed */ |
1228 | const char *zPath, /* File path associated with error */ |
1229 | int iLine /* Source line number where error occurred */ |
1230 | ){ |
1231 | char *zErr; /* Message from strerror() or equivalent */ |
1232 | int iErrno = errno; /* Saved syscall error number */ |
1233 | |
1234 | /* If this is not a threadsafe build (SQLITE_THREADSAFE==0), then use |
1235 | ** the strerror() function to obtain the human-readable error message |
1236 | ** equivalent to errno. Otherwise, use strerror_r(). |
1237 | */ |
1238 | #if SQLITE_THREADSAFE && defined(HAVE_STRERROR_R) |
1239 | char aErr[80]; |
1240 | memset(aErr, 0, sizeof(aErr)); |
1241 | zErr = aErr; |
1242 | |
1243 | /* If STRERROR_R_CHAR_P (set by autoconf scripts) or __USE_GNU is defined, |
1244 | ** assume that the system provides the GNU version of strerror_r() that |
1245 | ** returns a pointer to a buffer containing the error message. That pointer |
1246 | ** may point to aErr[], or it may point to some static storage somewhere. |
1247 | ** Otherwise, assume that the system provides the POSIX version of |
1248 | ** strerror_r(), which always writes an error message into aErr[]. |
1249 | ** |
1250 | ** If the code incorrectly assumes that it is the POSIX version that is |
1251 | ** available, the error message will often be an empty string. Not a |
1252 | ** huge problem. Incorrectly concluding that the GNU version is available |
1253 | ** could lead to a segfault though. |
1254 | */ |
1255 | #if defined(STRERROR_R_CHAR_P) || defined(__USE_GNU) |
1256 | zErr = |
1257 | # endif |
1258 | strerror_r(iErrno, aErr, sizeof(aErr)-1); |
1259 | |
1260 | #elif SQLITE_THREADSAFE |
1261 | /* This is a threadsafe build, but strerror_r() is not available. */ |
1262 | zErr = "" ; |
1263 | #else |
1264 | /* Non-threadsafe build, use strerror(). */ |
1265 | zErr = strerror(iErrno); |
1266 | #endif |
1267 | |
1268 | if( zPath==0 ) zPath = "" ; |
1269 | sqlite3_log(errcode, |
1270 | "os_unix.c:%d: (%d) %s(%s) - %s" , |
1271 | iLine, iErrno, zFunc, zPath, zErr |
1272 | ); |
1273 | |
1274 | return errcode; |
1275 | } |
1276 | |
1277 | /* |
1278 | ** Close a file descriptor. |
1279 | ** |
1280 | ** We assume that close() almost always works, since it is only in a |
1281 | ** very sick application or on a very sick platform that it might fail. |
1282 | ** If it does fail, simply leak the file descriptor, but do log the |
1283 | ** error. |
1284 | ** |
1285 | ** Note that it is not safe to retry close() after EINTR since the |
1286 | ** file descriptor might have already been reused by another thread. |
1287 | ** So we don't even try to recover from an EINTR. Just log the error |
1288 | ** and move on. |
1289 | */ |
1290 | static void robust_close(unixFile *pFile, int h, int lineno){ |
1291 | if( osClose(h) ){ |
1292 | unixLogErrorAtLine(SQLITE_IOERR_CLOSE, "close" , |
1293 | pFile ? pFile->zPath : 0, lineno); |
1294 | } |
1295 | } |
1296 | |
1297 | /* |
1298 | ** Set the pFile->lastErrno. Do this in a subroutine as that provides |
1299 | ** a convenient place to set a breakpoint. |
1300 | */ |
1301 | static void storeLastErrno(unixFile *pFile, int error){ |
1302 | pFile->lastErrno = error; |
1303 | } |
1304 | |
1305 | /* |
1306 | ** Close all file descriptors accumuated in the unixInodeInfo->pUnused list. |
1307 | */ |
1308 | static void closePendingFds(unixFile *pFile){ |
1309 | unixInodeInfo *pInode = pFile->pInode; |
1310 | UnixUnusedFd *p; |
1311 | UnixUnusedFd *pNext; |
1312 | assert( unixFileMutexHeld(pFile) ); |
1313 | for(p=pInode->pUnused; p; p=pNext){ |
1314 | pNext = p->pNext; |
1315 | robust_close(pFile, p->fd, __LINE__); |
1316 | sqlite3_free(p); |
1317 | } |
1318 | pInode->pUnused = 0; |
1319 | } |
1320 | |
1321 | /* |
1322 | ** Release a unixInodeInfo structure previously allocated by findInodeInfo(). |
1323 | ** |
1324 | ** The global mutex must be held when this routine is called, but the mutex |
1325 | ** on the inode being deleted must NOT be held. |
1326 | */ |
1327 | static void releaseInodeInfo(unixFile *pFile){ |
1328 | unixInodeInfo *pInode = pFile->pInode; |
1329 | assert( unixMutexHeld() ); |
1330 | assert( unixFileMutexNotheld(pFile) ); |
1331 | if( ALWAYS(pInode) ){ |
1332 | pInode->nRef--; |
1333 | if( pInode->nRef==0 ){ |
1334 | assert( pInode->pShmNode==0 ); |
1335 | sqlite3_mutex_enter(pInode->pLockMutex); |
1336 | closePendingFds(pFile); |
1337 | sqlite3_mutex_leave(pInode->pLockMutex); |
1338 | if( pInode->pPrev ){ |
1339 | assert( pInode->pPrev->pNext==pInode ); |
1340 | pInode->pPrev->pNext = pInode->pNext; |
1341 | }else{ |
1342 | assert( inodeList==pInode ); |
1343 | inodeList = pInode->pNext; |
1344 | } |
1345 | if( pInode->pNext ){ |
1346 | assert( pInode->pNext->pPrev==pInode ); |
1347 | pInode->pNext->pPrev = pInode->pPrev; |
1348 | } |
1349 | sqlite3_mutex_free(pInode->pLockMutex); |
1350 | sqlite3_free(pInode); |
1351 | } |
1352 | } |
1353 | } |
1354 | |
1355 | /* |
1356 | ** Given a file descriptor, locate the unixInodeInfo object that |
1357 | ** describes that file descriptor. Create a new one if necessary. The |
1358 | ** return value might be uninitialized if an error occurs. |
1359 | ** |
1360 | ** The global mutex must held when calling this routine. |
1361 | ** |
1362 | ** Return an appropriate error code. |
1363 | */ |
1364 | static int findInodeInfo( |
1365 | unixFile *pFile, /* Unix file with file desc used in the key */ |
1366 | unixInodeInfo **ppInode /* Return the unixInodeInfo object here */ |
1367 | ){ |
1368 | int rc; /* System call return code */ |
1369 | int fd; /* The file descriptor for pFile */ |
1370 | struct unixFileId fileId; /* Lookup key for the unixInodeInfo */ |
1371 | struct stat statbuf; /* Low-level file information */ |
1372 | unixInodeInfo *pInode = 0; /* Candidate unixInodeInfo object */ |
1373 | |
1374 | assert( unixMutexHeld() ); |
1375 | |
1376 | /* Get low-level information about the file that we can used to |
1377 | ** create a unique name for the file. |
1378 | */ |
1379 | fd = pFile->h; |
1380 | rc = osFstat(fd, &statbuf); |
1381 | if( rc!=0 ){ |
1382 | storeLastErrno(pFile, errno); |
1383 | #if defined(EOVERFLOW) && defined(SQLITE_DISABLE_LFS) |
1384 | if( pFile->lastErrno==EOVERFLOW ) return SQLITE_NOLFS; |
1385 | #endif |
1386 | return SQLITE_IOERR; |
1387 | } |
1388 | |
1389 | #ifdef __APPLE__ |
1390 | /* On OS X on an msdos filesystem, the inode number is reported |
1391 | ** incorrectly for zero-size files. See ticket #3260. To work |
1392 | ** around this problem (we consider it a bug in OS X, not SQLite) |
1393 | ** we always increase the file size to 1 by writing a single byte |
1394 | ** prior to accessing the inode number. The one byte written is |
1395 | ** an ASCII 'S' character which also happens to be the first byte |
1396 | ** in the header of every SQLite database. In this way, if there |
1397 | ** is a race condition such that another thread has already populated |
1398 | ** the first page of the database, no damage is done. |
1399 | */ |
1400 | if( statbuf.st_size==0 && (pFile->fsFlags & SQLITE_FSFLAGS_IS_MSDOS)!=0 ){ |
1401 | do{ rc = osWrite(fd, "S" , 1); }while( rc<0 && errno==EINTR ); |
1402 | if( rc!=1 ){ |
1403 | storeLastErrno(pFile, errno); |
1404 | return SQLITE_IOERR; |
1405 | } |
1406 | rc = osFstat(fd, &statbuf); |
1407 | if( rc!=0 ){ |
1408 | storeLastErrno(pFile, errno); |
1409 | return SQLITE_IOERR; |
1410 | } |
1411 | } |
1412 | #endif |
1413 | |
1414 | memset(&fileId, 0, sizeof(fileId)); |
1415 | fileId.dev = statbuf.st_dev; |
1416 | #if OS_VXWORKS |
1417 | fileId.pId = pFile->pId; |
1418 | #else |
1419 | fileId.ino = (u64)statbuf.st_ino; |
1420 | #endif |
1421 | assert( unixMutexHeld() ); |
1422 | pInode = inodeList; |
1423 | while( pInode && memcmp(&fileId, &pInode->fileId, sizeof(fileId)) ){ |
1424 | pInode = pInode->pNext; |
1425 | } |
1426 | if( pInode==0 ){ |
1427 | pInode = sqlite3_malloc64( sizeof(*pInode) ); |
1428 | if( pInode==0 ){ |
1429 | return SQLITE_NOMEM_BKPT; |
1430 | } |
1431 | memset(pInode, 0, sizeof(*pInode)); |
1432 | memcpy(&pInode->fileId, &fileId, sizeof(fileId)); |
1433 | if( sqlite3GlobalConfig.bCoreMutex ){ |
1434 | pInode->pLockMutex = sqlite3_mutex_alloc(SQLITE_MUTEX_FAST); |
1435 | if( pInode->pLockMutex==0 ){ |
1436 | sqlite3_free(pInode); |
1437 | return SQLITE_NOMEM_BKPT; |
1438 | } |
1439 | } |
1440 | pInode->nRef = 1; |
1441 | assert( unixMutexHeld() ); |
1442 | pInode->pNext = inodeList; |
1443 | pInode->pPrev = 0; |
1444 | if( inodeList ) inodeList->pPrev = pInode; |
1445 | inodeList = pInode; |
1446 | }else{ |
1447 | pInode->nRef++; |
1448 | } |
1449 | *ppInode = pInode; |
1450 | return SQLITE_OK; |
1451 | } |
1452 | |
1453 | /* |
1454 | ** Return TRUE if pFile has been renamed or unlinked since it was first opened. |
1455 | */ |
1456 | static int fileHasMoved(unixFile *pFile){ |
1457 | #if OS_VXWORKS |
1458 | return pFile->pInode!=0 && pFile->pId!=pFile->pInode->fileId.pId; |
1459 | #else |
1460 | struct stat buf; |
1461 | return pFile->pInode!=0 && |
1462 | (osStat(pFile->zPath, &buf)!=0 |
1463 | || (u64)buf.st_ino!=pFile->pInode->fileId.ino); |
1464 | #endif |
1465 | } |
1466 | |
1467 | |
1468 | /* |
1469 | ** Check a unixFile that is a database. Verify the following: |
1470 | ** |
1471 | ** (1) There is exactly one hard link on the file |
1472 | ** (2) The file is not a symbolic link |
1473 | ** (3) The file has not been renamed or unlinked |
1474 | ** |
1475 | ** Issue sqlite3_log(SQLITE_WARNING,...) messages if anything is not right. |
1476 | */ |
1477 | static void verifyDbFile(unixFile *pFile){ |
1478 | struct stat buf; |
1479 | int rc; |
1480 | |
1481 | /* These verifications occurs for the main database only */ |
1482 | if( pFile->ctrlFlags & UNIXFILE_NOLOCK ) return; |
1483 | |
1484 | rc = osFstat(pFile->h, &buf); |
1485 | if( rc!=0 ){ |
1486 | sqlite3_log(SQLITE_WARNING, "cannot fstat db file %s" , pFile->zPath); |
1487 | return; |
1488 | } |
1489 | if( buf.st_nlink==0 ){ |
1490 | sqlite3_log(SQLITE_WARNING, "file unlinked while open: %s" , pFile->zPath); |
1491 | return; |
1492 | } |
1493 | if( buf.st_nlink>1 ){ |
1494 | sqlite3_log(SQLITE_WARNING, "multiple links to file: %s" , pFile->zPath); |
1495 | return; |
1496 | } |
1497 | if( fileHasMoved(pFile) ){ |
1498 | sqlite3_log(SQLITE_WARNING, "file renamed while open: %s" , pFile->zPath); |
1499 | return; |
1500 | } |
1501 | } |
1502 | |
1503 | |
1504 | /* |
1505 | ** This routine checks if there is a RESERVED lock held on the specified |
1506 | ** file by this or any other process. If such a lock is held, set *pResOut |
1507 | ** to a non-zero value otherwise *pResOut is set to zero. The return value |
1508 | ** is set to SQLITE_OK unless an I/O error occurs during lock checking. |
1509 | */ |
1510 | static int unixCheckReservedLock(sqlite3_file *id, int *pResOut){ |
1511 | int rc = SQLITE_OK; |
1512 | int reserved = 0; |
1513 | unixFile *pFile = (unixFile*)id; |
1514 | |
1515 | SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; ); |
1516 | |
1517 | assert( pFile ); |
1518 | assert( pFile->eFileLock<=SHARED_LOCK ); |
1519 | sqlite3_mutex_enter(pFile->pInode->pLockMutex); |
1520 | |
1521 | /* Check if a thread in this process holds such a lock */ |
1522 | if( pFile->pInode->eFileLock>SHARED_LOCK ){ |
1523 | reserved = 1; |
1524 | } |
1525 | |
1526 | /* Otherwise see if some other process holds it. |
1527 | */ |
1528 | #ifndef __DJGPP__ |
1529 | if( !reserved && !pFile->pInode->bProcessLock ){ |
1530 | struct flock lock; |
1531 | lock.l_whence = SEEK_SET; |
1532 | lock.l_start = RESERVED_BYTE; |
1533 | lock.l_len = 1; |
1534 | lock.l_type = F_WRLCK; |
1535 | if( osFcntl(pFile->h, F_GETLK, &lock) ){ |
1536 | rc = SQLITE_IOERR_CHECKRESERVEDLOCK; |
1537 | storeLastErrno(pFile, errno); |
1538 | } else if( lock.l_type!=F_UNLCK ){ |
1539 | reserved = 1; |
1540 | } |
1541 | } |
1542 | #endif |
1543 | |
1544 | sqlite3_mutex_leave(pFile->pInode->pLockMutex); |
1545 | OSTRACE(("TEST WR-LOCK %d %d %d (unix)\n" , pFile->h, rc, reserved)); |
1546 | |
1547 | *pResOut = reserved; |
1548 | return rc; |
1549 | } |
1550 | |
1551 | /* Forward declaration*/ |
1552 | static int unixSleep(sqlite3_vfs*,int); |
1553 | |
1554 | /* |
1555 | ** Set a posix-advisory-lock. |
1556 | ** |
1557 | ** There are two versions of this routine. If compiled with |
1558 | ** SQLITE_ENABLE_SETLK_TIMEOUT then the routine has an extra parameter |
1559 | ** which is a pointer to a unixFile. If the unixFile->iBusyTimeout |
1560 | ** value is set, then it is the number of milliseconds to wait before |
1561 | ** failing the lock. The iBusyTimeout value is always reset back to |
1562 | ** zero on each call. |
1563 | ** |
1564 | ** If SQLITE_ENABLE_SETLK_TIMEOUT is not defined, then do a non-blocking |
1565 | ** attempt to set the lock. |
1566 | */ |
1567 | #ifndef SQLITE_ENABLE_SETLK_TIMEOUT |
1568 | # define osSetPosixAdvisoryLock(h,x,t) osFcntl(h,F_SETLK,x) |
1569 | #else |
1570 | static int osSetPosixAdvisoryLock( |
1571 | int h, /* The file descriptor on which to take the lock */ |
1572 | struct flock *pLock, /* The description of the lock */ |
1573 | unixFile *pFile /* Structure holding timeout value */ |
1574 | ){ |
1575 | int tm = pFile->iBusyTimeout; |
1576 | int rc = osFcntl(h,F_SETLK,pLock); |
1577 | while( rc<0 && tm>0 ){ |
1578 | /* On systems that support some kind of blocking file lock with a timeout, |
1579 | ** make appropriate changes here to invoke that blocking file lock. On |
1580 | ** generic posix, however, there is no such API. So we simply try the |
1581 | ** lock once every millisecond until either the timeout expires, or until |
1582 | ** the lock is obtained. */ |
1583 | unixSleep(0,1000); |
1584 | rc = osFcntl(h,F_SETLK,pLock); |
1585 | tm--; |
1586 | } |
1587 | return rc; |
1588 | } |
1589 | #endif /* SQLITE_ENABLE_SETLK_TIMEOUT */ |
1590 | |
1591 | |
1592 | /* |
1593 | ** Attempt to set a system-lock on the file pFile. The lock is |
1594 | ** described by pLock. |
1595 | ** |
1596 | ** If the pFile was opened read/write from unix-excl, then the only lock |
1597 | ** ever obtained is an exclusive lock, and it is obtained exactly once |
1598 | ** the first time any lock is attempted. All subsequent system locking |
1599 | ** operations become no-ops. Locking operations still happen internally, |
1600 | ** in order to coordinate access between separate database connections |
1601 | ** within this process, but all of that is handled in memory and the |
1602 | ** operating system does not participate. |
1603 | ** |
1604 | ** This function is a pass-through to fcntl(F_SETLK) if pFile is using |
1605 | ** any VFS other than "unix-excl" or if pFile is opened on "unix-excl" |
1606 | ** and is read-only. |
1607 | ** |
1608 | ** Zero is returned if the call completes successfully, or -1 if a call |
1609 | ** to fcntl() fails. In this case, errno is set appropriately (by fcntl()). |
1610 | */ |
1611 | static int unixFileLock(unixFile *pFile, struct flock *pLock){ |
1612 | int rc; |
1613 | unixInodeInfo *pInode = pFile->pInode; |
1614 | assert( pInode!=0 ); |
1615 | assert( sqlite3_mutex_held(pInode->pLockMutex) ); |
1616 | if( (pFile->ctrlFlags & (UNIXFILE_EXCL|UNIXFILE_RDONLY))==UNIXFILE_EXCL ){ |
1617 | if( pInode->bProcessLock==0 ){ |
1618 | struct flock lock; |
1619 | assert( pInode->nLock==0 ); |
1620 | lock.l_whence = SEEK_SET; |
1621 | lock.l_start = SHARED_FIRST; |
1622 | lock.l_len = SHARED_SIZE; |
1623 | lock.l_type = F_WRLCK; |
1624 | rc = osSetPosixAdvisoryLock(pFile->h, &lock, pFile); |
1625 | if( rc<0 ) return rc; |
1626 | pInode->bProcessLock = 1; |
1627 | pInode->nLock++; |
1628 | }else{ |
1629 | rc = 0; |
1630 | } |
1631 | }else{ |
1632 | rc = osSetPosixAdvisoryLock(pFile->h, pLock, pFile); |
1633 | } |
1634 | return rc; |
1635 | } |
1636 | |
1637 | /* |
1638 | ** Lock the file with the lock specified by parameter eFileLock - one |
1639 | ** of the following: |
1640 | ** |
1641 | ** (1) SHARED_LOCK |
1642 | ** (2) RESERVED_LOCK |
1643 | ** (3) PENDING_LOCK |
1644 | ** (4) EXCLUSIVE_LOCK |
1645 | ** |
1646 | ** Sometimes when requesting one lock state, additional lock states |
1647 | ** are inserted in between. The locking might fail on one of the later |
1648 | ** transitions leaving the lock state different from what it started but |
1649 | ** still short of its goal. The following chart shows the allowed |
1650 | ** transitions and the inserted intermediate states: |
1651 | ** |
1652 | ** UNLOCKED -> SHARED |
1653 | ** SHARED -> RESERVED |
1654 | ** SHARED -> (PENDING) -> EXCLUSIVE |
1655 | ** RESERVED -> (PENDING) -> EXCLUSIVE |
1656 | ** PENDING -> EXCLUSIVE |
1657 | ** |
1658 | ** This routine will only increase a lock. Use the sqlite3OsUnlock() |
1659 | ** routine to lower a locking level. |
1660 | */ |
1661 | static int unixLock(sqlite3_file *id, int eFileLock){ |
1662 | /* The following describes the implementation of the various locks and |
1663 | ** lock transitions in terms of the POSIX advisory shared and exclusive |
1664 | ** lock primitives (called read-locks and write-locks below, to avoid |
1665 | ** confusion with SQLite lock names). The algorithms are complicated |
1666 | ** slightly in order to be compatible with Windows95 systems simultaneously |
1667 | ** accessing the same database file, in case that is ever required. |
1668 | ** |
1669 | ** Symbols defined in os.h indentify the 'pending byte' and the 'reserved |
1670 | ** byte', each single bytes at well known offsets, and the 'shared byte |
1671 | ** range', a range of 510 bytes at a well known offset. |
1672 | ** |
1673 | ** To obtain a SHARED lock, a read-lock is obtained on the 'pending |
1674 | ** byte'. If this is successful, 'shared byte range' is read-locked |
1675 | ** and the lock on the 'pending byte' released. (Legacy note: When |
1676 | ** SQLite was first developed, Windows95 systems were still very common, |
1677 | ** and Widnows95 lacks a shared-lock capability. So on Windows95, a |
1678 | ** single randomly selected by from the 'shared byte range' is locked. |
1679 | ** Windows95 is now pretty much extinct, but this work-around for the |
1680 | ** lack of shared-locks on Windows95 lives on, for backwards |
1681 | ** compatibility.) |
1682 | ** |
1683 | ** A process may only obtain a RESERVED lock after it has a SHARED lock. |
1684 | ** A RESERVED lock is implemented by grabbing a write-lock on the |
1685 | ** 'reserved byte'. |
1686 | ** |
1687 | ** A process may only obtain a PENDING lock after it has obtained a |
1688 | ** SHARED lock. A PENDING lock is implemented by obtaining a write-lock |
1689 | ** on the 'pending byte'. This ensures that no new SHARED locks can be |
1690 | ** obtained, but existing SHARED locks are allowed to persist. A process |
1691 | ** does not have to obtain a RESERVED lock on the way to a PENDING lock. |
1692 | ** This property is used by the algorithm for rolling back a journal file |
1693 | ** after a crash. |
1694 | ** |
1695 | ** An EXCLUSIVE lock, obtained after a PENDING lock is held, is |
1696 | ** implemented by obtaining a write-lock on the entire 'shared byte |
1697 | ** range'. Since all other locks require a read-lock on one of the bytes |
1698 | ** within this range, this ensures that no other locks are held on the |
1699 | ** database. |
1700 | */ |
1701 | int rc = SQLITE_OK; |
1702 | unixFile *pFile = (unixFile*)id; |
1703 | unixInodeInfo *pInode; |
1704 | struct flock lock; |
1705 | int tErrno = 0; |
1706 | |
1707 | assert( pFile ); |
1708 | OSTRACE(("LOCK %d %s was %s(%s,%d) pid=%d (unix)\n" , pFile->h, |
1709 | azFileLock(eFileLock), azFileLock(pFile->eFileLock), |
1710 | azFileLock(pFile->pInode->eFileLock), pFile->pInode->nShared, |
1711 | osGetpid(0))); |
1712 | |
1713 | /* If there is already a lock of this type or more restrictive on the |
1714 | ** unixFile, do nothing. Don't use the end_lock: exit path, as |
1715 | ** unixEnterMutex() hasn't been called yet. |
1716 | */ |
1717 | if( pFile->eFileLock>=eFileLock ){ |
1718 | OSTRACE(("LOCK %d %s ok (already held) (unix)\n" , pFile->h, |
1719 | azFileLock(eFileLock))); |
1720 | return SQLITE_OK; |
1721 | } |
1722 | |
1723 | /* Make sure the locking sequence is correct. |
1724 | ** (1) We never move from unlocked to anything higher than shared lock. |
1725 | ** (2) SQLite never explicitly requests a pendig lock. |
1726 | ** (3) A shared lock is always held when a reserve lock is requested. |
1727 | */ |
1728 | assert( pFile->eFileLock!=NO_LOCK || eFileLock==SHARED_LOCK ); |
1729 | assert( eFileLock!=PENDING_LOCK ); |
1730 | assert( eFileLock!=RESERVED_LOCK || pFile->eFileLock==SHARED_LOCK ); |
1731 | |
1732 | /* This mutex is needed because pFile->pInode is shared across threads |
1733 | */ |
1734 | pInode = pFile->pInode; |
1735 | sqlite3_mutex_enter(pInode->pLockMutex); |
1736 | |
1737 | /* If some thread using this PID has a lock via a different unixFile* |
1738 | ** handle that precludes the requested lock, return BUSY. |
1739 | */ |
1740 | if( (pFile->eFileLock!=pInode->eFileLock && |
1741 | (pInode->eFileLock>=PENDING_LOCK || eFileLock>SHARED_LOCK)) |
1742 | ){ |
1743 | rc = SQLITE_BUSY; |
1744 | goto end_lock; |
1745 | } |
1746 | |
1747 | /* If a SHARED lock is requested, and some thread using this PID already |
1748 | ** has a SHARED or RESERVED lock, then increment reference counts and |
1749 | ** return SQLITE_OK. |
1750 | */ |
1751 | if( eFileLock==SHARED_LOCK && |
1752 | (pInode->eFileLock==SHARED_LOCK || pInode->eFileLock==RESERVED_LOCK) ){ |
1753 | assert( eFileLock==SHARED_LOCK ); |
1754 | assert( pFile->eFileLock==0 ); |
1755 | assert( pInode->nShared>0 ); |
1756 | pFile->eFileLock = SHARED_LOCK; |
1757 | pInode->nShared++; |
1758 | pInode->nLock++; |
1759 | goto end_lock; |
1760 | } |
1761 | |
1762 | |
1763 | /* A PENDING lock is needed before acquiring a SHARED lock and before |
1764 | ** acquiring an EXCLUSIVE lock. For the SHARED lock, the PENDING will |
1765 | ** be released. |
1766 | */ |
1767 | lock.l_len = 1L; |
1768 | lock.l_whence = SEEK_SET; |
1769 | if( eFileLock==SHARED_LOCK |
1770 | || (eFileLock==EXCLUSIVE_LOCK && pFile->eFileLock<PENDING_LOCK) |
1771 | ){ |
1772 | lock.l_type = (eFileLock==SHARED_LOCK?F_RDLCK:F_WRLCK); |
1773 | lock.l_start = PENDING_BYTE; |
1774 | if( unixFileLock(pFile, &lock) ){ |
1775 | tErrno = errno; |
1776 | rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK); |
1777 | if( rc!=SQLITE_BUSY ){ |
1778 | storeLastErrno(pFile, tErrno); |
1779 | } |
1780 | goto end_lock; |
1781 | } |
1782 | } |
1783 | |
1784 | |
1785 | /* If control gets to this point, then actually go ahead and make |
1786 | ** operating system calls for the specified lock. |
1787 | */ |
1788 | if( eFileLock==SHARED_LOCK ){ |
1789 | assert( pInode->nShared==0 ); |
1790 | assert( pInode->eFileLock==0 ); |
1791 | assert( rc==SQLITE_OK ); |
1792 | |
1793 | /* Now get the read-lock */ |
1794 | lock.l_start = SHARED_FIRST; |
1795 | lock.l_len = SHARED_SIZE; |
1796 | if( unixFileLock(pFile, &lock) ){ |
1797 | tErrno = errno; |
1798 | rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK); |
1799 | } |
1800 | |
1801 | /* Drop the temporary PENDING lock */ |
1802 | lock.l_start = PENDING_BYTE; |
1803 | lock.l_len = 1L; |
1804 | lock.l_type = F_UNLCK; |
1805 | if( unixFileLock(pFile, &lock) && rc==SQLITE_OK ){ |
1806 | /* This could happen with a network mount */ |
1807 | tErrno = errno; |
1808 | rc = SQLITE_IOERR_UNLOCK; |
1809 | } |
1810 | |
1811 | if( rc ){ |
1812 | if( rc!=SQLITE_BUSY ){ |
1813 | storeLastErrno(pFile, tErrno); |
1814 | } |
1815 | goto end_lock; |
1816 | }else{ |
1817 | pFile->eFileLock = SHARED_LOCK; |
1818 | pInode->nLock++; |
1819 | pInode->nShared = 1; |
1820 | } |
1821 | }else if( eFileLock==EXCLUSIVE_LOCK && pInode->nShared>1 ){ |
1822 | /* We are trying for an exclusive lock but another thread in this |
1823 | ** same process is still holding a shared lock. */ |
1824 | rc = SQLITE_BUSY; |
1825 | }else{ |
1826 | /* The request was for a RESERVED or EXCLUSIVE lock. It is |
1827 | ** assumed that there is a SHARED or greater lock on the file |
1828 | ** already. |
1829 | */ |
1830 | assert( 0!=pFile->eFileLock ); |
1831 | lock.l_type = F_WRLCK; |
1832 | |
1833 | assert( eFileLock==RESERVED_LOCK || eFileLock==EXCLUSIVE_LOCK ); |
1834 | if( eFileLock==RESERVED_LOCK ){ |
1835 | lock.l_start = RESERVED_BYTE; |
1836 | lock.l_len = 1L; |
1837 | }else{ |
1838 | lock.l_start = SHARED_FIRST; |
1839 | lock.l_len = SHARED_SIZE; |
1840 | } |
1841 | |
1842 | if( unixFileLock(pFile, &lock) ){ |
1843 | tErrno = errno; |
1844 | rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK); |
1845 | if( rc!=SQLITE_BUSY ){ |
1846 | storeLastErrno(pFile, tErrno); |
1847 | } |
1848 | } |
1849 | } |
1850 | |
1851 | |
1852 | #ifdef SQLITE_DEBUG |
1853 | /* Set up the transaction-counter change checking flags when |
1854 | ** transitioning from a SHARED to a RESERVED lock. The change |
1855 | ** from SHARED to RESERVED marks the beginning of a normal |
1856 | ** write operation (not a hot journal rollback). |
1857 | */ |
1858 | if( rc==SQLITE_OK |
1859 | && pFile->eFileLock<=SHARED_LOCK |
1860 | && eFileLock==RESERVED_LOCK |
1861 | ){ |
1862 | pFile->transCntrChng = 0; |
1863 | pFile->dbUpdate = 0; |
1864 | pFile->inNormalWrite = 1; |
1865 | } |
1866 | #endif |
1867 | |
1868 | |
1869 | if( rc==SQLITE_OK ){ |
1870 | pFile->eFileLock = eFileLock; |
1871 | pInode->eFileLock = eFileLock; |
1872 | }else if( eFileLock==EXCLUSIVE_LOCK ){ |
1873 | pFile->eFileLock = PENDING_LOCK; |
1874 | pInode->eFileLock = PENDING_LOCK; |
1875 | } |
1876 | |
1877 | end_lock: |
1878 | sqlite3_mutex_leave(pInode->pLockMutex); |
1879 | OSTRACE(("LOCK %d %s %s (unix)\n" , pFile->h, azFileLock(eFileLock), |
1880 | rc==SQLITE_OK ? "ok" : "failed" )); |
1881 | return rc; |
1882 | } |
1883 | |
1884 | /* |
1885 | ** Add the file descriptor used by file handle pFile to the corresponding |
1886 | ** pUnused list. |
1887 | */ |
1888 | static void setPendingFd(unixFile *pFile){ |
1889 | unixInodeInfo *pInode = pFile->pInode; |
1890 | UnixUnusedFd *p = pFile->pPreallocatedUnused; |
1891 | assert( unixFileMutexHeld(pFile) ); |
1892 | p->pNext = pInode->pUnused; |
1893 | pInode->pUnused = p; |
1894 | pFile->h = -1; |
1895 | pFile->pPreallocatedUnused = 0; |
1896 | } |
1897 | |
1898 | /* |
1899 | ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock |
1900 | ** must be either NO_LOCK or SHARED_LOCK. |
1901 | ** |
1902 | ** If the locking level of the file descriptor is already at or below |
1903 | ** the requested locking level, this routine is a no-op. |
1904 | ** |
1905 | ** If handleNFSUnlock is true, then on downgrading an EXCLUSIVE_LOCK to SHARED |
1906 | ** the byte range is divided into 2 parts and the first part is unlocked then |
1907 | ** set to a read lock, then the other part is simply unlocked. This works |
1908 | ** around a bug in BSD NFS lockd (also seen on MacOSX 10.3+) that fails to |
1909 | ** remove the write lock on a region when a read lock is set. |
1910 | */ |
1911 | static int posixUnlock(sqlite3_file *id, int eFileLock, int handleNFSUnlock){ |
1912 | unixFile *pFile = (unixFile*)id; |
1913 | unixInodeInfo *pInode; |
1914 | struct flock lock; |
1915 | int rc = SQLITE_OK; |
1916 | |
1917 | assert( pFile ); |
1918 | OSTRACE(("UNLOCK %d %d was %d(%d,%d) pid=%d (unix)\n" , pFile->h, eFileLock, |
1919 | pFile->eFileLock, pFile->pInode->eFileLock, pFile->pInode->nShared, |
1920 | osGetpid(0))); |
1921 | |
1922 | assert( eFileLock<=SHARED_LOCK ); |
1923 | if( pFile->eFileLock<=eFileLock ){ |
1924 | return SQLITE_OK; |
1925 | } |
1926 | pInode = pFile->pInode; |
1927 | sqlite3_mutex_enter(pInode->pLockMutex); |
1928 | assert( pInode->nShared!=0 ); |
1929 | if( pFile->eFileLock>SHARED_LOCK ){ |
1930 | assert( pInode->eFileLock==pFile->eFileLock ); |
1931 | |
1932 | #ifdef SQLITE_DEBUG |
1933 | /* When reducing a lock such that other processes can start |
1934 | ** reading the database file again, make sure that the |
1935 | ** transaction counter was updated if any part of the database |
1936 | ** file changed. If the transaction counter is not updated, |
1937 | ** other connections to the same file might not realize that |
1938 | ** the file has changed and hence might not know to flush their |
1939 | ** cache. The use of a stale cache can lead to database corruption. |
1940 | */ |
1941 | pFile->inNormalWrite = 0; |
1942 | #endif |
1943 | |
1944 | /* downgrading to a shared lock on NFS involves clearing the write lock |
1945 | ** before establishing the readlock - to avoid a race condition we downgrade |
1946 | ** the lock in 2 blocks, so that part of the range will be covered by a |
1947 | ** write lock until the rest is covered by a read lock: |
1948 | ** 1: [WWWWW] |
1949 | ** 2: [....W] |
1950 | ** 3: [RRRRW] |
1951 | ** 4: [RRRR.] |
1952 | */ |
1953 | if( eFileLock==SHARED_LOCK ){ |
1954 | #if !defined(__APPLE__) || !SQLITE_ENABLE_LOCKING_STYLE |
1955 | (void)handleNFSUnlock; |
1956 | assert( handleNFSUnlock==0 ); |
1957 | #endif |
1958 | #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE |
1959 | if( handleNFSUnlock ){ |
1960 | int tErrno; /* Error code from system call errors */ |
1961 | off_t divSize = SHARED_SIZE - 1; |
1962 | |
1963 | lock.l_type = F_UNLCK; |
1964 | lock.l_whence = SEEK_SET; |
1965 | lock.l_start = SHARED_FIRST; |
1966 | lock.l_len = divSize; |
1967 | if( unixFileLock(pFile, &lock)==(-1) ){ |
1968 | tErrno = errno; |
1969 | rc = SQLITE_IOERR_UNLOCK; |
1970 | storeLastErrno(pFile, tErrno); |
1971 | goto end_unlock; |
1972 | } |
1973 | lock.l_type = F_RDLCK; |
1974 | lock.l_whence = SEEK_SET; |
1975 | lock.l_start = SHARED_FIRST; |
1976 | lock.l_len = divSize; |
1977 | if( unixFileLock(pFile, &lock)==(-1) ){ |
1978 | tErrno = errno; |
1979 | rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_RDLOCK); |
1980 | if( IS_LOCK_ERROR(rc) ){ |
1981 | storeLastErrno(pFile, tErrno); |
1982 | } |
1983 | goto end_unlock; |
1984 | } |
1985 | lock.l_type = F_UNLCK; |
1986 | lock.l_whence = SEEK_SET; |
1987 | lock.l_start = SHARED_FIRST+divSize; |
1988 | lock.l_len = SHARED_SIZE-divSize; |
1989 | if( unixFileLock(pFile, &lock)==(-1) ){ |
1990 | tErrno = errno; |
1991 | rc = SQLITE_IOERR_UNLOCK; |
1992 | storeLastErrno(pFile, tErrno); |
1993 | goto end_unlock; |
1994 | } |
1995 | }else |
1996 | #endif /* defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE */ |
1997 | { |
1998 | lock.l_type = F_RDLCK; |
1999 | lock.l_whence = SEEK_SET; |
2000 | lock.l_start = SHARED_FIRST; |
2001 | lock.l_len = SHARED_SIZE; |
2002 | if( unixFileLock(pFile, &lock) ){ |
2003 | /* In theory, the call to unixFileLock() cannot fail because another |
2004 | ** process is holding an incompatible lock. If it does, this |
2005 | ** indicates that the other process is not following the locking |
2006 | ** protocol. If this happens, return SQLITE_IOERR_RDLOCK. Returning |
2007 | ** SQLITE_BUSY would confuse the upper layer (in practice it causes |
2008 | ** an assert to fail). */ |
2009 | rc = SQLITE_IOERR_RDLOCK; |
2010 | storeLastErrno(pFile, errno); |
2011 | goto end_unlock; |
2012 | } |
2013 | } |
2014 | } |
2015 | lock.l_type = F_UNLCK; |
2016 | lock.l_whence = SEEK_SET; |
2017 | lock.l_start = PENDING_BYTE; |
2018 | lock.l_len = 2L; assert( PENDING_BYTE+1==RESERVED_BYTE ); |
2019 | if( unixFileLock(pFile, &lock)==0 ){ |
2020 | pInode->eFileLock = SHARED_LOCK; |
2021 | }else{ |
2022 | rc = SQLITE_IOERR_UNLOCK; |
2023 | storeLastErrno(pFile, errno); |
2024 | goto end_unlock; |
2025 | } |
2026 | } |
2027 | if( eFileLock==NO_LOCK ){ |
2028 | /* Decrement the shared lock counter. Release the lock using an |
2029 | ** OS call only when all threads in this same process have released |
2030 | ** the lock. |
2031 | */ |
2032 | pInode->nShared--; |
2033 | if( pInode->nShared==0 ){ |
2034 | lock.l_type = F_UNLCK; |
2035 | lock.l_whence = SEEK_SET; |
2036 | lock.l_start = lock.l_len = 0L; |
2037 | if( unixFileLock(pFile, &lock)==0 ){ |
2038 | pInode->eFileLock = NO_LOCK; |
2039 | }else{ |
2040 | rc = SQLITE_IOERR_UNLOCK; |
2041 | storeLastErrno(pFile, errno); |
2042 | pInode->eFileLock = NO_LOCK; |
2043 | pFile->eFileLock = NO_LOCK; |
2044 | } |
2045 | } |
2046 | |
2047 | /* Decrement the count of locks against this same file. When the |
2048 | ** count reaches zero, close any other file descriptors whose close |
2049 | ** was deferred because of outstanding locks. |
2050 | */ |
2051 | pInode->nLock--; |
2052 | assert( pInode->nLock>=0 ); |
2053 | if( pInode->nLock==0 ) closePendingFds(pFile); |
2054 | } |
2055 | |
2056 | end_unlock: |
2057 | sqlite3_mutex_leave(pInode->pLockMutex); |
2058 | if( rc==SQLITE_OK ){ |
2059 | pFile->eFileLock = eFileLock; |
2060 | } |
2061 | return rc; |
2062 | } |
2063 | |
2064 | /* |
2065 | ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock |
2066 | ** must be either NO_LOCK or SHARED_LOCK. |
2067 | ** |
2068 | ** If the locking level of the file descriptor is already at or below |
2069 | ** the requested locking level, this routine is a no-op. |
2070 | */ |
2071 | static int unixUnlock(sqlite3_file *id, int eFileLock){ |
2072 | #if SQLITE_MAX_MMAP_SIZE>0 |
2073 | assert( eFileLock==SHARED_LOCK || ((unixFile *)id)->nFetchOut==0 ); |
2074 | #endif |
2075 | return posixUnlock(id, eFileLock, 0); |
2076 | } |
2077 | |
2078 | #if SQLITE_MAX_MMAP_SIZE>0 |
2079 | static int unixMapfile(unixFile *pFd, i64 nByte); |
2080 | static void unixUnmapfile(unixFile *pFd); |
2081 | #endif |
2082 | |
2083 | /* |
2084 | ** This function performs the parts of the "close file" operation |
2085 | ** common to all locking schemes. It closes the directory and file |
2086 | ** handles, if they are valid, and sets all fields of the unixFile |
2087 | ** structure to 0. |
2088 | ** |
2089 | ** It is *not* necessary to hold the mutex when this routine is called, |
2090 | ** even on VxWorks. A mutex will be acquired on VxWorks by the |
2091 | ** vxworksReleaseFileId() routine. |
2092 | */ |
2093 | static int closeUnixFile(sqlite3_file *id){ |
2094 | unixFile *pFile = (unixFile*)id; |
2095 | #if SQLITE_MAX_MMAP_SIZE>0 |
2096 | unixUnmapfile(pFile); |
2097 | #endif |
2098 | if( pFile->h>=0 ){ |
2099 | robust_close(pFile, pFile->h, __LINE__); |
2100 | pFile->h = -1; |
2101 | } |
2102 | #if OS_VXWORKS |
2103 | if( pFile->pId ){ |
2104 | if( pFile->ctrlFlags & UNIXFILE_DELETE ){ |
2105 | osUnlink(pFile->pId->zCanonicalName); |
2106 | } |
2107 | vxworksReleaseFileId(pFile->pId); |
2108 | pFile->pId = 0; |
2109 | } |
2110 | #endif |
2111 | #ifdef SQLITE_UNLINK_AFTER_CLOSE |
2112 | if( pFile->ctrlFlags & UNIXFILE_DELETE ){ |
2113 | osUnlink(pFile->zPath); |
2114 | sqlite3_free(*(char**)&pFile->zPath); |
2115 | pFile->zPath = 0; |
2116 | } |
2117 | #endif |
2118 | OSTRACE(("CLOSE %-3d\n" , pFile->h)); |
2119 | OpenCounter(-1); |
2120 | sqlite3_free(pFile->pPreallocatedUnused); |
2121 | memset(pFile, 0, sizeof(unixFile)); |
2122 | return SQLITE_OK; |
2123 | } |
2124 | |
2125 | /* |
2126 | ** Close a file. |
2127 | */ |
2128 | static int unixClose(sqlite3_file *id){ |
2129 | int rc = SQLITE_OK; |
2130 | unixFile *pFile = (unixFile *)id; |
2131 | unixInodeInfo *pInode = pFile->pInode; |
2132 | |
2133 | assert( pInode!=0 ); |
2134 | verifyDbFile(pFile); |
2135 | unixUnlock(id, NO_LOCK); |
2136 | assert( unixFileMutexNotheld(pFile) ); |
2137 | unixEnterMutex(); |
2138 | |
2139 | /* unixFile.pInode is always valid here. Otherwise, a different close |
2140 | ** routine (e.g. nolockClose()) would be called instead. |
2141 | */ |
2142 | assert( pFile->pInode->nLock>0 || pFile->pInode->bProcessLock==0 ); |
2143 | sqlite3_mutex_enter(pInode->pLockMutex); |
2144 | if( pInode->nLock ){ |
2145 | /* If there are outstanding locks, do not actually close the file just |
2146 | ** yet because that would clear those locks. Instead, add the file |
2147 | ** descriptor to pInode->pUnused list. It will be automatically closed |
2148 | ** when the last lock is cleared. |
2149 | */ |
2150 | setPendingFd(pFile); |
2151 | } |
2152 | sqlite3_mutex_leave(pInode->pLockMutex); |
2153 | releaseInodeInfo(pFile); |
2154 | assert( pFile->pShm==0 ); |
2155 | rc = closeUnixFile(id); |
2156 | unixLeaveMutex(); |
2157 | return rc; |
2158 | } |
2159 | |
2160 | /************** End of the posix advisory lock implementation ***************** |
2161 | ******************************************************************************/ |
2162 | |
2163 | /****************************************************************************** |
2164 | ****************************** No-op Locking ********************************** |
2165 | ** |
2166 | ** Of the various locking implementations available, this is by far the |
2167 | ** simplest: locking is ignored. No attempt is made to lock the database |
2168 | ** file for reading or writing. |
2169 | ** |
2170 | ** This locking mode is appropriate for use on read-only databases |
2171 | ** (ex: databases that are burned into CD-ROM, for example.) It can |
2172 | ** also be used if the application employs some external mechanism to |
2173 | ** prevent simultaneous access of the same database by two or more |
2174 | ** database connections. But there is a serious risk of database |
2175 | ** corruption if this locking mode is used in situations where multiple |
2176 | ** database connections are accessing the same database file at the same |
2177 | ** time and one or more of those connections are writing. |
2178 | */ |
2179 | |
2180 | static int nolockCheckReservedLock(sqlite3_file *NotUsed, int *pResOut){ |
2181 | UNUSED_PARAMETER(NotUsed); |
2182 | *pResOut = 0; |
2183 | return SQLITE_OK; |
2184 | } |
2185 | static int nolockLock(sqlite3_file *NotUsed, int NotUsed2){ |
2186 | UNUSED_PARAMETER2(NotUsed, NotUsed2); |
2187 | return SQLITE_OK; |
2188 | } |
2189 | static int nolockUnlock(sqlite3_file *NotUsed, int NotUsed2){ |
2190 | UNUSED_PARAMETER2(NotUsed, NotUsed2); |
2191 | return SQLITE_OK; |
2192 | } |
2193 | |
2194 | /* |
2195 | ** Close the file. |
2196 | */ |
2197 | static int nolockClose(sqlite3_file *id) { |
2198 | return closeUnixFile(id); |
2199 | } |
2200 | |
2201 | /******************* End of the no-op lock implementation ********************* |
2202 | ******************************************************************************/ |
2203 | |
2204 | /****************************************************************************** |
2205 | ************************* Begin dot-file Locking ****************************** |
2206 | ** |
2207 | ** The dotfile locking implementation uses the existence of separate lock |
2208 | ** files (really a directory) to control access to the database. This works |
2209 | ** on just about every filesystem imaginable. But there are serious downsides: |
2210 | ** |
2211 | ** (1) There is zero concurrency. A single reader blocks all other |
2212 | ** connections from reading or writing the database. |
2213 | ** |
2214 | ** (2) An application crash or power loss can leave stale lock files |
2215 | ** sitting around that need to be cleared manually. |
2216 | ** |
2217 | ** Nevertheless, a dotlock is an appropriate locking mode for use if no |
2218 | ** other locking strategy is available. |
2219 | ** |
2220 | ** Dotfile locking works by creating a subdirectory in the same directory as |
2221 | ** the database and with the same name but with a ".lock" extension added. |
2222 | ** The existence of a lock directory implies an EXCLUSIVE lock. All other |
2223 | ** lock types (SHARED, RESERVED, PENDING) are mapped into EXCLUSIVE. |
2224 | */ |
2225 | |
2226 | /* |
2227 | ** The file suffix added to the data base filename in order to create the |
2228 | ** lock directory. |
2229 | */ |
2230 | #define DOTLOCK_SUFFIX ".lock" |
2231 | |
2232 | /* |
2233 | ** This routine checks if there is a RESERVED lock held on the specified |
2234 | ** file by this or any other process. If such a lock is held, set *pResOut |
2235 | ** to a non-zero value otherwise *pResOut is set to zero. The return value |
2236 | ** is set to SQLITE_OK unless an I/O error occurs during lock checking. |
2237 | ** |
2238 | ** In dotfile locking, either a lock exists or it does not. So in this |
2239 | ** variation of CheckReservedLock(), *pResOut is set to true if any lock |
2240 | ** is held on the file and false if the file is unlocked. |
2241 | */ |
2242 | static int dotlockCheckReservedLock(sqlite3_file *id, int *pResOut) { |
2243 | int rc = SQLITE_OK; |
2244 | int reserved = 0; |
2245 | unixFile *pFile = (unixFile*)id; |
2246 | |
2247 | SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; ); |
2248 | |
2249 | assert( pFile ); |
2250 | reserved = osAccess((const char*)pFile->lockingContext, 0)==0; |
2251 | OSTRACE(("TEST WR-LOCK %d %d %d (dotlock)\n" , pFile->h, rc, reserved)); |
2252 | *pResOut = reserved; |
2253 | return rc; |
2254 | } |
2255 | |
2256 | /* |
2257 | ** Lock the file with the lock specified by parameter eFileLock - one |
2258 | ** of the following: |
2259 | ** |
2260 | ** (1) SHARED_LOCK |
2261 | ** (2) RESERVED_LOCK |
2262 | ** (3) PENDING_LOCK |
2263 | ** (4) EXCLUSIVE_LOCK |
2264 | ** |
2265 | ** Sometimes when requesting one lock state, additional lock states |
2266 | ** are inserted in between. The locking might fail on one of the later |
2267 | ** transitions leaving the lock state different from what it started but |
2268 | ** still short of its goal. The following chart shows the allowed |
2269 | ** transitions and the inserted intermediate states: |
2270 | ** |
2271 | ** UNLOCKED -> SHARED |
2272 | ** SHARED -> RESERVED |
2273 | ** SHARED -> (PENDING) -> EXCLUSIVE |
2274 | ** RESERVED -> (PENDING) -> EXCLUSIVE |
2275 | ** PENDING -> EXCLUSIVE |
2276 | ** |
2277 | ** This routine will only increase a lock. Use the sqlite3OsUnlock() |
2278 | ** routine to lower a locking level. |
2279 | ** |
2280 | ** With dotfile locking, we really only support state (4): EXCLUSIVE. |
2281 | ** But we track the other locking levels internally. |
2282 | */ |
2283 | static int dotlockLock(sqlite3_file *id, int eFileLock) { |
2284 | unixFile *pFile = (unixFile*)id; |
2285 | char *zLockFile = (char *)pFile->lockingContext; |
2286 | int rc = SQLITE_OK; |
2287 | |
2288 | |
2289 | /* If we have any lock, then the lock file already exists. All we have |
2290 | ** to do is adjust our internal record of the lock level. |
2291 | */ |
2292 | if( pFile->eFileLock > NO_LOCK ){ |
2293 | pFile->eFileLock = eFileLock; |
2294 | /* Always update the timestamp on the old file */ |
2295 | #ifdef HAVE_UTIME |
2296 | utime(zLockFile, NULL); |
2297 | #else |
2298 | utimes(zLockFile, NULL); |
2299 | #endif |
2300 | return SQLITE_OK; |
2301 | } |
2302 | |
2303 | /* grab an exclusive lock */ |
2304 | rc = osMkdir(zLockFile, 0777); |
2305 | if( rc<0 ){ |
2306 | /* failed to open/create the lock directory */ |
2307 | int tErrno = errno; |
2308 | if( EEXIST == tErrno ){ |
2309 | rc = SQLITE_BUSY; |
2310 | } else { |
2311 | rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK); |
2312 | if( rc!=SQLITE_BUSY ){ |
2313 | storeLastErrno(pFile, tErrno); |
2314 | } |
2315 | } |
2316 | return rc; |
2317 | } |
2318 | |
2319 | /* got it, set the type and return ok */ |
2320 | pFile->eFileLock = eFileLock; |
2321 | return rc; |
2322 | } |
2323 | |
2324 | /* |
2325 | ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock |
2326 | ** must be either NO_LOCK or SHARED_LOCK. |
2327 | ** |
2328 | ** If the locking level of the file descriptor is already at or below |
2329 | ** the requested locking level, this routine is a no-op. |
2330 | ** |
2331 | ** When the locking level reaches NO_LOCK, delete the lock file. |
2332 | */ |
2333 | static int dotlockUnlock(sqlite3_file *id, int eFileLock) { |
2334 | unixFile *pFile = (unixFile*)id; |
2335 | char *zLockFile = (char *)pFile->lockingContext; |
2336 | int rc; |
2337 | |
2338 | assert( pFile ); |
2339 | OSTRACE(("UNLOCK %d %d was %d pid=%d (dotlock)\n" , pFile->h, eFileLock, |
2340 | pFile->eFileLock, osGetpid(0))); |
2341 | assert( eFileLock<=SHARED_LOCK ); |
2342 | |
2343 | /* no-op if possible */ |
2344 | if( pFile->eFileLock==eFileLock ){ |
2345 | return SQLITE_OK; |
2346 | } |
2347 | |
2348 | /* To downgrade to shared, simply update our internal notion of the |
2349 | ** lock state. No need to mess with the file on disk. |
2350 | */ |
2351 | if( eFileLock==SHARED_LOCK ){ |
2352 | pFile->eFileLock = SHARED_LOCK; |
2353 | return SQLITE_OK; |
2354 | } |
2355 | |
2356 | /* To fully unlock the database, delete the lock file */ |
2357 | assert( eFileLock==NO_LOCK ); |
2358 | rc = osRmdir(zLockFile); |
2359 | if( rc<0 ){ |
2360 | int tErrno = errno; |
2361 | if( tErrno==ENOENT ){ |
2362 | rc = SQLITE_OK; |
2363 | }else{ |
2364 | rc = SQLITE_IOERR_UNLOCK; |
2365 | storeLastErrno(pFile, tErrno); |
2366 | } |
2367 | return rc; |
2368 | } |
2369 | pFile->eFileLock = NO_LOCK; |
2370 | return SQLITE_OK; |
2371 | } |
2372 | |
2373 | /* |
2374 | ** Close a file. Make sure the lock has been released before closing. |
2375 | */ |
2376 | static int dotlockClose(sqlite3_file *id) { |
2377 | unixFile *pFile = (unixFile*)id; |
2378 | assert( id!=0 ); |
2379 | dotlockUnlock(id, NO_LOCK); |
2380 | sqlite3_free(pFile->lockingContext); |
2381 | return closeUnixFile(id); |
2382 | } |
2383 | /****************** End of the dot-file lock implementation ******************* |
2384 | ******************************************************************************/ |
2385 | |
2386 | /****************************************************************************** |
2387 | ************************** Begin flock Locking ******************************** |
2388 | ** |
2389 | ** Use the flock() system call to do file locking. |
2390 | ** |
2391 | ** flock() locking is like dot-file locking in that the various |
2392 | ** fine-grain locking levels supported by SQLite are collapsed into |
2393 | ** a single exclusive lock. In other words, SHARED, RESERVED, and |
2394 | ** PENDING locks are the same thing as an EXCLUSIVE lock. SQLite |
2395 | ** still works when you do this, but concurrency is reduced since |
2396 | ** only a single process can be reading the database at a time. |
2397 | ** |
2398 | ** Omit this section if SQLITE_ENABLE_LOCKING_STYLE is turned off |
2399 | */ |
2400 | #if SQLITE_ENABLE_LOCKING_STYLE |
2401 | |
2402 | /* |
2403 | ** Retry flock() calls that fail with EINTR |
2404 | */ |
2405 | #ifdef EINTR |
2406 | static int robust_flock(int fd, int op){ |
2407 | int rc; |
2408 | do{ rc = flock(fd,op); }while( rc<0 && errno==EINTR ); |
2409 | return rc; |
2410 | } |
2411 | #else |
2412 | # define robust_flock(a,b) flock(a,b) |
2413 | #endif |
2414 | |
2415 | |
2416 | /* |
2417 | ** This routine checks if there is a RESERVED lock held on the specified |
2418 | ** file by this or any other process. If such a lock is held, set *pResOut |
2419 | ** to a non-zero value otherwise *pResOut is set to zero. The return value |
2420 | ** is set to SQLITE_OK unless an I/O error occurs during lock checking. |
2421 | */ |
2422 | static int flockCheckReservedLock(sqlite3_file *id, int *pResOut){ |
2423 | int rc = SQLITE_OK; |
2424 | int reserved = 0; |
2425 | unixFile *pFile = (unixFile*)id; |
2426 | |
2427 | SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; ); |
2428 | |
2429 | assert( pFile ); |
2430 | |
2431 | /* Check if a thread in this process holds such a lock */ |
2432 | if( pFile->eFileLock>SHARED_LOCK ){ |
2433 | reserved = 1; |
2434 | } |
2435 | |
2436 | /* Otherwise see if some other process holds it. */ |
2437 | if( !reserved ){ |
2438 | /* attempt to get the lock */ |
2439 | int lrc = robust_flock(pFile->h, LOCK_EX | LOCK_NB); |
2440 | if( !lrc ){ |
2441 | /* got the lock, unlock it */ |
2442 | lrc = robust_flock(pFile->h, LOCK_UN); |
2443 | if ( lrc ) { |
2444 | int tErrno = errno; |
2445 | /* unlock failed with an error */ |
2446 | lrc = SQLITE_IOERR_UNLOCK; |
2447 | storeLastErrno(pFile, tErrno); |
2448 | rc = lrc; |
2449 | } |
2450 | } else { |
2451 | int tErrno = errno; |
2452 | reserved = 1; |
2453 | /* someone else might have it reserved */ |
2454 | lrc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK); |
2455 | if( IS_LOCK_ERROR(lrc) ){ |
2456 | storeLastErrno(pFile, tErrno); |
2457 | rc = lrc; |
2458 | } |
2459 | } |
2460 | } |
2461 | OSTRACE(("TEST WR-LOCK %d %d %d (flock)\n" , pFile->h, rc, reserved)); |
2462 | |
2463 | #ifdef SQLITE_IGNORE_FLOCK_LOCK_ERRORS |
2464 | if( (rc & 0xff) == SQLITE_IOERR ){ |
2465 | rc = SQLITE_OK; |
2466 | reserved=1; |
2467 | } |
2468 | #endif /* SQLITE_IGNORE_FLOCK_LOCK_ERRORS */ |
2469 | *pResOut = reserved; |
2470 | return rc; |
2471 | } |
2472 | |
2473 | /* |
2474 | ** Lock the file with the lock specified by parameter eFileLock - one |
2475 | ** of the following: |
2476 | ** |
2477 | ** (1) SHARED_LOCK |
2478 | ** (2) RESERVED_LOCK |
2479 | ** (3) PENDING_LOCK |
2480 | ** (4) EXCLUSIVE_LOCK |
2481 | ** |
2482 | ** Sometimes when requesting one lock state, additional lock states |
2483 | ** are inserted in between. The locking might fail on one of the later |
2484 | ** transitions leaving the lock state different from what it started but |
2485 | ** still short of its goal. The following chart shows the allowed |
2486 | ** transitions and the inserted intermediate states: |
2487 | ** |
2488 | ** UNLOCKED -> SHARED |
2489 | ** SHARED -> RESERVED |
2490 | ** SHARED -> (PENDING) -> EXCLUSIVE |
2491 | ** RESERVED -> (PENDING) -> EXCLUSIVE |
2492 | ** PENDING -> EXCLUSIVE |
2493 | ** |
2494 | ** flock() only really support EXCLUSIVE locks. We track intermediate |
2495 | ** lock states in the sqlite3_file structure, but all locks SHARED or |
2496 | ** above are really EXCLUSIVE locks and exclude all other processes from |
2497 | ** access the file. |
2498 | ** |
2499 | ** This routine will only increase a lock. Use the sqlite3OsUnlock() |
2500 | ** routine to lower a locking level. |
2501 | */ |
2502 | static int flockLock(sqlite3_file *id, int eFileLock) { |
2503 | int rc = SQLITE_OK; |
2504 | unixFile *pFile = (unixFile*)id; |
2505 | |
2506 | assert( pFile ); |
2507 | |
2508 | /* if we already have a lock, it is exclusive. |
2509 | ** Just adjust level and punt on outta here. */ |
2510 | if (pFile->eFileLock > NO_LOCK) { |
2511 | pFile->eFileLock = eFileLock; |
2512 | return SQLITE_OK; |
2513 | } |
2514 | |
2515 | /* grab an exclusive lock */ |
2516 | |
2517 | if (robust_flock(pFile->h, LOCK_EX | LOCK_NB)) { |
2518 | int tErrno = errno; |
2519 | /* didn't get, must be busy */ |
2520 | rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK); |
2521 | if( IS_LOCK_ERROR(rc) ){ |
2522 | storeLastErrno(pFile, tErrno); |
2523 | } |
2524 | } else { |
2525 | /* got it, set the type and return ok */ |
2526 | pFile->eFileLock = eFileLock; |
2527 | } |
2528 | OSTRACE(("LOCK %d %s %s (flock)\n" , pFile->h, azFileLock(eFileLock), |
2529 | rc==SQLITE_OK ? "ok" : "failed" )); |
2530 | #ifdef SQLITE_IGNORE_FLOCK_LOCK_ERRORS |
2531 | if( (rc & 0xff) == SQLITE_IOERR ){ |
2532 | rc = SQLITE_BUSY; |
2533 | } |
2534 | #endif /* SQLITE_IGNORE_FLOCK_LOCK_ERRORS */ |
2535 | return rc; |
2536 | } |
2537 | |
2538 | |
2539 | /* |
2540 | ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock |
2541 | ** must be either NO_LOCK or SHARED_LOCK. |
2542 | ** |
2543 | ** If the locking level of the file descriptor is already at or below |
2544 | ** the requested locking level, this routine is a no-op. |
2545 | */ |
2546 | static int flockUnlock(sqlite3_file *id, int eFileLock) { |
2547 | unixFile *pFile = (unixFile*)id; |
2548 | |
2549 | assert( pFile ); |
2550 | OSTRACE(("UNLOCK %d %d was %d pid=%d (flock)\n" , pFile->h, eFileLock, |
2551 | pFile->eFileLock, osGetpid(0))); |
2552 | assert( eFileLock<=SHARED_LOCK ); |
2553 | |
2554 | /* no-op if possible */ |
2555 | if( pFile->eFileLock==eFileLock ){ |
2556 | return SQLITE_OK; |
2557 | } |
2558 | |
2559 | /* shared can just be set because we always have an exclusive */ |
2560 | if (eFileLock==SHARED_LOCK) { |
2561 | pFile->eFileLock = eFileLock; |
2562 | return SQLITE_OK; |
2563 | } |
2564 | |
2565 | /* no, really, unlock. */ |
2566 | if( robust_flock(pFile->h, LOCK_UN) ){ |
2567 | #ifdef SQLITE_IGNORE_FLOCK_LOCK_ERRORS |
2568 | return SQLITE_OK; |
2569 | #endif /* SQLITE_IGNORE_FLOCK_LOCK_ERRORS */ |
2570 | return SQLITE_IOERR_UNLOCK; |
2571 | }else{ |
2572 | pFile->eFileLock = NO_LOCK; |
2573 | return SQLITE_OK; |
2574 | } |
2575 | } |
2576 | |
2577 | /* |
2578 | ** Close a file. |
2579 | */ |
2580 | static int flockClose(sqlite3_file *id) { |
2581 | assert( id!=0 ); |
2582 | flockUnlock(id, NO_LOCK); |
2583 | return closeUnixFile(id); |
2584 | } |
2585 | |
2586 | #endif /* SQLITE_ENABLE_LOCKING_STYLE && !OS_VXWORK */ |
2587 | |
2588 | /******************* End of the flock lock implementation ********************* |
2589 | ******************************************************************************/ |
2590 | |
2591 | /****************************************************************************** |
2592 | ************************ Begin Named Semaphore Locking ************************ |
2593 | ** |
2594 | ** Named semaphore locking is only supported on VxWorks. |
2595 | ** |
2596 | ** Semaphore locking is like dot-lock and flock in that it really only |
2597 | ** supports EXCLUSIVE locking. Only a single process can read or write |
2598 | ** the database file at a time. This reduces potential concurrency, but |
2599 | ** makes the lock implementation much easier. |
2600 | */ |
2601 | #if OS_VXWORKS |
2602 | |
2603 | /* |
2604 | ** This routine checks if there is a RESERVED lock held on the specified |
2605 | ** file by this or any other process. If such a lock is held, set *pResOut |
2606 | ** to a non-zero value otherwise *pResOut is set to zero. The return value |
2607 | ** is set to SQLITE_OK unless an I/O error occurs during lock checking. |
2608 | */ |
2609 | static int semXCheckReservedLock(sqlite3_file *id, int *pResOut) { |
2610 | int rc = SQLITE_OK; |
2611 | int reserved = 0; |
2612 | unixFile *pFile = (unixFile*)id; |
2613 | |
2614 | SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; ); |
2615 | |
2616 | assert( pFile ); |
2617 | |
2618 | /* Check if a thread in this process holds such a lock */ |
2619 | if( pFile->eFileLock>SHARED_LOCK ){ |
2620 | reserved = 1; |
2621 | } |
2622 | |
2623 | /* Otherwise see if some other process holds it. */ |
2624 | if( !reserved ){ |
2625 | sem_t *pSem = pFile->pInode->pSem; |
2626 | |
2627 | if( sem_trywait(pSem)==-1 ){ |
2628 | int tErrno = errno; |
2629 | if( EAGAIN != tErrno ){ |
2630 | rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_CHECKRESERVEDLOCK); |
2631 | storeLastErrno(pFile, tErrno); |
2632 | } else { |
2633 | /* someone else has the lock when we are in NO_LOCK */ |
2634 | reserved = (pFile->eFileLock < SHARED_LOCK); |
2635 | } |
2636 | }else{ |
2637 | /* we could have it if we want it */ |
2638 | sem_post(pSem); |
2639 | } |
2640 | } |
2641 | OSTRACE(("TEST WR-LOCK %d %d %d (sem)\n" , pFile->h, rc, reserved)); |
2642 | |
2643 | *pResOut = reserved; |
2644 | return rc; |
2645 | } |
2646 | |
2647 | /* |
2648 | ** Lock the file with the lock specified by parameter eFileLock - one |
2649 | ** of the following: |
2650 | ** |
2651 | ** (1) SHARED_LOCK |
2652 | ** (2) RESERVED_LOCK |
2653 | ** (3) PENDING_LOCK |
2654 | ** (4) EXCLUSIVE_LOCK |
2655 | ** |
2656 | ** Sometimes when requesting one lock state, additional lock states |
2657 | ** are inserted in between. The locking might fail on one of the later |
2658 | ** transitions leaving the lock state different from what it started but |
2659 | ** still short of its goal. The following chart shows the allowed |
2660 | ** transitions and the inserted intermediate states: |
2661 | ** |
2662 | ** UNLOCKED -> SHARED |
2663 | ** SHARED -> RESERVED |
2664 | ** SHARED -> (PENDING) -> EXCLUSIVE |
2665 | ** RESERVED -> (PENDING) -> EXCLUSIVE |
2666 | ** PENDING -> EXCLUSIVE |
2667 | ** |
2668 | ** Semaphore locks only really support EXCLUSIVE locks. We track intermediate |
2669 | ** lock states in the sqlite3_file structure, but all locks SHARED or |
2670 | ** above are really EXCLUSIVE locks and exclude all other processes from |
2671 | ** access the file. |
2672 | ** |
2673 | ** This routine will only increase a lock. Use the sqlite3OsUnlock() |
2674 | ** routine to lower a locking level. |
2675 | */ |
2676 | static int semXLock(sqlite3_file *id, int eFileLock) { |
2677 | unixFile *pFile = (unixFile*)id; |
2678 | sem_t *pSem = pFile->pInode->pSem; |
2679 | int rc = SQLITE_OK; |
2680 | |
2681 | /* if we already have a lock, it is exclusive. |
2682 | ** Just adjust level and punt on outta here. */ |
2683 | if (pFile->eFileLock > NO_LOCK) { |
2684 | pFile->eFileLock = eFileLock; |
2685 | rc = SQLITE_OK; |
2686 | goto sem_end_lock; |
2687 | } |
2688 | |
2689 | /* lock semaphore now but bail out when already locked. */ |
2690 | if( sem_trywait(pSem)==-1 ){ |
2691 | rc = SQLITE_BUSY; |
2692 | goto sem_end_lock; |
2693 | } |
2694 | |
2695 | /* got it, set the type and return ok */ |
2696 | pFile->eFileLock = eFileLock; |
2697 | |
2698 | sem_end_lock: |
2699 | return rc; |
2700 | } |
2701 | |
2702 | /* |
2703 | ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock |
2704 | ** must be either NO_LOCK or SHARED_LOCK. |
2705 | ** |
2706 | ** If the locking level of the file descriptor is already at or below |
2707 | ** the requested locking level, this routine is a no-op. |
2708 | */ |
2709 | static int semXUnlock(sqlite3_file *id, int eFileLock) { |
2710 | unixFile *pFile = (unixFile*)id; |
2711 | sem_t *pSem = pFile->pInode->pSem; |
2712 | |
2713 | assert( pFile ); |
2714 | assert( pSem ); |
2715 | OSTRACE(("UNLOCK %d %d was %d pid=%d (sem)\n" , pFile->h, eFileLock, |
2716 | pFile->eFileLock, osGetpid(0))); |
2717 | assert( eFileLock<=SHARED_LOCK ); |
2718 | |
2719 | /* no-op if possible */ |
2720 | if( pFile->eFileLock==eFileLock ){ |
2721 | return SQLITE_OK; |
2722 | } |
2723 | |
2724 | /* shared can just be set because we always have an exclusive */ |
2725 | if (eFileLock==SHARED_LOCK) { |
2726 | pFile->eFileLock = eFileLock; |
2727 | return SQLITE_OK; |
2728 | } |
2729 | |
2730 | /* no, really unlock. */ |
2731 | if ( sem_post(pSem)==-1 ) { |
2732 | int rc, tErrno = errno; |
2733 | rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_UNLOCK); |
2734 | if( IS_LOCK_ERROR(rc) ){ |
2735 | storeLastErrno(pFile, tErrno); |
2736 | } |
2737 | return rc; |
2738 | } |
2739 | pFile->eFileLock = NO_LOCK; |
2740 | return SQLITE_OK; |
2741 | } |
2742 | |
2743 | /* |
2744 | ** Close a file. |
2745 | */ |
2746 | static int semXClose(sqlite3_file *id) { |
2747 | if( id ){ |
2748 | unixFile *pFile = (unixFile*)id; |
2749 | semXUnlock(id, NO_LOCK); |
2750 | assert( pFile ); |
2751 | assert( unixFileMutexNotheld(pFile) ); |
2752 | unixEnterMutex(); |
2753 | releaseInodeInfo(pFile); |
2754 | unixLeaveMutex(); |
2755 | closeUnixFile(id); |
2756 | } |
2757 | return SQLITE_OK; |
2758 | } |
2759 | |
2760 | #endif /* OS_VXWORKS */ |
2761 | /* |
2762 | ** Named semaphore locking is only available on VxWorks. |
2763 | ** |
2764 | *************** End of the named semaphore lock implementation **************** |
2765 | ******************************************************************************/ |
2766 | |
2767 | |
2768 | /****************************************************************************** |
2769 | *************************** Begin AFP Locking ********************************* |
2770 | ** |
2771 | ** AFP is the Apple Filing Protocol. AFP is a network filesystem found |
2772 | ** on Apple Macintosh computers - both OS9 and OSX. |
2773 | ** |
2774 | ** Third-party implementations of AFP are available. But this code here |
2775 | ** only works on OSX. |
2776 | */ |
2777 | |
2778 | #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE |
2779 | /* |
2780 | ** The afpLockingContext structure contains all afp lock specific state |
2781 | */ |
2782 | typedef struct afpLockingContext afpLockingContext; |
2783 | struct afpLockingContext { |
2784 | int reserved; |
2785 | const char *dbPath; /* Name of the open file */ |
2786 | }; |
2787 | |
2788 | struct ByteRangeLockPB2 |
2789 | { |
2790 | unsigned long long offset; /* offset to first byte to lock */ |
2791 | unsigned long long length; /* nbr of bytes to lock */ |
2792 | unsigned long long retRangeStart; /* nbr of 1st byte locked if successful */ |
2793 | unsigned char unLockFlag; /* 1 = unlock, 0 = lock */ |
2794 | unsigned char startEndFlag; /* 1=rel to end of fork, 0=rel to start */ |
2795 | int fd; /* file desc to assoc this lock with */ |
2796 | }; |
2797 | |
2798 | #define afpfsByteRangeLock2FSCTL _IOWR('z', 23, struct ByteRangeLockPB2) |
2799 | |
2800 | /* |
2801 | ** This is a utility for setting or clearing a bit-range lock on an |
2802 | ** AFP filesystem. |
2803 | ** |
2804 | ** Return SQLITE_OK on success, SQLITE_BUSY on failure. |
2805 | */ |
2806 | static int afpSetLock( |
2807 | const char *path, /* Name of the file to be locked or unlocked */ |
2808 | unixFile *pFile, /* Open file descriptor on path */ |
2809 | unsigned long long offset, /* First byte to be locked */ |
2810 | unsigned long long length, /* Number of bytes to lock */ |
2811 | int setLockFlag /* True to set lock. False to clear lock */ |
2812 | ){ |
2813 | struct ByteRangeLockPB2 pb; |
2814 | int err; |
2815 | |
2816 | pb.unLockFlag = setLockFlag ? 0 : 1; |
2817 | pb.startEndFlag = 0; |
2818 | pb.offset = offset; |
2819 | pb.length = length; |
2820 | pb.fd = pFile->h; |
2821 | |
2822 | OSTRACE(("AFPSETLOCK [%s] for %d%s in range %llx:%llx\n" , |
2823 | (setLockFlag?"ON" :"OFF" ), pFile->h, (pb.fd==-1?"[testval-1]" :"" ), |
2824 | offset, length)); |
2825 | err = fsctl(path, afpfsByteRangeLock2FSCTL, &pb, 0); |
2826 | if ( err==-1 ) { |
2827 | int rc; |
2828 | int tErrno = errno; |
2829 | OSTRACE(("AFPSETLOCK failed to fsctl() '%s' %d %s\n" , |
2830 | path, tErrno, strerror(tErrno))); |
2831 | #ifdef SQLITE_IGNORE_AFP_LOCK_ERRORS |
2832 | rc = SQLITE_BUSY; |
2833 | #else |
2834 | rc = sqliteErrorFromPosixError(tErrno, |
2835 | setLockFlag ? SQLITE_IOERR_LOCK : SQLITE_IOERR_UNLOCK); |
2836 | #endif /* SQLITE_IGNORE_AFP_LOCK_ERRORS */ |
2837 | if( IS_LOCK_ERROR(rc) ){ |
2838 | storeLastErrno(pFile, tErrno); |
2839 | } |
2840 | return rc; |
2841 | } else { |
2842 | return SQLITE_OK; |
2843 | } |
2844 | } |
2845 | |
2846 | /* |
2847 | ** This routine checks if there is a RESERVED lock held on the specified |
2848 | ** file by this or any other process. If such a lock is held, set *pResOut |
2849 | ** to a non-zero value otherwise *pResOut is set to zero. The return value |
2850 | ** is set to SQLITE_OK unless an I/O error occurs during lock checking. |
2851 | */ |
2852 | static int afpCheckReservedLock(sqlite3_file *id, int *pResOut){ |
2853 | int rc = SQLITE_OK; |
2854 | int reserved = 0; |
2855 | unixFile *pFile = (unixFile*)id; |
2856 | afpLockingContext *context; |
2857 | |
2858 | SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; ); |
2859 | |
2860 | assert( pFile ); |
2861 | context = (afpLockingContext *) pFile->lockingContext; |
2862 | if( context->reserved ){ |
2863 | *pResOut = 1; |
2864 | return SQLITE_OK; |
2865 | } |
2866 | sqlite3_mutex_enter(pFile->pInode->pLockMutex); |
2867 | /* Check if a thread in this process holds such a lock */ |
2868 | if( pFile->pInode->eFileLock>SHARED_LOCK ){ |
2869 | reserved = 1; |
2870 | } |
2871 | |
2872 | /* Otherwise see if some other process holds it. |
2873 | */ |
2874 | if( !reserved ){ |
2875 | /* lock the RESERVED byte */ |
2876 | int lrc = afpSetLock(context->dbPath, pFile, RESERVED_BYTE, 1,1); |
2877 | if( SQLITE_OK==lrc ){ |
2878 | /* if we succeeded in taking the reserved lock, unlock it to restore |
2879 | ** the original state */ |
2880 | lrc = afpSetLock(context->dbPath, pFile, RESERVED_BYTE, 1, 0); |
2881 | } else { |
2882 | /* if we failed to get the lock then someone else must have it */ |
2883 | reserved = 1; |
2884 | } |
2885 | if( IS_LOCK_ERROR(lrc) ){ |
2886 | rc=lrc; |
2887 | } |
2888 | } |
2889 | |
2890 | sqlite3_mutex_leave(pFile->pInode->pLockMutex); |
2891 | OSTRACE(("TEST WR-LOCK %d %d %d (afp)\n" , pFile->h, rc, reserved)); |
2892 | |
2893 | *pResOut = reserved; |
2894 | return rc; |
2895 | } |
2896 | |
2897 | /* |
2898 | ** Lock the file with the lock specified by parameter eFileLock - one |
2899 | ** of the following: |
2900 | ** |
2901 | ** (1) SHARED_LOCK |
2902 | ** (2) RESERVED_LOCK |
2903 | ** (3) PENDING_LOCK |
2904 | ** (4) EXCLUSIVE_LOCK |
2905 | ** |
2906 | ** Sometimes when requesting one lock state, additional lock states |
2907 | ** are inserted in between. The locking might fail on one of the later |
2908 | ** transitions leaving the lock state different from what it started but |
2909 | ** still short of its goal. The following chart shows the allowed |
2910 | ** transitions and the inserted intermediate states: |
2911 | ** |
2912 | ** UNLOCKED -> SHARED |
2913 | ** SHARED -> RESERVED |
2914 | ** SHARED -> (PENDING) -> EXCLUSIVE |
2915 | ** RESERVED -> (PENDING) -> EXCLUSIVE |
2916 | ** PENDING -> EXCLUSIVE |
2917 | ** |
2918 | ** This routine will only increase a lock. Use the sqlite3OsUnlock() |
2919 | ** routine to lower a locking level. |
2920 | */ |
2921 | static int afpLock(sqlite3_file *id, int eFileLock){ |
2922 | int rc = SQLITE_OK; |
2923 | unixFile *pFile = (unixFile*)id; |
2924 | unixInodeInfo *pInode = pFile->pInode; |
2925 | afpLockingContext *context = (afpLockingContext *) pFile->lockingContext; |
2926 | |
2927 | assert( pFile ); |
2928 | OSTRACE(("LOCK %d %s was %s(%s,%d) pid=%d (afp)\n" , pFile->h, |
2929 | azFileLock(eFileLock), azFileLock(pFile->eFileLock), |
2930 | azFileLock(pInode->eFileLock), pInode->nShared , osGetpid(0))); |
2931 | |
2932 | /* If there is already a lock of this type or more restrictive on the |
2933 | ** unixFile, do nothing. Don't use the afp_end_lock: exit path, as |
2934 | ** unixEnterMutex() hasn't been called yet. |
2935 | */ |
2936 | if( pFile->eFileLock>=eFileLock ){ |
2937 | OSTRACE(("LOCK %d %s ok (already held) (afp)\n" , pFile->h, |
2938 | azFileLock(eFileLock))); |
2939 | return SQLITE_OK; |
2940 | } |
2941 | |
2942 | /* Make sure the locking sequence is correct |
2943 | ** (1) We never move from unlocked to anything higher than shared lock. |
2944 | ** (2) SQLite never explicitly requests a pendig lock. |
2945 | ** (3) A shared lock is always held when a reserve lock is requested. |
2946 | */ |
2947 | assert( pFile->eFileLock!=NO_LOCK || eFileLock==SHARED_LOCK ); |
2948 | assert( eFileLock!=PENDING_LOCK ); |
2949 | assert( eFileLock!=RESERVED_LOCK || pFile->eFileLock==SHARED_LOCK ); |
2950 | |
2951 | /* This mutex is needed because pFile->pInode is shared across threads |
2952 | */ |
2953 | pInode = pFile->pInode; |
2954 | sqlite3_mutex_enter(pInode->pLockMutex); |
2955 | |
2956 | /* If some thread using this PID has a lock via a different unixFile* |
2957 | ** handle that precludes the requested lock, return BUSY. |
2958 | */ |
2959 | if( (pFile->eFileLock!=pInode->eFileLock && |
2960 | (pInode->eFileLock>=PENDING_LOCK || eFileLock>SHARED_LOCK)) |
2961 | ){ |
2962 | rc = SQLITE_BUSY; |
2963 | goto afp_end_lock; |
2964 | } |
2965 | |
2966 | /* If a SHARED lock is requested, and some thread using this PID already |
2967 | ** has a SHARED or RESERVED lock, then increment reference counts and |
2968 | ** return SQLITE_OK. |
2969 | */ |
2970 | if( eFileLock==SHARED_LOCK && |
2971 | (pInode->eFileLock==SHARED_LOCK || pInode->eFileLock==RESERVED_LOCK) ){ |
2972 | assert( eFileLock==SHARED_LOCK ); |
2973 | assert( pFile->eFileLock==0 ); |
2974 | assert( pInode->nShared>0 ); |
2975 | pFile->eFileLock = SHARED_LOCK; |
2976 | pInode->nShared++; |
2977 | pInode->nLock++; |
2978 | goto afp_end_lock; |
2979 | } |
2980 | |
2981 | /* A PENDING lock is needed before acquiring a SHARED lock and before |
2982 | ** acquiring an EXCLUSIVE lock. For the SHARED lock, the PENDING will |
2983 | ** be released. |
2984 | */ |
2985 | if( eFileLock==SHARED_LOCK |
2986 | || (eFileLock==EXCLUSIVE_LOCK && pFile->eFileLock<PENDING_LOCK) |
2987 | ){ |
2988 | int failed; |
2989 | failed = afpSetLock(context->dbPath, pFile, PENDING_BYTE, 1, 1); |
2990 | if (failed) { |
2991 | rc = failed; |
2992 | goto afp_end_lock; |
2993 | } |
2994 | } |
2995 | |
2996 | /* If control gets to this point, then actually go ahead and make |
2997 | ** operating system calls for the specified lock. |
2998 | */ |
2999 | if( eFileLock==SHARED_LOCK ){ |
3000 | int lrc1, lrc2, lrc1Errno = 0; |
3001 | long lk, mask; |
3002 | |
3003 | assert( pInode->nShared==0 ); |
3004 | assert( pInode->eFileLock==0 ); |
3005 | |
3006 | mask = (sizeof(long)==8) ? LARGEST_INT64 : 0x7fffffff; |
3007 | /* Now get the read-lock SHARED_LOCK */ |
3008 | /* note that the quality of the randomness doesn't matter that much */ |
3009 | lk = random(); |
3010 | pInode->sharedByte = (lk & mask)%(SHARED_SIZE - 1); |
3011 | lrc1 = afpSetLock(context->dbPath, pFile, |
3012 | SHARED_FIRST+pInode->sharedByte, 1, 1); |
3013 | if( IS_LOCK_ERROR(lrc1) ){ |
3014 | lrc1Errno = pFile->lastErrno; |
3015 | } |
3016 | /* Drop the temporary PENDING lock */ |
3017 | lrc2 = afpSetLock(context->dbPath, pFile, PENDING_BYTE, 1, 0); |
3018 | |
3019 | if( IS_LOCK_ERROR(lrc1) ) { |
3020 | storeLastErrno(pFile, lrc1Errno); |
3021 | rc = lrc1; |
3022 | goto afp_end_lock; |
3023 | } else if( IS_LOCK_ERROR(lrc2) ){ |
3024 | rc = lrc2; |
3025 | goto afp_end_lock; |
3026 | } else if( lrc1 != SQLITE_OK ) { |
3027 | rc = lrc1; |
3028 | } else { |
3029 | pFile->eFileLock = SHARED_LOCK; |
3030 | pInode->nLock++; |
3031 | pInode->nShared = 1; |
3032 | } |
3033 | }else if( eFileLock==EXCLUSIVE_LOCK && pInode->nShared>1 ){ |
3034 | /* We are trying for an exclusive lock but another thread in this |
3035 | ** same process is still holding a shared lock. */ |
3036 | rc = SQLITE_BUSY; |
3037 | }else{ |
3038 | /* The request was for a RESERVED or EXCLUSIVE lock. It is |
3039 | ** assumed that there is a SHARED or greater lock on the file |
3040 | ** already. |
3041 | */ |
3042 | int failed = 0; |
3043 | assert( 0!=pFile->eFileLock ); |
3044 | if (eFileLock >= RESERVED_LOCK && pFile->eFileLock < RESERVED_LOCK) { |
3045 | /* Acquire a RESERVED lock */ |
3046 | failed = afpSetLock(context->dbPath, pFile, RESERVED_BYTE, 1,1); |
3047 | if( !failed ){ |
3048 | context->reserved = 1; |
3049 | } |
3050 | } |
3051 | if (!failed && eFileLock == EXCLUSIVE_LOCK) { |
3052 | /* Acquire an EXCLUSIVE lock */ |
3053 | |
3054 | /* Remove the shared lock before trying the range. we'll need to |
3055 | ** reestablish the shared lock if we can't get the afpUnlock |
3056 | */ |
3057 | if( !(failed = afpSetLock(context->dbPath, pFile, SHARED_FIRST + |
3058 | pInode->sharedByte, 1, 0)) ){ |
3059 | int failed2 = SQLITE_OK; |
3060 | /* now attemmpt to get the exclusive lock range */ |
3061 | failed = afpSetLock(context->dbPath, pFile, SHARED_FIRST, |
3062 | SHARED_SIZE, 1); |
3063 | if( failed && (failed2 = afpSetLock(context->dbPath, pFile, |
3064 | SHARED_FIRST + pInode->sharedByte, 1, 1)) ){ |
3065 | /* Can't reestablish the shared lock. Sqlite can't deal, this is |
3066 | ** a critical I/O error |
3067 | */ |
3068 | rc = ((failed & 0xff) == SQLITE_IOERR) ? failed2 : |
3069 | SQLITE_IOERR_LOCK; |
3070 | goto afp_end_lock; |
3071 | } |
3072 | }else{ |
3073 | rc = failed; |
3074 | } |
3075 | } |
3076 | if( failed ){ |
3077 | rc = failed; |
3078 | } |
3079 | } |
3080 | |
3081 | if( rc==SQLITE_OK ){ |
3082 | pFile->eFileLock = eFileLock; |
3083 | pInode->eFileLock = eFileLock; |
3084 | }else if( eFileLock==EXCLUSIVE_LOCK ){ |
3085 | pFile->eFileLock = PENDING_LOCK; |
3086 | pInode->eFileLock = PENDING_LOCK; |
3087 | } |
3088 | |
3089 | afp_end_lock: |
3090 | sqlite3_mutex_leave(pInode->pLockMutex); |
3091 | OSTRACE(("LOCK %d %s %s (afp)\n" , pFile->h, azFileLock(eFileLock), |
3092 | rc==SQLITE_OK ? "ok" : "failed" )); |
3093 | return rc; |
3094 | } |
3095 | |
3096 | /* |
3097 | ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock |
3098 | ** must be either NO_LOCK or SHARED_LOCK. |
3099 | ** |
3100 | ** If the locking level of the file descriptor is already at or below |
3101 | ** the requested locking level, this routine is a no-op. |
3102 | */ |
3103 | static int afpUnlock(sqlite3_file *id, int eFileLock) { |
3104 | int rc = SQLITE_OK; |
3105 | unixFile *pFile = (unixFile*)id; |
3106 | unixInodeInfo *pInode; |
3107 | afpLockingContext *context = (afpLockingContext *) pFile->lockingContext; |
3108 | int skipShared = 0; |
3109 | #ifdef SQLITE_TEST |
3110 | int h = pFile->h; |
3111 | #endif |
3112 | |
3113 | assert( pFile ); |
3114 | OSTRACE(("UNLOCK %d %d was %d(%d,%d) pid=%d (afp)\n" , pFile->h, eFileLock, |
3115 | pFile->eFileLock, pFile->pInode->eFileLock, pFile->pInode->nShared, |
3116 | osGetpid(0))); |
3117 | |
3118 | assert( eFileLock<=SHARED_LOCK ); |
3119 | if( pFile->eFileLock<=eFileLock ){ |
3120 | return SQLITE_OK; |
3121 | } |
3122 | pInode = pFile->pInode; |
3123 | sqlite3_mutex_enter(pInode->pLockMutex); |
3124 | assert( pInode->nShared!=0 ); |
3125 | if( pFile->eFileLock>SHARED_LOCK ){ |
3126 | assert( pInode->eFileLock==pFile->eFileLock ); |
3127 | SimulateIOErrorBenign(1); |
3128 | SimulateIOError( h=(-1) ) |
3129 | SimulateIOErrorBenign(0); |
3130 | |
3131 | #ifdef SQLITE_DEBUG |
3132 | /* When reducing a lock such that other processes can start |
3133 | ** reading the database file again, make sure that the |
3134 | ** transaction counter was updated if any part of the database |
3135 | ** file changed. If the transaction counter is not updated, |
3136 | ** other connections to the same file might not realize that |
3137 | ** the file has changed and hence might not know to flush their |
3138 | ** cache. The use of a stale cache can lead to database corruption. |
3139 | */ |
3140 | assert( pFile->inNormalWrite==0 |
3141 | || pFile->dbUpdate==0 |
3142 | || pFile->transCntrChng==1 ); |
3143 | pFile->inNormalWrite = 0; |
3144 | #endif |
3145 | |
3146 | if( pFile->eFileLock==EXCLUSIVE_LOCK ){ |
3147 | rc = afpSetLock(context->dbPath, pFile, SHARED_FIRST, SHARED_SIZE, 0); |
3148 | if( rc==SQLITE_OK && (eFileLock==SHARED_LOCK || pInode->nShared>1) ){ |
3149 | /* only re-establish the shared lock if necessary */ |
3150 | int sharedLockByte = SHARED_FIRST+pInode->sharedByte; |
3151 | rc = afpSetLock(context->dbPath, pFile, sharedLockByte, 1, 1); |
3152 | } else { |
3153 | skipShared = 1; |
3154 | } |
3155 | } |
3156 | if( rc==SQLITE_OK && pFile->eFileLock>=PENDING_LOCK ){ |
3157 | rc = afpSetLock(context->dbPath, pFile, PENDING_BYTE, 1, 0); |
3158 | } |
3159 | if( rc==SQLITE_OK && pFile->eFileLock>=RESERVED_LOCK && context->reserved ){ |
3160 | rc = afpSetLock(context->dbPath, pFile, RESERVED_BYTE, 1, 0); |
3161 | if( !rc ){ |
3162 | context->reserved = 0; |
3163 | } |
3164 | } |
3165 | if( rc==SQLITE_OK && (eFileLock==SHARED_LOCK || pInode->nShared>1)){ |
3166 | pInode->eFileLock = SHARED_LOCK; |
3167 | } |
3168 | } |
3169 | if( rc==SQLITE_OK && eFileLock==NO_LOCK ){ |
3170 | |
3171 | /* Decrement the shared lock counter. Release the lock using an |
3172 | ** OS call only when all threads in this same process have released |
3173 | ** the lock. |
3174 | */ |
3175 | unsigned long long sharedLockByte = SHARED_FIRST+pInode->sharedByte; |
3176 | pInode->nShared--; |
3177 | if( pInode->nShared==0 ){ |
3178 | SimulateIOErrorBenign(1); |
3179 | SimulateIOError( h=(-1) ) |
3180 | SimulateIOErrorBenign(0); |
3181 | if( !skipShared ){ |
3182 | rc = afpSetLock(context->dbPath, pFile, sharedLockByte, 1, 0); |
3183 | } |
3184 | if( !rc ){ |
3185 | pInode->eFileLock = NO_LOCK; |
3186 | pFile->eFileLock = NO_LOCK; |
3187 | } |
3188 | } |
3189 | if( rc==SQLITE_OK ){ |
3190 | pInode->nLock--; |
3191 | assert( pInode->nLock>=0 ); |
3192 | if( pInode->nLock==0 ) closePendingFds(pFile); |
3193 | } |
3194 | } |
3195 | |
3196 | sqlite3_mutex_leave(pInode->pLockMutex); |
3197 | if( rc==SQLITE_OK ){ |
3198 | pFile->eFileLock = eFileLock; |
3199 | } |
3200 | return rc; |
3201 | } |
3202 | |
3203 | /* |
3204 | ** Close a file & cleanup AFP specific locking context |
3205 | */ |
3206 | static int afpClose(sqlite3_file *id) { |
3207 | int rc = SQLITE_OK; |
3208 | unixFile *pFile = (unixFile*)id; |
3209 | assert( id!=0 ); |
3210 | afpUnlock(id, NO_LOCK); |
3211 | assert( unixFileMutexNotheld(pFile) ); |
3212 | unixEnterMutex(); |
3213 | if( pFile->pInode ){ |
3214 | unixInodeInfo *pInode = pFile->pInode; |
3215 | sqlite3_mutex_enter(pInode->pLockMutex); |
3216 | if( pInode->nLock ){ |
3217 | /* If there are outstanding locks, do not actually close the file just |
3218 | ** yet because that would clear those locks. Instead, add the file |
3219 | ** descriptor to pInode->aPending. It will be automatically closed when |
3220 | ** the last lock is cleared. |
3221 | */ |
3222 | setPendingFd(pFile); |
3223 | } |
3224 | sqlite3_mutex_leave(pInode->pLockMutex); |
3225 | } |
3226 | releaseInodeInfo(pFile); |
3227 | sqlite3_free(pFile->lockingContext); |
3228 | rc = closeUnixFile(id); |
3229 | unixLeaveMutex(); |
3230 | return rc; |
3231 | } |
3232 | |
3233 | #endif /* defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE */ |
3234 | /* |
3235 | ** The code above is the AFP lock implementation. The code is specific |
3236 | ** to MacOSX and does not work on other unix platforms. No alternative |
3237 | ** is available. If you don't compile for a mac, then the "unix-afp" |
3238 | ** VFS is not available. |
3239 | ** |
3240 | ********************* End of the AFP lock implementation ********************** |
3241 | ******************************************************************************/ |
3242 | |
3243 | /****************************************************************************** |
3244 | *************************** Begin NFS Locking ********************************/ |
3245 | |
3246 | #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE |
3247 | /* |
3248 | ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock |
3249 | ** must be either NO_LOCK or SHARED_LOCK. |
3250 | ** |
3251 | ** If the locking level of the file descriptor is already at or below |
3252 | ** the requested locking level, this routine is a no-op. |
3253 | */ |
3254 | static int nfsUnlock(sqlite3_file *id, int eFileLock){ |
3255 | return posixUnlock(id, eFileLock, 1); |
3256 | } |
3257 | |
3258 | #endif /* defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE */ |
3259 | /* |
3260 | ** The code above is the NFS lock implementation. The code is specific |
3261 | ** to MacOSX and does not work on other unix platforms. No alternative |
3262 | ** is available. |
3263 | ** |
3264 | ********************* End of the NFS lock implementation ********************** |
3265 | ******************************************************************************/ |
3266 | |
3267 | /****************************************************************************** |
3268 | **************** Non-locking sqlite3_file methods ***************************** |
3269 | ** |
3270 | ** The next division contains implementations for all methods of the |
3271 | ** sqlite3_file object other than the locking methods. The locking |
3272 | ** methods were defined in divisions above (one locking method per |
3273 | ** division). Those methods that are common to all locking modes |
3274 | ** are gather together into this division. |
3275 | */ |
3276 | |
3277 | /* |
3278 | ** Seek to the offset passed as the second argument, then read cnt |
3279 | ** bytes into pBuf. Return the number of bytes actually read. |
3280 | ** |
3281 | ** NB: If you define USE_PREAD or USE_PREAD64, then it might also |
3282 | ** be necessary to define _XOPEN_SOURCE to be 500. This varies from |
3283 | ** one system to another. Since SQLite does not define USE_PREAD |
3284 | ** in any form by default, we will not attempt to define _XOPEN_SOURCE. |
3285 | ** See tickets #2741 and #2681. |
3286 | ** |
3287 | ** To avoid stomping the errno value on a failed read the lastErrno value |
3288 | ** is set before returning. |
3289 | */ |
3290 | static int seekAndRead(unixFile *id, sqlite3_int64 offset, void *pBuf, int cnt){ |
3291 | int got; |
3292 | int prior = 0; |
3293 | #if (!defined(USE_PREAD) && !defined(USE_PREAD64)) |
3294 | i64 newOffset; |
3295 | #endif |
3296 | TIMER_START; |
3297 | assert( cnt==(cnt&0x1ffff) ); |
3298 | assert( id->h>2 ); |
3299 | do{ |
3300 | #if defined(USE_PREAD) |
3301 | got = osPread(id->h, pBuf, cnt, offset); |
3302 | SimulateIOError( got = -1 ); |
3303 | #elif defined(USE_PREAD64) |
3304 | got = osPread64(id->h, pBuf, cnt, offset); |
3305 | SimulateIOError( got = -1 ); |
3306 | #else |
3307 | newOffset = lseek(id->h, offset, SEEK_SET); |
3308 | SimulateIOError( newOffset = -1 ); |
3309 | if( newOffset<0 ){ |
3310 | storeLastErrno((unixFile*)id, errno); |
3311 | return -1; |
3312 | } |
3313 | got = osRead(id->h, pBuf, cnt); |
3314 | #endif |
3315 | if( got==cnt ) break; |
3316 | if( got<0 ){ |
3317 | if( errno==EINTR ){ got = 1; continue; } |
3318 | prior = 0; |
3319 | storeLastErrno((unixFile*)id, errno); |
3320 | break; |
3321 | }else if( got>0 ){ |
3322 | cnt -= got; |
3323 | offset += got; |
3324 | prior += got; |
3325 | pBuf = (void*)(got + (char*)pBuf); |
3326 | } |
3327 | }while( got>0 ); |
3328 | TIMER_END; |
3329 | OSTRACE(("READ %-3d %5d %7lld %llu\n" , |
3330 | id->h, got+prior, offset-prior, TIMER_ELAPSED)); |
3331 | return got+prior; |
3332 | } |
3333 | |
3334 | /* |
3335 | ** Read data from a file into a buffer. Return SQLITE_OK if all |
3336 | ** bytes were read successfully and SQLITE_IOERR if anything goes |
3337 | ** wrong. |
3338 | */ |
3339 | static int unixRead( |
3340 | sqlite3_file *id, |
3341 | void *pBuf, |
3342 | int amt, |
3343 | sqlite3_int64 offset |
3344 | ){ |
3345 | unixFile *pFile = (unixFile *)id; |
3346 | int got; |
3347 | assert( id ); |
3348 | assert( offset>=0 ); |
3349 | assert( amt>0 ); |
3350 | |
3351 | /* If this is a database file (not a journal, super-journal or temp |
3352 | ** file), the bytes in the locking range should never be read or written. */ |
3353 | #if 0 |
3354 | assert( pFile->pPreallocatedUnused==0 |
3355 | || offset>=PENDING_BYTE+512 |
3356 | || offset+amt<=PENDING_BYTE |
3357 | ); |
3358 | #endif |
3359 | |
3360 | #if SQLITE_MAX_MMAP_SIZE>0 |
3361 | /* Deal with as much of this read request as possible by transfering |
3362 | ** data from the memory mapping using memcpy(). */ |
3363 | if( offset<pFile->mmapSize ){ |
3364 | if( offset+amt <= pFile->mmapSize ){ |
3365 | memcpy(pBuf, &((u8 *)(pFile->pMapRegion))[offset], amt); |
3366 | return SQLITE_OK; |
3367 | }else{ |
3368 | int nCopy = pFile->mmapSize - offset; |
3369 | memcpy(pBuf, &((u8 *)(pFile->pMapRegion))[offset], nCopy); |
3370 | pBuf = &((u8 *)pBuf)[nCopy]; |
3371 | amt -= nCopy; |
3372 | offset += nCopy; |
3373 | } |
3374 | } |
3375 | #endif |
3376 | |
3377 | got = seekAndRead(pFile, offset, pBuf, amt); |
3378 | if( got==amt ){ |
3379 | return SQLITE_OK; |
3380 | }else if( got<0 ){ |
3381 | /* pFile->lastErrno has been set by seekAndRead(). |
3382 | ** Usually we return SQLITE_IOERR_READ here, though for some |
3383 | ** kinds of errors we return SQLITE_IOERR_CORRUPTFS. The |
3384 | ** SQLITE_IOERR_CORRUPTFS will be converted into SQLITE_CORRUPT |
3385 | ** prior to returning to the application by the sqlite3ApiExit() |
3386 | ** routine. |
3387 | */ |
3388 | switch( pFile->lastErrno ){ |
3389 | case ERANGE: |
3390 | case EIO: |
3391 | #ifdef ENXIO |
3392 | case ENXIO: |
3393 | #endif |
3394 | #ifdef EDEVERR |
3395 | case EDEVERR: |
3396 | #endif |
3397 | return SQLITE_IOERR_CORRUPTFS; |
3398 | } |
3399 | return SQLITE_IOERR_READ; |
3400 | }else{ |
3401 | storeLastErrno(pFile, 0); /* not a system error */ |
3402 | /* Unread parts of the buffer must be zero-filled */ |
3403 | memset(&((char*)pBuf)[got], 0, amt-got); |
3404 | return SQLITE_IOERR_SHORT_READ; |
3405 | } |
3406 | } |
3407 | |
3408 | /* |
3409 | ** Attempt to seek the file-descriptor passed as the first argument to |
3410 | ** absolute offset iOff, then attempt to write nBuf bytes of data from |
3411 | ** pBuf to it. If an error occurs, return -1 and set *piErrno. Otherwise, |
3412 | ** return the actual number of bytes written (which may be less than |
3413 | ** nBuf). |
3414 | */ |
3415 | static int seekAndWriteFd( |
3416 | int fd, /* File descriptor to write to */ |
3417 | i64 iOff, /* File offset to begin writing at */ |
3418 | const void *pBuf, /* Copy data from this buffer to the file */ |
3419 | int nBuf, /* Size of buffer pBuf in bytes */ |
3420 | int *piErrno /* OUT: Error number if error occurs */ |
3421 | ){ |
3422 | int rc = 0; /* Value returned by system call */ |
3423 | |
3424 | assert( nBuf==(nBuf&0x1ffff) ); |
3425 | assert( fd>2 ); |
3426 | assert( piErrno!=0 ); |
3427 | nBuf &= 0x1ffff; |
3428 | TIMER_START; |
3429 | |
3430 | #if defined(USE_PREAD) |
3431 | do{ rc = (int)osPwrite(fd, pBuf, nBuf, iOff); }while( rc<0 && errno==EINTR ); |
3432 | #elif defined(USE_PREAD64) |
3433 | do{ rc = (int)osPwrite64(fd, pBuf, nBuf, iOff);}while( rc<0 && errno==EINTR); |
3434 | #else |
3435 | do{ |
3436 | i64 iSeek = lseek(fd, iOff, SEEK_SET); |
3437 | SimulateIOError( iSeek = -1 ); |
3438 | if( iSeek<0 ){ |
3439 | rc = -1; |
3440 | break; |
3441 | } |
3442 | rc = osWrite(fd, pBuf, nBuf); |
3443 | }while( rc<0 && errno==EINTR ); |
3444 | #endif |
3445 | |
3446 | TIMER_END; |
3447 | OSTRACE(("WRITE %-3d %5d %7lld %llu\n" , fd, rc, iOff, TIMER_ELAPSED)); |
3448 | |
3449 | if( rc<0 ) *piErrno = errno; |
3450 | return rc; |
3451 | } |
3452 | |
3453 | |
3454 | /* |
3455 | ** Seek to the offset in id->offset then read cnt bytes into pBuf. |
3456 | ** Return the number of bytes actually read. Update the offset. |
3457 | ** |
3458 | ** To avoid stomping the errno value on a failed write the lastErrno value |
3459 | ** is set before returning. |
3460 | */ |
3461 | static int seekAndWrite(unixFile *id, i64 offset, const void *pBuf, int cnt){ |
3462 | return seekAndWriteFd(id->h, offset, pBuf, cnt, &id->lastErrno); |
3463 | } |
3464 | |
3465 | |
3466 | /* |
3467 | ** Write data from a buffer into a file. Return SQLITE_OK on success |
3468 | ** or some other error code on failure. |
3469 | */ |
3470 | static int unixWrite( |
3471 | sqlite3_file *id, |
3472 | const void *pBuf, |
3473 | int amt, |
3474 | sqlite3_int64 offset |
3475 | ){ |
3476 | unixFile *pFile = (unixFile*)id; |
3477 | int wrote = 0; |
3478 | assert( id ); |
3479 | assert( amt>0 ); |
3480 | |
3481 | /* If this is a database file (not a journal, super-journal or temp |
3482 | ** file), the bytes in the locking range should never be read or written. */ |
3483 | #if 0 |
3484 | assert( pFile->pPreallocatedUnused==0 |
3485 | || offset>=PENDING_BYTE+512 |
3486 | || offset+amt<=PENDING_BYTE |
3487 | ); |
3488 | #endif |
3489 | |
3490 | #ifdef SQLITE_DEBUG |
3491 | /* If we are doing a normal write to a database file (as opposed to |
3492 | ** doing a hot-journal rollback or a write to some file other than a |
3493 | ** normal database file) then record the fact that the database |
3494 | ** has changed. If the transaction counter is modified, record that |
3495 | ** fact too. |
3496 | */ |
3497 | if( pFile->inNormalWrite ){ |
3498 | pFile->dbUpdate = 1; /* The database has been modified */ |
3499 | if( offset<=24 && offset+amt>=27 ){ |
3500 | int rc; |
3501 | char oldCntr[4]; |
3502 | SimulateIOErrorBenign(1); |
3503 | rc = seekAndRead(pFile, 24, oldCntr, 4); |
3504 | SimulateIOErrorBenign(0); |
3505 | if( rc!=4 || memcmp(oldCntr, &((char*)pBuf)[24-offset], 4)!=0 ){ |
3506 | pFile->transCntrChng = 1; /* The transaction counter has changed */ |
3507 | } |
3508 | } |
3509 | } |
3510 | #endif |
3511 | |
3512 | #if defined(SQLITE_MMAP_READWRITE) && SQLITE_MAX_MMAP_SIZE>0 |
3513 | /* Deal with as much of this write request as possible by transfering |
3514 | ** data from the memory mapping using memcpy(). */ |
3515 | if( offset<pFile->mmapSize ){ |
3516 | if( offset+amt <= pFile->mmapSize ){ |
3517 | memcpy(&((u8 *)(pFile->pMapRegion))[offset], pBuf, amt); |
3518 | return SQLITE_OK; |
3519 | }else{ |
3520 | int nCopy = pFile->mmapSize - offset; |
3521 | memcpy(&((u8 *)(pFile->pMapRegion))[offset], pBuf, nCopy); |
3522 | pBuf = &((u8 *)pBuf)[nCopy]; |
3523 | amt -= nCopy; |
3524 | offset += nCopy; |
3525 | } |
3526 | } |
3527 | #endif |
3528 | |
3529 | while( (wrote = seekAndWrite(pFile, offset, pBuf, amt))<amt && wrote>0 ){ |
3530 | amt -= wrote; |
3531 | offset += wrote; |
3532 | pBuf = &((char*)pBuf)[wrote]; |
3533 | } |
3534 | SimulateIOError(( wrote=(-1), amt=1 )); |
3535 | SimulateDiskfullError(( wrote=0, amt=1 )); |
3536 | |
3537 | if( amt>wrote ){ |
3538 | if( wrote<0 && pFile->lastErrno!=ENOSPC ){ |
3539 | /* lastErrno set by seekAndWrite */ |
3540 | return SQLITE_IOERR_WRITE; |
3541 | }else{ |
3542 | storeLastErrno(pFile, 0); /* not a system error */ |
3543 | return SQLITE_FULL; |
3544 | } |
3545 | } |
3546 | |
3547 | return SQLITE_OK; |
3548 | } |
3549 | |
3550 | #ifdef SQLITE_TEST |
3551 | /* |
3552 | ** Count the number of fullsyncs and normal syncs. This is used to test |
3553 | ** that syncs and fullsyncs are occurring at the right times. |
3554 | */ |
3555 | int sqlite3_sync_count = 0; |
3556 | int sqlite3_fullsync_count = 0; |
3557 | #endif |
3558 | |
3559 | /* |
3560 | ** We do not trust systems to provide a working fdatasync(). Some do. |
3561 | ** Others do no. To be safe, we will stick with the (slightly slower) |
3562 | ** fsync(). If you know that your system does support fdatasync() correctly, |
3563 | ** then simply compile with -Dfdatasync=fdatasync or -DHAVE_FDATASYNC |
3564 | */ |
3565 | #if !defined(fdatasync) && !HAVE_FDATASYNC |
3566 | # define fdatasync fsync |
3567 | #endif |
3568 | |
3569 | /* |
3570 | ** Define HAVE_FULLFSYNC to 0 or 1 depending on whether or not |
3571 | ** the F_FULLFSYNC macro is defined. F_FULLFSYNC is currently |
3572 | ** only available on Mac OS X. But that could change. |
3573 | */ |
3574 | #ifdef F_FULLFSYNC |
3575 | # define HAVE_FULLFSYNC 1 |
3576 | #else |
3577 | # define HAVE_FULLFSYNC 0 |
3578 | #endif |
3579 | |
3580 | |
3581 | /* |
3582 | ** The fsync() system call does not work as advertised on many |
3583 | ** unix systems. The following procedure is an attempt to make |
3584 | ** it work better. |
3585 | ** |
3586 | ** The SQLITE_NO_SYNC macro disables all fsync()s. This is useful |
3587 | ** for testing when we want to run through the test suite quickly. |
3588 | ** You are strongly advised *not* to deploy with SQLITE_NO_SYNC |
3589 | ** enabled, however, since with SQLITE_NO_SYNC enabled, an OS crash |
3590 | ** or power failure will likely corrupt the database file. |
3591 | ** |
3592 | ** SQLite sets the dataOnly flag if the size of the file is unchanged. |
3593 | ** The idea behind dataOnly is that it should only write the file content |
3594 | ** to disk, not the inode. We only set dataOnly if the file size is |
3595 | ** unchanged since the file size is part of the inode. However, |
3596 | ** Ted Ts'o tells us that fdatasync() will also write the inode if the |
3597 | ** file size has changed. The only real difference between fdatasync() |
3598 | ** and fsync(), Ted tells us, is that fdatasync() will not flush the |
3599 | ** inode if the mtime or owner or other inode attributes have changed. |
3600 | ** We only care about the file size, not the other file attributes, so |
3601 | ** as far as SQLite is concerned, an fdatasync() is always adequate. |
3602 | ** So, we always use fdatasync() if it is available, regardless of |
3603 | ** the value of the dataOnly flag. |
3604 | */ |
3605 | static int full_fsync(int fd, int fullSync, int dataOnly){ |
3606 | int rc; |
3607 | |
3608 | /* The following "ifdef/elif/else/" block has the same structure as |
3609 | ** the one below. It is replicated here solely to avoid cluttering |
3610 | ** up the real code with the UNUSED_PARAMETER() macros. |
3611 | */ |
3612 | #ifdef SQLITE_NO_SYNC |
3613 | UNUSED_PARAMETER(fd); |
3614 | UNUSED_PARAMETER(fullSync); |
3615 | UNUSED_PARAMETER(dataOnly); |
3616 | #elif HAVE_FULLFSYNC |
3617 | UNUSED_PARAMETER(dataOnly); |
3618 | #else |
3619 | UNUSED_PARAMETER(fullSync); |
3620 | UNUSED_PARAMETER(dataOnly); |
3621 | #endif |
3622 | |
3623 | /* Record the number of times that we do a normal fsync() and |
3624 | ** FULLSYNC. This is used during testing to verify that this procedure |
3625 | ** gets called with the correct arguments. |
3626 | */ |
3627 | #ifdef SQLITE_TEST |
3628 | if( fullSync ) sqlite3_fullsync_count++; |
3629 | sqlite3_sync_count++; |
3630 | #endif |
3631 | |
3632 | /* If we compiled with the SQLITE_NO_SYNC flag, then syncing is a |
3633 | ** no-op. But go ahead and call fstat() to validate the file |
3634 | ** descriptor as we need a method to provoke a failure during |
3635 | ** coverate testing. |
3636 | */ |
3637 | #ifdef SQLITE_NO_SYNC |
3638 | { |
3639 | struct stat buf; |
3640 | rc = osFstat(fd, &buf); |
3641 | } |
3642 | #elif HAVE_FULLFSYNC |
3643 | if( fullSync ){ |
3644 | rc = osFcntl(fd, F_FULLFSYNC, 0); |
3645 | }else{ |
3646 | rc = 1; |
3647 | } |
3648 | /* If the FULLFSYNC failed, fall back to attempting an fsync(). |
3649 | ** It shouldn't be possible for fullfsync to fail on the local |
3650 | ** file system (on OSX), so failure indicates that FULLFSYNC |
3651 | ** isn't supported for this file system. So, attempt an fsync |
3652 | ** and (for now) ignore the overhead of a superfluous fcntl call. |
3653 | ** It'd be better to detect fullfsync support once and avoid |
3654 | ** the fcntl call every time sync is called. |
3655 | */ |
3656 | if( rc ) rc = fsync(fd); |
3657 | |
3658 | #elif defined(__APPLE__) |
3659 | /* fdatasync() on HFS+ doesn't yet flush the file size if it changed correctly |
3660 | ** so currently we default to the macro that redefines fdatasync to fsync |
3661 | */ |
3662 | rc = fsync(fd); |
3663 | #else |
3664 | rc = fdatasync(fd); |
3665 | #if OS_VXWORKS |
3666 | if( rc==-1 && errno==ENOTSUP ){ |
3667 | rc = fsync(fd); |
3668 | } |
3669 | #endif /* OS_VXWORKS */ |
3670 | #endif /* ifdef SQLITE_NO_SYNC elif HAVE_FULLFSYNC */ |
3671 | |
3672 | if( OS_VXWORKS && rc!= -1 ){ |
3673 | rc = 0; |
3674 | } |
3675 | return rc; |
3676 | } |
3677 | |
3678 | /* |
3679 | ** Open a file descriptor to the directory containing file zFilename. |
3680 | ** If successful, *pFd is set to the opened file descriptor and |
3681 | ** SQLITE_OK is returned. If an error occurs, either SQLITE_NOMEM |
3682 | ** or SQLITE_CANTOPEN is returned and *pFd is set to an undefined |
3683 | ** value. |
3684 | ** |
3685 | ** The directory file descriptor is used for only one thing - to |
3686 | ** fsync() a directory to make sure file creation and deletion events |
3687 | ** are flushed to disk. Such fsyncs are not needed on newer |
3688 | ** journaling filesystems, but are required on older filesystems. |
3689 | ** |
3690 | ** This routine can be overridden using the xSetSysCall interface. |
3691 | ** The ability to override this routine was added in support of the |
3692 | ** chromium sandbox. Opening a directory is a security risk (we are |
3693 | ** told) so making it overrideable allows the chromium sandbox to |
3694 | ** replace this routine with a harmless no-op. To make this routine |
3695 | ** a no-op, replace it with a stub that returns SQLITE_OK but leaves |
3696 | ** *pFd set to a negative number. |
3697 | ** |
3698 | ** If SQLITE_OK is returned, the caller is responsible for closing |
3699 | ** the file descriptor *pFd using close(). |
3700 | */ |
3701 | static int openDirectory(const char *zFilename, int *pFd){ |
3702 | int ii; |
3703 | int fd = -1; |
3704 | char zDirname[MAX_PATHNAME+1]; |
3705 | |
3706 | sqlite3_snprintf(MAX_PATHNAME, zDirname, "%s" , zFilename); |
3707 | for(ii=(int)strlen(zDirname); ii>0 && zDirname[ii]!='/'; ii--); |
3708 | if( ii>0 ){ |
3709 | zDirname[ii] = '\0'; |
3710 | }else{ |
3711 | if( zDirname[0]!='/' ) zDirname[0] = '.'; |
3712 | zDirname[1] = 0; |
3713 | } |
3714 | fd = robust_open(zDirname, O_RDONLY|O_BINARY, 0); |
3715 | if( fd>=0 ){ |
3716 | OSTRACE(("OPENDIR %-3d %s\n" , fd, zDirname)); |
3717 | } |
3718 | *pFd = fd; |
3719 | if( fd>=0 ) return SQLITE_OK; |
3720 | return unixLogError(SQLITE_CANTOPEN_BKPT, "openDirectory" , zDirname); |
3721 | } |
3722 | |
3723 | /* |
3724 | ** Make sure all writes to a particular file are committed to disk. |
3725 | ** |
3726 | ** If dataOnly==0 then both the file itself and its metadata (file |
3727 | ** size, access time, etc) are synced. If dataOnly!=0 then only the |
3728 | ** file data is synced. |
3729 | ** |
3730 | ** Under Unix, also make sure that the directory entry for the file |
3731 | ** has been created by fsync-ing the directory that contains the file. |
3732 | ** If we do not do this and we encounter a power failure, the directory |
3733 | ** entry for the journal might not exist after we reboot. The next |
3734 | ** SQLite to access the file will not know that the journal exists (because |
3735 | ** the directory entry for the journal was never created) and the transaction |
3736 | ** will not roll back - possibly leading to database corruption. |
3737 | */ |
3738 | static int unixSync(sqlite3_file *id, int flags){ |
3739 | int rc; |
3740 | unixFile *pFile = (unixFile*)id; |
3741 | |
3742 | int isDataOnly = (flags&SQLITE_SYNC_DATAONLY); |
3743 | int isFullsync = (flags&0x0F)==SQLITE_SYNC_FULL; |
3744 | |
3745 | /* Check that one of SQLITE_SYNC_NORMAL or FULL was passed */ |
3746 | assert((flags&0x0F)==SQLITE_SYNC_NORMAL |
3747 | || (flags&0x0F)==SQLITE_SYNC_FULL |
3748 | ); |
3749 | |
3750 | /* Unix cannot, but some systems may return SQLITE_FULL from here. This |
3751 | ** line is to test that doing so does not cause any problems. |
3752 | */ |
3753 | SimulateDiskfullError( return SQLITE_FULL ); |
3754 | |
3755 | assert( pFile ); |
3756 | OSTRACE(("SYNC %-3d\n" , pFile->h)); |
3757 | rc = full_fsync(pFile->h, isFullsync, isDataOnly); |
3758 | SimulateIOError( rc=1 ); |
3759 | if( rc ){ |
3760 | storeLastErrno(pFile, errno); |
3761 | return unixLogError(SQLITE_IOERR_FSYNC, "full_fsync" , pFile->zPath); |
3762 | } |
3763 | |
3764 | /* Also fsync the directory containing the file if the DIRSYNC flag |
3765 | ** is set. This is a one-time occurrence. Many systems (examples: AIX) |
3766 | ** are unable to fsync a directory, so ignore errors on the fsync. |
3767 | */ |
3768 | if( pFile->ctrlFlags & UNIXFILE_DIRSYNC ){ |
3769 | int dirfd; |
3770 | OSTRACE(("DIRSYNC %s (have_fullfsync=%d fullsync=%d)\n" , pFile->zPath, |
3771 | HAVE_FULLFSYNC, isFullsync)); |
3772 | rc = osOpenDirectory(pFile->zPath, &dirfd); |
3773 | if( rc==SQLITE_OK ){ |
3774 | full_fsync(dirfd, 0, 0); |
3775 | robust_close(pFile, dirfd, __LINE__); |
3776 | }else{ |
3777 | assert( rc==SQLITE_CANTOPEN ); |
3778 | rc = SQLITE_OK; |
3779 | } |
3780 | pFile->ctrlFlags &= ~UNIXFILE_DIRSYNC; |
3781 | } |
3782 | return rc; |
3783 | } |
3784 | |
3785 | /* |
3786 | ** Truncate an open file to a specified size |
3787 | */ |
3788 | static int unixTruncate(sqlite3_file *id, i64 nByte){ |
3789 | unixFile *pFile = (unixFile *)id; |
3790 | int rc; |
3791 | assert( pFile ); |
3792 | SimulateIOError( return SQLITE_IOERR_TRUNCATE ); |
3793 | |
3794 | /* If the user has configured a chunk-size for this file, truncate the |
3795 | ** file so that it consists of an integer number of chunks (i.e. the |
3796 | ** actual file size after the operation may be larger than the requested |
3797 | ** size). |
3798 | */ |
3799 | if( pFile->szChunk>0 ){ |
3800 | nByte = ((nByte + pFile->szChunk - 1)/pFile->szChunk) * pFile->szChunk; |
3801 | } |
3802 | |
3803 | rc = robust_ftruncate(pFile->h, nByte); |
3804 | if( rc ){ |
3805 | storeLastErrno(pFile, errno); |
3806 | return unixLogError(SQLITE_IOERR_TRUNCATE, "ftruncate" , pFile->zPath); |
3807 | }else{ |
3808 | #ifdef SQLITE_DEBUG |
3809 | /* If we are doing a normal write to a database file (as opposed to |
3810 | ** doing a hot-journal rollback or a write to some file other than a |
3811 | ** normal database file) and we truncate the file to zero length, |
3812 | ** that effectively updates the change counter. This might happen |
3813 | ** when restoring a database using the backup API from a zero-length |
3814 | ** source. |
3815 | */ |
3816 | if( pFile->inNormalWrite && nByte==0 ){ |
3817 | pFile->transCntrChng = 1; |
3818 | } |
3819 | #endif |
3820 | |
3821 | #if SQLITE_MAX_MMAP_SIZE>0 |
3822 | /* If the file was just truncated to a size smaller than the currently |
3823 | ** mapped region, reduce the effective mapping size as well. SQLite will |
3824 | ** use read() and write() to access data beyond this point from now on. |
3825 | */ |
3826 | if( nByte<pFile->mmapSize ){ |
3827 | pFile->mmapSize = nByte; |
3828 | } |
3829 | #endif |
3830 | |
3831 | return SQLITE_OK; |
3832 | } |
3833 | } |
3834 | |
3835 | /* |
3836 | ** Determine the current size of a file in bytes |
3837 | */ |
3838 | static int unixFileSize(sqlite3_file *id, i64 *pSize){ |
3839 | int rc; |
3840 | struct stat buf; |
3841 | assert( id ); |
3842 | rc = osFstat(((unixFile*)id)->h, &buf); |
3843 | SimulateIOError( rc=1 ); |
3844 | if( rc!=0 ){ |
3845 | storeLastErrno((unixFile*)id, errno); |
3846 | return SQLITE_IOERR_FSTAT; |
3847 | } |
3848 | *pSize = buf.st_size; |
3849 | |
3850 | /* When opening a zero-size database, the findInodeInfo() procedure |
3851 | ** writes a single byte into that file in order to work around a bug |
3852 | ** in the OS-X msdos filesystem. In order to avoid problems with upper |
3853 | ** layers, we need to report this file size as zero even though it is |
3854 | ** really 1. Ticket #3260. |
3855 | */ |
3856 | if( *pSize==1 ) *pSize = 0; |
3857 | |
3858 | |
3859 | return SQLITE_OK; |
3860 | } |
3861 | |
3862 | #if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) |
3863 | /* |
3864 | ** Handler for proxy-locking file-control verbs. Defined below in the |
3865 | ** proxying locking division. |
3866 | */ |
3867 | static int proxyFileControl(sqlite3_file*,int,void*); |
3868 | #endif |
3869 | |
3870 | /* |
3871 | ** This function is called to handle the SQLITE_FCNTL_SIZE_HINT |
3872 | ** file-control operation. Enlarge the database to nBytes in size |
3873 | ** (rounded up to the next chunk-size). If the database is already |
3874 | ** nBytes or larger, this routine is a no-op. |
3875 | */ |
3876 | static int fcntlSizeHint(unixFile *pFile, i64 nByte){ |
3877 | if( pFile->szChunk>0 ){ |
3878 | i64 nSize; /* Required file size */ |
3879 | struct stat buf; /* Used to hold return values of fstat() */ |
3880 | |
3881 | if( osFstat(pFile->h, &buf) ){ |
3882 | return SQLITE_IOERR_FSTAT; |
3883 | } |
3884 | |
3885 | nSize = ((nByte+pFile->szChunk-1) / pFile->szChunk) * pFile->szChunk; |
3886 | if( nSize>(i64)buf.st_size ){ |
3887 | |
3888 | #if defined(HAVE_POSIX_FALLOCATE) && HAVE_POSIX_FALLOCATE |
3889 | /* The code below is handling the return value of osFallocate() |
3890 | ** correctly. posix_fallocate() is defined to "returns zero on success, |
3891 | ** or an error number on failure". See the manpage for details. */ |
3892 | int err; |
3893 | do{ |
3894 | err = osFallocate(pFile->h, buf.st_size, nSize-buf.st_size); |
3895 | }while( err==EINTR ); |
3896 | if( err && err!=EINVAL ) return SQLITE_IOERR_WRITE; |
3897 | #else |
3898 | /* If the OS does not have posix_fallocate(), fake it. Write a |
3899 | ** single byte to the last byte in each block that falls entirely |
3900 | ** within the extended region. Then, if required, a single byte |
3901 | ** at offset (nSize-1), to set the size of the file correctly. |
3902 | ** This is a similar technique to that used by glibc on systems |
3903 | ** that do not have a real fallocate() call. |
3904 | */ |
3905 | int nBlk = buf.st_blksize; /* File-system block size */ |
3906 | int nWrite = 0; /* Number of bytes written by seekAndWrite */ |
3907 | i64 iWrite; /* Next offset to write to */ |
3908 | |
3909 | iWrite = (buf.st_size/nBlk)*nBlk + nBlk - 1; |
3910 | assert( iWrite>=buf.st_size ); |
3911 | assert( ((iWrite+1)%nBlk)==0 ); |
3912 | for(/*no-op*/; iWrite<nSize+nBlk-1; iWrite+=nBlk ){ |
3913 | if( iWrite>=nSize ) iWrite = nSize - 1; |
3914 | nWrite = seekAndWrite(pFile, iWrite, "" , 1); |
3915 | if( nWrite!=1 ) return SQLITE_IOERR_WRITE; |
3916 | } |
3917 | #endif |
3918 | } |
3919 | } |
3920 | |
3921 | #if SQLITE_MAX_MMAP_SIZE>0 |
3922 | if( pFile->mmapSizeMax>0 && nByte>pFile->mmapSize ){ |
3923 | int rc; |
3924 | if( pFile->szChunk<=0 ){ |
3925 | if( robust_ftruncate(pFile->h, nByte) ){ |
3926 | storeLastErrno(pFile, errno); |
3927 | return unixLogError(SQLITE_IOERR_TRUNCATE, "ftruncate" , pFile->zPath); |
3928 | } |
3929 | } |
3930 | |
3931 | rc = unixMapfile(pFile, nByte); |
3932 | return rc; |
3933 | } |
3934 | #endif |
3935 | |
3936 | return SQLITE_OK; |
3937 | } |
3938 | |
3939 | /* |
3940 | ** If *pArg is initially negative then this is a query. Set *pArg to |
3941 | ** 1 or 0 depending on whether or not bit mask of pFile->ctrlFlags is set. |
3942 | ** |
3943 | ** If *pArg is 0 or 1, then clear or set the mask bit of pFile->ctrlFlags. |
3944 | */ |
3945 | static void unixModeBit(unixFile *pFile, unsigned char mask, int *pArg){ |
3946 | if( *pArg<0 ){ |
3947 | *pArg = (pFile->ctrlFlags & mask)!=0; |
3948 | }else if( (*pArg)==0 ){ |
3949 | pFile->ctrlFlags &= ~mask; |
3950 | }else{ |
3951 | pFile->ctrlFlags |= mask; |
3952 | } |
3953 | } |
3954 | |
3955 | /* Forward declaration */ |
3956 | static int unixGetTempname(int nBuf, char *zBuf); |
3957 | #ifndef SQLITE_OMIT_WAL |
3958 | static int unixFcntlExternalReader(unixFile*, int*); |
3959 | #endif |
3960 | |
3961 | /* |
3962 | ** Information and control of an open file handle. |
3963 | */ |
3964 | static int unixFileControl(sqlite3_file *id, int op, void *pArg){ |
3965 | unixFile *pFile = (unixFile*)id; |
3966 | switch( op ){ |
3967 | #if defined(__linux__) && defined(SQLITE_ENABLE_BATCH_ATOMIC_WRITE) |
3968 | case SQLITE_FCNTL_BEGIN_ATOMIC_WRITE: { |
3969 | int rc = osIoctl(pFile->h, F2FS_IOC_START_ATOMIC_WRITE); |
3970 | return rc ? SQLITE_IOERR_BEGIN_ATOMIC : SQLITE_OK; |
3971 | } |
3972 | case SQLITE_FCNTL_COMMIT_ATOMIC_WRITE: { |
3973 | int rc = osIoctl(pFile->h, F2FS_IOC_COMMIT_ATOMIC_WRITE); |
3974 | return rc ? SQLITE_IOERR_COMMIT_ATOMIC : SQLITE_OK; |
3975 | } |
3976 | case SQLITE_FCNTL_ROLLBACK_ATOMIC_WRITE: { |
3977 | int rc = osIoctl(pFile->h, F2FS_IOC_ABORT_VOLATILE_WRITE); |
3978 | return rc ? SQLITE_IOERR_ROLLBACK_ATOMIC : SQLITE_OK; |
3979 | } |
3980 | #endif /* __linux__ && SQLITE_ENABLE_BATCH_ATOMIC_WRITE */ |
3981 | |
3982 | case SQLITE_FCNTL_LOCKSTATE: { |
3983 | *(int*)pArg = pFile->eFileLock; |
3984 | return SQLITE_OK; |
3985 | } |
3986 | case SQLITE_FCNTL_LAST_ERRNO: { |
3987 | *(int*)pArg = pFile->lastErrno; |
3988 | return SQLITE_OK; |
3989 | } |
3990 | case SQLITE_FCNTL_CHUNK_SIZE: { |
3991 | pFile->szChunk = *(int *)pArg; |
3992 | return SQLITE_OK; |
3993 | } |
3994 | case SQLITE_FCNTL_SIZE_HINT: { |
3995 | int rc; |
3996 | SimulateIOErrorBenign(1); |
3997 | rc = fcntlSizeHint(pFile, *(i64 *)pArg); |
3998 | SimulateIOErrorBenign(0); |
3999 | return rc; |
4000 | } |
4001 | case SQLITE_FCNTL_PERSIST_WAL: { |
4002 | unixModeBit(pFile, UNIXFILE_PERSIST_WAL, (int*)pArg); |
4003 | return SQLITE_OK; |
4004 | } |
4005 | case SQLITE_FCNTL_POWERSAFE_OVERWRITE: { |
4006 | unixModeBit(pFile, UNIXFILE_PSOW, (int*)pArg); |
4007 | return SQLITE_OK; |
4008 | } |
4009 | case SQLITE_FCNTL_VFSNAME: { |
4010 | *(char**)pArg = sqlite3_mprintf("%s" , pFile->pVfs->zName); |
4011 | return SQLITE_OK; |
4012 | } |
4013 | case SQLITE_FCNTL_TEMPFILENAME: { |
4014 | char *zTFile = sqlite3_malloc64( pFile->pVfs->mxPathname ); |
4015 | if( zTFile ){ |
4016 | unixGetTempname(pFile->pVfs->mxPathname, zTFile); |
4017 | *(char**)pArg = zTFile; |
4018 | } |
4019 | return SQLITE_OK; |
4020 | } |
4021 | case SQLITE_FCNTL_HAS_MOVED: { |
4022 | *(int*)pArg = fileHasMoved(pFile); |
4023 | return SQLITE_OK; |
4024 | } |
4025 | #ifdef SQLITE_ENABLE_SETLK_TIMEOUT |
4026 | case SQLITE_FCNTL_LOCK_TIMEOUT: { |
4027 | int iOld = pFile->iBusyTimeout; |
4028 | pFile->iBusyTimeout = *(int*)pArg; |
4029 | *(int*)pArg = iOld; |
4030 | return SQLITE_OK; |
4031 | } |
4032 | #endif |
4033 | #if SQLITE_MAX_MMAP_SIZE>0 |
4034 | case SQLITE_FCNTL_MMAP_SIZE: { |
4035 | i64 newLimit = *(i64*)pArg; |
4036 | int rc = SQLITE_OK; |
4037 | if( newLimit>sqlite3GlobalConfig.mxMmap ){ |
4038 | newLimit = sqlite3GlobalConfig.mxMmap; |
4039 | } |
4040 | |
4041 | /* The value of newLimit may be eventually cast to (size_t) and passed |
4042 | ** to mmap(). Restrict its value to 2GB if (size_t) is not at least a |
4043 | ** 64-bit type. */ |
4044 | if( newLimit>0 && sizeof(size_t)<8 ){ |
4045 | newLimit = (newLimit & 0x7FFFFFFF); |
4046 | } |
4047 | |
4048 | *(i64*)pArg = pFile->mmapSizeMax; |
4049 | if( newLimit>=0 && newLimit!=pFile->mmapSizeMax && pFile->nFetchOut==0 ){ |
4050 | pFile->mmapSizeMax = newLimit; |
4051 | if( pFile->mmapSize>0 ){ |
4052 | unixUnmapfile(pFile); |
4053 | rc = unixMapfile(pFile, -1); |
4054 | } |
4055 | } |
4056 | return rc; |
4057 | } |
4058 | #endif |
4059 | #ifdef SQLITE_DEBUG |
4060 | /* The pager calls this method to signal that it has done |
4061 | ** a rollback and that the database is therefore unchanged and |
4062 | ** it hence it is OK for the transaction change counter to be |
4063 | ** unchanged. |
4064 | */ |
4065 | case SQLITE_FCNTL_DB_UNCHANGED: { |
4066 | ((unixFile*)id)->dbUpdate = 0; |
4067 | return SQLITE_OK; |
4068 | } |
4069 | #endif |
4070 | #if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) |
4071 | case SQLITE_FCNTL_SET_LOCKPROXYFILE: |
4072 | case SQLITE_FCNTL_GET_LOCKPROXYFILE: { |
4073 | return proxyFileControl(id,op,pArg); |
4074 | } |
4075 | #endif /* SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) */ |
4076 | |
4077 | case SQLITE_FCNTL_EXTERNAL_READER: { |
4078 | #ifndef SQLITE_OMIT_WAL |
4079 | return unixFcntlExternalReader((unixFile*)id, (int*)pArg); |
4080 | #else |
4081 | *(int*)pArg = 0; |
4082 | return SQLITE_OK; |
4083 | #endif |
4084 | } |
4085 | } |
4086 | return SQLITE_NOTFOUND; |
4087 | } |
4088 | |
4089 | /* |
4090 | ** If pFd->sectorSize is non-zero when this function is called, it is a |
4091 | ** no-op. Otherwise, the values of pFd->sectorSize and |
4092 | ** pFd->deviceCharacteristics are set according to the file-system |
4093 | ** characteristics. |
4094 | ** |
4095 | ** There are two versions of this function. One for QNX and one for all |
4096 | ** other systems. |
4097 | */ |
4098 | #ifndef __QNXNTO__ |
4099 | static void setDeviceCharacteristics(unixFile *pFd){ |
4100 | assert( pFd->deviceCharacteristics==0 || pFd->sectorSize!=0 ); |
4101 | if( pFd->sectorSize==0 ){ |
4102 | #if defined(__linux__) && defined(SQLITE_ENABLE_BATCH_ATOMIC_WRITE) |
4103 | int res; |
4104 | u32 f = 0; |
4105 | |
4106 | /* Check for support for F2FS atomic batch writes. */ |
4107 | res = osIoctl(pFd->h, F2FS_IOC_GET_FEATURES, &f); |
4108 | if( res==0 && (f & F2FS_FEATURE_ATOMIC_WRITE) ){ |
4109 | pFd->deviceCharacteristics = SQLITE_IOCAP_BATCH_ATOMIC; |
4110 | } |
4111 | #endif /* __linux__ && SQLITE_ENABLE_BATCH_ATOMIC_WRITE */ |
4112 | |
4113 | /* Set the POWERSAFE_OVERWRITE flag if requested. */ |
4114 | if( pFd->ctrlFlags & UNIXFILE_PSOW ){ |
4115 | pFd->deviceCharacteristics |= SQLITE_IOCAP_POWERSAFE_OVERWRITE; |
4116 | } |
4117 | |
4118 | pFd->sectorSize = SQLITE_DEFAULT_SECTOR_SIZE; |
4119 | } |
4120 | } |
4121 | #else |
4122 | #include <sys/dcmd_blk.h> |
4123 | #include <sys/statvfs.h> |
4124 | static void setDeviceCharacteristics(unixFile *pFile){ |
4125 | if( pFile->sectorSize == 0 ){ |
4126 | struct statvfs fsInfo; |
4127 | |
4128 | /* Set defaults for non-supported filesystems */ |
4129 | pFile->sectorSize = SQLITE_DEFAULT_SECTOR_SIZE; |
4130 | pFile->deviceCharacteristics = 0; |
4131 | if( fstatvfs(pFile->h, &fsInfo) == -1 ) { |
4132 | return; |
4133 | } |
4134 | |
4135 | if( !strcmp(fsInfo.f_basetype, "tmp" ) ) { |
4136 | pFile->sectorSize = fsInfo.f_bsize; |
4137 | pFile->deviceCharacteristics = |
4138 | SQLITE_IOCAP_ATOMIC4K | /* All ram filesystem writes are atomic */ |
4139 | SQLITE_IOCAP_SAFE_APPEND | /* growing the file does not occur until |
4140 | ** the write succeeds */ |
4141 | SQLITE_IOCAP_SEQUENTIAL | /* The ram filesystem has no write behind |
4142 | ** so it is ordered */ |
4143 | 0; |
4144 | }else if( strstr(fsInfo.f_basetype, "etfs" ) ){ |
4145 | pFile->sectorSize = fsInfo.f_bsize; |
4146 | pFile->deviceCharacteristics = |
4147 | /* etfs cluster size writes are atomic */ |
4148 | (pFile->sectorSize / 512 * SQLITE_IOCAP_ATOMIC512) | |
4149 | SQLITE_IOCAP_SAFE_APPEND | /* growing the file does not occur until |
4150 | ** the write succeeds */ |
4151 | SQLITE_IOCAP_SEQUENTIAL | /* The ram filesystem has no write behind |
4152 | ** so it is ordered */ |
4153 | 0; |
4154 | }else if( !strcmp(fsInfo.f_basetype, "qnx6" ) ){ |
4155 | pFile->sectorSize = fsInfo.f_bsize; |
4156 | pFile->deviceCharacteristics = |
4157 | SQLITE_IOCAP_ATOMIC | /* All filesystem writes are atomic */ |
4158 | SQLITE_IOCAP_SAFE_APPEND | /* growing the file does not occur until |
4159 | ** the write succeeds */ |
4160 | SQLITE_IOCAP_SEQUENTIAL | /* The ram filesystem has no write behind |
4161 | ** so it is ordered */ |
4162 | 0; |
4163 | }else if( !strcmp(fsInfo.f_basetype, "qnx4" ) ){ |
4164 | pFile->sectorSize = fsInfo.f_bsize; |
4165 | pFile->deviceCharacteristics = |
4166 | /* full bitset of atomics from max sector size and smaller */ |
4167 | ((pFile->sectorSize / 512 * SQLITE_IOCAP_ATOMIC512) << 1) - 2 | |
4168 | SQLITE_IOCAP_SEQUENTIAL | /* The ram filesystem has no write behind |
4169 | ** so it is ordered */ |
4170 | 0; |
4171 | }else if( strstr(fsInfo.f_basetype, "dos" ) ){ |
4172 | pFile->sectorSize = fsInfo.f_bsize; |
4173 | pFile->deviceCharacteristics = |
4174 | /* full bitset of atomics from max sector size and smaller */ |
4175 | ((pFile->sectorSize / 512 * SQLITE_IOCAP_ATOMIC512) << 1) - 2 | |
4176 | SQLITE_IOCAP_SEQUENTIAL | /* The ram filesystem has no write behind |
4177 | ** so it is ordered */ |
4178 | 0; |
4179 | }else{ |
4180 | pFile->deviceCharacteristics = |
4181 | SQLITE_IOCAP_ATOMIC512 | /* blocks are atomic */ |
4182 | SQLITE_IOCAP_SAFE_APPEND | /* growing the file does not occur until |
4183 | ** the write succeeds */ |
4184 | 0; |
4185 | } |
4186 | } |
4187 | /* Last chance verification. If the sector size isn't a multiple of 512 |
4188 | ** then it isn't valid.*/ |
4189 | if( pFile->sectorSize % 512 != 0 ){ |
4190 | pFile->deviceCharacteristics = 0; |
4191 | pFile->sectorSize = SQLITE_DEFAULT_SECTOR_SIZE; |
4192 | } |
4193 | } |
4194 | #endif |
4195 | |
4196 | /* |
4197 | ** Return the sector size in bytes of the underlying block device for |
4198 | ** the specified file. This is almost always 512 bytes, but may be |
4199 | ** larger for some devices. |
4200 | ** |
4201 | ** SQLite code assumes this function cannot fail. It also assumes that |
4202 | ** if two files are created in the same file-system directory (i.e. |
4203 | ** a database and its journal file) that the sector size will be the |
4204 | ** same for both. |
4205 | */ |
4206 | static int unixSectorSize(sqlite3_file *id){ |
4207 | unixFile *pFd = (unixFile*)id; |
4208 | setDeviceCharacteristics(pFd); |
4209 | return pFd->sectorSize; |
4210 | } |
4211 | |
4212 | /* |
4213 | ** Return the device characteristics for the file. |
4214 | ** |
4215 | ** This VFS is set up to return SQLITE_IOCAP_POWERSAFE_OVERWRITE by default. |
4216 | ** However, that choice is controversial since technically the underlying |
4217 | ** file system does not always provide powersafe overwrites. (In other |
4218 | ** words, after a power-loss event, parts of the file that were never |
4219 | ** written might end up being altered.) However, non-PSOW behavior is very, |
4220 | ** very rare. And asserting PSOW makes a large reduction in the amount |
4221 | ** of required I/O for journaling, since a lot of padding is eliminated. |
4222 | ** Hence, while POWERSAFE_OVERWRITE is on by default, there is a file-control |
4223 | ** available to turn it off and URI query parameter available to turn it off. |
4224 | */ |
4225 | static int unixDeviceCharacteristics(sqlite3_file *id){ |
4226 | unixFile *pFd = (unixFile*)id; |
4227 | setDeviceCharacteristics(pFd); |
4228 | return pFd->deviceCharacteristics; |
4229 | } |
4230 | |
4231 | #if !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0 |
4232 | |
4233 | /* |
4234 | ** Return the system page size. |
4235 | ** |
4236 | ** This function should not be called directly by other code in this file. |
4237 | ** Instead, it should be called via macro osGetpagesize(). |
4238 | */ |
4239 | static int unixGetpagesize(void){ |
4240 | #if OS_VXWORKS |
4241 | return 1024; |
4242 | #elif defined(_BSD_SOURCE) |
4243 | return getpagesize(); |
4244 | #else |
4245 | return (int)sysconf(_SC_PAGESIZE); |
4246 | #endif |
4247 | } |
4248 | |
4249 | #endif /* !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0 */ |
4250 | |
4251 | #ifndef SQLITE_OMIT_WAL |
4252 | |
4253 | /* |
4254 | ** Object used to represent an shared memory buffer. |
4255 | ** |
4256 | ** When multiple threads all reference the same wal-index, each thread |
4257 | ** has its own unixShm object, but they all point to a single instance |
4258 | ** of this unixShmNode object. In other words, each wal-index is opened |
4259 | ** only once per process. |
4260 | ** |
4261 | ** Each unixShmNode object is connected to a single unixInodeInfo object. |
4262 | ** We could coalesce this object into unixInodeInfo, but that would mean |
4263 | ** every open file that does not use shared memory (in other words, most |
4264 | ** open files) would have to carry around this extra information. So |
4265 | ** the unixInodeInfo object contains a pointer to this unixShmNode object |
4266 | ** and the unixShmNode object is created only when needed. |
4267 | ** |
4268 | ** unixMutexHeld() must be true when creating or destroying |
4269 | ** this object or while reading or writing the following fields: |
4270 | ** |
4271 | ** nRef |
4272 | ** |
4273 | ** The following fields are read-only after the object is created: |
4274 | ** |
4275 | ** hShm |
4276 | ** zFilename |
4277 | ** |
4278 | ** Either unixShmNode.pShmMutex must be held or unixShmNode.nRef==0 and |
4279 | ** unixMutexHeld() is true when reading or writing any other field |
4280 | ** in this structure. |
4281 | */ |
4282 | struct unixShmNode { |
4283 | unixInodeInfo *pInode; /* unixInodeInfo that owns this SHM node */ |
4284 | sqlite3_mutex *pShmMutex; /* Mutex to access this object */ |
4285 | char *zFilename; /* Name of the mmapped file */ |
4286 | int hShm; /* Open file descriptor */ |
4287 | int szRegion; /* Size of shared-memory regions */ |
4288 | u16 nRegion; /* Size of array apRegion */ |
4289 | u8 isReadonly; /* True if read-only */ |
4290 | u8 isUnlocked; /* True if no DMS lock held */ |
4291 | char **apRegion; /* Array of mapped shared-memory regions */ |
4292 | int nRef; /* Number of unixShm objects pointing to this */ |
4293 | unixShm *pFirst; /* All unixShm objects pointing to this */ |
4294 | int aLock[SQLITE_SHM_NLOCK]; /* # shared locks on slot, -1==excl lock */ |
4295 | #ifdef SQLITE_DEBUG |
4296 | u8 exclMask; /* Mask of exclusive locks held */ |
4297 | u8 sharedMask; /* Mask of shared locks held */ |
4298 | u8 nextShmId; /* Next available unixShm.id value */ |
4299 | #endif |
4300 | }; |
4301 | |
4302 | /* |
4303 | ** Structure used internally by this VFS to record the state of an |
4304 | ** open shared memory connection. |
4305 | ** |
4306 | ** The following fields are initialized when this object is created and |
4307 | ** are read-only thereafter: |
4308 | ** |
4309 | ** unixShm.pShmNode |
4310 | ** unixShm.id |
4311 | ** |
4312 | ** All other fields are read/write. The unixShm.pShmNode->pShmMutex must |
4313 | ** be held while accessing any read/write fields. |
4314 | */ |
4315 | struct unixShm { |
4316 | unixShmNode *pShmNode; /* The underlying unixShmNode object */ |
4317 | unixShm *pNext; /* Next unixShm with the same unixShmNode */ |
4318 | u8 hasMutex; /* True if holding the unixShmNode->pShmMutex */ |
4319 | u8 id; /* Id of this connection within its unixShmNode */ |
4320 | u16 sharedMask; /* Mask of shared locks held */ |
4321 | u16 exclMask; /* Mask of exclusive locks held */ |
4322 | }; |
4323 | |
4324 | /* |
4325 | ** Constants used for locking |
4326 | */ |
4327 | #define UNIX_SHM_BASE ((22+SQLITE_SHM_NLOCK)*4) /* first lock byte */ |
4328 | #define UNIX_SHM_DMS (UNIX_SHM_BASE+SQLITE_SHM_NLOCK) /* deadman switch */ |
4329 | |
4330 | /* |
4331 | ** Use F_GETLK to check whether or not there are any readers with open |
4332 | ** wal-mode transactions in other processes on database file pFile. If |
4333 | ** no error occurs, return SQLITE_OK and set (*piOut) to 1 if there are |
4334 | ** such transactions, or 0 otherwise. If an error occurs, return an |
4335 | ** SQLite error code. The final value of *piOut is undefined in this |
4336 | ** case. |
4337 | */ |
4338 | static int unixFcntlExternalReader(unixFile *pFile, int *piOut){ |
4339 | int rc = SQLITE_OK; |
4340 | *piOut = 0; |
4341 | if( pFile->pShm){ |
4342 | unixShmNode *pShmNode = pFile->pShm->pShmNode; |
4343 | struct flock f; |
4344 | |
4345 | memset(&f, 0, sizeof(f)); |
4346 | f.l_type = F_WRLCK; |
4347 | f.l_whence = SEEK_SET; |
4348 | f.l_start = UNIX_SHM_BASE + 3; |
4349 | f.l_len = SQLITE_SHM_NLOCK - 3; |
4350 | |
4351 | sqlite3_mutex_enter(pShmNode->pShmMutex); |
4352 | if( osFcntl(pShmNode->hShm, F_GETLK, &f)<0 ){ |
4353 | rc = SQLITE_IOERR_LOCK; |
4354 | }else{ |
4355 | *piOut = (f.l_type!=F_UNLCK); |
4356 | } |
4357 | sqlite3_mutex_leave(pShmNode->pShmMutex); |
4358 | } |
4359 | |
4360 | return rc; |
4361 | } |
4362 | |
4363 | |
4364 | /* |
4365 | ** Apply posix advisory locks for all bytes from ofst through ofst+n-1. |
4366 | ** |
4367 | ** Locks block if the mask is exactly UNIX_SHM_C and are non-blocking |
4368 | ** otherwise. |
4369 | */ |
4370 | static int unixShmSystemLock( |
4371 | unixFile *pFile, /* Open connection to the WAL file */ |
4372 | int lockType, /* F_UNLCK, F_RDLCK, or F_WRLCK */ |
4373 | int ofst, /* First byte of the locking range */ |
4374 | int n /* Number of bytes to lock */ |
4375 | ){ |
4376 | unixShmNode *pShmNode; /* Apply locks to this open shared-memory segment */ |
4377 | struct flock f; /* The posix advisory locking structure */ |
4378 | int rc = SQLITE_OK; /* Result code form fcntl() */ |
4379 | |
4380 | /* Access to the unixShmNode object is serialized by the caller */ |
4381 | pShmNode = pFile->pInode->pShmNode; |
4382 | assert( pShmNode->nRef==0 || sqlite3_mutex_held(pShmNode->pShmMutex) ); |
4383 | assert( pShmNode->nRef>0 || unixMutexHeld() ); |
4384 | |
4385 | /* Shared locks never span more than one byte */ |
4386 | assert( n==1 || lockType!=F_RDLCK ); |
4387 | |
4388 | /* Locks are within range */ |
4389 | assert( n>=1 && n<=SQLITE_SHM_NLOCK ); |
4390 | |
4391 | if( pShmNode->hShm>=0 ){ |
4392 | int res; |
4393 | /* Initialize the locking parameters */ |
4394 | f.l_type = lockType; |
4395 | f.l_whence = SEEK_SET; |
4396 | f.l_start = ofst; |
4397 | f.l_len = n; |
4398 | res = osSetPosixAdvisoryLock(pShmNode->hShm, &f, pFile); |
4399 | if( res==-1 ){ |
4400 | #ifdef SQLITE_ENABLE_SETLK_TIMEOUT |
4401 | rc = (pFile->iBusyTimeout ? SQLITE_BUSY_TIMEOUT : SQLITE_BUSY); |
4402 | #else |
4403 | rc = SQLITE_BUSY; |
4404 | #endif |
4405 | } |
4406 | } |
4407 | |
4408 | /* Update the global lock state and do debug tracing */ |
4409 | #ifdef SQLITE_DEBUG |
4410 | { u16 mask; |
4411 | OSTRACE(("SHM-LOCK " )); |
4412 | mask = ofst>31 ? 0xffff : (1<<(ofst+n)) - (1<<ofst); |
4413 | if( rc==SQLITE_OK ){ |
4414 | if( lockType==F_UNLCK ){ |
4415 | OSTRACE(("unlock %d ok" , ofst)); |
4416 | pShmNode->exclMask &= ~mask; |
4417 | pShmNode->sharedMask &= ~mask; |
4418 | }else if( lockType==F_RDLCK ){ |
4419 | OSTRACE(("read-lock %d ok" , ofst)); |
4420 | pShmNode->exclMask &= ~mask; |
4421 | pShmNode->sharedMask |= mask; |
4422 | }else{ |
4423 | assert( lockType==F_WRLCK ); |
4424 | OSTRACE(("write-lock %d ok" , ofst)); |
4425 | pShmNode->exclMask |= mask; |
4426 | pShmNode->sharedMask &= ~mask; |
4427 | } |
4428 | }else{ |
4429 | if( lockType==F_UNLCK ){ |
4430 | OSTRACE(("unlock %d failed" , ofst)); |
4431 | }else if( lockType==F_RDLCK ){ |
4432 | OSTRACE(("read-lock failed" )); |
4433 | }else{ |
4434 | assert( lockType==F_WRLCK ); |
4435 | OSTRACE(("write-lock %d failed" , ofst)); |
4436 | } |
4437 | } |
4438 | OSTRACE((" - afterwards %03x,%03x\n" , |
4439 | pShmNode->sharedMask, pShmNode->exclMask)); |
4440 | } |
4441 | #endif |
4442 | |
4443 | return rc; |
4444 | } |
4445 | |
4446 | /* |
4447 | ** Return the minimum number of 32KB shm regions that should be mapped at |
4448 | ** a time, assuming that each mapping must be an integer multiple of the |
4449 | ** current system page-size. |
4450 | ** |
4451 | ** Usually, this is 1. The exception seems to be systems that are configured |
4452 | ** to use 64KB pages - in this case each mapping must cover at least two |
4453 | ** shm regions. |
4454 | */ |
4455 | static int unixShmRegionPerMap(void){ |
4456 | int shmsz = 32*1024; /* SHM region size */ |
4457 | int pgsz = osGetpagesize(); /* System page size */ |
4458 | assert( ((pgsz-1)&pgsz)==0 ); /* Page size must be a power of 2 */ |
4459 | if( pgsz<shmsz ) return 1; |
4460 | return pgsz/shmsz; |
4461 | } |
4462 | |
4463 | /* |
4464 | ** Purge the unixShmNodeList list of all entries with unixShmNode.nRef==0. |
4465 | ** |
4466 | ** This is not a VFS shared-memory method; it is a utility function called |
4467 | ** by VFS shared-memory methods. |
4468 | */ |
4469 | static void unixShmPurge(unixFile *pFd){ |
4470 | unixShmNode *p = pFd->pInode->pShmNode; |
4471 | assert( unixMutexHeld() ); |
4472 | if( p && ALWAYS(p->nRef==0) ){ |
4473 | int nShmPerMap = unixShmRegionPerMap(); |
4474 | int i; |
4475 | assert( p->pInode==pFd->pInode ); |
4476 | sqlite3_mutex_free(p->pShmMutex); |
4477 | for(i=0; i<p->nRegion; i+=nShmPerMap){ |
4478 | if( p->hShm>=0 ){ |
4479 | osMunmap(p->apRegion[i], p->szRegion); |
4480 | }else{ |
4481 | sqlite3_free(p->apRegion[i]); |
4482 | } |
4483 | } |
4484 | sqlite3_free(p->apRegion); |
4485 | if( p->hShm>=0 ){ |
4486 | robust_close(pFd, p->hShm, __LINE__); |
4487 | p->hShm = -1; |
4488 | } |
4489 | p->pInode->pShmNode = 0; |
4490 | sqlite3_free(p); |
4491 | } |
4492 | } |
4493 | |
4494 | /* |
4495 | ** The DMS lock has not yet been taken on shm file pShmNode. Attempt to |
4496 | ** take it now. Return SQLITE_OK if successful, or an SQLite error |
4497 | ** code otherwise. |
4498 | ** |
4499 | ** If the DMS cannot be locked because this is a readonly_shm=1 |
4500 | ** connection and no other process already holds a lock, return |
4501 | ** SQLITE_READONLY_CANTINIT and set pShmNode->isUnlocked=1. |
4502 | */ |
4503 | static int unixLockSharedMemory(unixFile *pDbFd, unixShmNode *pShmNode){ |
4504 | struct flock lock; |
4505 | int rc = SQLITE_OK; |
4506 | |
4507 | /* Use F_GETLK to determine the locks other processes are holding |
4508 | ** on the DMS byte. If it indicates that another process is holding |
4509 | ** a SHARED lock, then this process may also take a SHARED lock |
4510 | ** and proceed with opening the *-shm file. |
4511 | ** |
4512 | ** Or, if no other process is holding any lock, then this process |
4513 | ** is the first to open it. In this case take an EXCLUSIVE lock on the |
4514 | ** DMS byte and truncate the *-shm file to zero bytes in size. Then |
4515 | ** downgrade to a SHARED lock on the DMS byte. |
4516 | ** |
4517 | ** If another process is holding an EXCLUSIVE lock on the DMS byte, |
4518 | ** return SQLITE_BUSY to the caller (it will try again). An earlier |
4519 | ** version of this code attempted the SHARED lock at this point. But |
4520 | ** this introduced a subtle race condition: if the process holding |
4521 | ** EXCLUSIVE failed just before truncating the *-shm file, then this |
4522 | ** process might open and use the *-shm file without truncating it. |
4523 | ** And if the *-shm file has been corrupted by a power failure or |
4524 | ** system crash, the database itself may also become corrupt. */ |
4525 | lock.l_whence = SEEK_SET; |
4526 | lock.l_start = UNIX_SHM_DMS; |
4527 | lock.l_len = 1; |
4528 | lock.l_type = F_WRLCK; |
4529 | if( osFcntl(pShmNode->hShm, F_GETLK, &lock)!=0 ) { |
4530 | rc = SQLITE_IOERR_LOCK; |
4531 | }else if( lock.l_type==F_UNLCK ){ |
4532 | if( pShmNode->isReadonly ){ |
4533 | pShmNode->isUnlocked = 1; |
4534 | rc = SQLITE_READONLY_CANTINIT; |
4535 | }else{ |
4536 | rc = unixShmSystemLock(pDbFd, F_WRLCK, UNIX_SHM_DMS, 1); |
4537 | /* The first connection to attach must truncate the -shm file. We |
4538 | ** truncate to 3 bytes (an arbitrary small number, less than the |
4539 | ** -shm header size) rather than 0 as a system debugging aid, to |
4540 | ** help detect if a -shm file truncation is legitimate or is the work |
4541 | ** or a rogue process. */ |
4542 | if( rc==SQLITE_OK && robust_ftruncate(pShmNode->hShm, 3) ){ |
4543 | rc = unixLogError(SQLITE_IOERR_SHMOPEN,"ftruncate" ,pShmNode->zFilename); |
4544 | } |
4545 | } |
4546 | }else if( lock.l_type==F_WRLCK ){ |
4547 | rc = SQLITE_BUSY; |
4548 | } |
4549 | |
4550 | if( rc==SQLITE_OK ){ |
4551 | assert( lock.l_type==F_UNLCK || lock.l_type==F_RDLCK ); |
4552 | rc = unixShmSystemLock(pDbFd, F_RDLCK, UNIX_SHM_DMS, 1); |
4553 | } |
4554 | return rc; |
4555 | } |
4556 | |
4557 | /* |
4558 | ** Open a shared-memory area associated with open database file pDbFd. |
4559 | ** This particular implementation uses mmapped files. |
4560 | ** |
4561 | ** The file used to implement shared-memory is in the same directory |
4562 | ** as the open database file and has the same name as the open database |
4563 | ** file with the "-shm" suffix added. For example, if the database file |
4564 | ** is "/home/user1/config.db" then the file that is created and mmapped |
4565 | ** for shared memory will be called "/home/user1/config.db-shm". |
4566 | ** |
4567 | ** Another approach to is to use files in /dev/shm or /dev/tmp or an |
4568 | ** some other tmpfs mount. But if a file in a different directory |
4569 | ** from the database file is used, then differing access permissions |
4570 | ** or a chroot() might cause two different processes on the same |
4571 | ** database to end up using different files for shared memory - |
4572 | ** meaning that their memory would not really be shared - resulting |
4573 | ** in database corruption. Nevertheless, this tmpfs file usage |
4574 | ** can be enabled at compile-time using -DSQLITE_SHM_DIRECTORY="/dev/shm" |
4575 | ** or the equivalent. The use of the SQLITE_SHM_DIRECTORY compile-time |
4576 | ** option results in an incompatible build of SQLite; builds of SQLite |
4577 | ** that with differing SQLITE_SHM_DIRECTORY settings attempt to use the |
4578 | ** same database file at the same time, database corruption will likely |
4579 | ** result. The SQLITE_SHM_DIRECTORY compile-time option is considered |
4580 | ** "unsupported" and may go away in a future SQLite release. |
4581 | ** |
4582 | ** When opening a new shared-memory file, if no other instances of that |
4583 | ** file are currently open, in this process or in other processes, then |
4584 | ** the file must be truncated to zero length or have its header cleared. |
4585 | ** |
4586 | ** If the original database file (pDbFd) is using the "unix-excl" VFS |
4587 | ** that means that an exclusive lock is held on the database file and |
4588 | ** that no other processes are able to read or write the database. In |
4589 | ** that case, we do not really need shared memory. No shared memory |
4590 | ** file is created. The shared memory will be simulated with heap memory. |
4591 | */ |
4592 | static int unixOpenSharedMemory(unixFile *pDbFd){ |
4593 | struct unixShm *p = 0; /* The connection to be opened */ |
4594 | struct unixShmNode *pShmNode; /* The underlying mmapped file */ |
4595 | int rc = SQLITE_OK; /* Result code */ |
4596 | unixInodeInfo *pInode; /* The inode of fd */ |
4597 | char *zShm; /* Name of the file used for SHM */ |
4598 | int nShmFilename; /* Size of the SHM filename in bytes */ |
4599 | |
4600 | /* Allocate space for the new unixShm object. */ |
4601 | p = sqlite3_malloc64( sizeof(*p) ); |
4602 | if( p==0 ) return SQLITE_NOMEM_BKPT; |
4603 | memset(p, 0, sizeof(*p)); |
4604 | assert( pDbFd->pShm==0 ); |
4605 | |
4606 | /* Check to see if a unixShmNode object already exists. Reuse an existing |
4607 | ** one if present. Create a new one if necessary. |
4608 | */ |
4609 | assert( unixFileMutexNotheld(pDbFd) ); |
4610 | unixEnterMutex(); |
4611 | pInode = pDbFd->pInode; |
4612 | pShmNode = pInode->pShmNode; |
4613 | if( pShmNode==0 ){ |
4614 | struct stat sStat; /* fstat() info for database file */ |
4615 | #ifndef SQLITE_SHM_DIRECTORY |
4616 | const char *zBasePath = pDbFd->zPath; |
4617 | #endif |
4618 | |
4619 | /* Call fstat() to figure out the permissions on the database file. If |
4620 | ** a new *-shm file is created, an attempt will be made to create it |
4621 | ** with the same permissions. |
4622 | */ |
4623 | if( osFstat(pDbFd->h, &sStat) ){ |
4624 | rc = SQLITE_IOERR_FSTAT; |
4625 | goto shm_open_err; |
4626 | } |
4627 | |
4628 | #ifdef SQLITE_SHM_DIRECTORY |
4629 | nShmFilename = sizeof(SQLITE_SHM_DIRECTORY) + 31; |
4630 | #else |
4631 | nShmFilename = 6 + (int)strlen(zBasePath); |
4632 | #endif |
4633 | pShmNode = sqlite3_malloc64( sizeof(*pShmNode) + nShmFilename ); |
4634 | if( pShmNode==0 ){ |
4635 | rc = SQLITE_NOMEM_BKPT; |
4636 | goto shm_open_err; |
4637 | } |
4638 | memset(pShmNode, 0, sizeof(*pShmNode)+nShmFilename); |
4639 | zShm = pShmNode->zFilename = (char*)&pShmNode[1]; |
4640 | #ifdef SQLITE_SHM_DIRECTORY |
4641 | sqlite3_snprintf(nShmFilename, zShm, |
4642 | SQLITE_SHM_DIRECTORY "/sqlite-shm-%x-%x" , |
4643 | (u32)sStat.st_ino, (u32)sStat.st_dev); |
4644 | #else |
4645 | sqlite3_snprintf(nShmFilename, zShm, "%s-shm" , zBasePath); |
4646 | sqlite3FileSuffix3(pDbFd->zPath, zShm); |
4647 | #endif |
4648 | pShmNode->hShm = -1; |
4649 | pDbFd->pInode->pShmNode = pShmNode; |
4650 | pShmNode->pInode = pDbFd->pInode; |
4651 | if( sqlite3GlobalConfig.bCoreMutex ){ |
4652 | pShmNode->pShmMutex = sqlite3_mutex_alloc(SQLITE_MUTEX_FAST); |
4653 | if( pShmNode->pShmMutex==0 ){ |
4654 | rc = SQLITE_NOMEM_BKPT; |
4655 | goto shm_open_err; |
4656 | } |
4657 | } |
4658 | |
4659 | if( pInode->bProcessLock==0 ){ |
4660 | if( 0==sqlite3_uri_boolean(pDbFd->zPath, "readonly_shm" , 0) ){ |
4661 | pShmNode->hShm = robust_open(zShm, O_RDWR|O_CREAT|O_NOFOLLOW, |
4662 | (sStat.st_mode&0777)); |
4663 | } |
4664 | if( pShmNode->hShm<0 ){ |
4665 | pShmNode->hShm = robust_open(zShm, O_RDONLY|O_NOFOLLOW, |
4666 | (sStat.st_mode&0777)); |
4667 | if( pShmNode->hShm<0 ){ |
4668 | rc = unixLogError(SQLITE_CANTOPEN_BKPT, "open" , zShm); |
4669 | goto shm_open_err; |
4670 | } |
4671 | pShmNode->isReadonly = 1; |
4672 | } |
4673 | |
4674 | /* If this process is running as root, make sure that the SHM file |
4675 | ** is owned by the same user that owns the original database. Otherwise, |
4676 | ** the original owner will not be able to connect. |
4677 | */ |
4678 | robustFchown(pShmNode->hShm, sStat.st_uid, sStat.st_gid); |
4679 | |
4680 | rc = unixLockSharedMemory(pDbFd, pShmNode); |
4681 | if( rc!=SQLITE_OK && rc!=SQLITE_READONLY_CANTINIT ) goto shm_open_err; |
4682 | } |
4683 | } |
4684 | |
4685 | /* Make the new connection a child of the unixShmNode */ |
4686 | p->pShmNode = pShmNode; |
4687 | #ifdef SQLITE_DEBUG |
4688 | p->id = pShmNode->nextShmId++; |
4689 | #endif |
4690 | pShmNode->nRef++; |
4691 | pDbFd->pShm = p; |
4692 | unixLeaveMutex(); |
4693 | |
4694 | /* The reference count on pShmNode has already been incremented under |
4695 | ** the cover of the unixEnterMutex() mutex and the pointer from the |
4696 | ** new (struct unixShm) object to the pShmNode has been set. All that is |
4697 | ** left to do is to link the new object into the linked list starting |
4698 | ** at pShmNode->pFirst. This must be done while holding the |
4699 | ** pShmNode->pShmMutex. |
4700 | */ |
4701 | sqlite3_mutex_enter(pShmNode->pShmMutex); |
4702 | p->pNext = pShmNode->pFirst; |
4703 | pShmNode->pFirst = p; |
4704 | sqlite3_mutex_leave(pShmNode->pShmMutex); |
4705 | return rc; |
4706 | |
4707 | /* Jump here on any error */ |
4708 | shm_open_err: |
4709 | unixShmPurge(pDbFd); /* This call frees pShmNode if required */ |
4710 | sqlite3_free(p); |
4711 | unixLeaveMutex(); |
4712 | return rc; |
4713 | } |
4714 | |
4715 | /* |
4716 | ** This function is called to obtain a pointer to region iRegion of the |
4717 | ** shared-memory associated with the database file fd. Shared-memory regions |
4718 | ** are numbered starting from zero. Each shared-memory region is szRegion |
4719 | ** bytes in size. |
4720 | ** |
4721 | ** If an error occurs, an error code is returned and *pp is set to NULL. |
4722 | ** |
4723 | ** Otherwise, if the bExtend parameter is 0 and the requested shared-memory |
4724 | ** region has not been allocated (by any client, including one running in a |
4725 | ** separate process), then *pp is set to NULL and SQLITE_OK returned. If |
4726 | ** bExtend is non-zero and the requested shared-memory region has not yet |
4727 | ** been allocated, it is allocated by this function. |
4728 | ** |
4729 | ** If the shared-memory region has already been allocated or is allocated by |
4730 | ** this call as described above, then it is mapped into this processes |
4731 | ** address space (if it is not already), *pp is set to point to the mapped |
4732 | ** memory and SQLITE_OK returned. |
4733 | */ |
4734 | static int unixShmMap( |
4735 | sqlite3_file *fd, /* Handle open on database file */ |
4736 | int iRegion, /* Region to retrieve */ |
4737 | int szRegion, /* Size of regions */ |
4738 | int bExtend, /* True to extend file if necessary */ |
4739 | void volatile **pp /* OUT: Mapped memory */ |
4740 | ){ |
4741 | unixFile *pDbFd = (unixFile*)fd; |
4742 | unixShm *p; |
4743 | unixShmNode *pShmNode; |
4744 | int rc = SQLITE_OK; |
4745 | int nShmPerMap = unixShmRegionPerMap(); |
4746 | int nReqRegion; |
4747 | |
4748 | /* If the shared-memory file has not yet been opened, open it now. */ |
4749 | if( pDbFd->pShm==0 ){ |
4750 | rc = unixOpenSharedMemory(pDbFd); |
4751 | if( rc!=SQLITE_OK ) return rc; |
4752 | } |
4753 | |
4754 | p = pDbFd->pShm; |
4755 | pShmNode = p->pShmNode; |
4756 | sqlite3_mutex_enter(pShmNode->pShmMutex); |
4757 | if( pShmNode->isUnlocked ){ |
4758 | rc = unixLockSharedMemory(pDbFd, pShmNode); |
4759 | if( rc!=SQLITE_OK ) goto shmpage_out; |
4760 | pShmNode->isUnlocked = 0; |
4761 | } |
4762 | assert( szRegion==pShmNode->szRegion || pShmNode->nRegion==0 ); |
4763 | assert( pShmNode->pInode==pDbFd->pInode ); |
4764 | assert( pShmNode->hShm>=0 || pDbFd->pInode->bProcessLock==1 ); |
4765 | assert( pShmNode->hShm<0 || pDbFd->pInode->bProcessLock==0 ); |
4766 | |
4767 | /* Minimum number of regions required to be mapped. */ |
4768 | nReqRegion = ((iRegion+nShmPerMap) / nShmPerMap) * nShmPerMap; |
4769 | |
4770 | if( pShmNode->nRegion<nReqRegion ){ |
4771 | char **apNew; /* New apRegion[] array */ |
4772 | int nByte = nReqRegion*szRegion; /* Minimum required file size */ |
4773 | struct stat sStat; /* Used by fstat() */ |
4774 | |
4775 | pShmNode->szRegion = szRegion; |
4776 | |
4777 | if( pShmNode->hShm>=0 ){ |
4778 | /* The requested region is not mapped into this processes address space. |
4779 | ** Check to see if it has been allocated (i.e. if the wal-index file is |
4780 | ** large enough to contain the requested region). |
4781 | */ |
4782 | if( osFstat(pShmNode->hShm, &sStat) ){ |
4783 | rc = SQLITE_IOERR_SHMSIZE; |
4784 | goto shmpage_out; |
4785 | } |
4786 | |
4787 | if( sStat.st_size<nByte ){ |
4788 | /* The requested memory region does not exist. If bExtend is set to |
4789 | ** false, exit early. *pp will be set to NULL and SQLITE_OK returned. |
4790 | */ |
4791 | if( !bExtend ){ |
4792 | goto shmpage_out; |
4793 | } |
4794 | |
4795 | /* Alternatively, if bExtend is true, extend the file. Do this by |
4796 | ** writing a single byte to the end of each (OS) page being |
4797 | ** allocated or extended. Technically, we need only write to the |
4798 | ** last page in order to extend the file. But writing to all new |
4799 | ** pages forces the OS to allocate them immediately, which reduces |
4800 | ** the chances of SIGBUS while accessing the mapped region later on. |
4801 | */ |
4802 | else{ |
4803 | static const int pgsz = 4096; |
4804 | int iPg; |
4805 | |
4806 | /* Write to the last byte of each newly allocated or extended page */ |
4807 | assert( (nByte % pgsz)==0 ); |
4808 | for(iPg=(sStat.st_size/pgsz); iPg<(nByte/pgsz); iPg++){ |
4809 | int x = 0; |
4810 | if( seekAndWriteFd(pShmNode->hShm, iPg*pgsz + pgsz-1,"" ,1,&x)!=1 ){ |
4811 | const char *zFile = pShmNode->zFilename; |
4812 | rc = unixLogError(SQLITE_IOERR_SHMSIZE, "write" , zFile); |
4813 | goto shmpage_out; |
4814 | } |
4815 | } |
4816 | } |
4817 | } |
4818 | } |
4819 | |
4820 | /* Map the requested memory region into this processes address space. */ |
4821 | apNew = (char **)sqlite3_realloc( |
4822 | pShmNode->apRegion, nReqRegion*sizeof(char *) |
4823 | ); |
4824 | if( !apNew ){ |
4825 | rc = SQLITE_IOERR_NOMEM_BKPT; |
4826 | goto shmpage_out; |
4827 | } |
4828 | pShmNode->apRegion = apNew; |
4829 | while( pShmNode->nRegion<nReqRegion ){ |
4830 | int nMap = szRegion*nShmPerMap; |
4831 | int i; |
4832 | void *pMem; |
4833 | if( pShmNode->hShm>=0 ){ |
4834 | pMem = osMmap(0, nMap, |
4835 | pShmNode->isReadonly ? PROT_READ : PROT_READ|PROT_WRITE, |
4836 | MAP_SHARED, pShmNode->hShm, szRegion*(i64)pShmNode->nRegion |
4837 | ); |
4838 | if( pMem==MAP_FAILED ){ |
4839 | rc = unixLogError(SQLITE_IOERR_SHMMAP, "mmap" , pShmNode->zFilename); |
4840 | goto shmpage_out; |
4841 | } |
4842 | }else{ |
4843 | pMem = sqlite3_malloc64(nMap); |
4844 | if( pMem==0 ){ |
4845 | rc = SQLITE_NOMEM_BKPT; |
4846 | goto shmpage_out; |
4847 | } |
4848 | memset(pMem, 0, nMap); |
4849 | } |
4850 | |
4851 | for(i=0; i<nShmPerMap; i++){ |
4852 | pShmNode->apRegion[pShmNode->nRegion+i] = &((char*)pMem)[szRegion*i]; |
4853 | } |
4854 | pShmNode->nRegion += nShmPerMap; |
4855 | } |
4856 | } |
4857 | |
4858 | shmpage_out: |
4859 | if( pShmNode->nRegion>iRegion ){ |
4860 | *pp = pShmNode->apRegion[iRegion]; |
4861 | }else{ |
4862 | *pp = 0; |
4863 | } |
4864 | if( pShmNode->isReadonly && rc==SQLITE_OK ) rc = SQLITE_READONLY; |
4865 | sqlite3_mutex_leave(pShmNode->pShmMutex); |
4866 | return rc; |
4867 | } |
4868 | |
4869 | /* |
4870 | ** Check that the pShmNode->aLock[] array comports with the locking bitmasks |
4871 | ** held by each client. Return true if it does, or false otherwise. This |
4872 | ** is to be used in an assert(). e.g. |
4873 | ** |
4874 | ** assert( assertLockingArrayOk(pShmNode) ); |
4875 | */ |
4876 | #ifdef SQLITE_DEBUG |
4877 | static int assertLockingArrayOk(unixShmNode *pShmNode){ |
4878 | unixShm *pX; |
4879 | int aLock[SQLITE_SHM_NLOCK]; |
4880 | assert( sqlite3_mutex_held(pShmNode->pShmMutex) ); |
4881 | |
4882 | memset(aLock, 0, sizeof(aLock)); |
4883 | for(pX=pShmNode->pFirst; pX; pX=pX->pNext){ |
4884 | int i; |
4885 | for(i=0; i<SQLITE_SHM_NLOCK; i++){ |
4886 | if( pX->exclMask & (1<<i) ){ |
4887 | assert( aLock[i]==0 ); |
4888 | aLock[i] = -1; |
4889 | }else if( pX->sharedMask & (1<<i) ){ |
4890 | assert( aLock[i]>=0 ); |
4891 | aLock[i]++; |
4892 | } |
4893 | } |
4894 | } |
4895 | |
4896 | assert( 0==memcmp(pShmNode->aLock, aLock, sizeof(aLock)) ); |
4897 | return (memcmp(pShmNode->aLock, aLock, sizeof(aLock))==0); |
4898 | } |
4899 | #endif |
4900 | |
4901 | /* |
4902 | ** Change the lock state for a shared-memory segment. |
4903 | ** |
4904 | ** Note that the relationship between SHAREd and EXCLUSIVE locks is a little |
4905 | ** different here than in posix. In xShmLock(), one can go from unlocked |
4906 | ** to shared and back or from unlocked to exclusive and back. But one may |
4907 | ** not go from shared to exclusive or from exclusive to shared. |
4908 | */ |
4909 | static int unixShmLock( |
4910 | sqlite3_file *fd, /* Database file holding the shared memory */ |
4911 | int ofst, /* First lock to acquire or release */ |
4912 | int n, /* Number of locks to acquire or release */ |
4913 | int flags /* What to do with the lock */ |
4914 | ){ |
4915 | unixFile *pDbFd = (unixFile*)fd; /* Connection holding shared memory */ |
4916 | unixShm *p; /* The shared memory being locked */ |
4917 | unixShmNode *pShmNode; /* The underlying file iNode */ |
4918 | int rc = SQLITE_OK; /* Result code */ |
4919 | u16 mask; /* Mask of locks to take or release */ |
4920 | int *aLock; |
4921 | |
4922 | p = pDbFd->pShm; |
4923 | if( p==0 ) return SQLITE_IOERR_SHMLOCK; |
4924 | pShmNode = p->pShmNode; |
4925 | if( NEVER(pShmNode==0) ) return SQLITE_IOERR_SHMLOCK; |
4926 | aLock = pShmNode->aLock; |
4927 | |
4928 | assert( pShmNode==pDbFd->pInode->pShmNode ); |
4929 | assert( pShmNode->pInode==pDbFd->pInode ); |
4930 | assert( ofst>=0 && ofst+n<=SQLITE_SHM_NLOCK ); |
4931 | assert( n>=1 ); |
4932 | assert( flags==(SQLITE_SHM_LOCK | SQLITE_SHM_SHARED) |
4933 | || flags==(SQLITE_SHM_LOCK | SQLITE_SHM_EXCLUSIVE) |
4934 | || flags==(SQLITE_SHM_UNLOCK | SQLITE_SHM_SHARED) |
4935 | || flags==(SQLITE_SHM_UNLOCK | SQLITE_SHM_EXCLUSIVE) ); |
4936 | assert( n==1 || (flags & SQLITE_SHM_EXCLUSIVE)!=0 ); |
4937 | assert( pShmNode->hShm>=0 || pDbFd->pInode->bProcessLock==1 ); |
4938 | assert( pShmNode->hShm<0 || pDbFd->pInode->bProcessLock==0 ); |
4939 | |
4940 | /* Check that, if this to be a blocking lock, no locks that occur later |
4941 | ** in the following list than the lock being obtained are already held: |
4942 | ** |
4943 | ** 1. Checkpointer lock (ofst==1). |
4944 | ** 2. Write lock (ofst==0). |
4945 | ** 3. Read locks (ofst>=3 && ofst<SQLITE_SHM_NLOCK). |
4946 | ** |
4947 | ** In other words, if this is a blocking lock, none of the locks that |
4948 | ** occur later in the above list than the lock being obtained may be |
4949 | ** held. |
4950 | ** |
4951 | ** It is not permitted to block on the RECOVER lock. |
4952 | */ |
4953 | #ifdef SQLITE_ENABLE_SETLK_TIMEOUT |
4954 | assert( (flags & SQLITE_SHM_UNLOCK) || pDbFd->iBusyTimeout==0 || ( |
4955 | (ofst!=2) /* not RECOVER */ |
4956 | && (ofst!=1 || (p->exclMask|p->sharedMask)==0) |
4957 | && (ofst!=0 || (p->exclMask|p->sharedMask)<3) |
4958 | && (ofst<3 || (p->exclMask|p->sharedMask)<(1<<ofst)) |
4959 | )); |
4960 | #endif |
4961 | |
4962 | mask = (1<<(ofst+n)) - (1<<ofst); |
4963 | assert( n>1 || mask==(1<<ofst) ); |
4964 | sqlite3_mutex_enter(pShmNode->pShmMutex); |
4965 | assert( assertLockingArrayOk(pShmNode) ); |
4966 | if( flags & SQLITE_SHM_UNLOCK ){ |
4967 | if( (p->exclMask|p->sharedMask) & mask ){ |
4968 | int ii; |
4969 | int bUnlock = 1; |
4970 | |
4971 | for(ii=ofst; ii<ofst+n; ii++){ |
4972 | if( aLock[ii]>((p->sharedMask & (1<<ii)) ? 1 : 0) ){ |
4973 | bUnlock = 0; |
4974 | } |
4975 | } |
4976 | |
4977 | if( bUnlock ){ |
4978 | rc = unixShmSystemLock(pDbFd, F_UNLCK, ofst+UNIX_SHM_BASE, n); |
4979 | if( rc==SQLITE_OK ){ |
4980 | memset(&aLock[ofst], 0, sizeof(int)*n); |
4981 | } |
4982 | }else if( ALWAYS(p->sharedMask & (1<<ofst)) ){ |
4983 | assert( n==1 && aLock[ofst]>1 ); |
4984 | aLock[ofst]--; |
4985 | } |
4986 | |
4987 | /* Undo the local locks */ |
4988 | if( rc==SQLITE_OK ){ |
4989 | p->exclMask &= ~mask; |
4990 | p->sharedMask &= ~mask; |
4991 | } |
4992 | } |
4993 | }else if( flags & SQLITE_SHM_SHARED ){ |
4994 | assert( n==1 ); |
4995 | assert( (p->exclMask & (1<<ofst))==0 ); |
4996 | if( (p->sharedMask & mask)==0 ){ |
4997 | if( aLock[ofst]<0 ){ |
4998 | rc = SQLITE_BUSY; |
4999 | }else if( aLock[ofst]==0 ){ |
5000 | rc = unixShmSystemLock(pDbFd, F_RDLCK, ofst+UNIX_SHM_BASE, n); |
5001 | } |
5002 | |
5003 | /* Get the local shared locks */ |
5004 | if( rc==SQLITE_OK ){ |
5005 | p->sharedMask |= mask; |
5006 | aLock[ofst]++; |
5007 | } |
5008 | } |
5009 | }else{ |
5010 | /* Make sure no sibling connections hold locks that will block this |
5011 | ** lock. If any do, return SQLITE_BUSY right away. */ |
5012 | int ii; |
5013 | for(ii=ofst; ii<ofst+n; ii++){ |
5014 | assert( (p->sharedMask & mask)==0 ); |
5015 | if( ALWAYS((p->exclMask & (1<<ii))==0) && aLock[ii] ){ |
5016 | rc = SQLITE_BUSY; |
5017 | break; |
5018 | } |
5019 | } |
5020 | |
5021 | /* Get the exclusive locks at the system level. Then if successful |
5022 | ** also update the in-memory values. */ |
5023 | if( rc==SQLITE_OK ){ |
5024 | rc = unixShmSystemLock(pDbFd, F_WRLCK, ofst+UNIX_SHM_BASE, n); |
5025 | if( rc==SQLITE_OK ){ |
5026 | assert( (p->sharedMask & mask)==0 ); |
5027 | p->exclMask |= mask; |
5028 | for(ii=ofst; ii<ofst+n; ii++){ |
5029 | aLock[ii] = -1; |
5030 | } |
5031 | } |
5032 | } |
5033 | } |
5034 | assert( assertLockingArrayOk(pShmNode) ); |
5035 | sqlite3_mutex_leave(pShmNode->pShmMutex); |
5036 | OSTRACE(("SHM-LOCK shmid-%d, pid-%d got %03x,%03x\n" , |
5037 | p->id, osGetpid(0), p->sharedMask, p->exclMask)); |
5038 | return rc; |
5039 | } |
5040 | |
5041 | /* |
5042 | ** Implement a memory barrier or memory fence on shared memory. |
5043 | ** |
5044 | ** All loads and stores begun before the barrier must complete before |
5045 | ** any load or store begun after the barrier. |
5046 | */ |
5047 | static void unixShmBarrier( |
5048 | sqlite3_file *fd /* Database file holding the shared memory */ |
5049 | ){ |
5050 | UNUSED_PARAMETER(fd); |
5051 | sqlite3MemoryBarrier(); /* compiler-defined memory barrier */ |
5052 | assert( fd->pMethods->xLock==nolockLock |
5053 | || unixFileMutexNotheld((unixFile*)fd) |
5054 | ); |
5055 | unixEnterMutex(); /* Also mutex, for redundancy */ |
5056 | unixLeaveMutex(); |
5057 | } |
5058 | |
5059 | /* |
5060 | ** Close a connection to shared-memory. Delete the underlying |
5061 | ** storage if deleteFlag is true. |
5062 | ** |
5063 | ** If there is no shared memory associated with the connection then this |
5064 | ** routine is a harmless no-op. |
5065 | */ |
5066 | static int unixShmUnmap( |
5067 | sqlite3_file *fd, /* The underlying database file */ |
5068 | int deleteFlag /* Delete shared-memory if true */ |
5069 | ){ |
5070 | unixShm *p; /* The connection to be closed */ |
5071 | unixShmNode *pShmNode; /* The underlying shared-memory file */ |
5072 | unixShm **pp; /* For looping over sibling connections */ |
5073 | unixFile *pDbFd; /* The underlying database file */ |
5074 | |
5075 | pDbFd = (unixFile*)fd; |
5076 | p = pDbFd->pShm; |
5077 | if( p==0 ) return SQLITE_OK; |
5078 | pShmNode = p->pShmNode; |
5079 | |
5080 | assert( pShmNode==pDbFd->pInode->pShmNode ); |
5081 | assert( pShmNode->pInode==pDbFd->pInode ); |
5082 | |
5083 | /* Remove connection p from the set of connections associated |
5084 | ** with pShmNode */ |
5085 | sqlite3_mutex_enter(pShmNode->pShmMutex); |
5086 | for(pp=&pShmNode->pFirst; (*pp)!=p; pp = &(*pp)->pNext){} |
5087 | *pp = p->pNext; |
5088 | |
5089 | /* Free the connection p */ |
5090 | sqlite3_free(p); |
5091 | pDbFd->pShm = 0; |
5092 | sqlite3_mutex_leave(pShmNode->pShmMutex); |
5093 | |
5094 | /* If pShmNode->nRef has reached 0, then close the underlying |
5095 | ** shared-memory file, too */ |
5096 | assert( unixFileMutexNotheld(pDbFd) ); |
5097 | unixEnterMutex(); |
5098 | assert( pShmNode->nRef>0 ); |
5099 | pShmNode->nRef--; |
5100 | if( pShmNode->nRef==0 ){ |
5101 | if( deleteFlag && pShmNode->hShm>=0 ){ |
5102 | osUnlink(pShmNode->zFilename); |
5103 | } |
5104 | unixShmPurge(pDbFd); |
5105 | } |
5106 | unixLeaveMutex(); |
5107 | |
5108 | return SQLITE_OK; |
5109 | } |
5110 | |
5111 | |
5112 | #else |
5113 | # define unixShmMap 0 |
5114 | # define unixShmLock 0 |
5115 | # define unixShmBarrier 0 |
5116 | # define unixShmUnmap 0 |
5117 | #endif /* #ifndef SQLITE_OMIT_WAL */ |
5118 | |
5119 | #if SQLITE_MAX_MMAP_SIZE>0 |
5120 | /* |
5121 | ** If it is currently memory mapped, unmap file pFd. |
5122 | */ |
5123 | static void unixUnmapfile(unixFile *pFd){ |
5124 | assert( pFd->nFetchOut==0 ); |
5125 | if( pFd->pMapRegion ){ |
5126 | osMunmap(pFd->pMapRegion, pFd->mmapSizeActual); |
5127 | pFd->pMapRegion = 0; |
5128 | pFd->mmapSize = 0; |
5129 | pFd->mmapSizeActual = 0; |
5130 | } |
5131 | } |
5132 | |
5133 | /* |
5134 | ** Attempt to set the size of the memory mapping maintained by file |
5135 | ** descriptor pFd to nNew bytes. Any existing mapping is discarded. |
5136 | ** |
5137 | ** If successful, this function sets the following variables: |
5138 | ** |
5139 | ** unixFile.pMapRegion |
5140 | ** unixFile.mmapSize |
5141 | ** unixFile.mmapSizeActual |
5142 | ** |
5143 | ** If unsuccessful, an error message is logged via sqlite3_log() and |
5144 | ** the three variables above are zeroed. In this case SQLite should |
5145 | ** continue accessing the database using the xRead() and xWrite() |
5146 | ** methods. |
5147 | */ |
5148 | static void unixRemapfile( |
5149 | unixFile *pFd, /* File descriptor object */ |
5150 | i64 nNew /* Required mapping size */ |
5151 | ){ |
5152 | const char *zErr = "mmap" ; |
5153 | int h = pFd->h; /* File descriptor open on db file */ |
5154 | u8 *pOrig = (u8 *)pFd->pMapRegion; /* Pointer to current file mapping */ |
5155 | i64 nOrig = pFd->mmapSizeActual; /* Size of pOrig region in bytes */ |
5156 | u8 *pNew = 0; /* Location of new mapping */ |
5157 | int flags = PROT_READ; /* Flags to pass to mmap() */ |
5158 | |
5159 | assert( pFd->nFetchOut==0 ); |
5160 | assert( nNew>pFd->mmapSize ); |
5161 | assert( nNew<=pFd->mmapSizeMax ); |
5162 | assert( nNew>0 ); |
5163 | assert( pFd->mmapSizeActual>=pFd->mmapSize ); |
5164 | assert( MAP_FAILED!=0 ); |
5165 | |
5166 | #ifdef SQLITE_MMAP_READWRITE |
5167 | if( (pFd->ctrlFlags & UNIXFILE_RDONLY)==0 ) flags |= PROT_WRITE; |
5168 | #endif |
5169 | |
5170 | if( pOrig ){ |
5171 | #if HAVE_MREMAP |
5172 | i64 nReuse = pFd->mmapSize; |
5173 | #else |
5174 | const int szSyspage = osGetpagesize(); |
5175 | i64 nReuse = (pFd->mmapSize & ~(szSyspage-1)); |
5176 | #endif |
5177 | u8 *pReq = &pOrig[nReuse]; |
5178 | |
5179 | /* Unmap any pages of the existing mapping that cannot be reused. */ |
5180 | if( nReuse!=nOrig ){ |
5181 | osMunmap(pReq, nOrig-nReuse); |
5182 | } |
5183 | |
5184 | #if HAVE_MREMAP |
5185 | pNew = osMremap(pOrig, nReuse, nNew, MREMAP_MAYMOVE); |
5186 | zErr = "mremap" ; |
5187 | #else |
5188 | pNew = osMmap(pReq, nNew-nReuse, flags, MAP_SHARED, h, nReuse); |
5189 | if( pNew!=MAP_FAILED ){ |
5190 | if( pNew!=pReq ){ |
5191 | osMunmap(pNew, nNew - nReuse); |
5192 | pNew = 0; |
5193 | }else{ |
5194 | pNew = pOrig; |
5195 | } |
5196 | } |
5197 | #endif |
5198 | |
5199 | /* The attempt to extend the existing mapping failed. Free it. */ |
5200 | if( pNew==MAP_FAILED || pNew==0 ){ |
5201 | osMunmap(pOrig, nReuse); |
5202 | } |
5203 | } |
5204 | |
5205 | /* If pNew is still NULL, try to create an entirely new mapping. */ |
5206 | if( pNew==0 ){ |
5207 | pNew = osMmap(0, nNew, flags, MAP_SHARED, h, 0); |
5208 | } |
5209 | |
5210 | if( pNew==MAP_FAILED ){ |
5211 | pNew = 0; |
5212 | nNew = 0; |
5213 | unixLogError(SQLITE_OK, zErr, pFd->zPath); |
5214 | |
5215 | /* If the mmap() above failed, assume that all subsequent mmap() calls |
5216 | ** will probably fail too. Fall back to using xRead/xWrite exclusively |
5217 | ** in this case. */ |
5218 | pFd->mmapSizeMax = 0; |
5219 | } |
5220 | pFd->pMapRegion = (void *)pNew; |
5221 | pFd->mmapSize = pFd->mmapSizeActual = nNew; |
5222 | } |
5223 | |
5224 | /* |
5225 | ** Memory map or remap the file opened by file-descriptor pFd (if the file |
5226 | ** is already mapped, the existing mapping is replaced by the new). Or, if |
5227 | ** there already exists a mapping for this file, and there are still |
5228 | ** outstanding xFetch() references to it, this function is a no-op. |
5229 | ** |
5230 | ** If parameter nByte is non-negative, then it is the requested size of |
5231 | ** the mapping to create. Otherwise, if nByte is less than zero, then the |
5232 | ** requested size is the size of the file on disk. The actual size of the |
5233 | ** created mapping is either the requested size or the value configured |
5234 | ** using SQLITE_FCNTL_MMAP_LIMIT, whichever is smaller. |
5235 | ** |
5236 | ** SQLITE_OK is returned if no error occurs (even if the mapping is not |
5237 | ** recreated as a result of outstanding references) or an SQLite error |
5238 | ** code otherwise. |
5239 | */ |
5240 | static int unixMapfile(unixFile *pFd, i64 nMap){ |
5241 | assert( nMap>=0 || pFd->nFetchOut==0 ); |
5242 | assert( nMap>0 || (pFd->mmapSize==0 && pFd->pMapRegion==0) ); |
5243 | if( pFd->nFetchOut>0 ) return SQLITE_OK; |
5244 | |
5245 | if( nMap<0 ){ |
5246 | struct stat statbuf; /* Low-level file information */ |
5247 | if( osFstat(pFd->h, &statbuf) ){ |
5248 | return SQLITE_IOERR_FSTAT; |
5249 | } |
5250 | nMap = statbuf.st_size; |
5251 | } |
5252 | if( nMap>pFd->mmapSizeMax ){ |
5253 | nMap = pFd->mmapSizeMax; |
5254 | } |
5255 | |
5256 | assert( nMap>0 || (pFd->mmapSize==0 && pFd->pMapRegion==0) ); |
5257 | if( nMap!=pFd->mmapSize ){ |
5258 | unixRemapfile(pFd, nMap); |
5259 | } |
5260 | |
5261 | return SQLITE_OK; |
5262 | } |
5263 | #endif /* SQLITE_MAX_MMAP_SIZE>0 */ |
5264 | |
5265 | /* |
5266 | ** If possible, return a pointer to a mapping of file fd starting at offset |
5267 | ** iOff. The mapping must be valid for at least nAmt bytes. |
5268 | ** |
5269 | ** If such a pointer can be obtained, store it in *pp and return SQLITE_OK. |
5270 | ** Or, if one cannot but no error occurs, set *pp to 0 and return SQLITE_OK. |
5271 | ** Finally, if an error does occur, return an SQLite error code. The final |
5272 | ** value of *pp is undefined in this case. |
5273 | ** |
5274 | ** If this function does return a pointer, the caller must eventually |
5275 | ** release the reference by calling unixUnfetch(). |
5276 | */ |
5277 | static int unixFetch(sqlite3_file *fd, i64 iOff, int nAmt, void **pp){ |
5278 | #if SQLITE_MAX_MMAP_SIZE>0 |
5279 | unixFile *pFd = (unixFile *)fd; /* The underlying database file */ |
5280 | #endif |
5281 | *pp = 0; |
5282 | |
5283 | #if SQLITE_MAX_MMAP_SIZE>0 |
5284 | if( pFd->mmapSizeMax>0 ){ |
5285 | if( pFd->pMapRegion==0 ){ |
5286 | int rc = unixMapfile(pFd, -1); |
5287 | if( rc!=SQLITE_OK ) return rc; |
5288 | } |
5289 | if( pFd->mmapSize >= iOff+nAmt ){ |
5290 | *pp = &((u8 *)pFd->pMapRegion)[iOff]; |
5291 | pFd->nFetchOut++; |
5292 | } |
5293 | } |
5294 | #endif |
5295 | return SQLITE_OK; |
5296 | } |
5297 | |
5298 | /* |
5299 | ** If the third argument is non-NULL, then this function releases a |
5300 | ** reference obtained by an earlier call to unixFetch(). The second |
5301 | ** argument passed to this function must be the same as the corresponding |
5302 | ** argument that was passed to the unixFetch() invocation. |
5303 | ** |
5304 | ** Or, if the third argument is NULL, then this function is being called |
5305 | ** to inform the VFS layer that, according to POSIX, any existing mapping |
5306 | ** may now be invalid and should be unmapped. |
5307 | */ |
5308 | static int unixUnfetch(sqlite3_file *fd, i64 iOff, void *p){ |
5309 | #if SQLITE_MAX_MMAP_SIZE>0 |
5310 | unixFile *pFd = (unixFile *)fd; /* The underlying database file */ |
5311 | UNUSED_PARAMETER(iOff); |
5312 | |
5313 | /* If p==0 (unmap the entire file) then there must be no outstanding |
5314 | ** xFetch references. Or, if p!=0 (meaning it is an xFetch reference), |
5315 | ** then there must be at least one outstanding. */ |
5316 | assert( (p==0)==(pFd->nFetchOut==0) ); |
5317 | |
5318 | /* If p!=0, it must match the iOff value. */ |
5319 | assert( p==0 || p==&((u8 *)pFd->pMapRegion)[iOff] ); |
5320 | |
5321 | if( p ){ |
5322 | pFd->nFetchOut--; |
5323 | }else{ |
5324 | unixUnmapfile(pFd); |
5325 | } |
5326 | |
5327 | assert( pFd->nFetchOut>=0 ); |
5328 | #else |
5329 | UNUSED_PARAMETER(fd); |
5330 | UNUSED_PARAMETER(p); |
5331 | UNUSED_PARAMETER(iOff); |
5332 | #endif |
5333 | return SQLITE_OK; |
5334 | } |
5335 | |
5336 | /* |
5337 | ** Here ends the implementation of all sqlite3_file methods. |
5338 | ** |
5339 | ********************** End sqlite3_file Methods ******************************* |
5340 | ******************************************************************************/ |
5341 | |
5342 | /* |
5343 | ** This division contains definitions of sqlite3_io_methods objects that |
5344 | ** implement various file locking strategies. It also contains definitions |
5345 | ** of "finder" functions. A finder-function is used to locate the appropriate |
5346 | ** sqlite3_io_methods object for a particular database file. The pAppData |
5347 | ** field of the sqlite3_vfs VFS objects are initialized to be pointers to |
5348 | ** the correct finder-function for that VFS. |
5349 | ** |
5350 | ** Most finder functions return a pointer to a fixed sqlite3_io_methods |
5351 | ** object. The only interesting finder-function is autolockIoFinder, which |
5352 | ** looks at the filesystem type and tries to guess the best locking |
5353 | ** strategy from that. |
5354 | ** |
5355 | ** For finder-function F, two objects are created: |
5356 | ** |
5357 | ** (1) The real finder-function named "FImpt()". |
5358 | ** |
5359 | ** (2) A constant pointer to this function named just "F". |
5360 | ** |
5361 | ** |
5362 | ** A pointer to the F pointer is used as the pAppData value for VFS |
5363 | ** objects. We have to do this instead of letting pAppData point |
5364 | ** directly at the finder-function since C90 rules prevent a void* |
5365 | ** from be cast into a function pointer. |
5366 | ** |
5367 | ** |
5368 | ** Each instance of this macro generates two objects: |
5369 | ** |
5370 | ** * A constant sqlite3_io_methods object call METHOD that has locking |
5371 | ** methods CLOSE, LOCK, UNLOCK, CKRESLOCK. |
5372 | ** |
5373 | ** * An I/O method finder function called FINDER that returns a pointer |
5374 | ** to the METHOD object in the previous bullet. |
5375 | */ |
5376 | #define IOMETHODS(FINDER,METHOD,VERSION,CLOSE,LOCK,UNLOCK,CKLOCK,SHMMAP) \ |
5377 | static const sqlite3_io_methods METHOD = { \ |
5378 | VERSION, /* iVersion */ \ |
5379 | CLOSE, /* xClose */ \ |
5380 | unixRead, /* xRead */ \ |
5381 | unixWrite, /* xWrite */ \ |
5382 | unixTruncate, /* xTruncate */ \ |
5383 | unixSync, /* xSync */ \ |
5384 | unixFileSize, /* xFileSize */ \ |
5385 | LOCK, /* xLock */ \ |
5386 | UNLOCK, /* xUnlock */ \ |
5387 | CKLOCK, /* xCheckReservedLock */ \ |
5388 | unixFileControl, /* xFileControl */ \ |
5389 | unixSectorSize, /* xSectorSize */ \ |
5390 | unixDeviceCharacteristics, /* xDeviceCapabilities */ \ |
5391 | SHMMAP, /* xShmMap */ \ |
5392 | unixShmLock, /* xShmLock */ \ |
5393 | unixShmBarrier, /* xShmBarrier */ \ |
5394 | unixShmUnmap, /* xShmUnmap */ \ |
5395 | unixFetch, /* xFetch */ \ |
5396 | unixUnfetch, /* xUnfetch */ \ |
5397 | }; \ |
5398 | static const sqlite3_io_methods *FINDER##Impl(const char *z, unixFile *p){ \ |
5399 | UNUSED_PARAMETER(z); UNUSED_PARAMETER(p); \ |
5400 | return &METHOD; \ |
5401 | } \ |
5402 | static const sqlite3_io_methods *(*const FINDER)(const char*,unixFile *p) \ |
5403 | = FINDER##Impl; |
5404 | |
5405 | /* |
5406 | ** Here are all of the sqlite3_io_methods objects for each of the |
5407 | ** locking strategies. Functions that return pointers to these methods |
5408 | ** are also created. |
5409 | */ |
5410 | IOMETHODS( |
5411 | posixIoFinder, /* Finder function name */ |
5412 | posixIoMethods, /* sqlite3_io_methods object name */ |
5413 | 3, /* shared memory and mmap are enabled */ |
5414 | unixClose, /* xClose method */ |
5415 | unixLock, /* xLock method */ |
5416 | unixUnlock, /* xUnlock method */ |
5417 | unixCheckReservedLock, /* xCheckReservedLock method */ |
5418 | unixShmMap /* xShmMap method */ |
5419 | ) |
5420 | IOMETHODS( |
5421 | nolockIoFinder, /* Finder function name */ |
5422 | nolockIoMethods, /* sqlite3_io_methods object name */ |
5423 | 3, /* shared memory and mmap are enabled */ |
5424 | nolockClose, /* xClose method */ |
5425 | nolockLock, /* xLock method */ |
5426 | nolockUnlock, /* xUnlock method */ |
5427 | nolockCheckReservedLock, /* xCheckReservedLock method */ |
5428 | 0 /* xShmMap method */ |
5429 | ) |
5430 | IOMETHODS( |
5431 | dotlockIoFinder, /* Finder function name */ |
5432 | dotlockIoMethods, /* sqlite3_io_methods object name */ |
5433 | 1, /* shared memory is disabled */ |
5434 | dotlockClose, /* xClose method */ |
5435 | dotlockLock, /* xLock method */ |
5436 | dotlockUnlock, /* xUnlock method */ |
5437 | dotlockCheckReservedLock, /* xCheckReservedLock method */ |
5438 | 0 /* xShmMap method */ |
5439 | ) |
5440 | |
5441 | #if SQLITE_ENABLE_LOCKING_STYLE |
5442 | IOMETHODS( |
5443 | flockIoFinder, /* Finder function name */ |
5444 | flockIoMethods, /* sqlite3_io_methods object name */ |
5445 | 1, /* shared memory is disabled */ |
5446 | flockClose, /* xClose method */ |
5447 | flockLock, /* xLock method */ |
5448 | flockUnlock, /* xUnlock method */ |
5449 | flockCheckReservedLock, /* xCheckReservedLock method */ |
5450 | 0 /* xShmMap method */ |
5451 | ) |
5452 | #endif |
5453 | |
5454 | #if OS_VXWORKS |
5455 | IOMETHODS( |
5456 | semIoFinder, /* Finder function name */ |
5457 | semIoMethods, /* sqlite3_io_methods object name */ |
5458 | 1, /* shared memory is disabled */ |
5459 | semXClose, /* xClose method */ |
5460 | semXLock, /* xLock method */ |
5461 | semXUnlock, /* xUnlock method */ |
5462 | semXCheckReservedLock, /* xCheckReservedLock method */ |
5463 | 0 /* xShmMap method */ |
5464 | ) |
5465 | #endif |
5466 | |
5467 | #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE |
5468 | IOMETHODS( |
5469 | afpIoFinder, /* Finder function name */ |
5470 | afpIoMethods, /* sqlite3_io_methods object name */ |
5471 | 1, /* shared memory is disabled */ |
5472 | afpClose, /* xClose method */ |
5473 | afpLock, /* xLock method */ |
5474 | afpUnlock, /* xUnlock method */ |
5475 | afpCheckReservedLock, /* xCheckReservedLock method */ |
5476 | 0 /* xShmMap method */ |
5477 | ) |
5478 | #endif |
5479 | |
5480 | /* |
5481 | ** The proxy locking method is a "super-method" in the sense that it |
5482 | ** opens secondary file descriptors for the conch and lock files and |
5483 | ** it uses proxy, dot-file, AFP, and flock() locking methods on those |
5484 | ** secondary files. For this reason, the division that implements |
5485 | ** proxy locking is located much further down in the file. But we need |
5486 | ** to go ahead and define the sqlite3_io_methods and finder function |
5487 | ** for proxy locking here. So we forward declare the I/O methods. |
5488 | */ |
5489 | #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE |
5490 | static int proxyClose(sqlite3_file*); |
5491 | static int proxyLock(sqlite3_file*, int); |
5492 | static int proxyUnlock(sqlite3_file*, int); |
5493 | static int proxyCheckReservedLock(sqlite3_file*, int*); |
5494 | IOMETHODS( |
5495 | proxyIoFinder, /* Finder function name */ |
5496 | proxyIoMethods, /* sqlite3_io_methods object name */ |
5497 | 1, /* shared memory is disabled */ |
5498 | proxyClose, /* xClose method */ |
5499 | proxyLock, /* xLock method */ |
5500 | proxyUnlock, /* xUnlock method */ |
5501 | proxyCheckReservedLock, /* xCheckReservedLock method */ |
5502 | 0 /* xShmMap method */ |
5503 | ) |
5504 | #endif |
5505 | |
5506 | /* nfs lockd on OSX 10.3+ doesn't clear write locks when a read lock is set */ |
5507 | #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE |
5508 | IOMETHODS( |
5509 | nfsIoFinder, /* Finder function name */ |
5510 | nfsIoMethods, /* sqlite3_io_methods object name */ |
5511 | 1, /* shared memory is disabled */ |
5512 | unixClose, /* xClose method */ |
5513 | unixLock, /* xLock method */ |
5514 | nfsUnlock, /* xUnlock method */ |
5515 | unixCheckReservedLock, /* xCheckReservedLock method */ |
5516 | 0 /* xShmMap method */ |
5517 | ) |
5518 | #endif |
5519 | |
5520 | #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE |
5521 | /* |
5522 | ** This "finder" function attempts to determine the best locking strategy |
5523 | ** for the database file "filePath". It then returns the sqlite3_io_methods |
5524 | ** object that implements that strategy. |
5525 | ** |
5526 | ** This is for MacOSX only. |
5527 | */ |
5528 | static const sqlite3_io_methods *autolockIoFinderImpl( |
5529 | const char *filePath, /* name of the database file */ |
5530 | unixFile *pNew /* open file object for the database file */ |
5531 | ){ |
5532 | static const struct Mapping { |
5533 | const char *zFilesystem; /* Filesystem type name */ |
5534 | const sqlite3_io_methods *pMethods; /* Appropriate locking method */ |
5535 | } aMap[] = { |
5536 | { "hfs" , &posixIoMethods }, |
5537 | { "ufs" , &posixIoMethods }, |
5538 | { "afpfs" , &afpIoMethods }, |
5539 | { "smbfs" , &afpIoMethods }, |
5540 | { "webdav" , &nolockIoMethods }, |
5541 | { 0, 0 } |
5542 | }; |
5543 | int i; |
5544 | struct statfs fsInfo; |
5545 | struct flock lockInfo; |
5546 | |
5547 | if( !filePath ){ |
5548 | /* If filePath==NULL that means we are dealing with a transient file |
5549 | ** that does not need to be locked. */ |
5550 | return &nolockIoMethods; |
5551 | } |
5552 | if( statfs(filePath, &fsInfo) != -1 ){ |
5553 | if( fsInfo.f_flags & MNT_RDONLY ){ |
5554 | return &nolockIoMethods; |
5555 | } |
5556 | for(i=0; aMap[i].zFilesystem; i++){ |
5557 | if( strcmp(fsInfo.f_fstypename, aMap[i].zFilesystem)==0 ){ |
5558 | return aMap[i].pMethods; |
5559 | } |
5560 | } |
5561 | } |
5562 | |
5563 | /* Default case. Handles, amongst others, "nfs". |
5564 | ** Test byte-range lock using fcntl(). If the call succeeds, |
5565 | ** assume that the file-system supports POSIX style locks. |
5566 | */ |
5567 | lockInfo.l_len = 1; |
5568 | lockInfo.l_start = 0; |
5569 | lockInfo.l_whence = SEEK_SET; |
5570 | lockInfo.l_type = F_RDLCK; |
5571 | if( osFcntl(pNew->h, F_GETLK, &lockInfo)!=-1 ) { |
5572 | if( strcmp(fsInfo.f_fstypename, "nfs" )==0 ){ |
5573 | return &nfsIoMethods; |
5574 | } else { |
5575 | return &posixIoMethods; |
5576 | } |
5577 | }else{ |
5578 | return &dotlockIoMethods; |
5579 | } |
5580 | } |
5581 | static const sqlite3_io_methods |
5582 | *(*const autolockIoFinder)(const char*,unixFile*) = autolockIoFinderImpl; |
5583 | |
5584 | #endif /* defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE */ |
5585 | |
5586 | #if OS_VXWORKS |
5587 | /* |
5588 | ** This "finder" function for VxWorks checks to see if posix advisory |
5589 | ** locking works. If it does, then that is what is used. If it does not |
5590 | ** work, then fallback to named semaphore locking. |
5591 | */ |
5592 | static const sqlite3_io_methods *vxworksIoFinderImpl( |
5593 | const char *filePath, /* name of the database file */ |
5594 | unixFile *pNew /* the open file object */ |
5595 | ){ |
5596 | struct flock lockInfo; |
5597 | |
5598 | if( !filePath ){ |
5599 | /* If filePath==NULL that means we are dealing with a transient file |
5600 | ** that does not need to be locked. */ |
5601 | return &nolockIoMethods; |
5602 | } |
5603 | |
5604 | /* Test if fcntl() is supported and use POSIX style locks. |
5605 | ** Otherwise fall back to the named semaphore method. |
5606 | */ |
5607 | lockInfo.l_len = 1; |
5608 | lockInfo.l_start = 0; |
5609 | lockInfo.l_whence = SEEK_SET; |
5610 | lockInfo.l_type = F_RDLCK; |
5611 | if( osFcntl(pNew->h, F_GETLK, &lockInfo)!=-1 ) { |
5612 | return &posixIoMethods; |
5613 | }else{ |
5614 | return &semIoMethods; |
5615 | } |
5616 | } |
5617 | static const sqlite3_io_methods |
5618 | *(*const vxworksIoFinder)(const char*,unixFile*) = vxworksIoFinderImpl; |
5619 | |
5620 | #endif /* OS_VXWORKS */ |
5621 | |
5622 | /* |
5623 | ** An abstract type for a pointer to an IO method finder function: |
5624 | */ |
5625 | typedef const sqlite3_io_methods *(*finder_type)(const char*,unixFile*); |
5626 | |
5627 | |
5628 | /**************************************************************************** |
5629 | **************************** sqlite3_vfs methods **************************** |
5630 | ** |
5631 | ** This division contains the implementation of methods on the |
5632 | ** sqlite3_vfs object. |
5633 | */ |
5634 | |
5635 | /* |
5636 | ** Initialize the contents of the unixFile structure pointed to by pId. |
5637 | */ |
5638 | static int fillInUnixFile( |
5639 | sqlite3_vfs *pVfs, /* Pointer to vfs object */ |
5640 | int h, /* Open file descriptor of file being opened */ |
5641 | sqlite3_file *pId, /* Write to the unixFile structure here */ |
5642 | const char *zFilename, /* Name of the file being opened */ |
5643 | int ctrlFlags /* Zero or more UNIXFILE_* values */ |
5644 | ){ |
5645 | const sqlite3_io_methods *pLockingStyle; |
5646 | unixFile *pNew = (unixFile *)pId; |
5647 | int rc = SQLITE_OK; |
5648 | |
5649 | assert( pNew->pInode==NULL ); |
5650 | |
5651 | /* No locking occurs in temporary files */ |
5652 | assert( zFilename!=0 || (ctrlFlags & UNIXFILE_NOLOCK)!=0 ); |
5653 | |
5654 | OSTRACE(("OPEN %-3d %s\n" , h, zFilename)); |
5655 | pNew->h = h; |
5656 | pNew->pVfs = pVfs; |
5657 | pNew->zPath = zFilename; |
5658 | pNew->ctrlFlags = (u8)ctrlFlags; |
5659 | #if SQLITE_MAX_MMAP_SIZE>0 |
5660 | pNew->mmapSizeMax = sqlite3GlobalConfig.szMmap; |
5661 | #endif |
5662 | if( sqlite3_uri_boolean(((ctrlFlags & UNIXFILE_URI) ? zFilename : 0), |
5663 | "psow" , SQLITE_POWERSAFE_OVERWRITE) ){ |
5664 | pNew->ctrlFlags |= UNIXFILE_PSOW; |
5665 | } |
5666 | if( strcmp(pVfs->zName,"unix-excl" )==0 ){ |
5667 | pNew->ctrlFlags |= UNIXFILE_EXCL; |
5668 | } |
5669 | |
5670 | #if OS_VXWORKS |
5671 | pNew->pId = vxworksFindFileId(zFilename); |
5672 | if( pNew->pId==0 ){ |
5673 | ctrlFlags |= UNIXFILE_NOLOCK; |
5674 | rc = SQLITE_NOMEM_BKPT; |
5675 | } |
5676 | #endif |
5677 | |
5678 | if( ctrlFlags & UNIXFILE_NOLOCK ){ |
5679 | pLockingStyle = &nolockIoMethods; |
5680 | }else{ |
5681 | pLockingStyle = (**(finder_type*)pVfs->pAppData)(zFilename, pNew); |
5682 | #if SQLITE_ENABLE_LOCKING_STYLE |
5683 | /* Cache zFilename in the locking context (AFP and dotlock override) for |
5684 | ** proxyLock activation is possible (remote proxy is based on db name) |
5685 | ** zFilename remains valid until file is closed, to support */ |
5686 | pNew->lockingContext = (void*)zFilename; |
5687 | #endif |
5688 | } |
5689 | |
5690 | if( pLockingStyle == &posixIoMethods |
5691 | #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE |
5692 | || pLockingStyle == &nfsIoMethods |
5693 | #endif |
5694 | ){ |
5695 | unixEnterMutex(); |
5696 | rc = findInodeInfo(pNew, &pNew->pInode); |
5697 | if( rc!=SQLITE_OK ){ |
5698 | /* If an error occurred in findInodeInfo(), close the file descriptor |
5699 | ** immediately, before releasing the mutex. findInodeInfo() may fail |
5700 | ** in two scenarios: |
5701 | ** |
5702 | ** (a) A call to fstat() failed. |
5703 | ** (b) A malloc failed. |
5704 | ** |
5705 | ** Scenario (b) may only occur if the process is holding no other |
5706 | ** file descriptors open on the same file. If there were other file |
5707 | ** descriptors on this file, then no malloc would be required by |
5708 | ** findInodeInfo(). If this is the case, it is quite safe to close |
5709 | ** handle h - as it is guaranteed that no posix locks will be released |
5710 | ** by doing so. |
5711 | ** |
5712 | ** If scenario (a) caused the error then things are not so safe. The |
5713 | ** implicit assumption here is that if fstat() fails, things are in |
5714 | ** such bad shape that dropping a lock or two doesn't matter much. |
5715 | */ |
5716 | robust_close(pNew, h, __LINE__); |
5717 | h = -1; |
5718 | } |
5719 | unixLeaveMutex(); |
5720 | } |
5721 | |
5722 | #if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) |
5723 | else if( pLockingStyle == &afpIoMethods ){ |
5724 | /* AFP locking uses the file path so it needs to be included in |
5725 | ** the afpLockingContext. |
5726 | */ |
5727 | afpLockingContext *pCtx; |
5728 | pNew->lockingContext = pCtx = sqlite3_malloc64( sizeof(*pCtx) ); |
5729 | if( pCtx==0 ){ |
5730 | rc = SQLITE_NOMEM_BKPT; |
5731 | }else{ |
5732 | /* NB: zFilename exists and remains valid until the file is closed |
5733 | ** according to requirement F11141. So we do not need to make a |
5734 | ** copy of the filename. */ |
5735 | pCtx->dbPath = zFilename; |
5736 | pCtx->reserved = 0; |
5737 | srandomdev(); |
5738 | unixEnterMutex(); |
5739 | rc = findInodeInfo(pNew, &pNew->pInode); |
5740 | if( rc!=SQLITE_OK ){ |
5741 | sqlite3_free(pNew->lockingContext); |
5742 | robust_close(pNew, h, __LINE__); |
5743 | h = -1; |
5744 | } |
5745 | unixLeaveMutex(); |
5746 | } |
5747 | } |
5748 | #endif |
5749 | |
5750 | else if( pLockingStyle == &dotlockIoMethods ){ |
5751 | /* Dotfile locking uses the file path so it needs to be included in |
5752 | ** the dotlockLockingContext |
5753 | */ |
5754 | char *zLockFile; |
5755 | int nFilename; |
5756 | assert( zFilename!=0 ); |
5757 | nFilename = (int)strlen(zFilename) + 6; |
5758 | zLockFile = (char *)sqlite3_malloc64(nFilename); |
5759 | if( zLockFile==0 ){ |
5760 | rc = SQLITE_NOMEM_BKPT; |
5761 | }else{ |
5762 | sqlite3_snprintf(nFilename, zLockFile, "%s" DOTLOCK_SUFFIX, zFilename); |
5763 | } |
5764 | pNew->lockingContext = zLockFile; |
5765 | } |
5766 | |
5767 | #if OS_VXWORKS |
5768 | else if( pLockingStyle == &semIoMethods ){ |
5769 | /* Named semaphore locking uses the file path so it needs to be |
5770 | ** included in the semLockingContext |
5771 | */ |
5772 | unixEnterMutex(); |
5773 | rc = findInodeInfo(pNew, &pNew->pInode); |
5774 | if( (rc==SQLITE_OK) && (pNew->pInode->pSem==NULL) ){ |
5775 | char *zSemName = pNew->pInode->aSemName; |
5776 | int n; |
5777 | sqlite3_snprintf(MAX_PATHNAME, zSemName, "/%s.sem" , |
5778 | pNew->pId->zCanonicalName); |
5779 | for( n=1; zSemName[n]; n++ ) |
5780 | if( zSemName[n]=='/' ) zSemName[n] = '_'; |
5781 | pNew->pInode->pSem = sem_open(zSemName, O_CREAT, 0666, 1); |
5782 | if( pNew->pInode->pSem == SEM_FAILED ){ |
5783 | rc = SQLITE_NOMEM_BKPT; |
5784 | pNew->pInode->aSemName[0] = '\0'; |
5785 | } |
5786 | } |
5787 | unixLeaveMutex(); |
5788 | } |
5789 | #endif |
5790 | |
5791 | storeLastErrno(pNew, 0); |
5792 | #if OS_VXWORKS |
5793 | if( rc!=SQLITE_OK ){ |
5794 | if( h>=0 ) robust_close(pNew, h, __LINE__); |
5795 | h = -1; |
5796 | osUnlink(zFilename); |
5797 | pNew->ctrlFlags |= UNIXFILE_DELETE; |
5798 | } |
5799 | #endif |
5800 | if( rc!=SQLITE_OK ){ |
5801 | if( h>=0 ) robust_close(pNew, h, __LINE__); |
5802 | }else{ |
5803 | pId->pMethods = pLockingStyle; |
5804 | OpenCounter(+1); |
5805 | verifyDbFile(pNew); |
5806 | } |
5807 | return rc; |
5808 | } |
5809 | |
5810 | /* |
5811 | ** Directories to consider for temp files. |
5812 | */ |
5813 | static const char *azTempDirs[] = { |
5814 | 0, |
5815 | 0, |
5816 | "/var/tmp" , |
5817 | "/usr/tmp" , |
5818 | "/tmp" , |
5819 | "." |
5820 | }; |
5821 | |
5822 | /* |
5823 | ** Initialize first two members of azTempDirs[] array. |
5824 | */ |
5825 | static void unixTempFileInit(void){ |
5826 | azTempDirs[0] = getenv("SQLITE_TMPDIR" ); |
5827 | azTempDirs[1] = getenv("TMPDIR" ); |
5828 | } |
5829 | |
5830 | /* |
5831 | ** Return the name of a directory in which to put temporary files. |
5832 | ** If no suitable temporary file directory can be found, return NULL. |
5833 | */ |
5834 | static const char *unixTempFileDir(void){ |
5835 | unsigned int i = 0; |
5836 | struct stat buf; |
5837 | const char *zDir = sqlite3_temp_directory; |
5838 | |
5839 | while(1){ |
5840 | if( zDir!=0 |
5841 | && osStat(zDir, &buf)==0 |
5842 | && S_ISDIR(buf.st_mode) |
5843 | && osAccess(zDir, 03)==0 |
5844 | ){ |
5845 | return zDir; |
5846 | } |
5847 | if( i>=sizeof(azTempDirs)/sizeof(azTempDirs[0]) ) break; |
5848 | zDir = azTempDirs[i++]; |
5849 | } |
5850 | return 0; |
5851 | } |
5852 | |
5853 | /* |
5854 | ** Create a temporary file name in zBuf. zBuf must be allocated |
5855 | ** by the calling process and must be big enough to hold at least |
5856 | ** pVfs->mxPathname bytes. |
5857 | */ |
5858 | static int unixGetTempname(int nBuf, char *zBuf){ |
5859 | const char *zDir; |
5860 | int iLimit = 0; |
5861 | int rc = SQLITE_OK; |
5862 | |
5863 | /* It's odd to simulate an io-error here, but really this is just |
5864 | ** using the io-error infrastructure to test that SQLite handles this |
5865 | ** function failing. |
5866 | */ |
5867 | zBuf[0] = 0; |
5868 | SimulateIOError( return SQLITE_IOERR ); |
5869 | |
5870 | sqlite3_mutex_enter(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_TEMPDIR)); |
5871 | zDir = unixTempFileDir(); |
5872 | if( zDir==0 ){ |
5873 | rc = SQLITE_IOERR_GETTEMPPATH; |
5874 | }else{ |
5875 | do{ |
5876 | u64 r; |
5877 | sqlite3_randomness(sizeof(r), &r); |
5878 | assert( nBuf>2 ); |
5879 | zBuf[nBuf-2] = 0; |
5880 | sqlite3_snprintf(nBuf, zBuf, "%s/" SQLITE_TEMP_FILE_PREFIX"%llx%c" , |
5881 | zDir, r, 0); |
5882 | if( zBuf[nBuf-2]!=0 || (iLimit++)>10 ){ |
5883 | rc = SQLITE_ERROR; |
5884 | break; |
5885 | } |
5886 | }while( osAccess(zBuf,0)==0 ); |
5887 | } |
5888 | sqlite3_mutex_leave(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_TEMPDIR)); |
5889 | return rc; |
5890 | } |
5891 | |
5892 | #if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) |
5893 | /* |
5894 | ** Routine to transform a unixFile into a proxy-locking unixFile. |
5895 | ** Implementation in the proxy-lock division, but used by unixOpen() |
5896 | ** if SQLITE_PREFER_PROXY_LOCKING is defined. |
5897 | */ |
5898 | static int proxyTransformUnixFile(unixFile*, const char*); |
5899 | #endif |
5900 | |
5901 | /* |
5902 | ** Search for an unused file descriptor that was opened on the database |
5903 | ** file (not a journal or super-journal file) identified by pathname |
5904 | ** zPath with SQLITE_OPEN_XXX flags matching those passed as the second |
5905 | ** argument to this function. |
5906 | ** |
5907 | ** Such a file descriptor may exist if a database connection was closed |
5908 | ** but the associated file descriptor could not be closed because some |
5909 | ** other file descriptor open on the same file is holding a file-lock. |
5910 | ** Refer to comments in the unixClose() function and the lengthy comment |
5911 | ** describing "Posix Advisory Locking" at the start of this file for |
5912 | ** further details. Also, ticket #4018. |
5913 | ** |
5914 | ** If a suitable file descriptor is found, then it is returned. If no |
5915 | ** such file descriptor is located, -1 is returned. |
5916 | */ |
5917 | static UnixUnusedFd *findReusableFd(const char *zPath, int flags){ |
5918 | UnixUnusedFd *pUnused = 0; |
5919 | |
5920 | /* Do not search for an unused file descriptor on vxworks. Not because |
5921 | ** vxworks would not benefit from the change (it might, we're not sure), |
5922 | ** but because no way to test it is currently available. It is better |
5923 | ** not to risk breaking vxworks support for the sake of such an obscure |
5924 | ** feature. */ |
5925 | #if !OS_VXWORKS |
5926 | struct stat sStat; /* Results of stat() call */ |
5927 | |
5928 | unixEnterMutex(); |
5929 | |
5930 | /* A stat() call may fail for various reasons. If this happens, it is |
5931 | ** almost certain that an open() call on the same path will also fail. |
5932 | ** For this reason, if an error occurs in the stat() call here, it is |
5933 | ** ignored and -1 is returned. The caller will try to open a new file |
5934 | ** descriptor on the same path, fail, and return an error to SQLite. |
5935 | ** |
5936 | ** Even if a subsequent open() call does succeed, the consequences of |
5937 | ** not searching for a reusable file descriptor are not dire. */ |
5938 | if( inodeList!=0 && 0==osStat(zPath, &sStat) ){ |
5939 | unixInodeInfo *pInode; |
5940 | |
5941 | pInode = inodeList; |
5942 | while( pInode && (pInode->fileId.dev!=sStat.st_dev |
5943 | || pInode->fileId.ino!=(u64)sStat.st_ino) ){ |
5944 | pInode = pInode->pNext; |
5945 | } |
5946 | if( pInode ){ |
5947 | UnixUnusedFd **pp; |
5948 | assert( sqlite3_mutex_notheld(pInode->pLockMutex) ); |
5949 | sqlite3_mutex_enter(pInode->pLockMutex); |
5950 | flags &= (SQLITE_OPEN_READONLY|SQLITE_OPEN_READWRITE); |
5951 | for(pp=&pInode->pUnused; *pp && (*pp)->flags!=flags; pp=&((*pp)->pNext)); |
5952 | pUnused = *pp; |
5953 | if( pUnused ){ |
5954 | *pp = pUnused->pNext; |
5955 | } |
5956 | sqlite3_mutex_leave(pInode->pLockMutex); |
5957 | } |
5958 | } |
5959 | unixLeaveMutex(); |
5960 | #endif /* if !OS_VXWORKS */ |
5961 | return pUnused; |
5962 | } |
5963 | |
5964 | /* |
5965 | ** Find the mode, uid and gid of file zFile. |
5966 | */ |
5967 | static int getFileMode( |
5968 | const char *zFile, /* File name */ |
5969 | mode_t *pMode, /* OUT: Permissions of zFile */ |
5970 | uid_t *pUid, /* OUT: uid of zFile. */ |
5971 | gid_t *pGid /* OUT: gid of zFile. */ |
5972 | ){ |
5973 | struct stat sStat; /* Output of stat() on database file */ |
5974 | int rc = SQLITE_OK; |
5975 | if( 0==osStat(zFile, &sStat) ){ |
5976 | *pMode = sStat.st_mode & 0777; |
5977 | *pUid = sStat.st_uid; |
5978 | *pGid = sStat.st_gid; |
5979 | }else{ |
5980 | rc = SQLITE_IOERR_FSTAT; |
5981 | } |
5982 | return rc; |
5983 | } |
5984 | |
5985 | /* |
5986 | ** This function is called by unixOpen() to determine the unix permissions |
5987 | ** to create new files with. If no error occurs, then SQLITE_OK is returned |
5988 | ** and a value suitable for passing as the third argument to open(2) is |
5989 | ** written to *pMode. If an IO error occurs, an SQLite error code is |
5990 | ** returned and the value of *pMode is not modified. |
5991 | ** |
5992 | ** In most cases, this routine sets *pMode to 0, which will become |
5993 | ** an indication to robust_open() to create the file using |
5994 | ** SQLITE_DEFAULT_FILE_PERMISSIONS adjusted by the umask. |
5995 | ** But if the file being opened is a WAL or regular journal file, then |
5996 | ** this function queries the file-system for the permissions on the |
5997 | ** corresponding database file and sets *pMode to this value. Whenever |
5998 | ** possible, WAL and journal files are created using the same permissions |
5999 | ** as the associated database file. |
6000 | ** |
6001 | ** If the SQLITE_ENABLE_8_3_NAMES option is enabled, then the |
6002 | ** original filename is unavailable. But 8_3_NAMES is only used for |
6003 | ** FAT filesystems and permissions do not matter there, so just use |
6004 | ** the default permissions. In 8_3_NAMES mode, leave *pMode set to zero. |
6005 | */ |
6006 | static int findCreateFileMode( |
6007 | const char *zPath, /* Path of file (possibly) being created */ |
6008 | int flags, /* Flags passed as 4th argument to xOpen() */ |
6009 | mode_t *pMode, /* OUT: Permissions to open file with */ |
6010 | uid_t *pUid, /* OUT: uid to set on the file */ |
6011 | gid_t *pGid /* OUT: gid to set on the file */ |
6012 | ){ |
6013 | int rc = SQLITE_OK; /* Return Code */ |
6014 | *pMode = 0; |
6015 | *pUid = 0; |
6016 | *pGid = 0; |
6017 | if( flags & (SQLITE_OPEN_WAL|SQLITE_OPEN_MAIN_JOURNAL) ){ |
6018 | char zDb[MAX_PATHNAME+1]; /* Database file path */ |
6019 | int nDb; /* Number of valid bytes in zDb */ |
6020 | |
6021 | /* zPath is a path to a WAL or journal file. The following block derives |
6022 | ** the path to the associated database file from zPath. This block handles |
6023 | ** the following naming conventions: |
6024 | ** |
6025 | ** "<path to db>-journal" |
6026 | ** "<path to db>-wal" |
6027 | ** "<path to db>-journalNN" |
6028 | ** "<path to db>-walNN" |
6029 | ** |
6030 | ** where NN is a decimal number. The NN naming schemes are |
6031 | ** used by the test_multiplex.c module. |
6032 | ** |
6033 | ** In normal operation, the journal file name will always contain |
6034 | ** a '-' character. However in 8+3 filename mode, or if a corrupt |
6035 | ** rollback journal specifies a super-journal with a goofy name, then |
6036 | ** the '-' might be missing or the '-' might be the first character in |
6037 | ** the filename. In that case, just return SQLITE_OK with *pMode==0. |
6038 | */ |
6039 | nDb = sqlite3Strlen30(zPath) - 1; |
6040 | while( nDb>0 && zPath[nDb]!='.' ){ |
6041 | if( zPath[nDb]=='-' ){ |
6042 | memcpy(zDb, zPath, nDb); |
6043 | zDb[nDb] = '\0'; |
6044 | rc = getFileMode(zDb, pMode, pUid, pGid); |
6045 | break; |
6046 | } |
6047 | nDb--; |
6048 | } |
6049 | }else if( flags & SQLITE_OPEN_DELETEONCLOSE ){ |
6050 | *pMode = 0600; |
6051 | }else if( flags & SQLITE_OPEN_URI ){ |
6052 | /* If this is a main database file and the file was opened using a URI |
6053 | ** filename, check for the "modeof" parameter. If present, interpret |
6054 | ** its value as a filename and try to copy the mode, uid and gid from |
6055 | ** that file. */ |
6056 | const char *z = sqlite3_uri_parameter(zPath, "modeof" ); |
6057 | if( z ){ |
6058 | rc = getFileMode(z, pMode, pUid, pGid); |
6059 | } |
6060 | } |
6061 | return rc; |
6062 | } |
6063 | |
6064 | /* |
6065 | ** Open the file zPath. |
6066 | ** |
6067 | ** Previously, the SQLite OS layer used three functions in place of this |
6068 | ** one: |
6069 | ** |
6070 | ** sqlite3OsOpenReadWrite(); |
6071 | ** sqlite3OsOpenReadOnly(); |
6072 | ** sqlite3OsOpenExclusive(); |
6073 | ** |
6074 | ** These calls correspond to the following combinations of flags: |
6075 | ** |
6076 | ** ReadWrite() -> (READWRITE | CREATE) |
6077 | ** ReadOnly() -> (READONLY) |
6078 | ** OpenExclusive() -> (READWRITE | CREATE | EXCLUSIVE) |
6079 | ** |
6080 | ** The old OpenExclusive() accepted a boolean argument - "delFlag". If |
6081 | ** true, the file was configured to be automatically deleted when the |
6082 | ** file handle closed. To achieve the same effect using this new |
6083 | ** interface, add the DELETEONCLOSE flag to those specified above for |
6084 | ** OpenExclusive(). |
6085 | */ |
6086 | static int unixOpen( |
6087 | sqlite3_vfs *pVfs, /* The VFS for which this is the xOpen method */ |
6088 | const char *zPath, /* Pathname of file to be opened */ |
6089 | sqlite3_file *pFile, /* The file descriptor to be filled in */ |
6090 | int flags, /* Input flags to control the opening */ |
6091 | int *pOutFlags /* Output flags returned to SQLite core */ |
6092 | ){ |
6093 | unixFile *p = (unixFile *)pFile; |
6094 | int fd = -1; /* File descriptor returned by open() */ |
6095 | int openFlags = 0; /* Flags to pass to open() */ |
6096 | int eType = flags&0x0FFF00; /* Type of file to open */ |
6097 | int noLock; /* True to omit locking primitives */ |
6098 | int rc = SQLITE_OK; /* Function Return Code */ |
6099 | int ctrlFlags = 0; /* UNIXFILE_* flags */ |
6100 | |
6101 | int isExclusive = (flags & SQLITE_OPEN_EXCLUSIVE); |
6102 | int isDelete = (flags & SQLITE_OPEN_DELETEONCLOSE); |
6103 | int isCreate = (flags & SQLITE_OPEN_CREATE); |
6104 | int isReadonly = (flags & SQLITE_OPEN_READONLY); |
6105 | int isReadWrite = (flags & SQLITE_OPEN_READWRITE); |
6106 | #if SQLITE_ENABLE_LOCKING_STYLE |
6107 | int isAutoProxy = (flags & SQLITE_OPEN_AUTOPROXY); |
6108 | #endif |
6109 | #if defined(__APPLE__) || SQLITE_ENABLE_LOCKING_STYLE |
6110 | struct statfs fsInfo; |
6111 | #endif |
6112 | |
6113 | /* If creating a super- or main-file journal, this function will open |
6114 | ** a file-descriptor on the directory too. The first time unixSync() |
6115 | ** is called the directory file descriptor will be fsync()ed and close()d. |
6116 | */ |
6117 | int isNewJrnl = (isCreate && ( |
6118 | eType==SQLITE_OPEN_SUPER_JOURNAL |
6119 | || eType==SQLITE_OPEN_MAIN_JOURNAL |
6120 | || eType==SQLITE_OPEN_WAL |
6121 | )); |
6122 | |
6123 | /* If argument zPath is a NULL pointer, this function is required to open |
6124 | ** a temporary file. Use this buffer to store the file name in. |
6125 | */ |
6126 | char zTmpname[MAX_PATHNAME+2]; |
6127 | const char *zName = zPath; |
6128 | |
6129 | /* Check the following statements are true: |
6130 | ** |
6131 | ** (a) Exactly one of the READWRITE and READONLY flags must be set, and |
6132 | ** (b) if CREATE is set, then READWRITE must also be set, and |
6133 | ** (c) if EXCLUSIVE is set, then CREATE must also be set. |
6134 | ** (d) if DELETEONCLOSE is set, then CREATE must also be set. |
6135 | */ |
6136 | assert((isReadonly==0 || isReadWrite==0) && (isReadWrite || isReadonly)); |
6137 | assert(isCreate==0 || isReadWrite); |
6138 | assert(isExclusive==0 || isCreate); |
6139 | assert(isDelete==0 || isCreate); |
6140 | |
6141 | /* The main DB, main journal, WAL file and super-journal are never |
6142 | ** automatically deleted. Nor are they ever temporary files. */ |
6143 | assert( (!isDelete && zName) || eType!=SQLITE_OPEN_MAIN_DB ); |
6144 | assert( (!isDelete && zName) || eType!=SQLITE_OPEN_MAIN_JOURNAL ); |
6145 | assert( (!isDelete && zName) || eType!=SQLITE_OPEN_SUPER_JOURNAL ); |
6146 | assert( (!isDelete && zName) || eType!=SQLITE_OPEN_WAL ); |
6147 | |
6148 | /* Assert that the upper layer has set one of the "file-type" flags. */ |
6149 | assert( eType==SQLITE_OPEN_MAIN_DB || eType==SQLITE_OPEN_TEMP_DB |
6150 | || eType==SQLITE_OPEN_MAIN_JOURNAL || eType==SQLITE_OPEN_TEMP_JOURNAL |
6151 | || eType==SQLITE_OPEN_SUBJOURNAL || eType==SQLITE_OPEN_SUPER_JOURNAL |
6152 | || eType==SQLITE_OPEN_TRANSIENT_DB || eType==SQLITE_OPEN_WAL |
6153 | ); |
6154 | |
6155 | /* Detect a pid change and reset the PRNG. There is a race condition |
6156 | ** here such that two or more threads all trying to open databases at |
6157 | ** the same instant might all reset the PRNG. But multiple resets |
6158 | ** are harmless. |
6159 | */ |
6160 | if( randomnessPid!=osGetpid(0) ){ |
6161 | randomnessPid = osGetpid(0); |
6162 | sqlite3_randomness(0,0); |
6163 | } |
6164 | memset(p, 0, sizeof(unixFile)); |
6165 | |
6166 | #ifdef SQLITE_ASSERT_NO_FILES |
6167 | /* Applications that never read or write a persistent disk files */ |
6168 | assert( zName==0 ); |
6169 | #endif |
6170 | |
6171 | if( eType==SQLITE_OPEN_MAIN_DB ){ |
6172 | UnixUnusedFd *pUnused; |
6173 | pUnused = findReusableFd(zName, flags); |
6174 | if( pUnused ){ |
6175 | fd = pUnused->fd; |
6176 | }else{ |
6177 | pUnused = sqlite3_malloc64(sizeof(*pUnused)); |
6178 | if( !pUnused ){ |
6179 | return SQLITE_NOMEM_BKPT; |
6180 | } |
6181 | } |
6182 | p->pPreallocatedUnused = pUnused; |
6183 | |
6184 | /* Database filenames are double-zero terminated if they are not |
6185 | ** URIs with parameters. Hence, they can always be passed into |
6186 | ** sqlite3_uri_parameter(). */ |
6187 | assert( (flags & SQLITE_OPEN_URI) || zName[strlen(zName)+1]==0 ); |
6188 | |
6189 | }else if( !zName ){ |
6190 | /* If zName is NULL, the upper layer is requesting a temp file. */ |
6191 | assert(isDelete && !isNewJrnl); |
6192 | rc = unixGetTempname(pVfs->mxPathname, zTmpname); |
6193 | if( rc!=SQLITE_OK ){ |
6194 | return rc; |
6195 | } |
6196 | zName = zTmpname; |
6197 | |
6198 | /* Generated temporary filenames are always double-zero terminated |
6199 | ** for use by sqlite3_uri_parameter(). */ |
6200 | assert( zName[strlen(zName)+1]==0 ); |
6201 | } |
6202 | |
6203 | /* Determine the value of the flags parameter passed to POSIX function |
6204 | ** open(). These must be calculated even if open() is not called, as |
6205 | ** they may be stored as part of the file handle and used by the |
6206 | ** 'conch file' locking functions later on. */ |
6207 | if( isReadonly ) openFlags |= O_RDONLY; |
6208 | if( isReadWrite ) openFlags |= O_RDWR; |
6209 | if( isCreate ) openFlags |= O_CREAT; |
6210 | if( isExclusive ) openFlags |= (O_EXCL|O_NOFOLLOW); |
6211 | openFlags |= (O_LARGEFILE|O_BINARY|O_NOFOLLOW); |
6212 | |
6213 | if( fd<0 ){ |
6214 | mode_t openMode; /* Permissions to create file with */ |
6215 | uid_t uid; /* Userid for the file */ |
6216 | gid_t gid; /* Groupid for the file */ |
6217 | rc = findCreateFileMode(zName, flags, &openMode, &uid, &gid); |
6218 | if( rc!=SQLITE_OK ){ |
6219 | assert( !p->pPreallocatedUnused ); |
6220 | assert( eType==SQLITE_OPEN_WAL || eType==SQLITE_OPEN_MAIN_JOURNAL ); |
6221 | return rc; |
6222 | } |
6223 | fd = robust_open(zName, openFlags, openMode); |
6224 | OSTRACE(("OPENX %-3d %s 0%o\n" , fd, zName, openFlags)); |
6225 | assert( !isExclusive || (openFlags & O_CREAT)!=0 ); |
6226 | if( fd<0 ){ |
6227 | if( isNewJrnl && errno==EACCES && osAccess(zName, F_OK) ){ |
6228 | /* If unable to create a journal because the directory is not |
6229 | ** writable, change the error code to indicate that. */ |
6230 | rc = SQLITE_READONLY_DIRECTORY; |
6231 | }else if( errno!=EISDIR && isReadWrite ){ |
6232 | /* Failed to open the file for read/write access. Try read-only. */ |
6233 | flags &= ~(SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE); |
6234 | openFlags &= ~(O_RDWR|O_CREAT); |
6235 | flags |= SQLITE_OPEN_READONLY; |
6236 | openFlags |= O_RDONLY; |
6237 | isReadonly = 1; |
6238 | fd = robust_open(zName, openFlags, openMode); |
6239 | } |
6240 | } |
6241 | if( fd<0 ){ |
6242 | int rc2 = unixLogError(SQLITE_CANTOPEN_BKPT, "open" , zName); |
6243 | if( rc==SQLITE_OK ) rc = rc2; |
6244 | goto open_finished; |
6245 | } |
6246 | |
6247 | /* The owner of the rollback journal or WAL file should always be the |
6248 | ** same as the owner of the database file. Try to ensure that this is |
6249 | ** the case. The chown() system call will be a no-op if the current |
6250 | ** process lacks root privileges, be we should at least try. Without |
6251 | ** this step, if a root process opens a database file, it can leave |
6252 | ** behinds a journal/WAL that is owned by root and hence make the |
6253 | ** database inaccessible to unprivileged processes. |
6254 | ** |
6255 | ** If openMode==0, then that means uid and gid are not set correctly |
6256 | ** (probably because SQLite is configured to use 8+3 filename mode) and |
6257 | ** in that case we do not want to attempt the chown(). |
6258 | */ |
6259 | if( openMode && (flags & (SQLITE_OPEN_WAL|SQLITE_OPEN_MAIN_JOURNAL))!=0 ){ |
6260 | robustFchown(fd, uid, gid); |
6261 | } |
6262 | } |
6263 | assert( fd>=0 ); |
6264 | if( pOutFlags ){ |
6265 | *pOutFlags = flags; |
6266 | } |
6267 | |
6268 | if( p->pPreallocatedUnused ){ |
6269 | p->pPreallocatedUnused->fd = fd; |
6270 | p->pPreallocatedUnused->flags = |
6271 | flags & (SQLITE_OPEN_READONLY|SQLITE_OPEN_READWRITE); |
6272 | } |
6273 | |
6274 | if( isDelete ){ |
6275 | #if OS_VXWORKS |
6276 | zPath = zName; |
6277 | #elif defined(SQLITE_UNLINK_AFTER_CLOSE) |
6278 | zPath = sqlite3_mprintf("%s" , zName); |
6279 | if( zPath==0 ){ |
6280 | robust_close(p, fd, __LINE__); |
6281 | return SQLITE_NOMEM_BKPT; |
6282 | } |
6283 | #else |
6284 | osUnlink(zName); |
6285 | #endif |
6286 | } |
6287 | #if SQLITE_ENABLE_LOCKING_STYLE |
6288 | else{ |
6289 | p->openFlags = openFlags; |
6290 | } |
6291 | #endif |
6292 | |
6293 | #if defined(__APPLE__) || SQLITE_ENABLE_LOCKING_STYLE |
6294 | if( fstatfs(fd, &fsInfo) == -1 ){ |
6295 | storeLastErrno(p, errno); |
6296 | robust_close(p, fd, __LINE__); |
6297 | return SQLITE_IOERR_ACCESS; |
6298 | } |
6299 | if (0 == strncmp("msdos" , fsInfo.f_fstypename, 5)) { |
6300 | ((unixFile*)pFile)->fsFlags |= SQLITE_FSFLAGS_IS_MSDOS; |
6301 | } |
6302 | if (0 == strncmp("exfat" , fsInfo.f_fstypename, 5)) { |
6303 | ((unixFile*)pFile)->fsFlags |= SQLITE_FSFLAGS_IS_MSDOS; |
6304 | } |
6305 | #endif |
6306 | |
6307 | /* Set up appropriate ctrlFlags */ |
6308 | if( isDelete ) ctrlFlags |= UNIXFILE_DELETE; |
6309 | if( isReadonly ) ctrlFlags |= UNIXFILE_RDONLY; |
6310 | noLock = eType!=SQLITE_OPEN_MAIN_DB; |
6311 | if( noLock ) ctrlFlags |= UNIXFILE_NOLOCK; |
6312 | if( isNewJrnl ) ctrlFlags |= UNIXFILE_DIRSYNC; |
6313 | if( flags & SQLITE_OPEN_URI ) ctrlFlags |= UNIXFILE_URI; |
6314 | |
6315 | #if SQLITE_ENABLE_LOCKING_STYLE |
6316 | #if SQLITE_PREFER_PROXY_LOCKING |
6317 | isAutoProxy = 1; |
6318 | #endif |
6319 | if( isAutoProxy && (zPath!=NULL) && (!noLock) && pVfs->xOpen ){ |
6320 | char *envforce = getenv("SQLITE_FORCE_PROXY_LOCKING" ); |
6321 | int useProxy = 0; |
6322 | |
6323 | /* SQLITE_FORCE_PROXY_LOCKING==1 means force always use proxy, 0 means |
6324 | ** never use proxy, NULL means use proxy for non-local files only. */ |
6325 | if( envforce!=NULL ){ |
6326 | useProxy = atoi(envforce)>0; |
6327 | }else{ |
6328 | useProxy = !(fsInfo.f_flags&MNT_LOCAL); |
6329 | } |
6330 | if( useProxy ){ |
6331 | rc = fillInUnixFile(pVfs, fd, pFile, zPath, ctrlFlags); |
6332 | if( rc==SQLITE_OK ){ |
6333 | rc = proxyTransformUnixFile((unixFile*)pFile, ":auto:" ); |
6334 | if( rc!=SQLITE_OK ){ |
6335 | /* Use unixClose to clean up the resources added in fillInUnixFile |
6336 | ** and clear all the structure's references. Specifically, |
6337 | ** pFile->pMethods will be NULL so sqlite3OsClose will be a no-op |
6338 | */ |
6339 | unixClose(pFile); |
6340 | return rc; |
6341 | } |
6342 | } |
6343 | goto open_finished; |
6344 | } |
6345 | } |
6346 | #endif |
6347 | |
6348 | assert( zPath==0 || zPath[0]=='/' |
6349 | || eType==SQLITE_OPEN_SUPER_JOURNAL || eType==SQLITE_OPEN_MAIN_JOURNAL |
6350 | ); |
6351 | rc = fillInUnixFile(pVfs, fd, pFile, zPath, ctrlFlags); |
6352 | |
6353 | open_finished: |
6354 | if( rc!=SQLITE_OK ){ |
6355 | sqlite3_free(p->pPreallocatedUnused); |
6356 | } |
6357 | return rc; |
6358 | } |
6359 | |
6360 | |
6361 | /* |
6362 | ** Delete the file at zPath. If the dirSync argument is true, fsync() |
6363 | ** the directory after deleting the file. |
6364 | */ |
6365 | static int unixDelete( |
6366 | sqlite3_vfs *NotUsed, /* VFS containing this as the xDelete method */ |
6367 | const char *zPath, /* Name of file to be deleted */ |
6368 | int dirSync /* If true, fsync() directory after deleting file */ |
6369 | ){ |
6370 | int rc = SQLITE_OK; |
6371 | UNUSED_PARAMETER(NotUsed); |
6372 | SimulateIOError(return SQLITE_IOERR_DELETE); |
6373 | if( osUnlink(zPath)==(-1) ){ |
6374 | if( errno==ENOENT |
6375 | #if OS_VXWORKS |
6376 | || osAccess(zPath,0)!=0 |
6377 | #endif |
6378 | ){ |
6379 | rc = SQLITE_IOERR_DELETE_NOENT; |
6380 | }else{ |
6381 | rc = unixLogError(SQLITE_IOERR_DELETE, "unlink" , zPath); |
6382 | } |
6383 | return rc; |
6384 | } |
6385 | #ifndef SQLITE_DISABLE_DIRSYNC |
6386 | if( (dirSync & 1)!=0 ){ |
6387 | int fd; |
6388 | rc = osOpenDirectory(zPath, &fd); |
6389 | if( rc==SQLITE_OK ){ |
6390 | if( full_fsync(fd,0,0) ){ |
6391 | rc = unixLogError(SQLITE_IOERR_DIR_FSYNC, "fsync" , zPath); |
6392 | } |
6393 | robust_close(0, fd, __LINE__); |
6394 | }else{ |
6395 | assert( rc==SQLITE_CANTOPEN ); |
6396 | rc = SQLITE_OK; |
6397 | } |
6398 | } |
6399 | #endif |
6400 | return rc; |
6401 | } |
6402 | |
6403 | /* |
6404 | ** Test the existence of or access permissions of file zPath. The |
6405 | ** test performed depends on the value of flags: |
6406 | ** |
6407 | ** SQLITE_ACCESS_EXISTS: Return 1 if the file exists |
6408 | ** SQLITE_ACCESS_READWRITE: Return 1 if the file is read and writable. |
6409 | ** SQLITE_ACCESS_READONLY: Return 1 if the file is readable. |
6410 | ** |
6411 | ** Otherwise return 0. |
6412 | */ |
6413 | static int unixAccess( |
6414 | sqlite3_vfs *NotUsed, /* The VFS containing this xAccess method */ |
6415 | const char *zPath, /* Path of the file to examine */ |
6416 | int flags, /* What do we want to learn about the zPath file? */ |
6417 | int *pResOut /* Write result boolean here */ |
6418 | ){ |
6419 | UNUSED_PARAMETER(NotUsed); |
6420 | SimulateIOError( return SQLITE_IOERR_ACCESS; ); |
6421 | assert( pResOut!=0 ); |
6422 | |
6423 | /* The spec says there are three possible values for flags. But only |
6424 | ** two of them are actually used */ |
6425 | assert( flags==SQLITE_ACCESS_EXISTS || flags==SQLITE_ACCESS_READWRITE ); |
6426 | |
6427 | if( flags==SQLITE_ACCESS_EXISTS ){ |
6428 | struct stat buf; |
6429 | *pResOut = 0==osStat(zPath, &buf) && |
6430 | (!S_ISREG(buf.st_mode) || buf.st_size>0); |
6431 | }else{ |
6432 | *pResOut = osAccess(zPath, W_OK|R_OK)==0; |
6433 | } |
6434 | return SQLITE_OK; |
6435 | } |
6436 | |
6437 | /* |
6438 | ** A pathname under construction |
6439 | */ |
6440 | typedef struct DbPath DbPath; |
6441 | struct DbPath { |
6442 | int rc; /* Non-zero following any error */ |
6443 | int nSymlink; /* Number of symlinks resolved */ |
6444 | char *zOut; /* Write the pathname here */ |
6445 | int nOut; /* Bytes of space available to zOut[] */ |
6446 | int nUsed; /* Bytes of zOut[] currently being used */ |
6447 | }; |
6448 | |
6449 | /* Forward reference */ |
6450 | static void appendAllPathElements(DbPath*,const char*); |
6451 | |
6452 | /* |
6453 | ** Append a single path element to the DbPath under construction |
6454 | */ |
6455 | static void appendOnePathElement( |
6456 | DbPath *pPath, /* Path under construction, to which to append zName */ |
6457 | const char *zName, /* Name to append to pPath. Not zero-terminated */ |
6458 | int nName /* Number of significant bytes in zName */ |
6459 | ){ |
6460 | assert( nName>0 ); |
6461 | assert( zName!=0 ); |
6462 | if( zName[0]=='.' ){ |
6463 | if( nName==1 ) return; |
6464 | if( zName[1]=='.' && nName==2 ){ |
6465 | if( pPath->nUsed<=1 ){ |
6466 | pPath->rc = SQLITE_ERROR; |
6467 | return; |
6468 | } |
6469 | assert( pPath->zOut[0]=='/' ); |
6470 | while( pPath->zOut[--pPath->nUsed]!='/' ){} |
6471 | return; |
6472 | } |
6473 | } |
6474 | if( pPath->nUsed + nName + 2 >= pPath->nOut ){ |
6475 | pPath->rc = SQLITE_ERROR; |
6476 | return; |
6477 | } |
6478 | pPath->zOut[pPath->nUsed++] = '/'; |
6479 | memcpy(&pPath->zOut[pPath->nUsed], zName, nName); |
6480 | pPath->nUsed += nName; |
6481 | #if defined(HAVE_READLINK) && defined(HAVE_LSTAT) |
6482 | if( pPath->rc==SQLITE_OK ){ |
6483 | const char *zIn; |
6484 | struct stat buf; |
6485 | pPath->zOut[pPath->nUsed] = 0; |
6486 | zIn = pPath->zOut; |
6487 | if( osLstat(zIn, &buf)!=0 ){ |
6488 | if( errno!=ENOENT ){ |
6489 | pPath->rc = unixLogError(SQLITE_CANTOPEN_BKPT, "lstat" , zIn); |
6490 | } |
6491 | }else if( S_ISLNK(buf.st_mode) ){ |
6492 | ssize_t got; |
6493 | char zLnk[SQLITE_MAX_PATHLEN+2]; |
6494 | if( pPath->nSymlink++ > SQLITE_MAX_SYMLINK ){ |
6495 | pPath->rc = SQLITE_CANTOPEN_BKPT; |
6496 | return; |
6497 | } |
6498 | got = osReadlink(zIn, zLnk, sizeof(zLnk)-2); |
6499 | if( got<=0 || got>=(ssize_t)sizeof(zLnk)-2 ){ |
6500 | pPath->rc = unixLogError(SQLITE_CANTOPEN_BKPT, "readlink" , zIn); |
6501 | return; |
6502 | } |
6503 | zLnk[got] = 0; |
6504 | if( zLnk[0]=='/' ){ |
6505 | pPath->nUsed = 0; |
6506 | }else{ |
6507 | pPath->nUsed -= nName + 1; |
6508 | } |
6509 | appendAllPathElements(pPath, zLnk); |
6510 | } |
6511 | } |
6512 | #endif |
6513 | } |
6514 | |
6515 | /* |
6516 | ** Append all path elements in zPath to the DbPath under construction. |
6517 | */ |
6518 | static void appendAllPathElements( |
6519 | DbPath *pPath, /* Path under construction, to which to append zName */ |
6520 | const char *zPath /* Path to append to pPath. Is zero-terminated */ |
6521 | ){ |
6522 | int i = 0; |
6523 | int j = 0; |
6524 | do{ |
6525 | while( zPath[i] && zPath[i]!='/' ){ i++; } |
6526 | if( i>j ){ |
6527 | appendOnePathElement(pPath, &zPath[j], i-j); |
6528 | } |
6529 | j = i+1; |
6530 | }while( zPath[i++] ); |
6531 | } |
6532 | |
6533 | /* |
6534 | ** Turn a relative pathname into a full pathname. The relative path |
6535 | ** is stored as a nul-terminated string in the buffer pointed to by |
6536 | ** zPath. |
6537 | ** |
6538 | ** zOut points to a buffer of at least sqlite3_vfs.mxPathname bytes |
6539 | ** (in this case, MAX_PATHNAME bytes). The full-path is written to |
6540 | ** this buffer before returning. |
6541 | */ |
6542 | static int unixFullPathname( |
6543 | sqlite3_vfs *pVfs, /* Pointer to vfs object */ |
6544 | const char *zPath, /* Possibly relative input path */ |
6545 | int nOut, /* Size of output buffer in bytes */ |
6546 | char *zOut /* Output buffer */ |
6547 | ){ |
6548 | DbPath path; |
6549 | UNUSED_PARAMETER(pVfs); |
6550 | path.rc = 0; |
6551 | path.nUsed = 0; |
6552 | path.nSymlink = 0; |
6553 | path.nOut = nOut; |
6554 | path.zOut = zOut; |
6555 | if( zPath[0]!='/' ){ |
6556 | char zPwd[SQLITE_MAX_PATHLEN+2]; |
6557 | if( osGetcwd(zPwd, sizeof(zPwd)-2)==0 ){ |
6558 | return unixLogError(SQLITE_CANTOPEN_BKPT, "getcwd" , zPath); |
6559 | } |
6560 | appendAllPathElements(&path, zPwd); |
6561 | } |
6562 | appendAllPathElements(&path, zPath); |
6563 | zOut[path.nUsed] = 0; |
6564 | if( path.rc || path.nUsed<2 ) return SQLITE_CANTOPEN_BKPT; |
6565 | if( path.nSymlink ) return SQLITE_OK_SYMLINK; |
6566 | return SQLITE_OK; |
6567 | } |
6568 | |
6569 | #ifndef SQLITE_OMIT_LOAD_EXTENSION |
6570 | /* |
6571 | ** Interfaces for opening a shared library, finding entry points |
6572 | ** within the shared library, and closing the shared library. |
6573 | */ |
6574 | #include <dlfcn.h> |
6575 | static void *unixDlOpen(sqlite3_vfs *NotUsed, const char *zFilename){ |
6576 | UNUSED_PARAMETER(NotUsed); |
6577 | return dlopen(zFilename, RTLD_NOW | RTLD_GLOBAL); |
6578 | } |
6579 | |
6580 | /* |
6581 | ** SQLite calls this function immediately after a call to unixDlSym() or |
6582 | ** unixDlOpen() fails (returns a null pointer). If a more detailed error |
6583 | ** message is available, it is written to zBufOut. If no error message |
6584 | ** is available, zBufOut is left unmodified and SQLite uses a default |
6585 | ** error message. |
6586 | */ |
6587 | static void unixDlError(sqlite3_vfs *NotUsed, int nBuf, char *zBufOut){ |
6588 | const char *zErr; |
6589 | UNUSED_PARAMETER(NotUsed); |
6590 | unixEnterMutex(); |
6591 | zErr = dlerror(); |
6592 | if( zErr ){ |
6593 | sqlite3_snprintf(nBuf, zBufOut, "%s" , zErr); |
6594 | } |
6595 | unixLeaveMutex(); |
6596 | } |
6597 | static void (*unixDlSym(sqlite3_vfs *NotUsed, void *p, const char*zSym))(void){ |
6598 | /* |
6599 | ** GCC with -pedantic-errors says that C90 does not allow a void* to be |
6600 | ** cast into a pointer to a function. And yet the library dlsym() routine |
6601 | ** returns a void* which is really a pointer to a function. So how do we |
6602 | ** use dlsym() with -pedantic-errors? |
6603 | ** |
6604 | ** Variable x below is defined to be a pointer to a function taking |
6605 | ** parameters void* and const char* and returning a pointer to a function. |
6606 | ** We initialize x by assigning it a pointer to the dlsym() function. |
6607 | ** (That assignment requires a cast.) Then we call the function that |
6608 | ** x points to. |
6609 | ** |
6610 | ** This work-around is unlikely to work correctly on any system where |
6611 | ** you really cannot cast a function pointer into void*. But then, on the |
6612 | ** other hand, dlsym() will not work on such a system either, so we have |
6613 | ** not really lost anything. |
6614 | */ |
6615 | void (*(*x)(void*,const char*))(void); |
6616 | UNUSED_PARAMETER(NotUsed); |
6617 | x = (void(*(*)(void*,const char*))(void))dlsym; |
6618 | return (*x)(p, zSym); |
6619 | } |
6620 | static void unixDlClose(sqlite3_vfs *NotUsed, void *pHandle){ |
6621 | UNUSED_PARAMETER(NotUsed); |
6622 | dlclose(pHandle); |
6623 | } |
6624 | #else /* if SQLITE_OMIT_LOAD_EXTENSION is defined: */ |
6625 | #define unixDlOpen 0 |
6626 | #define unixDlError 0 |
6627 | #define unixDlSym 0 |
6628 | #define unixDlClose 0 |
6629 | #endif |
6630 | |
6631 | /* |
6632 | ** Write nBuf bytes of random data to the supplied buffer zBuf. |
6633 | */ |
6634 | static int unixRandomness(sqlite3_vfs *NotUsed, int nBuf, char *zBuf){ |
6635 | UNUSED_PARAMETER(NotUsed); |
6636 | assert((size_t)nBuf>=(sizeof(time_t)+sizeof(int))); |
6637 | |
6638 | /* We have to initialize zBuf to prevent valgrind from reporting |
6639 | ** errors. The reports issued by valgrind are incorrect - we would |
6640 | ** prefer that the randomness be increased by making use of the |
6641 | ** uninitialized space in zBuf - but valgrind errors tend to worry |
6642 | ** some users. Rather than argue, it seems easier just to initialize |
6643 | ** the whole array and silence valgrind, even if that means less randomness |
6644 | ** in the random seed. |
6645 | ** |
6646 | ** When testing, initializing zBuf[] to zero is all we do. That means |
6647 | ** that we always use the same random number sequence. This makes the |
6648 | ** tests repeatable. |
6649 | */ |
6650 | memset(zBuf, 0, nBuf); |
6651 | randomnessPid = osGetpid(0); |
6652 | #if !defined(SQLITE_TEST) && !defined(SQLITE_OMIT_RANDOMNESS) |
6653 | { |
6654 | int fd, got; |
6655 | fd = robust_open("/dev/urandom" , O_RDONLY, 0); |
6656 | if( fd<0 ){ |
6657 | time_t t; |
6658 | time(&t); |
6659 | memcpy(zBuf, &t, sizeof(t)); |
6660 | memcpy(&zBuf[sizeof(t)], &randomnessPid, sizeof(randomnessPid)); |
6661 | assert( sizeof(t)+sizeof(randomnessPid)<=(size_t)nBuf ); |
6662 | nBuf = sizeof(t) + sizeof(randomnessPid); |
6663 | }else{ |
6664 | do{ got = osRead(fd, zBuf, nBuf); }while( got<0 && errno==EINTR ); |
6665 | robust_close(0, fd, __LINE__); |
6666 | } |
6667 | } |
6668 | #endif |
6669 | return nBuf; |
6670 | } |
6671 | |
6672 | |
6673 | /* |
6674 | ** Sleep for a little while. Return the amount of time slept. |
6675 | ** The argument is the number of microseconds we want to sleep. |
6676 | ** The return value is the number of microseconds of sleep actually |
6677 | ** requested from the underlying operating system, a number which |
6678 | ** might be greater than or equal to the argument, but not less |
6679 | ** than the argument. |
6680 | */ |
6681 | static int unixSleep(sqlite3_vfs *NotUsed, int microseconds){ |
6682 | #if OS_VXWORKS |
6683 | struct timespec sp; |
6684 | |
6685 | sp.tv_sec = microseconds / 1000000; |
6686 | sp.tv_nsec = (microseconds % 1000000) * 1000; |
6687 | nanosleep(&sp, NULL); |
6688 | UNUSED_PARAMETER(NotUsed); |
6689 | return microseconds; |
6690 | #elif defined(HAVE_USLEEP) && HAVE_USLEEP |
6691 | if( microseconds>=1000000 ) sleep(microseconds/1000000); |
6692 | if( microseconds%1000000 ) usleep(microseconds%1000000); |
6693 | UNUSED_PARAMETER(NotUsed); |
6694 | return microseconds; |
6695 | #else |
6696 | int seconds = (microseconds+999999)/1000000; |
6697 | sleep(seconds); |
6698 | UNUSED_PARAMETER(NotUsed); |
6699 | return seconds*1000000; |
6700 | #endif |
6701 | } |
6702 | |
6703 | /* |
6704 | ** The following variable, if set to a non-zero value, is interpreted as |
6705 | ** the number of seconds since 1970 and is used to set the result of |
6706 | ** sqlite3OsCurrentTime() during testing. |
6707 | */ |
6708 | #ifdef SQLITE_TEST |
6709 | int sqlite3_current_time = 0; /* Fake system time in seconds since 1970. */ |
6710 | #endif |
6711 | |
6712 | /* |
6713 | ** Find the current time (in Universal Coordinated Time). Write into *piNow |
6714 | ** the current time and date as a Julian Day number times 86_400_000. In |
6715 | ** other words, write into *piNow the number of milliseconds since the Julian |
6716 | ** epoch of noon in Greenwich on November 24, 4714 B.C according to the |
6717 | ** proleptic Gregorian calendar. |
6718 | ** |
6719 | ** On success, return SQLITE_OK. Return SQLITE_ERROR if the time and date |
6720 | ** cannot be found. |
6721 | */ |
6722 | static int unixCurrentTimeInt64(sqlite3_vfs *NotUsed, sqlite3_int64 *piNow){ |
6723 | static const sqlite3_int64 unixEpoch = 24405875*(sqlite3_int64)8640000; |
6724 | int rc = SQLITE_OK; |
6725 | #if defined(NO_GETTOD) |
6726 | time_t t; |
6727 | time(&t); |
6728 | *piNow = ((sqlite3_int64)t)*1000 + unixEpoch; |
6729 | #elif OS_VXWORKS |
6730 | struct timespec sNow; |
6731 | clock_gettime(CLOCK_REALTIME, &sNow); |
6732 | *piNow = unixEpoch + 1000*(sqlite3_int64)sNow.tv_sec + sNow.tv_nsec/1000000; |
6733 | #else |
6734 | struct timeval sNow; |
6735 | (void)gettimeofday(&sNow, 0); /* Cannot fail given valid arguments */ |
6736 | *piNow = unixEpoch + 1000*(sqlite3_int64)sNow.tv_sec + sNow.tv_usec/1000; |
6737 | #endif |
6738 | |
6739 | #ifdef SQLITE_TEST |
6740 | if( sqlite3_current_time ){ |
6741 | *piNow = 1000*(sqlite3_int64)sqlite3_current_time + unixEpoch; |
6742 | } |
6743 | #endif |
6744 | UNUSED_PARAMETER(NotUsed); |
6745 | return rc; |
6746 | } |
6747 | |
6748 | #ifndef SQLITE_OMIT_DEPRECATED |
6749 | /* |
6750 | ** Find the current time (in Universal Coordinated Time). Write the |
6751 | ** current time and date as a Julian Day number into *prNow and |
6752 | ** return 0. Return 1 if the time and date cannot be found. |
6753 | */ |
6754 | static int unixCurrentTime(sqlite3_vfs *NotUsed, double *prNow){ |
6755 | sqlite3_int64 i = 0; |
6756 | int rc; |
6757 | UNUSED_PARAMETER(NotUsed); |
6758 | rc = unixCurrentTimeInt64(0, &i); |
6759 | *prNow = i/86400000.0; |
6760 | return rc; |
6761 | } |
6762 | #else |
6763 | # define unixCurrentTime 0 |
6764 | #endif |
6765 | |
6766 | /* |
6767 | ** The xGetLastError() method is designed to return a better |
6768 | ** low-level error message when operating-system problems come up |
6769 | ** during SQLite operation. Only the integer return code is currently |
6770 | ** used. |
6771 | */ |
6772 | static int unixGetLastError(sqlite3_vfs *NotUsed, int NotUsed2, char *NotUsed3){ |
6773 | UNUSED_PARAMETER(NotUsed); |
6774 | UNUSED_PARAMETER(NotUsed2); |
6775 | UNUSED_PARAMETER(NotUsed3); |
6776 | return errno; |
6777 | } |
6778 | |
6779 | |
6780 | /* |
6781 | ************************ End of sqlite3_vfs methods *************************** |
6782 | ******************************************************************************/ |
6783 | |
6784 | /****************************************************************************** |
6785 | ************************** Begin Proxy Locking ******************************** |
6786 | ** |
6787 | ** Proxy locking is a "uber-locking-method" in this sense: It uses the |
6788 | ** other locking methods on secondary lock files. Proxy locking is a |
6789 | ** meta-layer over top of the primitive locking implemented above. For |
6790 | ** this reason, the division that implements of proxy locking is deferred |
6791 | ** until late in the file (here) after all of the other I/O methods have |
6792 | ** been defined - so that the primitive locking methods are available |
6793 | ** as services to help with the implementation of proxy locking. |
6794 | ** |
6795 | **** |
6796 | ** |
6797 | ** The default locking schemes in SQLite use byte-range locks on the |
6798 | ** database file to coordinate safe, concurrent access by multiple readers |
6799 | ** and writers [http://sqlite.org/lockingv3.html]. The five file locking |
6800 | ** states (UNLOCKED, PENDING, SHARED, RESERVED, EXCLUSIVE) are implemented |
6801 | ** as POSIX read & write locks over fixed set of locations (via fsctl), |
6802 | ** on AFP and SMB only exclusive byte-range locks are available via fsctl |
6803 | ** with _IOWR('z', 23, struct ByteRangeLockPB2) to track the same 5 states. |
6804 | ** To simulate a F_RDLCK on the shared range, on AFP a randomly selected |
6805 | ** address in the shared range is taken for a SHARED lock, the entire |
6806 | ** shared range is taken for an EXCLUSIVE lock): |
6807 | ** |
6808 | ** PENDING_BYTE 0x40000000 |
6809 | ** RESERVED_BYTE 0x40000001 |
6810 | ** SHARED_RANGE 0x40000002 -> 0x40000200 |
6811 | ** |
6812 | ** This works well on the local file system, but shows a nearly 100x |
6813 | ** slowdown in read performance on AFP because the AFP client disables |
6814 | ** the read cache when byte-range locks are present. Enabling the read |
6815 | ** cache exposes a cache coherency problem that is present on all OS X |
6816 | ** supported network file systems. NFS and AFP both observe the |
6817 | ** close-to-open semantics for ensuring cache coherency |
6818 | ** [http://nfs.sourceforge.net/#faq_a8], which does not effectively |
6819 | ** address the requirements for concurrent database access by multiple |
6820 | ** readers and writers |
6821 | ** [http://www.nabble.com/SQLite-on-NFS-cache-coherency-td15655701.html]. |
6822 | ** |
6823 | ** To address the performance and cache coherency issues, proxy file locking |
6824 | ** changes the way database access is controlled by limiting access to a |
6825 | ** single host at a time and moving file locks off of the database file |
6826 | ** and onto a proxy file on the local file system. |
6827 | ** |
6828 | ** |
6829 | ** Using proxy locks |
6830 | ** ----------------- |
6831 | ** |
6832 | ** C APIs |
6833 | ** |
6834 | ** sqlite3_file_control(db, dbname, SQLITE_FCNTL_SET_LOCKPROXYFILE, |
6835 | ** <proxy_path> | ":auto:"); |
6836 | ** sqlite3_file_control(db, dbname, SQLITE_FCNTL_GET_LOCKPROXYFILE, |
6837 | ** &<proxy_path>); |
6838 | ** |
6839 | ** |
6840 | ** SQL pragmas |
6841 | ** |
6842 | ** PRAGMA [database.]lock_proxy_file=<proxy_path> | :auto: |
6843 | ** PRAGMA [database.]lock_proxy_file |
6844 | ** |
6845 | ** Specifying ":auto:" means that if there is a conch file with a matching |
6846 | ** host ID in it, the proxy path in the conch file will be used, otherwise |
6847 | ** a proxy path based on the user's temp dir |
6848 | ** (via confstr(_CS_DARWIN_USER_TEMP_DIR,...)) will be used and the |
6849 | ** actual proxy file name is generated from the name and path of the |
6850 | ** database file. For example: |
6851 | ** |
6852 | ** For database path "/Users/me/foo.db" |
6853 | ** The lock path will be "<tmpdir>/sqliteplocks/_Users_me_foo.db:auto:") |
6854 | ** |
6855 | ** Once a lock proxy is configured for a database connection, it can not |
6856 | ** be removed, however it may be switched to a different proxy path via |
6857 | ** the above APIs (assuming the conch file is not being held by another |
6858 | ** connection or process). |
6859 | ** |
6860 | ** |
6861 | ** How proxy locking works |
6862 | ** ----------------------- |
6863 | ** |
6864 | ** Proxy file locking relies primarily on two new supporting files: |
6865 | ** |
6866 | ** * conch file to limit access to the database file to a single host |
6867 | ** at a time |
6868 | ** |
6869 | ** * proxy file to act as a proxy for the advisory locks normally |
6870 | ** taken on the database |
6871 | ** |
6872 | ** The conch file - to use a proxy file, sqlite must first "hold the conch" |
6873 | ** by taking an sqlite-style shared lock on the conch file, reading the |
6874 | ** contents and comparing the host's unique host ID (see below) and lock |
6875 | ** proxy path against the values stored in the conch. The conch file is |
6876 | ** stored in the same directory as the database file and the file name |
6877 | ** is patterned after the database file name as ".<databasename>-conch". |
6878 | ** If the conch file does not exist, or its contents do not match the |
6879 | ** host ID and/or proxy path, then the lock is escalated to an exclusive |
6880 | ** lock and the conch file contents is updated with the host ID and proxy |
6881 | ** path and the lock is downgraded to a shared lock again. If the conch |
6882 | ** is held by another process (with a shared lock), the exclusive lock |
6883 | ** will fail and SQLITE_BUSY is returned. |
6884 | ** |
6885 | ** The proxy file - a single-byte file used for all advisory file locks |
6886 | ** normally taken on the database file. This allows for safe sharing |
6887 | ** of the database file for multiple readers and writers on the same |
6888 | ** host (the conch ensures that they all use the same local lock file). |
6889 | ** |
6890 | ** Requesting the lock proxy does not immediately take the conch, it is |
6891 | ** only taken when the first request to lock database file is made. |
6892 | ** This matches the semantics of the traditional locking behavior, where |
6893 | ** opening a connection to a database file does not take a lock on it. |
6894 | ** The shared lock and an open file descriptor are maintained until |
6895 | ** the connection to the database is closed. |
6896 | ** |
6897 | ** The proxy file and the lock file are never deleted so they only need |
6898 | ** to be created the first time they are used. |
6899 | ** |
6900 | ** Configuration options |
6901 | ** --------------------- |
6902 | ** |
6903 | ** SQLITE_PREFER_PROXY_LOCKING |
6904 | ** |
6905 | ** Database files accessed on non-local file systems are |
6906 | ** automatically configured for proxy locking, lock files are |
6907 | ** named automatically using the same logic as |
6908 | ** PRAGMA lock_proxy_file=":auto:" |
6909 | ** |
6910 | ** SQLITE_PROXY_DEBUG |
6911 | ** |
6912 | ** Enables the logging of error messages during host id file |
6913 | ** retrieval and creation |
6914 | ** |
6915 | ** LOCKPROXYDIR |
6916 | ** |
6917 | ** Overrides the default directory used for lock proxy files that |
6918 | ** are named automatically via the ":auto:" setting |
6919 | ** |
6920 | ** SQLITE_DEFAULT_PROXYDIR_PERMISSIONS |
6921 | ** |
6922 | ** Permissions to use when creating a directory for storing the |
6923 | ** lock proxy files, only used when LOCKPROXYDIR is not set. |
6924 | ** |
6925 | ** |
6926 | ** As mentioned above, when compiled with SQLITE_PREFER_PROXY_LOCKING, |
6927 | ** setting the environment variable SQLITE_FORCE_PROXY_LOCKING to 1 will |
6928 | ** force proxy locking to be used for every database file opened, and 0 |
6929 | ** will force automatic proxy locking to be disabled for all database |
6930 | ** files (explicitly calling the SQLITE_FCNTL_SET_LOCKPROXYFILE pragma or |
6931 | ** sqlite_file_control API is not affected by SQLITE_FORCE_PROXY_LOCKING). |
6932 | */ |
6933 | |
6934 | /* |
6935 | ** Proxy locking is only available on MacOSX |
6936 | */ |
6937 | #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE |
6938 | |
6939 | /* |
6940 | ** The proxyLockingContext has the path and file structures for the remote |
6941 | ** and local proxy files in it |
6942 | */ |
6943 | typedef struct proxyLockingContext proxyLockingContext; |
6944 | struct proxyLockingContext { |
6945 | unixFile *conchFile; /* Open conch file */ |
6946 | char *conchFilePath; /* Name of the conch file */ |
6947 | unixFile *lockProxy; /* Open proxy lock file */ |
6948 | char *lockProxyPath; /* Name of the proxy lock file */ |
6949 | char *dbPath; /* Name of the open file */ |
6950 | int conchHeld; /* 1 if the conch is held, -1 if lockless */ |
6951 | int nFails; /* Number of conch taking failures */ |
6952 | void *oldLockingContext; /* Original lockingcontext to restore on close */ |
6953 | sqlite3_io_methods const *pOldMethod; /* Original I/O methods for close */ |
6954 | }; |
6955 | |
6956 | /* |
6957 | ** The proxy lock file path for the database at dbPath is written into lPath, |
6958 | ** which must point to valid, writable memory large enough for a maxLen length |
6959 | ** file path. |
6960 | */ |
6961 | static int proxyGetLockPath(const char *dbPath, char *lPath, size_t maxLen){ |
6962 | int len; |
6963 | int dbLen; |
6964 | int i; |
6965 | |
6966 | #ifdef LOCKPROXYDIR |
6967 | len = strlcpy(lPath, LOCKPROXYDIR, maxLen); |
6968 | #else |
6969 | # ifdef _CS_DARWIN_USER_TEMP_DIR |
6970 | { |
6971 | if( !confstr(_CS_DARWIN_USER_TEMP_DIR, lPath, maxLen) ){ |
6972 | OSTRACE(("GETLOCKPATH failed %s errno=%d pid=%d\n" , |
6973 | lPath, errno, osGetpid(0))); |
6974 | return SQLITE_IOERR_LOCK; |
6975 | } |
6976 | len = strlcat(lPath, "sqliteplocks" , maxLen); |
6977 | } |
6978 | # else |
6979 | len = strlcpy(lPath, "/tmp/" , maxLen); |
6980 | # endif |
6981 | #endif |
6982 | |
6983 | if( lPath[len-1]!='/' ){ |
6984 | len = strlcat(lPath, "/" , maxLen); |
6985 | } |
6986 | |
6987 | /* transform the db path to a unique cache name */ |
6988 | dbLen = (int)strlen(dbPath); |
6989 | for( i=0; i<dbLen && (i+len+7)<(int)maxLen; i++){ |
6990 | char c = dbPath[i]; |
6991 | lPath[i+len] = (c=='/')?'_':c; |
6992 | } |
6993 | lPath[i+len]='\0'; |
6994 | strlcat(lPath, ":auto:" , maxLen); |
6995 | OSTRACE(("GETLOCKPATH proxy lock path=%s pid=%d\n" , lPath, osGetpid(0))); |
6996 | return SQLITE_OK; |
6997 | } |
6998 | |
6999 | /* |
7000 | ** Creates the lock file and any missing directories in lockPath |
7001 | */ |
7002 | static int proxyCreateLockPath(const char *lockPath){ |
7003 | int i, len; |
7004 | char buf[MAXPATHLEN]; |
7005 | int start = 0; |
7006 | |
7007 | assert(lockPath!=NULL); |
7008 | /* try to create all the intermediate directories */ |
7009 | len = (int)strlen(lockPath); |
7010 | buf[0] = lockPath[0]; |
7011 | for( i=1; i<len; i++ ){ |
7012 | if( lockPath[i] == '/' && (i - start > 0) ){ |
7013 | /* only mkdir if leaf dir != "." or "/" or ".." */ |
7014 | if( i-start>2 || (i-start==1 && buf[start] != '.' && buf[start] != '/') |
7015 | || (i-start==2 && buf[start] != '.' && buf[start+1] != '.') ){ |
7016 | buf[i]='\0'; |
7017 | if( osMkdir(buf, SQLITE_DEFAULT_PROXYDIR_PERMISSIONS) ){ |
7018 | int err=errno; |
7019 | if( err!=EEXIST ) { |
7020 | OSTRACE(("CREATELOCKPATH FAILED creating %s, " |
7021 | "'%s' proxy lock path=%s pid=%d\n" , |
7022 | buf, strerror(err), lockPath, osGetpid(0))); |
7023 | return err; |
7024 | } |
7025 | } |
7026 | } |
7027 | start=i+1; |
7028 | } |
7029 | buf[i] = lockPath[i]; |
7030 | } |
7031 | OSTRACE(("CREATELOCKPATH proxy lock path=%s pid=%d\n" ,lockPath,osGetpid(0))); |
7032 | return 0; |
7033 | } |
7034 | |
7035 | /* |
7036 | ** Create a new VFS file descriptor (stored in memory obtained from |
7037 | ** sqlite3_malloc) and open the file named "path" in the file descriptor. |
7038 | ** |
7039 | ** The caller is responsible not only for closing the file descriptor |
7040 | ** but also for freeing the memory associated with the file descriptor. |
7041 | */ |
7042 | static int proxyCreateUnixFile( |
7043 | const char *path, /* path for the new unixFile */ |
7044 | unixFile **ppFile, /* unixFile created and returned by ref */ |
7045 | int islockfile /* if non zero missing dirs will be created */ |
7046 | ) { |
7047 | int fd = -1; |
7048 | unixFile *pNew; |
7049 | int rc = SQLITE_OK; |
7050 | int openFlags = O_RDWR | O_CREAT | O_NOFOLLOW; |
7051 | sqlite3_vfs dummyVfs; |
7052 | int terrno = 0; |
7053 | UnixUnusedFd *pUnused = NULL; |
7054 | |
7055 | /* 1. first try to open/create the file |
7056 | ** 2. if that fails, and this is a lock file (not-conch), try creating |
7057 | ** the parent directories and then try again. |
7058 | ** 3. if that fails, try to open the file read-only |
7059 | ** otherwise return BUSY (if lock file) or CANTOPEN for the conch file |
7060 | */ |
7061 | pUnused = findReusableFd(path, openFlags); |
7062 | if( pUnused ){ |
7063 | fd = pUnused->fd; |
7064 | }else{ |
7065 | pUnused = sqlite3_malloc64(sizeof(*pUnused)); |
7066 | if( !pUnused ){ |
7067 | return SQLITE_NOMEM_BKPT; |
7068 | } |
7069 | } |
7070 | if( fd<0 ){ |
7071 | fd = robust_open(path, openFlags, 0); |
7072 | terrno = errno; |
7073 | if( fd<0 && errno==ENOENT && islockfile ){ |
7074 | if( proxyCreateLockPath(path) == SQLITE_OK ){ |
7075 | fd = robust_open(path, openFlags, 0); |
7076 | } |
7077 | } |
7078 | } |
7079 | if( fd<0 ){ |
7080 | openFlags = O_RDONLY | O_NOFOLLOW; |
7081 | fd = robust_open(path, openFlags, 0); |
7082 | terrno = errno; |
7083 | } |
7084 | if( fd<0 ){ |
7085 | if( islockfile ){ |
7086 | return SQLITE_BUSY; |
7087 | } |
7088 | switch (terrno) { |
7089 | case EACCES: |
7090 | return SQLITE_PERM; |
7091 | case EIO: |
7092 | return SQLITE_IOERR_LOCK; /* even though it is the conch */ |
7093 | default: |
7094 | return SQLITE_CANTOPEN_BKPT; |
7095 | } |
7096 | } |
7097 | |
7098 | pNew = (unixFile *)sqlite3_malloc64(sizeof(*pNew)); |
7099 | if( pNew==NULL ){ |
7100 | rc = SQLITE_NOMEM_BKPT; |
7101 | goto end_create_proxy; |
7102 | } |
7103 | memset(pNew, 0, sizeof(unixFile)); |
7104 | pNew->openFlags = openFlags; |
7105 | memset(&dummyVfs, 0, sizeof(dummyVfs)); |
7106 | dummyVfs.pAppData = (void*)&autolockIoFinder; |
7107 | dummyVfs.zName = "dummy" ; |
7108 | pUnused->fd = fd; |
7109 | pUnused->flags = openFlags; |
7110 | pNew->pPreallocatedUnused = pUnused; |
7111 | |
7112 | rc = fillInUnixFile(&dummyVfs, fd, (sqlite3_file*)pNew, path, 0); |
7113 | if( rc==SQLITE_OK ){ |
7114 | *ppFile = pNew; |
7115 | return SQLITE_OK; |
7116 | } |
7117 | end_create_proxy: |
7118 | robust_close(pNew, fd, __LINE__); |
7119 | sqlite3_free(pNew); |
7120 | sqlite3_free(pUnused); |
7121 | return rc; |
7122 | } |
7123 | |
7124 | #ifdef SQLITE_TEST |
7125 | /* simulate multiple hosts by creating unique hostid file paths */ |
7126 | int sqlite3_hostid_num = 0; |
7127 | #endif |
7128 | |
7129 | #define PROXY_HOSTIDLEN 16 /* conch file host id length */ |
7130 | |
7131 | #if HAVE_GETHOSTUUID |
7132 | /* Not always defined in the headers as it ought to be */ |
7133 | extern int gethostuuid(uuid_t id, const struct timespec *wait); |
7134 | #endif |
7135 | |
7136 | /* get the host ID via gethostuuid(), pHostID must point to PROXY_HOSTIDLEN |
7137 | ** bytes of writable memory. |
7138 | */ |
7139 | static int proxyGetHostID(unsigned char *pHostID, int *pError){ |
7140 | assert(PROXY_HOSTIDLEN == sizeof(uuid_t)); |
7141 | memset(pHostID, 0, PROXY_HOSTIDLEN); |
7142 | #if HAVE_GETHOSTUUID |
7143 | { |
7144 | struct timespec timeout = {1, 0}; /* 1 sec timeout */ |
7145 | if( gethostuuid(pHostID, &timeout) ){ |
7146 | int err = errno; |
7147 | if( pError ){ |
7148 | *pError = err; |
7149 | } |
7150 | return SQLITE_IOERR; |
7151 | } |
7152 | } |
7153 | #else |
7154 | UNUSED_PARAMETER(pError); |
7155 | #endif |
7156 | #ifdef SQLITE_TEST |
7157 | /* simulate multiple hosts by creating unique hostid file paths */ |
7158 | if( sqlite3_hostid_num != 0){ |
7159 | pHostID[0] = (char)(pHostID[0] + (char)(sqlite3_hostid_num & 0xFF)); |
7160 | } |
7161 | #endif |
7162 | |
7163 | return SQLITE_OK; |
7164 | } |
7165 | |
7166 | /* The conch file contains the header, host id and lock file path |
7167 | */ |
7168 | #define PROXY_CONCHVERSION 2 /* 1-byte header, 16-byte host id, path */ |
7169 | #define PROXY_HEADERLEN 1 /* conch file header length */ |
7170 | #define PROXY_PATHINDEX (PROXY_HEADERLEN+PROXY_HOSTIDLEN) |
7171 | #define PROXY_MAXCONCHLEN (PROXY_HEADERLEN+PROXY_HOSTIDLEN+MAXPATHLEN) |
7172 | |
7173 | /* |
7174 | ** Takes an open conch file, copies the contents to a new path and then moves |
7175 | ** it back. The newly created file's file descriptor is assigned to the |
7176 | ** conch file structure and finally the original conch file descriptor is |
7177 | ** closed. Returns zero if successful. |
7178 | */ |
7179 | static int proxyBreakConchLock(unixFile *pFile, uuid_t myHostID){ |
7180 | proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext; |
7181 | unixFile *conchFile = pCtx->conchFile; |
7182 | char tPath[MAXPATHLEN]; |
7183 | char buf[PROXY_MAXCONCHLEN]; |
7184 | char *cPath = pCtx->conchFilePath; |
7185 | size_t readLen = 0; |
7186 | size_t pathLen = 0; |
7187 | char errmsg[64] = "" ; |
7188 | int fd = -1; |
7189 | int rc = -1; |
7190 | UNUSED_PARAMETER(myHostID); |
7191 | |
7192 | /* create a new path by replace the trailing '-conch' with '-break' */ |
7193 | pathLen = strlcpy(tPath, cPath, MAXPATHLEN); |
7194 | if( pathLen>MAXPATHLEN || pathLen<6 || |
7195 | (strlcpy(&tPath[pathLen-5], "break" , 6) != 5) ){ |
7196 | sqlite3_snprintf(sizeof(errmsg),errmsg,"path error (len %d)" ,(int)pathLen); |
7197 | goto end_breaklock; |
7198 | } |
7199 | /* read the conch content */ |
7200 | readLen = osPread(conchFile->h, buf, PROXY_MAXCONCHLEN, 0); |
7201 | if( readLen<PROXY_PATHINDEX ){ |
7202 | sqlite3_snprintf(sizeof(errmsg),errmsg,"read error (len %d)" ,(int)readLen); |
7203 | goto end_breaklock; |
7204 | } |
7205 | /* write it out to the temporary break file */ |
7206 | fd = robust_open(tPath, (O_RDWR|O_CREAT|O_EXCL|O_NOFOLLOW), 0); |
7207 | if( fd<0 ){ |
7208 | sqlite3_snprintf(sizeof(errmsg), errmsg, "create failed (%d)" , errno); |
7209 | goto end_breaklock; |
7210 | } |
7211 | if( osPwrite(fd, buf, readLen, 0) != (ssize_t)readLen ){ |
7212 | sqlite3_snprintf(sizeof(errmsg), errmsg, "write failed (%d)" , errno); |
7213 | goto end_breaklock; |
7214 | } |
7215 | if( rename(tPath, cPath) ){ |
7216 | sqlite3_snprintf(sizeof(errmsg), errmsg, "rename failed (%d)" , errno); |
7217 | goto end_breaklock; |
7218 | } |
7219 | rc = 0; |
7220 | fprintf(stderr, "broke stale lock on %s\n" , cPath); |
7221 | robust_close(pFile, conchFile->h, __LINE__); |
7222 | conchFile->h = fd; |
7223 | conchFile->openFlags = O_RDWR | O_CREAT; |
7224 | |
7225 | end_breaklock: |
7226 | if( rc ){ |
7227 | if( fd>=0 ){ |
7228 | osUnlink(tPath); |
7229 | robust_close(pFile, fd, __LINE__); |
7230 | } |
7231 | fprintf(stderr, "failed to break stale lock on %s, %s\n" , cPath, errmsg); |
7232 | } |
7233 | return rc; |
7234 | } |
7235 | |
7236 | /* Take the requested lock on the conch file and break a stale lock if the |
7237 | ** host id matches. |
7238 | */ |
7239 | static int proxyConchLock(unixFile *pFile, uuid_t myHostID, int lockType){ |
7240 | proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext; |
7241 | unixFile *conchFile = pCtx->conchFile; |
7242 | int rc = SQLITE_OK; |
7243 | int nTries = 0; |
7244 | struct timespec conchModTime; |
7245 | |
7246 | memset(&conchModTime, 0, sizeof(conchModTime)); |
7247 | do { |
7248 | rc = conchFile->pMethod->xLock((sqlite3_file*)conchFile, lockType); |
7249 | nTries ++; |
7250 | if( rc==SQLITE_BUSY ){ |
7251 | /* If the lock failed (busy): |
7252 | * 1st try: get the mod time of the conch, wait 0.5s and try again. |
7253 | * 2nd try: fail if the mod time changed or host id is different, wait |
7254 | * 10 sec and try again |
7255 | * 3rd try: break the lock unless the mod time has changed. |
7256 | */ |
7257 | struct stat buf; |
7258 | if( osFstat(conchFile->h, &buf) ){ |
7259 | storeLastErrno(pFile, errno); |
7260 | return SQLITE_IOERR_LOCK; |
7261 | } |
7262 | |
7263 | if( nTries==1 ){ |
7264 | conchModTime = buf.st_mtimespec; |
7265 | unixSleep(0,500000); /* wait 0.5 sec and try the lock again*/ |
7266 | continue; |
7267 | } |
7268 | |
7269 | assert( nTries>1 ); |
7270 | if( conchModTime.tv_sec != buf.st_mtimespec.tv_sec || |
7271 | conchModTime.tv_nsec != buf.st_mtimespec.tv_nsec ){ |
7272 | return SQLITE_BUSY; |
7273 | } |
7274 | |
7275 | if( nTries==2 ){ |
7276 | char tBuf[PROXY_MAXCONCHLEN]; |
7277 | int len = osPread(conchFile->h, tBuf, PROXY_MAXCONCHLEN, 0); |
7278 | if( len<0 ){ |
7279 | storeLastErrno(pFile, errno); |
7280 | return SQLITE_IOERR_LOCK; |
7281 | } |
7282 | if( len>PROXY_PATHINDEX && tBuf[0]==(char)PROXY_CONCHVERSION){ |
7283 | /* don't break the lock if the host id doesn't match */ |
7284 | if( 0!=memcmp(&tBuf[PROXY_HEADERLEN], myHostID, PROXY_HOSTIDLEN) ){ |
7285 | return SQLITE_BUSY; |
7286 | } |
7287 | }else{ |
7288 | /* don't break the lock on short read or a version mismatch */ |
7289 | return SQLITE_BUSY; |
7290 | } |
7291 | unixSleep(0,10000000); /* wait 10 sec and try the lock again */ |
7292 | continue; |
7293 | } |
7294 | |
7295 | assert( nTries==3 ); |
7296 | if( 0==proxyBreakConchLock(pFile, myHostID) ){ |
7297 | rc = SQLITE_OK; |
7298 | if( lockType==EXCLUSIVE_LOCK ){ |
7299 | rc = conchFile->pMethod->xLock((sqlite3_file*)conchFile, SHARED_LOCK); |
7300 | } |
7301 | if( !rc ){ |
7302 | rc = conchFile->pMethod->xLock((sqlite3_file*)conchFile, lockType); |
7303 | } |
7304 | } |
7305 | } |
7306 | } while( rc==SQLITE_BUSY && nTries<3 ); |
7307 | |
7308 | return rc; |
7309 | } |
7310 | |
7311 | /* Takes the conch by taking a shared lock and read the contents conch, if |
7312 | ** lockPath is non-NULL, the host ID and lock file path must match. A NULL |
7313 | ** lockPath means that the lockPath in the conch file will be used if the |
7314 | ** host IDs match, or a new lock path will be generated automatically |
7315 | ** and written to the conch file. |
7316 | */ |
7317 | static int proxyTakeConch(unixFile *pFile){ |
7318 | proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext; |
7319 | |
7320 | if( pCtx->conchHeld!=0 ){ |
7321 | return SQLITE_OK; |
7322 | }else{ |
7323 | unixFile *conchFile = pCtx->conchFile; |
7324 | uuid_t myHostID; |
7325 | int pError = 0; |
7326 | char readBuf[PROXY_MAXCONCHLEN]; |
7327 | char lockPath[MAXPATHLEN]; |
7328 | char *tempLockPath = NULL; |
7329 | int rc = SQLITE_OK; |
7330 | int createConch = 0; |
7331 | int hostIdMatch = 0; |
7332 | int readLen = 0; |
7333 | int tryOldLockPath = 0; |
7334 | int forceNewLockPath = 0; |
7335 | |
7336 | OSTRACE(("TAKECONCH %d for %s pid=%d\n" , conchFile->h, |
7337 | (pCtx->lockProxyPath ? pCtx->lockProxyPath : ":auto:" ), |
7338 | osGetpid(0))); |
7339 | |
7340 | rc = proxyGetHostID(myHostID, &pError); |
7341 | if( (rc&0xff)==SQLITE_IOERR ){ |
7342 | storeLastErrno(pFile, pError); |
7343 | goto end_takeconch; |
7344 | } |
7345 | rc = proxyConchLock(pFile, myHostID, SHARED_LOCK); |
7346 | if( rc!=SQLITE_OK ){ |
7347 | goto end_takeconch; |
7348 | } |
7349 | /* read the existing conch file */ |
7350 | readLen = seekAndRead((unixFile*)conchFile, 0, readBuf, PROXY_MAXCONCHLEN); |
7351 | if( readLen<0 ){ |
7352 | /* I/O error: lastErrno set by seekAndRead */ |
7353 | storeLastErrno(pFile, conchFile->lastErrno); |
7354 | rc = SQLITE_IOERR_READ; |
7355 | goto end_takeconch; |
7356 | }else if( readLen<=(PROXY_HEADERLEN+PROXY_HOSTIDLEN) || |
7357 | readBuf[0]!=(char)PROXY_CONCHVERSION ){ |
7358 | /* a short read or version format mismatch means we need to create a new |
7359 | ** conch file. |
7360 | */ |
7361 | createConch = 1; |
7362 | } |
7363 | /* if the host id matches and the lock path already exists in the conch |
7364 | ** we'll try to use the path there, if we can't open that path, we'll |
7365 | ** retry with a new auto-generated path |
7366 | */ |
7367 | do { /* in case we need to try again for an :auto: named lock file */ |
7368 | |
7369 | if( !createConch && !forceNewLockPath ){ |
7370 | hostIdMatch = !memcmp(&readBuf[PROXY_HEADERLEN], myHostID, |
7371 | PROXY_HOSTIDLEN); |
7372 | /* if the conch has data compare the contents */ |
7373 | if( !pCtx->lockProxyPath ){ |
7374 | /* for auto-named local lock file, just check the host ID and we'll |
7375 | ** use the local lock file path that's already in there |
7376 | */ |
7377 | if( hostIdMatch ){ |
7378 | size_t pathLen = (readLen - PROXY_PATHINDEX); |
7379 | |
7380 | if( pathLen>=MAXPATHLEN ){ |
7381 | pathLen=MAXPATHLEN-1; |
7382 | } |
7383 | memcpy(lockPath, &readBuf[PROXY_PATHINDEX], pathLen); |
7384 | lockPath[pathLen] = 0; |
7385 | tempLockPath = lockPath; |
7386 | tryOldLockPath = 1; |
7387 | /* create a copy of the lock path if the conch is taken */ |
7388 | goto end_takeconch; |
7389 | } |
7390 | }else if( hostIdMatch |
7391 | && !strncmp(pCtx->lockProxyPath, &readBuf[PROXY_PATHINDEX], |
7392 | readLen-PROXY_PATHINDEX) |
7393 | ){ |
7394 | /* conch host and lock path match */ |
7395 | goto end_takeconch; |
7396 | } |
7397 | } |
7398 | |
7399 | /* if the conch isn't writable and doesn't match, we can't take it */ |
7400 | if( (conchFile->openFlags&O_RDWR) == 0 ){ |
7401 | rc = SQLITE_BUSY; |
7402 | goto end_takeconch; |
7403 | } |
7404 | |
7405 | /* either the conch didn't match or we need to create a new one */ |
7406 | if( !pCtx->lockProxyPath ){ |
7407 | proxyGetLockPath(pCtx->dbPath, lockPath, MAXPATHLEN); |
7408 | tempLockPath = lockPath; |
7409 | /* create a copy of the lock path _only_ if the conch is taken */ |
7410 | } |
7411 | |
7412 | /* update conch with host and path (this will fail if other process |
7413 | ** has a shared lock already), if the host id matches, use the big |
7414 | ** stick. |
7415 | */ |
7416 | futimes(conchFile->h, NULL); |
7417 | if( hostIdMatch && !createConch ){ |
7418 | if( conchFile->pInode && conchFile->pInode->nShared>1 ){ |
7419 | /* We are trying for an exclusive lock but another thread in this |
7420 | ** same process is still holding a shared lock. */ |
7421 | rc = SQLITE_BUSY; |
7422 | } else { |
7423 | rc = proxyConchLock(pFile, myHostID, EXCLUSIVE_LOCK); |
7424 | } |
7425 | }else{ |
7426 | rc = proxyConchLock(pFile, myHostID, EXCLUSIVE_LOCK); |
7427 | } |
7428 | if( rc==SQLITE_OK ){ |
7429 | char writeBuffer[PROXY_MAXCONCHLEN]; |
7430 | int writeSize = 0; |
7431 | |
7432 | writeBuffer[0] = (char)PROXY_CONCHVERSION; |
7433 | memcpy(&writeBuffer[PROXY_HEADERLEN], myHostID, PROXY_HOSTIDLEN); |
7434 | if( pCtx->lockProxyPath!=NULL ){ |
7435 | strlcpy(&writeBuffer[PROXY_PATHINDEX], pCtx->lockProxyPath, |
7436 | MAXPATHLEN); |
7437 | }else{ |
7438 | strlcpy(&writeBuffer[PROXY_PATHINDEX], tempLockPath, MAXPATHLEN); |
7439 | } |
7440 | writeSize = PROXY_PATHINDEX + strlen(&writeBuffer[PROXY_PATHINDEX]); |
7441 | robust_ftruncate(conchFile->h, writeSize); |
7442 | rc = unixWrite((sqlite3_file *)conchFile, writeBuffer, writeSize, 0); |
7443 | full_fsync(conchFile->h,0,0); |
7444 | /* If we created a new conch file (not just updated the contents of a |
7445 | ** valid conch file), try to match the permissions of the database |
7446 | */ |
7447 | if( rc==SQLITE_OK && createConch ){ |
7448 | struct stat buf; |
7449 | int err = osFstat(pFile->h, &buf); |
7450 | if( err==0 ){ |
7451 | mode_t cmode = buf.st_mode&(S_IRUSR|S_IWUSR | S_IRGRP|S_IWGRP | |
7452 | S_IROTH|S_IWOTH); |
7453 | /* try to match the database file R/W permissions, ignore failure */ |
7454 | #ifndef SQLITE_PROXY_DEBUG |
7455 | osFchmod(conchFile->h, cmode); |
7456 | #else |
7457 | do{ |
7458 | rc = osFchmod(conchFile->h, cmode); |
7459 | }while( rc==(-1) && errno==EINTR ); |
7460 | if( rc!=0 ){ |
7461 | int code = errno; |
7462 | fprintf(stderr, "fchmod %o FAILED with %d %s\n" , |
7463 | cmode, code, strerror(code)); |
7464 | } else { |
7465 | fprintf(stderr, "fchmod %o SUCCEDED\n" ,cmode); |
7466 | } |
7467 | }else{ |
7468 | int code = errno; |
7469 | fprintf(stderr, "STAT FAILED[%d] with %d %s\n" , |
7470 | err, code, strerror(code)); |
7471 | #endif |
7472 | } |
7473 | } |
7474 | } |
7475 | conchFile->pMethod->xUnlock((sqlite3_file*)conchFile, SHARED_LOCK); |
7476 | |
7477 | end_takeconch: |
7478 | OSTRACE(("TRANSPROXY: CLOSE %d\n" , pFile->h)); |
7479 | if( rc==SQLITE_OK && pFile->openFlags ){ |
7480 | int fd; |
7481 | if( pFile->h>=0 ){ |
7482 | robust_close(pFile, pFile->h, __LINE__); |
7483 | } |
7484 | pFile->h = -1; |
7485 | fd = robust_open(pCtx->dbPath, pFile->openFlags, 0); |
7486 | OSTRACE(("TRANSPROXY: OPEN %d\n" , fd)); |
7487 | if( fd>=0 ){ |
7488 | pFile->h = fd; |
7489 | }else{ |
7490 | rc=SQLITE_CANTOPEN_BKPT; /* SQLITE_BUSY? proxyTakeConch called |
7491 | during locking */ |
7492 | } |
7493 | } |
7494 | if( rc==SQLITE_OK && !pCtx->lockProxy ){ |
7495 | char *path = tempLockPath ? tempLockPath : pCtx->lockProxyPath; |
7496 | rc = proxyCreateUnixFile(path, &pCtx->lockProxy, 1); |
7497 | if( rc!=SQLITE_OK && rc!=SQLITE_NOMEM && tryOldLockPath ){ |
7498 | /* we couldn't create the proxy lock file with the old lock file path |
7499 | ** so try again via auto-naming |
7500 | */ |
7501 | forceNewLockPath = 1; |
7502 | tryOldLockPath = 0; |
7503 | continue; /* go back to the do {} while start point, try again */ |
7504 | } |
7505 | } |
7506 | if( rc==SQLITE_OK ){ |
7507 | /* Need to make a copy of path if we extracted the value |
7508 | ** from the conch file or the path was allocated on the stack |
7509 | */ |
7510 | if( tempLockPath ){ |
7511 | pCtx->lockProxyPath = sqlite3DbStrDup(0, tempLockPath); |
7512 | if( !pCtx->lockProxyPath ){ |
7513 | rc = SQLITE_NOMEM_BKPT; |
7514 | } |
7515 | } |
7516 | } |
7517 | if( rc==SQLITE_OK ){ |
7518 | pCtx->conchHeld = 1; |
7519 | |
7520 | if( pCtx->lockProxy->pMethod == &afpIoMethods ){ |
7521 | afpLockingContext *afpCtx; |
7522 | afpCtx = (afpLockingContext *)pCtx->lockProxy->lockingContext; |
7523 | afpCtx->dbPath = pCtx->lockProxyPath; |
7524 | } |
7525 | } else { |
7526 | conchFile->pMethod->xUnlock((sqlite3_file*)conchFile, NO_LOCK); |
7527 | } |
7528 | OSTRACE(("TAKECONCH %d %s\n" , conchFile->h, |
7529 | rc==SQLITE_OK?"ok" :"failed" )); |
7530 | return rc; |
7531 | } while (1); /* in case we need to retry the :auto: lock file - |
7532 | ** we should never get here except via the 'continue' call. */ |
7533 | } |
7534 | } |
7535 | |
7536 | /* |
7537 | ** If pFile holds a lock on a conch file, then release that lock. |
7538 | */ |
7539 | static int proxyReleaseConch(unixFile *pFile){ |
7540 | int rc = SQLITE_OK; /* Subroutine return code */ |
7541 | proxyLockingContext *pCtx; /* The locking context for the proxy lock */ |
7542 | unixFile *conchFile; /* Name of the conch file */ |
7543 | |
7544 | pCtx = (proxyLockingContext *)pFile->lockingContext; |
7545 | conchFile = pCtx->conchFile; |
7546 | OSTRACE(("RELEASECONCH %d for %s pid=%d\n" , conchFile->h, |
7547 | (pCtx->lockProxyPath ? pCtx->lockProxyPath : ":auto:" ), |
7548 | osGetpid(0))); |
7549 | if( pCtx->conchHeld>0 ){ |
7550 | rc = conchFile->pMethod->xUnlock((sqlite3_file*)conchFile, NO_LOCK); |
7551 | } |
7552 | pCtx->conchHeld = 0; |
7553 | OSTRACE(("RELEASECONCH %d %s\n" , conchFile->h, |
7554 | (rc==SQLITE_OK ? "ok" : "failed" ))); |
7555 | return rc; |
7556 | } |
7557 | |
7558 | /* |
7559 | ** Given the name of a database file, compute the name of its conch file. |
7560 | ** Store the conch filename in memory obtained from sqlite3_malloc64(). |
7561 | ** Make *pConchPath point to the new name. Return SQLITE_OK on success |
7562 | ** or SQLITE_NOMEM if unable to obtain memory. |
7563 | ** |
7564 | ** The caller is responsible for ensuring that the allocated memory |
7565 | ** space is eventually freed. |
7566 | ** |
7567 | ** *pConchPath is set to NULL if a memory allocation error occurs. |
7568 | */ |
7569 | static int proxyCreateConchPathname(char *dbPath, char **pConchPath){ |
7570 | int i; /* Loop counter */ |
7571 | int len = (int)strlen(dbPath); /* Length of database filename - dbPath */ |
7572 | char *conchPath; /* buffer in which to construct conch name */ |
7573 | |
7574 | /* Allocate space for the conch filename and initialize the name to |
7575 | ** the name of the original database file. */ |
7576 | *pConchPath = conchPath = (char *)sqlite3_malloc64(len + 8); |
7577 | if( conchPath==0 ){ |
7578 | return SQLITE_NOMEM_BKPT; |
7579 | } |
7580 | memcpy(conchPath, dbPath, len+1); |
7581 | |
7582 | /* now insert a "." before the last / character */ |
7583 | for( i=(len-1); i>=0; i-- ){ |
7584 | if( conchPath[i]=='/' ){ |
7585 | i++; |
7586 | break; |
7587 | } |
7588 | } |
7589 | conchPath[i]='.'; |
7590 | while ( i<len ){ |
7591 | conchPath[i+1]=dbPath[i]; |
7592 | i++; |
7593 | } |
7594 | |
7595 | /* append the "-conch" suffix to the file */ |
7596 | memcpy(&conchPath[i+1], "-conch" , 7); |
7597 | assert( (int)strlen(conchPath) == len+7 ); |
7598 | |
7599 | return SQLITE_OK; |
7600 | } |
7601 | |
7602 | |
7603 | /* Takes a fully configured proxy locking-style unix file and switches |
7604 | ** the local lock file path |
7605 | */ |
7606 | static int switchLockProxyPath(unixFile *pFile, const char *path) { |
7607 | proxyLockingContext *pCtx = (proxyLockingContext*)pFile->lockingContext; |
7608 | char *oldPath = pCtx->lockProxyPath; |
7609 | int rc = SQLITE_OK; |
7610 | |
7611 | if( pFile->eFileLock!=NO_LOCK ){ |
7612 | return SQLITE_BUSY; |
7613 | } |
7614 | |
7615 | /* nothing to do if the path is NULL, :auto: or matches the existing path */ |
7616 | if( !path || path[0]=='\0' || !strcmp(path, ":auto:" ) || |
7617 | (oldPath && !strncmp(oldPath, path, MAXPATHLEN)) ){ |
7618 | return SQLITE_OK; |
7619 | }else{ |
7620 | unixFile *lockProxy = pCtx->lockProxy; |
7621 | pCtx->lockProxy=NULL; |
7622 | pCtx->conchHeld = 0; |
7623 | if( lockProxy!=NULL ){ |
7624 | rc=lockProxy->pMethod->xClose((sqlite3_file *)lockProxy); |
7625 | if( rc ) return rc; |
7626 | sqlite3_free(lockProxy); |
7627 | } |
7628 | sqlite3_free(oldPath); |
7629 | pCtx->lockProxyPath = sqlite3DbStrDup(0, path); |
7630 | } |
7631 | |
7632 | return rc; |
7633 | } |
7634 | |
7635 | /* |
7636 | ** pFile is a file that has been opened by a prior xOpen call. dbPath |
7637 | ** is a string buffer at least MAXPATHLEN+1 characters in size. |
7638 | ** |
7639 | ** This routine find the filename associated with pFile and writes it |
7640 | ** int dbPath. |
7641 | */ |
7642 | static int proxyGetDbPathForUnixFile(unixFile *pFile, char *dbPath){ |
7643 | #if defined(__APPLE__) |
7644 | if( pFile->pMethod == &afpIoMethods ){ |
7645 | /* afp style keeps a reference to the db path in the filePath field |
7646 | ** of the struct */ |
7647 | assert( (int)strlen((char*)pFile->lockingContext)<=MAXPATHLEN ); |
7648 | strlcpy(dbPath, ((afpLockingContext *)pFile->lockingContext)->dbPath, |
7649 | MAXPATHLEN); |
7650 | } else |
7651 | #endif |
7652 | if( pFile->pMethod == &dotlockIoMethods ){ |
7653 | /* dot lock style uses the locking context to store the dot lock |
7654 | ** file path */ |
7655 | int len = strlen((char *)pFile->lockingContext) - strlen(DOTLOCK_SUFFIX); |
7656 | memcpy(dbPath, (char *)pFile->lockingContext, len + 1); |
7657 | }else{ |
7658 | /* all other styles use the locking context to store the db file path */ |
7659 | assert( strlen((char*)pFile->lockingContext)<=MAXPATHLEN ); |
7660 | strlcpy(dbPath, (char *)pFile->lockingContext, MAXPATHLEN); |
7661 | } |
7662 | return SQLITE_OK; |
7663 | } |
7664 | |
7665 | /* |
7666 | ** Takes an already filled in unix file and alters it so all file locking |
7667 | ** will be performed on the local proxy lock file. The following fields |
7668 | ** are preserved in the locking context so that they can be restored and |
7669 | ** the unix structure properly cleaned up at close time: |
7670 | ** ->lockingContext |
7671 | ** ->pMethod |
7672 | */ |
7673 | static int proxyTransformUnixFile(unixFile *pFile, const char *path) { |
7674 | proxyLockingContext *pCtx; |
7675 | char dbPath[MAXPATHLEN+1]; /* Name of the database file */ |
7676 | char *lockPath=NULL; |
7677 | int rc = SQLITE_OK; |
7678 | |
7679 | if( pFile->eFileLock!=NO_LOCK ){ |
7680 | return SQLITE_BUSY; |
7681 | } |
7682 | proxyGetDbPathForUnixFile(pFile, dbPath); |
7683 | if( !path || path[0]=='\0' || !strcmp(path, ":auto:" ) ){ |
7684 | lockPath=NULL; |
7685 | }else{ |
7686 | lockPath=(char *)path; |
7687 | } |
7688 | |
7689 | OSTRACE(("TRANSPROXY %d for %s pid=%d\n" , pFile->h, |
7690 | (lockPath ? lockPath : ":auto:" ), osGetpid(0))); |
7691 | |
7692 | pCtx = sqlite3_malloc64( sizeof(*pCtx) ); |
7693 | if( pCtx==0 ){ |
7694 | return SQLITE_NOMEM_BKPT; |
7695 | } |
7696 | memset(pCtx, 0, sizeof(*pCtx)); |
7697 | |
7698 | rc = proxyCreateConchPathname(dbPath, &pCtx->conchFilePath); |
7699 | if( rc==SQLITE_OK ){ |
7700 | rc = proxyCreateUnixFile(pCtx->conchFilePath, &pCtx->conchFile, 0); |
7701 | if( rc==SQLITE_CANTOPEN && ((pFile->openFlags&O_RDWR) == 0) ){ |
7702 | /* if (a) the open flags are not O_RDWR, (b) the conch isn't there, and |
7703 | ** (c) the file system is read-only, then enable no-locking access. |
7704 | ** Ugh, since O_RDONLY==0x0000 we test for !O_RDWR since unixOpen asserts |
7705 | ** that openFlags will have only one of O_RDONLY or O_RDWR. |
7706 | */ |
7707 | struct statfs fsInfo; |
7708 | struct stat conchInfo; |
7709 | int goLockless = 0; |
7710 | |
7711 | if( osStat(pCtx->conchFilePath, &conchInfo) == -1 ) { |
7712 | int err = errno; |
7713 | if( (err==ENOENT) && (statfs(dbPath, &fsInfo) != -1) ){ |
7714 | goLockless = (fsInfo.f_flags&MNT_RDONLY) == MNT_RDONLY; |
7715 | } |
7716 | } |
7717 | if( goLockless ){ |
7718 | pCtx->conchHeld = -1; /* read only FS/ lockless */ |
7719 | rc = SQLITE_OK; |
7720 | } |
7721 | } |
7722 | } |
7723 | if( rc==SQLITE_OK && lockPath ){ |
7724 | pCtx->lockProxyPath = sqlite3DbStrDup(0, lockPath); |
7725 | } |
7726 | |
7727 | if( rc==SQLITE_OK ){ |
7728 | pCtx->dbPath = sqlite3DbStrDup(0, dbPath); |
7729 | if( pCtx->dbPath==NULL ){ |
7730 | rc = SQLITE_NOMEM_BKPT; |
7731 | } |
7732 | } |
7733 | if( rc==SQLITE_OK ){ |
7734 | /* all memory is allocated, proxys are created and assigned, |
7735 | ** switch the locking context and pMethod then return. |
7736 | */ |
7737 | pCtx->oldLockingContext = pFile->lockingContext; |
7738 | pFile->lockingContext = pCtx; |
7739 | pCtx->pOldMethod = pFile->pMethod; |
7740 | pFile->pMethod = &proxyIoMethods; |
7741 | }else{ |
7742 | if( pCtx->conchFile ){ |
7743 | pCtx->conchFile->pMethod->xClose((sqlite3_file *)pCtx->conchFile); |
7744 | sqlite3_free(pCtx->conchFile); |
7745 | } |
7746 | sqlite3DbFree(0, pCtx->lockProxyPath); |
7747 | sqlite3_free(pCtx->conchFilePath); |
7748 | sqlite3_free(pCtx); |
7749 | } |
7750 | OSTRACE(("TRANSPROXY %d %s\n" , pFile->h, |
7751 | (rc==SQLITE_OK ? "ok" : "failed" ))); |
7752 | return rc; |
7753 | } |
7754 | |
7755 | |
7756 | /* |
7757 | ** This routine handles sqlite3_file_control() calls that are specific |
7758 | ** to proxy locking. |
7759 | */ |
7760 | static int proxyFileControl(sqlite3_file *id, int op, void *pArg){ |
7761 | switch( op ){ |
7762 | case SQLITE_FCNTL_GET_LOCKPROXYFILE: { |
7763 | unixFile *pFile = (unixFile*)id; |
7764 | if( pFile->pMethod == &proxyIoMethods ){ |
7765 | proxyLockingContext *pCtx = (proxyLockingContext*)pFile->lockingContext; |
7766 | proxyTakeConch(pFile); |
7767 | if( pCtx->lockProxyPath ){ |
7768 | *(const char **)pArg = pCtx->lockProxyPath; |
7769 | }else{ |
7770 | *(const char **)pArg = ":auto: (not held)" ; |
7771 | } |
7772 | } else { |
7773 | *(const char **)pArg = NULL; |
7774 | } |
7775 | return SQLITE_OK; |
7776 | } |
7777 | case SQLITE_FCNTL_SET_LOCKPROXYFILE: { |
7778 | unixFile *pFile = (unixFile*)id; |
7779 | int rc = SQLITE_OK; |
7780 | int isProxyStyle = (pFile->pMethod == &proxyIoMethods); |
7781 | if( pArg==NULL || (const char *)pArg==0 ){ |
7782 | if( isProxyStyle ){ |
7783 | /* turn off proxy locking - not supported. If support is added for |
7784 | ** switching proxy locking mode off then it will need to fail if |
7785 | ** the journal mode is WAL mode. |
7786 | */ |
7787 | rc = SQLITE_ERROR /*SQLITE_PROTOCOL? SQLITE_MISUSE?*/; |
7788 | }else{ |
7789 | /* turn off proxy locking - already off - NOOP */ |
7790 | rc = SQLITE_OK; |
7791 | } |
7792 | }else{ |
7793 | const char *proxyPath = (const char *)pArg; |
7794 | if( isProxyStyle ){ |
7795 | proxyLockingContext *pCtx = |
7796 | (proxyLockingContext*)pFile->lockingContext; |
7797 | if( !strcmp(pArg, ":auto:" ) |
7798 | || (pCtx->lockProxyPath && |
7799 | !strncmp(pCtx->lockProxyPath, proxyPath, MAXPATHLEN)) |
7800 | ){ |
7801 | rc = SQLITE_OK; |
7802 | }else{ |
7803 | rc = switchLockProxyPath(pFile, proxyPath); |
7804 | } |
7805 | }else{ |
7806 | /* turn on proxy file locking */ |
7807 | rc = proxyTransformUnixFile(pFile, proxyPath); |
7808 | } |
7809 | } |
7810 | return rc; |
7811 | } |
7812 | default: { |
7813 | assert( 0 ); /* The call assures that only valid opcodes are sent */ |
7814 | } |
7815 | } |
7816 | /*NOTREACHED*/ assert(0); |
7817 | return SQLITE_ERROR; |
7818 | } |
7819 | |
7820 | /* |
7821 | ** Within this division (the proxying locking implementation) the procedures |
7822 | ** above this point are all utilities. The lock-related methods of the |
7823 | ** proxy-locking sqlite3_io_method object follow. |
7824 | */ |
7825 | |
7826 | |
7827 | /* |
7828 | ** This routine checks if there is a RESERVED lock held on the specified |
7829 | ** file by this or any other process. If such a lock is held, set *pResOut |
7830 | ** to a non-zero value otherwise *pResOut is set to zero. The return value |
7831 | ** is set to SQLITE_OK unless an I/O error occurs during lock checking. |
7832 | */ |
7833 | static int proxyCheckReservedLock(sqlite3_file *id, int *pResOut) { |
7834 | unixFile *pFile = (unixFile*)id; |
7835 | int rc = proxyTakeConch(pFile); |
7836 | if( rc==SQLITE_OK ){ |
7837 | proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext; |
7838 | if( pCtx->conchHeld>0 ){ |
7839 | unixFile *proxy = pCtx->lockProxy; |
7840 | return proxy->pMethod->xCheckReservedLock((sqlite3_file*)proxy, pResOut); |
7841 | }else{ /* conchHeld < 0 is lockless */ |
7842 | pResOut=0; |
7843 | } |
7844 | } |
7845 | return rc; |
7846 | } |
7847 | |
7848 | /* |
7849 | ** Lock the file with the lock specified by parameter eFileLock - one |
7850 | ** of the following: |
7851 | ** |
7852 | ** (1) SHARED_LOCK |
7853 | ** (2) RESERVED_LOCK |
7854 | ** (3) PENDING_LOCK |
7855 | ** (4) EXCLUSIVE_LOCK |
7856 | ** |
7857 | ** Sometimes when requesting one lock state, additional lock states |
7858 | ** are inserted in between. The locking might fail on one of the later |
7859 | ** transitions leaving the lock state different from what it started but |
7860 | ** still short of its goal. The following chart shows the allowed |
7861 | ** transitions and the inserted intermediate states: |
7862 | ** |
7863 | ** UNLOCKED -> SHARED |
7864 | ** SHARED -> RESERVED |
7865 | ** SHARED -> (PENDING) -> EXCLUSIVE |
7866 | ** RESERVED -> (PENDING) -> EXCLUSIVE |
7867 | ** PENDING -> EXCLUSIVE |
7868 | ** |
7869 | ** This routine will only increase a lock. Use the sqlite3OsUnlock() |
7870 | ** routine to lower a locking level. |
7871 | */ |
7872 | static int proxyLock(sqlite3_file *id, int eFileLock) { |
7873 | unixFile *pFile = (unixFile*)id; |
7874 | int rc = proxyTakeConch(pFile); |
7875 | if( rc==SQLITE_OK ){ |
7876 | proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext; |
7877 | if( pCtx->conchHeld>0 ){ |
7878 | unixFile *proxy = pCtx->lockProxy; |
7879 | rc = proxy->pMethod->xLock((sqlite3_file*)proxy, eFileLock); |
7880 | pFile->eFileLock = proxy->eFileLock; |
7881 | }else{ |
7882 | /* conchHeld < 0 is lockless */ |
7883 | } |
7884 | } |
7885 | return rc; |
7886 | } |
7887 | |
7888 | |
7889 | /* |
7890 | ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock |
7891 | ** must be either NO_LOCK or SHARED_LOCK. |
7892 | ** |
7893 | ** If the locking level of the file descriptor is already at or below |
7894 | ** the requested locking level, this routine is a no-op. |
7895 | */ |
7896 | static int proxyUnlock(sqlite3_file *id, int eFileLock) { |
7897 | unixFile *pFile = (unixFile*)id; |
7898 | int rc = proxyTakeConch(pFile); |
7899 | if( rc==SQLITE_OK ){ |
7900 | proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext; |
7901 | if( pCtx->conchHeld>0 ){ |
7902 | unixFile *proxy = pCtx->lockProxy; |
7903 | rc = proxy->pMethod->xUnlock((sqlite3_file*)proxy, eFileLock); |
7904 | pFile->eFileLock = proxy->eFileLock; |
7905 | }else{ |
7906 | /* conchHeld < 0 is lockless */ |
7907 | } |
7908 | } |
7909 | return rc; |
7910 | } |
7911 | |
7912 | /* |
7913 | ** Close a file that uses proxy locks. |
7914 | */ |
7915 | static int proxyClose(sqlite3_file *id) { |
7916 | if( ALWAYS(id) ){ |
7917 | unixFile *pFile = (unixFile*)id; |
7918 | proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext; |
7919 | unixFile *lockProxy = pCtx->lockProxy; |
7920 | unixFile *conchFile = pCtx->conchFile; |
7921 | int rc = SQLITE_OK; |
7922 | |
7923 | if( lockProxy ){ |
7924 | rc = lockProxy->pMethod->xUnlock((sqlite3_file*)lockProxy, NO_LOCK); |
7925 | if( rc ) return rc; |
7926 | rc = lockProxy->pMethod->xClose((sqlite3_file*)lockProxy); |
7927 | if( rc ) return rc; |
7928 | sqlite3_free(lockProxy); |
7929 | pCtx->lockProxy = 0; |
7930 | } |
7931 | if( conchFile ){ |
7932 | if( pCtx->conchHeld ){ |
7933 | rc = proxyReleaseConch(pFile); |
7934 | if( rc ) return rc; |
7935 | } |
7936 | rc = conchFile->pMethod->xClose((sqlite3_file*)conchFile); |
7937 | if( rc ) return rc; |
7938 | sqlite3_free(conchFile); |
7939 | } |
7940 | sqlite3DbFree(0, pCtx->lockProxyPath); |
7941 | sqlite3_free(pCtx->conchFilePath); |
7942 | sqlite3DbFree(0, pCtx->dbPath); |
7943 | /* restore the original locking context and pMethod then close it */ |
7944 | pFile->lockingContext = pCtx->oldLockingContext; |
7945 | pFile->pMethod = pCtx->pOldMethod; |
7946 | sqlite3_free(pCtx); |
7947 | return pFile->pMethod->xClose(id); |
7948 | } |
7949 | return SQLITE_OK; |
7950 | } |
7951 | |
7952 | |
7953 | |
7954 | #endif /* defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE */ |
7955 | /* |
7956 | ** The proxy locking style is intended for use with AFP filesystems. |
7957 | ** And since AFP is only supported on MacOSX, the proxy locking is also |
7958 | ** restricted to MacOSX. |
7959 | ** |
7960 | ** |
7961 | ******************* End of the proxy lock implementation ********************** |
7962 | ******************************************************************************/ |
7963 | |
7964 | /* |
7965 | ** Initialize the operating system interface. |
7966 | ** |
7967 | ** This routine registers all VFS implementations for unix-like operating |
7968 | ** systems. This routine, and the sqlite3_os_end() routine that follows, |
7969 | ** should be the only routines in this file that are visible from other |
7970 | ** files. |
7971 | ** |
7972 | ** This routine is called once during SQLite initialization and by a |
7973 | ** single thread. The memory allocation and mutex subsystems have not |
7974 | ** necessarily been initialized when this routine is called, and so they |
7975 | ** should not be used. |
7976 | */ |
7977 | int sqlite3_os_init(void){ |
7978 | /* |
7979 | ** The following macro defines an initializer for an sqlite3_vfs object. |
7980 | ** The name of the VFS is NAME. The pAppData is a pointer to a pointer |
7981 | ** to the "finder" function. (pAppData is a pointer to a pointer because |
7982 | ** silly C90 rules prohibit a void* from being cast to a function pointer |
7983 | ** and so we have to go through the intermediate pointer to avoid problems |
7984 | ** when compiling with -pedantic-errors on GCC.) |
7985 | ** |
7986 | ** The FINDER parameter to this macro is the name of the pointer to the |
7987 | ** finder-function. The finder-function returns a pointer to the |
7988 | ** sqlite_io_methods object that implements the desired locking |
7989 | ** behaviors. See the division above that contains the IOMETHODS |
7990 | ** macro for addition information on finder-functions. |
7991 | ** |
7992 | ** Most finders simply return a pointer to a fixed sqlite3_io_methods |
7993 | ** object. But the "autolockIoFinder" available on MacOSX does a little |
7994 | ** more than that; it looks at the filesystem type that hosts the |
7995 | ** database file and tries to choose an locking method appropriate for |
7996 | ** that filesystem time. |
7997 | */ |
7998 | #define UNIXVFS(VFSNAME, FINDER) { \ |
7999 | 3, /* iVersion */ \ |
8000 | sizeof(unixFile), /* szOsFile */ \ |
8001 | MAX_PATHNAME, /* mxPathname */ \ |
8002 | 0, /* pNext */ \ |
8003 | VFSNAME, /* zName */ \ |
8004 | (void*)&FINDER, /* pAppData */ \ |
8005 | unixOpen, /* xOpen */ \ |
8006 | unixDelete, /* xDelete */ \ |
8007 | unixAccess, /* xAccess */ \ |
8008 | unixFullPathname, /* xFullPathname */ \ |
8009 | unixDlOpen, /* xDlOpen */ \ |
8010 | unixDlError, /* xDlError */ \ |
8011 | unixDlSym, /* xDlSym */ \ |
8012 | unixDlClose, /* xDlClose */ \ |
8013 | unixRandomness, /* xRandomness */ \ |
8014 | unixSleep, /* xSleep */ \ |
8015 | unixCurrentTime, /* xCurrentTime */ \ |
8016 | unixGetLastError, /* xGetLastError */ \ |
8017 | unixCurrentTimeInt64, /* xCurrentTimeInt64 */ \ |
8018 | unixSetSystemCall, /* xSetSystemCall */ \ |
8019 | unixGetSystemCall, /* xGetSystemCall */ \ |
8020 | unixNextSystemCall, /* xNextSystemCall */ \ |
8021 | } |
8022 | |
8023 | /* |
8024 | ** All default VFSes for unix are contained in the following array. |
8025 | ** |
8026 | ** Note that the sqlite3_vfs.pNext field of the VFS object is modified |
8027 | ** by the SQLite core when the VFS is registered. So the following |
8028 | ** array cannot be const. |
8029 | */ |
8030 | static sqlite3_vfs aVfs[] = { |
8031 | #if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) |
8032 | UNIXVFS("unix" , autolockIoFinder ), |
8033 | #elif OS_VXWORKS |
8034 | UNIXVFS("unix" , vxworksIoFinder ), |
8035 | #else |
8036 | UNIXVFS("unix" , posixIoFinder ), |
8037 | #endif |
8038 | UNIXVFS("unix-none" , nolockIoFinder ), |
8039 | UNIXVFS("unix-dotfile" , dotlockIoFinder ), |
8040 | UNIXVFS("unix-excl" , posixIoFinder ), |
8041 | #if OS_VXWORKS |
8042 | UNIXVFS("unix-namedsem" , semIoFinder ), |
8043 | #endif |
8044 | #if SQLITE_ENABLE_LOCKING_STYLE || OS_VXWORKS |
8045 | UNIXVFS("unix-posix" , posixIoFinder ), |
8046 | #endif |
8047 | #if SQLITE_ENABLE_LOCKING_STYLE |
8048 | UNIXVFS("unix-flock" , flockIoFinder ), |
8049 | #endif |
8050 | #if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) |
8051 | UNIXVFS("unix-afp" , afpIoFinder ), |
8052 | UNIXVFS("unix-nfs" , nfsIoFinder ), |
8053 | UNIXVFS("unix-proxy" , proxyIoFinder ), |
8054 | #endif |
8055 | }; |
8056 | unsigned int i; /* Loop counter */ |
8057 | |
8058 | /* Double-check that the aSyscall[] array has been constructed |
8059 | ** correctly. See ticket [bb3a86e890c8e96ab] */ |
8060 | assert( ArraySize(aSyscall)==29 ); |
8061 | |
8062 | /* Register all VFSes defined in the aVfs[] array */ |
8063 | for(i=0; i<(sizeof(aVfs)/sizeof(sqlite3_vfs)); i++){ |
8064 | #ifdef SQLITE_DEFAULT_UNIX_VFS |
8065 | sqlite3_vfs_register(&aVfs[i], |
8066 | 0==strcmp(aVfs[i].zName,SQLITE_DEFAULT_UNIX_VFS)); |
8067 | #else |
8068 | sqlite3_vfs_register(&aVfs[i], i==0); |
8069 | #endif |
8070 | } |
8071 | #ifdef SQLITE_OS_KV_OPTIONAL |
8072 | sqlite3KvvfsInit(); |
8073 | #endif |
8074 | unixBigLock = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_VFS1); |
8075 | |
8076 | #ifndef SQLITE_OMIT_WAL |
8077 | /* Validate lock assumptions */ |
8078 | assert( SQLITE_SHM_NLOCK==8 ); /* Number of available locks */ |
8079 | assert( UNIX_SHM_BASE==120 ); /* Start of locking area */ |
8080 | /* Locks: |
8081 | ** WRITE UNIX_SHM_BASE 120 |
8082 | ** CKPT UNIX_SHM_BASE+1 121 |
8083 | ** RECOVER UNIX_SHM_BASE+2 122 |
8084 | ** READ-0 UNIX_SHM_BASE+3 123 |
8085 | ** READ-1 UNIX_SHM_BASE+4 124 |
8086 | ** READ-2 UNIX_SHM_BASE+5 125 |
8087 | ** READ-3 UNIX_SHM_BASE+6 126 |
8088 | ** READ-4 UNIX_SHM_BASE+7 127 |
8089 | ** DMS UNIX_SHM_BASE+8 128 |
8090 | */ |
8091 | assert( UNIX_SHM_DMS==128 ); /* Byte offset of the deadman-switch */ |
8092 | #endif |
8093 | |
8094 | /* Initialize temp file dir array. */ |
8095 | unixTempFileInit(); |
8096 | |
8097 | return SQLITE_OK; |
8098 | } |
8099 | |
8100 | /* |
8101 | ** Shutdown the operating system interface. |
8102 | ** |
8103 | ** Some operating systems might need to do some cleanup in this routine, |
8104 | ** to release dynamically allocated objects. But not on unix. |
8105 | ** This routine is a no-op for unix. |
8106 | */ |
8107 | int sqlite3_os_end(void){ |
8108 | unixBigLock = 0; |
8109 | return SQLITE_OK; |
8110 | } |
8111 | |
8112 | #endif /* SQLITE_OS_UNIX */ |
8113 | |