1 | // Copyright (c) 2015 Sandstorm Development Group, Inc. and contributors |
2 | // Licensed under the MIT License: |
3 | // |
4 | // Permission is hereby granted, free of charge, to any person obtaining a copy |
5 | // of this software and associated documentation files (the "Software"), to deal |
6 | // in the Software without restriction, including without limitation the rights |
7 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
8 | // copies of the Software, and to permit persons to whom the Software is |
9 | // furnished to do so, subject to the following conditions: |
10 | // |
11 | // The above copyright notice and this permission notice shall be included in |
12 | // all copies or substantial portions of the Software. |
13 | // |
14 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
15 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
16 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
17 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
18 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
19 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
20 | // THE SOFTWARE. |
21 | |
22 | #if !_WIN32 |
23 | |
24 | #ifndef _GNU_SOURCE |
25 | #define _GNU_SOURCE |
26 | #endif |
27 | |
28 | #include "filesystem.h" |
29 | #include "debug.h" |
30 | #include <sys/types.h> |
31 | #include <sys/stat.h> |
32 | #include <sys/ioctl.h> |
33 | #include <fcntl.h> |
34 | #include <unistd.h> |
35 | #include <stdio.h> |
36 | #include <sys/mman.h> |
37 | #include <errno.h> |
38 | #include <dirent.h> |
39 | #include <stdlib.h> |
40 | #include "vector.h" |
41 | #include "miniposix.h" |
42 | #include <algorithm> |
43 | |
44 | #if __linux__ |
45 | #include <syscall.h> |
46 | #include <linux/fs.h> |
47 | #include <sys/sendfile.h> |
48 | #endif |
49 | |
50 | namespace kj { |
51 | namespace { |
52 | |
53 | #define HIDDEN_PREFIX ".kj-tmp." |
54 | // Prefix for temp files which should be hidden when listing a directory. |
55 | // |
56 | // If you change this, make sure to update the unit test. |
57 | |
58 | #ifdef O_CLOEXEC |
59 | #define MAYBE_O_CLOEXEC O_CLOEXEC |
60 | #else |
61 | #define MAYBE_O_CLOEXEC 0 |
62 | #endif |
63 | |
64 | #ifdef O_DIRECTORY |
65 | #define MAYBE_O_DIRECTORY O_DIRECTORY |
66 | #else |
67 | #define MAYBE_O_DIRECTORY 0 |
68 | #endif |
69 | |
70 | #if __APPLE__ |
71 | // Mac OSX defines SEEK_HOLE, but it doesn't work. ("Inappropriate ioctl for device", it says.) |
72 | #undef SEEK_HOLE |
73 | #endif |
74 | |
75 | #if __BIONIC__ |
76 | // No no DTTOIF function |
77 | #undef DT_UNKNOWN |
78 | #endif |
79 | |
80 | static void setCloexec(int fd) KJ_UNUSED; |
81 | static void setCloexec(int fd) { |
82 | // Set the O_CLOEXEC flag on the given fd. |
83 | // |
84 | // We try to avoid the need to call this by taking advantage of syscall flags that set it |
85 | // atomically on new file descriptors. Unfortunately some platforms do not support such syscalls. |
86 | |
87 | #ifdef FIOCLEX |
88 | // Yay, we can set the flag in one call. |
89 | KJ_SYSCALL_HANDLE_ERRORS(ioctl(fd, FIOCLEX)) { |
90 | case EINVAL: |
91 | case EOPNOTSUPP: |
92 | break; |
93 | default: |
94 | KJ_FAIL_SYSCALL("ioctl(fd, FIOCLEX)" , error) { break; } |
95 | break; |
96 | } else { |
97 | // success |
98 | return; |
99 | } |
100 | #endif |
101 | |
102 | // Sadness, we must resort to read/modify/write. |
103 | // |
104 | // (On many platforms, FD_CLOEXEC is the only flag modifiable via F_SETFD and therefore we could |
105 | // skip the read... but it seems dangerous to assume that's true of all platforms, and anyway |
106 | // most platforms support FIOCLEX.) |
107 | int flags; |
108 | KJ_SYSCALL(flags = fcntl(fd, F_GETFD)); |
109 | if (!(flags & FD_CLOEXEC)) { |
110 | KJ_SYSCALL(fcntl(fd, F_SETFD, flags | FD_CLOEXEC)); |
111 | } |
112 | } |
113 | |
114 | static Date toKjDate(struct timespec tv) { |
115 | return tv.tv_sec * SECONDS + tv.tv_nsec * NANOSECONDS + UNIX_EPOCH; |
116 | } |
117 | |
118 | static FsNode::Type modeToType(mode_t mode) { |
119 | switch (mode & S_IFMT) { |
120 | case S_IFREG : return FsNode::Type::FILE; |
121 | case S_IFDIR : return FsNode::Type::DIRECTORY; |
122 | case S_IFLNK : return FsNode::Type::SYMLINK; |
123 | case S_IFBLK : return FsNode::Type::BLOCK_DEVICE; |
124 | case S_IFCHR : return FsNode::Type::CHARACTER_DEVICE; |
125 | case S_IFIFO : return FsNode::Type::NAMED_PIPE; |
126 | case S_IFSOCK: return FsNode::Type::SOCKET; |
127 | default: return FsNode::Type::OTHER; |
128 | } |
129 | } |
130 | |
131 | static FsNode::Metadata statToMetadata(struct stat& stats) { |
132 | // Probably st_ino and st_dev are usually under 32 bits, so mix by rotating st_dev left 32 bits |
133 | // and XOR. |
134 | uint64_t d = stats.st_dev; |
135 | uint64_t hash = ((d << 32) | (d >> 32)) ^ stats.st_ino; |
136 | |
137 | return FsNode::Metadata { |
138 | modeToType(stats.st_mode), |
139 | implicitCast<uint64_t>(stats.st_size), |
140 | implicitCast<uint64_t>(stats.st_blocks * 512u), |
141 | #if __APPLE__ |
142 | toKjDate(stats.st_mtimespec), |
143 | #else |
144 | toKjDate(stats.st_mtim), |
145 | #endif |
146 | implicitCast<uint>(stats.st_nlink), |
147 | hash |
148 | }; |
149 | } |
150 | |
151 | static bool rmrf(int fd, StringPtr path); |
152 | |
153 | static void rmrfChildrenAndClose(int fd) { |
154 | // Assumes fd is seeked to beginning. |
155 | |
156 | DIR* dir = fdopendir(fd); |
157 | if (dir == nullptr) { |
158 | close(fd); |
159 | KJ_FAIL_SYSCALL("fdopendir" , errno); |
160 | }; |
161 | KJ_DEFER(closedir(dir)); |
162 | |
163 | for (;;) { |
164 | errno = 0; |
165 | struct dirent* entry = readdir(dir); |
166 | if (entry == nullptr) { |
167 | int error = errno; |
168 | if (error == 0) { |
169 | break; |
170 | } else { |
171 | KJ_FAIL_SYSCALL("readdir" , error); |
172 | } |
173 | } |
174 | |
175 | if (entry->d_name[0] == '.' && |
176 | (entry->d_name[1] == '\0' || |
177 | (entry->d_name[1] == '.' && |
178 | entry->d_name[2] == '\0'))) { |
179 | // ignore . and .. |
180 | } else { |
181 | #ifdef DT_UNKNOWN // d_type is not available on all platforms. |
182 | if (entry->d_type == DT_DIR) { |
183 | int subdirFd; |
184 | KJ_SYSCALL(subdirFd = openat( |
185 | fd, entry->d_name, O_RDONLY | MAYBE_O_DIRECTORY | MAYBE_O_CLOEXEC)); |
186 | rmrfChildrenAndClose(subdirFd); |
187 | KJ_SYSCALL(unlinkat(fd, entry->d_name, AT_REMOVEDIR)); |
188 | } else if (entry->d_type != DT_UNKNOWN) { |
189 | KJ_SYSCALL(unlinkat(fd, entry->d_name, 0)); |
190 | } else { |
191 | #endif |
192 | KJ_ASSERT(rmrf(fd, entry->d_name)); |
193 | #ifdef DT_UNKNOWN |
194 | } |
195 | #endif |
196 | } |
197 | } |
198 | } |
199 | |
200 | static bool rmrf(int fd, StringPtr path) { |
201 | struct stat stats; |
202 | KJ_SYSCALL_HANDLE_ERRORS(fstatat(fd, path.cStr(), &stats, AT_SYMLINK_NOFOLLOW)) { |
203 | case ENOENT: |
204 | case ENOTDIR: |
205 | // Doesn't exist. |
206 | return false; |
207 | default: |
208 | KJ_FAIL_SYSCALL("lstat(path)" , error, path) { return false; } |
209 | } |
210 | |
211 | if (S_ISDIR(stats.st_mode)) { |
212 | int subdirFd; |
213 | KJ_SYSCALL(subdirFd = openat( |
214 | fd, path.cStr(), O_RDONLY | MAYBE_O_DIRECTORY | MAYBE_O_CLOEXEC)) { return false; } |
215 | rmrfChildrenAndClose(subdirFd); |
216 | KJ_SYSCALL(unlinkat(fd, path.cStr(), AT_REMOVEDIR)) { return false; } |
217 | } else { |
218 | KJ_SYSCALL(unlinkat(fd, path.cStr(), 0)) { return false; } |
219 | } |
220 | |
221 | return true; |
222 | } |
223 | |
224 | struct MmapRange { |
225 | uint64_t offset; |
226 | uint64_t size; |
227 | }; |
228 | |
229 | static MmapRange getMmapRange(uint64_t offset, uint64_t size) { |
230 | // Comes up with an offset and size to pass to mmap(), given an offset and size requested by |
231 | // the caller, and considering the fact that mappings must start at a page boundary. |
232 | // |
233 | // The offset is rounded down to the nearest page boundary, and the size is increased to |
234 | // compensate. Note that the endpoint of the mapping is *not* rounded up to a page boundary, as |
235 | // mmap() does not actually require this, and it causes trouble on some systems (notably Cygwin). |
236 | |
237 | #ifndef _SC_PAGESIZE |
238 | #define _SC_PAGESIZE _SC_PAGE_SIZE |
239 | #endif |
240 | static const uint64_t pageSize = sysconf(_SC_PAGESIZE); |
241 | uint64_t pageMask = pageSize - 1; |
242 | |
243 | uint64_t realOffset = offset & ~pageMask; |
244 | |
245 | return { realOffset, offset + size - realOffset }; |
246 | } |
247 | |
248 | class MmapDisposer: public ArrayDisposer { |
249 | protected: |
250 | void disposeImpl(void* firstElement, size_t elementSize, size_t elementCount, |
251 | size_t capacity, void (*destroyElement)(void*)) const { |
252 | auto range = getMmapRange(reinterpret_cast<uintptr_t>(firstElement), |
253 | elementSize * elementCount); |
254 | KJ_SYSCALL(munmap(reinterpret_cast<byte*>(range.offset), range.size)) { break; } |
255 | } |
256 | }; |
257 | |
258 | constexpr MmapDisposer mmapDisposer = MmapDisposer(); |
259 | |
260 | class DiskHandle { |
261 | // We need to implement each of ReadableFile, AppendableFile, File, ReadableDirectory, and |
262 | // Directory for disk handles. There is a lot of implementation overlap between these, especially |
263 | // stat(), sync(), etc. We can't have everything inherit from a common DiskFsNode that implements |
264 | // these because then we get diamond inheritance which means we need to make all our inheritance |
265 | // virtual which means downcasting requires RTTI which violates our goal of supporting compiling |
266 | // with no RTTI. So instead we have the DiskHandle class which implements all the methods without |
267 | // inheriting anything, and then we have DiskFile, DiskDirectory, etc. hold this and delegate to |
268 | // it. Ugly, but works. |
269 | |
270 | public: |
271 | DiskHandle(AutoCloseFd&& fd): fd(kj::mv(fd)) {} |
272 | |
273 | // OsHandle ------------------------------------------------------------------ |
274 | |
275 | AutoCloseFd clone() const { |
276 | int fd2; |
277 | #ifdef F_DUPFD_CLOEXEC |
278 | KJ_SYSCALL_HANDLE_ERRORS(fd2 = fcntl(fd, F_DUPFD_CLOEXEC, 3)) { |
279 | case EINVAL: |
280 | case EOPNOTSUPP: |
281 | // fall back |
282 | break; |
283 | default: |
284 | KJ_FAIL_SYSCALL("fnctl(fd, F_DUPFD_CLOEXEC, 3)" , error) { break; } |
285 | break; |
286 | } else { |
287 | return AutoCloseFd(fd2); |
288 | } |
289 | #endif |
290 | |
291 | KJ_SYSCALL(fd2 = ::dup(fd)); |
292 | AutoCloseFd result(fd2); |
293 | setCloexec(result); |
294 | return result; |
295 | } |
296 | |
297 | int getFd() const { |
298 | return fd.get(); |
299 | } |
300 | |
301 | // FsNode -------------------------------------------------------------------- |
302 | |
303 | FsNode::Metadata stat() const { |
304 | struct stat stats; |
305 | KJ_SYSCALL(::fstat(fd, &stats)); |
306 | return statToMetadata(stats); |
307 | } |
308 | |
309 | void sync() const { |
310 | #if __APPLE__ |
311 | // For whatever reason, fsync() on OSX only flushes kernel buffers. It does not flush hardware |
312 | // disk buffers. This makes it not very useful. But OSX documents fcntl F_FULLFSYNC which does |
313 | // the right thing. Why they don't just make fsync() do the right thing, I do not know. |
314 | KJ_SYSCALL(fcntl(fd, F_FULLFSYNC)); |
315 | #else |
316 | KJ_SYSCALL(fsync(fd)); |
317 | #endif |
318 | } |
319 | |
320 | void datasync() const { |
321 | // The presence of the _POSIX_SYNCHRONIZED_IO define is supposed to tell us that fdatasync() |
322 | // exists. But Apple defines this yet doesn't offer fdatasync(). Thanks, Apple. |
323 | #if _POSIX_SYNCHRONIZED_IO && !__APPLE__ |
324 | KJ_SYSCALL(fdatasync(fd)); |
325 | #else |
326 | this->sync(); |
327 | #endif |
328 | } |
329 | |
330 | // ReadableFile -------------------------------------------------------------- |
331 | |
332 | size_t read(uint64_t offset, ArrayPtr<byte> buffer) const { |
333 | // pread() probably never returns short reads unless it hits EOF. Unfortunately, though, per |
334 | // spec we are not allowed to assume this. |
335 | |
336 | size_t total = 0; |
337 | while (buffer.size() > 0) { |
338 | ssize_t n; |
339 | KJ_SYSCALL(n = pread(fd, buffer.begin(), buffer.size(), offset)); |
340 | if (n == 0) break; |
341 | total += n; |
342 | offset += n; |
343 | buffer = buffer.slice(n, buffer.size()); |
344 | } |
345 | return total; |
346 | } |
347 | |
348 | Array<const byte> mmap(uint64_t offset, uint64_t size) const { |
349 | auto range = getMmapRange(offset, size); |
350 | const void* mapping = ::mmap(NULL, range.size, PROT_READ, MAP_SHARED, fd, range.offset); |
351 | if (mapping == MAP_FAILED) { |
352 | KJ_FAIL_SYSCALL("mmap" , errno); |
353 | } |
354 | return Array<const byte>(reinterpret_cast<const byte*>(mapping) + (offset - range.offset), |
355 | size, mmapDisposer); |
356 | } |
357 | |
358 | Array<byte> mmapPrivate(uint64_t offset, uint64_t size) const { |
359 | auto range = getMmapRange(offset, size); |
360 | void* mapping = ::mmap(NULL, range.size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, range.offset); |
361 | if (mapping == MAP_FAILED) { |
362 | KJ_FAIL_SYSCALL("mmap" , errno); |
363 | } |
364 | return Array<byte>(reinterpret_cast<byte*>(mapping) + (offset - range.offset), |
365 | size, mmapDisposer); |
366 | } |
367 | |
368 | // File ---------------------------------------------------------------------- |
369 | |
370 | void write(uint64_t offset, ArrayPtr<const byte> data) const { |
371 | // pwrite() probably never returns short writes unless there's no space left on disk. |
372 | // Unfortunately, though, per spec we are not allowed to assume this. |
373 | |
374 | while (data.size() > 0) { |
375 | ssize_t n; |
376 | KJ_SYSCALL(n = pwrite(fd, data.begin(), data.size(), offset)); |
377 | KJ_ASSERT(n > 0, "pwrite() returned zero?" ); |
378 | offset += n; |
379 | data = data.slice(n, data.size()); |
380 | } |
381 | } |
382 | |
383 | void zero(uint64_t offset, uint64_t size) const { |
384 | #ifdef FALLOC_FL_PUNCH_HOLE |
385 | KJ_SYSCALL_HANDLE_ERRORS( |
386 | fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, offset, size)) { |
387 | case EOPNOTSUPP: |
388 | // fall back to below |
389 | break; |
390 | default: |
391 | KJ_FAIL_SYSCALL("fallocate(FALLOC_FL_PUNCH_HOLE)" , error) { return; } |
392 | } else { |
393 | return; |
394 | } |
395 | #endif |
396 | |
397 | static const byte ZEROS[4096] = { 0 }; |
398 | |
399 | #if __APPLE__ || __CYGWIN__ |
400 | // Mac & Cygwin doesn't have pwritev(). |
401 | while (size > sizeof(ZEROS)) { |
402 | write(offset, ZEROS); |
403 | size -= sizeof(ZEROS); |
404 | offset += sizeof(ZEROS); |
405 | } |
406 | write(offset, kj::arrayPtr(ZEROS, size)); |
407 | #else |
408 | // Use a 4k buffer of zeros amplified by iov to write zeros with as few syscalls as possible. |
409 | size_t count = (size + sizeof(ZEROS) - 1) / sizeof(ZEROS); |
410 | const size_t iovmax = miniposix::iovMax(count); |
411 | KJ_STACK_ARRAY(struct iovec, iov, kj::min(iovmax, count), 16, 256); |
412 | |
413 | for (auto& item: iov) { |
414 | item.iov_base = const_cast<byte*>(ZEROS); |
415 | item.iov_len = sizeof(ZEROS); |
416 | } |
417 | |
418 | while (size > 0) { |
419 | size_t iovCount; |
420 | if (size >= iov.size() * sizeof(ZEROS)) { |
421 | iovCount = iov.size(); |
422 | } else { |
423 | iovCount = size / sizeof(ZEROS); |
424 | size_t rem = size % sizeof(ZEROS); |
425 | if (rem > 0) { |
426 | iov[iovCount++].iov_len = rem; |
427 | } |
428 | } |
429 | |
430 | ssize_t n; |
431 | KJ_SYSCALL(n = pwritev(fd, iov.begin(), count, offset)); |
432 | KJ_ASSERT(n > 0, "pwrite() returned zero?" ); |
433 | |
434 | offset += n; |
435 | size -= n; |
436 | } |
437 | #endif |
438 | } |
439 | |
440 | void truncate(uint64_t size) const { |
441 | KJ_SYSCALL(ftruncate(fd, size)); |
442 | } |
443 | |
444 | class WritableFileMappingImpl final: public WritableFileMapping { |
445 | public: |
446 | WritableFileMappingImpl(Array<byte> bytes): bytes(kj::mv(bytes)) {} |
447 | |
448 | ArrayPtr<byte> get() const override { |
449 | // const_cast OK because WritableFileMapping does indeed provide a writable view despite |
450 | // being const itself. |
451 | return arrayPtr(const_cast<byte*>(bytes.begin()), bytes.size()); |
452 | } |
453 | |
454 | void changed(ArrayPtr<byte> slice) const override { |
455 | KJ_REQUIRE(slice.begin() >= bytes.begin() && slice.end() <= bytes.end(), |
456 | "byte range is not part of this mapping" ); |
457 | |
458 | // msync() requires page-alignment, apparently, so use getMmapRange() to accomplish that. |
459 | auto range = getMmapRange(reinterpret_cast<uintptr_t>(slice.begin()), slice.size()); |
460 | KJ_SYSCALL(msync(reinterpret_cast<void*>(range.offset), range.size, MS_ASYNC)); |
461 | } |
462 | |
463 | void sync(ArrayPtr<byte> slice) const override { |
464 | KJ_REQUIRE(slice.begin() >= bytes.begin() && slice.end() <= bytes.end(), |
465 | "byte range is not part of this mapping" ); |
466 | |
467 | // msync() requires page-alignment, apparently, so use getMmapRange() to accomplish that. |
468 | auto range = getMmapRange(reinterpret_cast<uintptr_t>(slice.begin()), slice.size()); |
469 | KJ_SYSCALL(msync(reinterpret_cast<void*>(range.offset), range.size, MS_SYNC)); |
470 | } |
471 | |
472 | private: |
473 | Array<byte> bytes; |
474 | }; |
475 | |
476 | Own<const WritableFileMapping> mmapWritable(uint64_t offset, uint64_t size) const { |
477 | auto range = getMmapRange(offset, size); |
478 | void* mapping = ::mmap(NULL, range.size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, range.offset); |
479 | if (mapping == MAP_FAILED) { |
480 | KJ_FAIL_SYSCALL("mmap" , errno); |
481 | } |
482 | auto array = Array<byte>(reinterpret_cast<byte*>(mapping) + (offset - range.offset), |
483 | size, mmapDisposer); |
484 | return heap<WritableFileMappingImpl>(kj::mv(array)); |
485 | } |
486 | |
487 | size_t copyChunk(uint64_t offset, int fromFd, uint64_t fromOffset, uint64_t size) const { |
488 | // Copies a range of bytes from `fromFd` to this file in the most efficient way possible for |
489 | // the OS. Only returns less than `size` if EOF. Does not account for holes. |
490 | |
491 | #if __linux__ |
492 | { |
493 | KJ_SYSCALL(lseek(fd, offset, SEEK_SET)); |
494 | off_t fromPos = fromOffset; |
495 | off_t end = fromOffset + size; |
496 | while (fromPos < end) { |
497 | ssize_t n; |
498 | KJ_SYSCALL_HANDLE_ERRORS(n = sendfile(fd, fromFd, &fromPos, end - fromPos)) { |
499 | case EINVAL: |
500 | case ENOSYS: |
501 | goto sendfileNotAvailable; |
502 | default: |
503 | KJ_FAIL_SYSCALL("sendfile" , error) { return fromPos - fromOffset; } |
504 | } |
505 | } |
506 | return fromPos - fromOffset; |
507 | } |
508 | |
509 | sendfileNotAvailable: |
510 | #endif |
511 | uint64_t total = 0; |
512 | while (size > 0) { |
513 | byte buffer[4096]; |
514 | ssize_t n; |
515 | KJ_SYSCALL(n = pread(fromFd, buffer, kj::min(sizeof(buffer), size), fromOffset)); |
516 | if (n == 0) break; |
517 | write(offset, arrayPtr(buffer, n)); |
518 | fromOffset += n; |
519 | offset += n; |
520 | total += n; |
521 | size -= n; |
522 | } |
523 | return total; |
524 | } |
525 | |
526 | kj::Maybe<size_t> copy(uint64_t offset, const ReadableFile& from, |
527 | uint64_t fromOffset, uint64_t size) const { |
528 | KJ_IF_MAYBE(otherFd, from.getFd()) { |
529 | #ifdef FICLONE |
530 | if (offset == 0 && fromOffset == 0 && size == kj::maxValue && stat().size == 0) { |
531 | if (ioctl(fd, FICLONE, *otherFd) >= 0) { |
532 | return stat().size; |
533 | } |
534 | } else if (size > 0) { // src_length = 0 has special meaning for the syscall, so avoid. |
535 | struct file_clone_range range; |
536 | memset(&range, 0, sizeof(range)); |
537 | range.src_fd = *otherFd; |
538 | range.dest_offset = offset; |
539 | range.src_offset = fromOffset; |
540 | range.src_length = size == kj::maxValue ? 0 : size; |
541 | if (ioctl(fd, FICLONERANGE, &range) >= 0) { |
542 | // TODO(someday): What does FICLONERANGE actually do if the range goes past EOF? The docs |
543 | // don't say. Maybe it only copies the parts that exist. Maybe it punches holes for the |
544 | // rest. Where does the destination file's EOF marker end up? Who knows? |
545 | return kj::min(from.stat().size - fromOffset, size); |
546 | } |
547 | } else { |
548 | // size == 0 |
549 | return size_t(0); |
550 | } |
551 | |
552 | // ioctl failed. Almost all failures documented for these are of the form "the operation is |
553 | // not supported for the filesystem(s) specified", so fall back to other approaches. |
554 | #endif |
555 | |
556 | off_t toPos = offset; |
557 | off_t fromPos = fromOffset; |
558 | off_t end = size == kj::maxValue ? off_t(kj::maxValue) : off_t(fromOffset + size); |
559 | |
560 | for (;;) { |
561 | // Handle data. |
562 | { |
563 | // Find out how much data there is before the next hole. |
564 | off_t nextHole; |
565 | #ifdef SEEK_HOLE |
566 | KJ_SYSCALL_HANDLE_ERRORS(nextHole = lseek(*otherFd, fromPos, SEEK_HOLE)) { |
567 | case EINVAL: |
568 | // SEEK_HOLE probably not supported. Assume no holes. |
569 | nextHole = end; |
570 | break; |
571 | case ENXIO: |
572 | // Past EOF. Stop here. |
573 | return fromPos - fromOffset; |
574 | default: |
575 | KJ_FAIL_SYSCALL("lseek(fd, pos, SEEK_HOLE)" , error) { return fromPos - fromOffset; } |
576 | } |
577 | #else |
578 | // SEEK_HOLE not supported. Assume no holes. |
579 | nextHole = end; |
580 | #endif |
581 | |
582 | // Copy the next chunk of data. |
583 | off_t copyTo = kj::min(end, nextHole); |
584 | size_t amount = copyTo - fromPos; |
585 | if (amount > 0) { |
586 | size_t n = copyChunk(toPos, *otherFd, fromPos, amount); |
587 | fromPos += n; |
588 | toPos += n; |
589 | |
590 | if (n < amount) { |
591 | return fromPos - fromOffset; |
592 | } |
593 | } |
594 | |
595 | if (fromPos == end) { |
596 | return fromPos - fromOffset; |
597 | } |
598 | } |
599 | |
600 | #ifdef SEEK_HOLE |
601 | // Handle hole. |
602 | { |
603 | // Find out how much hole there is before the next data. |
604 | off_t nextData; |
605 | KJ_SYSCALL_HANDLE_ERRORS(nextData = lseek(*otherFd, fromPos, SEEK_DATA)) { |
606 | case EINVAL: |
607 | // SEEK_DATA probably not supported. But we should only have gotten here if we |
608 | // were expecting a hole. |
609 | KJ_FAIL_ASSERT("can't determine hole size; SEEK_DATA not supported" ); |
610 | break; |
611 | case ENXIO: |
612 | // No more data. Set to EOF. |
613 | KJ_SYSCALL(nextData = lseek(*otherFd, 0, SEEK_END)); |
614 | if (nextData > end) { |
615 | end = nextData; |
616 | } |
617 | break; |
618 | default: |
619 | KJ_FAIL_SYSCALL("lseek(fd, pos, SEEK_HOLE)" , error) { return fromPos - fromOffset; } |
620 | } |
621 | |
622 | // Write zeros. |
623 | off_t zeroTo = kj::min(end, nextData); |
624 | off_t amount = zeroTo - fromPos; |
625 | if (amount > 0) { |
626 | zero(toPos, amount); |
627 | toPos += amount; |
628 | fromPos = zeroTo; |
629 | } |
630 | |
631 | if (fromPos == end) { |
632 | return fromPos - fromOffset; |
633 | } |
634 | } |
635 | #endif |
636 | } |
637 | } |
638 | |
639 | // Indicates caller should call File::copy() default implementation. |
640 | return nullptr; |
641 | } |
642 | |
643 | // ReadableDirectory --------------------------------------------------------- |
644 | |
645 | template <typename Func> |
646 | auto list(bool needTypes, Func&& func) const |
647 | -> Array<Decay<decltype(func(instance<StringPtr>(), instance<FsNode::Type>()))>> { |
648 | // Seek to start of directory. |
649 | KJ_SYSCALL(lseek(fd, 0, SEEK_SET)); |
650 | |
651 | // Unfortunately, fdopendir() takes ownership of the file descriptor. Therefore we need to |
652 | // make a duplicate. |
653 | int duped; |
654 | KJ_SYSCALL(duped = dup(fd)); |
655 | DIR* dir = fdopendir(duped); |
656 | if (dir == nullptr) { |
657 | close(duped); |
658 | KJ_FAIL_SYSCALL("fdopendir" , errno); |
659 | } |
660 | |
661 | KJ_DEFER(closedir(dir)); |
662 | typedef Decay<decltype(func(instance<StringPtr>(), instance<FsNode::Type>()))> Entry; |
663 | kj::Vector<Entry> entries; |
664 | |
665 | for (;;) { |
666 | errno = 0; |
667 | struct dirent* entry = readdir(dir); |
668 | if (entry == nullptr) { |
669 | int error = errno; |
670 | if (error == 0) { |
671 | break; |
672 | } else { |
673 | KJ_FAIL_SYSCALL("readdir" , error); |
674 | } |
675 | } |
676 | |
677 | kj::StringPtr name = entry->d_name; |
678 | if (name != "." && name != ".." && !name.startsWith(HIDDEN_PREFIX)) { |
679 | #ifdef DT_UNKNOWN // d_type is not available on all platforms. |
680 | if (entry->d_type != DT_UNKNOWN) { |
681 | entries.add(func(name, modeToType(DTTOIF(entry->d_type)))); |
682 | } else { |
683 | #endif |
684 | if (needTypes) { |
685 | // Unknown type. Fall back to stat. |
686 | struct stat stats; |
687 | KJ_SYSCALL(fstatat(fd, name.cStr(), &stats, AT_SYMLINK_NOFOLLOW)); |
688 | entries.add(func(name, modeToType(stats.st_mode))); |
689 | } else { |
690 | entries.add(func(name, FsNode::Type::OTHER)); |
691 | } |
692 | #ifdef DT_UNKNOWN |
693 | } |
694 | #endif |
695 | } |
696 | } |
697 | |
698 | auto result = entries.releaseAsArray(); |
699 | std::sort(result.begin(), result.end()); |
700 | return result; |
701 | } |
702 | |
703 | Array<String> listNames() const { |
704 | return list(false, [](StringPtr name, FsNode::Type type) { return heapString(name); }); |
705 | } |
706 | |
707 | Array<ReadableDirectory::Entry> listEntries() const { |
708 | return list(true, [](StringPtr name, FsNode::Type type) { |
709 | return ReadableDirectory::Entry { type, heapString(name), }; |
710 | }); |
711 | } |
712 | |
713 | bool exists(PathPtr path) const { |
714 | KJ_SYSCALL_HANDLE_ERRORS(faccessat(fd, path.toString().cStr(), F_OK, 0)) { |
715 | case ENOENT: |
716 | case ENOTDIR: |
717 | return false; |
718 | default: |
719 | KJ_FAIL_SYSCALL("faccessat(fd, path)" , error, path) { return false; } |
720 | } |
721 | return true; |
722 | } |
723 | |
724 | Maybe<FsNode::Metadata> tryLstat(PathPtr path) const { |
725 | struct stat stats; |
726 | KJ_SYSCALL_HANDLE_ERRORS(fstatat(fd, path.toString().cStr(), &stats, AT_SYMLINK_NOFOLLOW)) { |
727 | case ENOENT: |
728 | case ENOTDIR: |
729 | return nullptr; |
730 | default: |
731 | KJ_FAIL_SYSCALL("faccessat(fd, path)" , error, path) { return nullptr; } |
732 | } |
733 | return statToMetadata(stats); |
734 | } |
735 | |
736 | Maybe<Own<const ReadableFile>> tryOpenFile(PathPtr path) const { |
737 | int newFd; |
738 | KJ_SYSCALL_HANDLE_ERRORS(newFd = openat( |
739 | fd, path.toString().cStr(), O_RDONLY | MAYBE_O_CLOEXEC)) { |
740 | case ENOENT: |
741 | case ENOTDIR: |
742 | return nullptr; |
743 | default: |
744 | KJ_FAIL_SYSCALL("openat(fd, path, O_RDONLY)" , error, path) { return nullptr; } |
745 | } |
746 | |
747 | kj::AutoCloseFd result(newFd); |
748 | #ifndef O_CLOEXEC |
749 | setCloexec(result); |
750 | #endif |
751 | |
752 | return newDiskReadableFile(kj::mv(result)); |
753 | } |
754 | |
755 | Maybe<AutoCloseFd> tryOpenSubdirInternal(PathPtr path) const { |
756 | int newFd; |
757 | KJ_SYSCALL_HANDLE_ERRORS(newFd = openat( |
758 | fd, path.toString().cStr(), O_RDONLY | MAYBE_O_CLOEXEC | MAYBE_O_DIRECTORY)) { |
759 | case ENOENT: |
760 | return nullptr; |
761 | case ENOTDIR: |
762 | // Could mean that a parent is not a directory, which we treat as "doesn't exist". |
763 | // Could also mean that the specified file is not a directory, which should throw. |
764 | // Check using exists(). |
765 | if (!exists(path)) { |
766 | return nullptr; |
767 | } |
768 | // fallthrough |
769 | default: |
770 | KJ_FAIL_SYSCALL("openat(fd, path, O_DIRECTORY)" , error, path) { return nullptr; } |
771 | } |
772 | |
773 | kj::AutoCloseFd result(newFd); |
774 | #ifndef O_CLOEXEC |
775 | setCloexec(result); |
776 | #endif |
777 | |
778 | return kj::mv(result); |
779 | } |
780 | |
781 | Maybe<Own<const ReadableDirectory>> tryOpenSubdir(PathPtr path) const { |
782 | return tryOpenSubdirInternal(path).map(newDiskReadableDirectory); |
783 | } |
784 | |
785 | Maybe<String> tryReadlink(PathPtr path) const { |
786 | size_t trySize = 256; |
787 | for (;;) { |
788 | KJ_STACK_ARRAY(char, buf, trySize, 256, 4096); |
789 | ssize_t n = readlinkat(fd, path.toString().cStr(), buf.begin(), buf.size()); |
790 | if (n < 0) { |
791 | int error = errno; |
792 | switch (error) { |
793 | case EINTR: |
794 | continue; |
795 | case ENOENT: |
796 | case ENOTDIR: |
797 | case EINVAL: // not a link |
798 | return nullptr; |
799 | default: |
800 | KJ_FAIL_SYSCALL("readlinkat(fd, path)" , error, path) { return nullptr; } |
801 | } |
802 | } |
803 | |
804 | if (n >= buf.size()) { |
805 | // Didn't give it enough space. Better retry with a bigger buffer. |
806 | trySize *= 2; |
807 | continue; |
808 | } |
809 | |
810 | return heapString(buf.begin(), n); |
811 | } |
812 | } |
813 | |
814 | // Directory ----------------------------------------------------------------- |
815 | |
816 | bool tryMkdir(PathPtr path, WriteMode mode, bool noThrow) const { |
817 | // Internal function to make a directory. |
818 | |
819 | auto filename = path.toString(); |
820 | mode_t acl = has(mode, WriteMode::PRIVATE) ? 0700 : 0777; |
821 | |
822 | KJ_SYSCALL_HANDLE_ERRORS(mkdirat(fd, filename.cStr(), acl)) { |
823 | case EEXIST: { |
824 | // Apparently this path exists. |
825 | if (!has(mode, WriteMode::MODIFY)) { |
826 | // Require exclusive create. |
827 | return false; |
828 | } |
829 | |
830 | // MODIFY is allowed, so we just need to check whether the existing entry is a directory. |
831 | struct stat stats; |
832 | KJ_SYSCALL_HANDLE_ERRORS(fstatat(fd, filename.cStr(), &stats, 0)) { |
833 | default: |
834 | // mkdir() says EEXIST but we can't stat it. Maybe it's a dangling link, or maybe |
835 | // we can't access it for some reason. Assume failure. |
836 | // |
837 | // TODO(someday): Maybe we should be creating the directory at the target of the |
838 | // link? |
839 | goto failed; |
840 | } |
841 | return (stats.st_mode & S_IFMT) == S_IFDIR; |
842 | } |
843 | case ENOENT: |
844 | if (has(mode, WriteMode::CREATE_PARENT) && path.size() > 0 && |
845 | tryMkdir(path.parent(), WriteMode::CREATE | WriteMode::MODIFY | |
846 | WriteMode::CREATE_PARENT, true)) { |
847 | // Retry, but make sure we don't try to create the parent again. |
848 | return tryMkdir(path, mode - WriteMode::CREATE_PARENT, noThrow); |
849 | } else { |
850 | goto failed; |
851 | } |
852 | default: |
853 | failed: |
854 | if (noThrow) { |
855 | // Caller requested no throwing. |
856 | return false; |
857 | } else { |
858 | KJ_FAIL_SYSCALL("mkdirat(fd, path)" , error, path); |
859 | } |
860 | } |
861 | |
862 | return true; |
863 | } |
864 | |
865 | kj::Maybe<String> createNamedTemporary( |
866 | PathPtr finalName, WriteMode mode, Function<int(StringPtr)> tryCreate) const { |
867 | // Create a temporary file which will eventually replace `finalName`. |
868 | // |
869 | // Calls `tryCreate` to actually create the temporary, passing in the desired path. tryCreate() |
870 | // is expected to behave like a syscall, returning a negative value and setting `errno` on |
871 | // error. tryCreate() MUST fail with EEXIST if the path exists -- this is not checked in |
872 | // advance, since it needs to be checked atomically. In the case of EEXIST, tryCreate() will |
873 | // be called again with a new path. |
874 | // |
875 | // Returns the temporary path that succeeded. Only returns nullptr if there was an exception |
876 | // but we're compiled with -fno-exceptions. |
877 | |
878 | if (finalName.size() == 0) { |
879 | KJ_FAIL_REQUIRE("can't replace self" ) { break; } |
880 | return nullptr; |
881 | } |
882 | |
883 | static uint counter = 0; |
884 | static const pid_t pid = getpid(); |
885 | String pathPrefix; |
886 | if (finalName.size() > 1) { |
887 | pathPrefix = kj::str(finalName.parent(), '/'); |
888 | } |
889 | auto path = kj::str(pathPrefix, HIDDEN_PREFIX, pid, '.', counter++, '.', |
890 | finalName.basename()[0], ".partial" ); |
891 | |
892 | KJ_SYSCALL_HANDLE_ERRORS(tryCreate(path)) { |
893 | case EEXIST: |
894 | return createNamedTemporary(finalName, mode, kj::mv(tryCreate)); |
895 | case ENOENT: |
896 | if (has(mode, WriteMode::CREATE_PARENT) && finalName.size() > 1 && |
897 | tryMkdir(finalName.parent(), WriteMode::CREATE | WriteMode::MODIFY | |
898 | WriteMode::CREATE_PARENT, true)) { |
899 | // Retry, but make sure we don't try to create the parent again. |
900 | mode = mode - WriteMode::CREATE_PARENT; |
901 | return createNamedTemporary(finalName, mode, kj::mv(tryCreate)); |
902 | } |
903 | // fallthrough |
904 | default: |
905 | KJ_FAIL_SYSCALL("create(path)" , error, path) { break; } |
906 | return nullptr; |
907 | } |
908 | |
909 | return kj::mv(path); |
910 | } |
911 | |
912 | bool tryReplaceNode(PathPtr path, WriteMode mode, Function<int(StringPtr)> tryCreate) const { |
913 | // Replaces the given path with an object created by calling tryCreate(). |
914 | // |
915 | // tryCreate() must behave like a syscall which creates the node at the path passed to it, |
916 | // returning a negative value on error. If the path passed to tryCreate already exists, it |
917 | // MUST fail with EEXIST. |
918 | // |
919 | // When `mode` includes MODIFY, replaceNode() reacts to EEXIST by creating the node in a |
920 | // temporary location and then rename()ing it into place. |
921 | |
922 | if (path.size() == 0) { |
923 | KJ_FAIL_REQUIRE("can't replace self" ) { return false; } |
924 | } |
925 | |
926 | auto filename = path.toString(); |
927 | |
928 | if (has(mode, WriteMode::CREATE)) { |
929 | // First try just cerating the node in-place. |
930 | KJ_SYSCALL_HANDLE_ERRORS(tryCreate(filename)) { |
931 | case EEXIST: |
932 | // Target exists. |
933 | if (has(mode, WriteMode::MODIFY)) { |
934 | // Fall back to MODIFY path, below. |
935 | break; |
936 | } else { |
937 | return false; |
938 | } |
939 | case ENOENT: |
940 | if (has(mode, WriteMode::CREATE_PARENT) && path.size() > 0 && |
941 | tryMkdir(path.parent(), WriteMode::CREATE | WriteMode::MODIFY | |
942 | WriteMode::CREATE_PARENT, true)) { |
943 | // Retry, but make sure we don't try to create the parent again. |
944 | return tryReplaceNode(path, mode - WriteMode::CREATE_PARENT, kj::mv(tryCreate)); |
945 | } |
946 | // fallthrough |
947 | default: |
948 | KJ_FAIL_SYSCALL("create(path)" , error, path) { return false; } |
949 | } else { |
950 | // Success. |
951 | return true; |
952 | } |
953 | } |
954 | |
955 | // Either we don't have CREATE mode or the target already exists. We need to perform a |
956 | // replacement instead. |
957 | |
958 | KJ_IF_MAYBE(tempPath, createNamedTemporary(path, mode, kj::mv(tryCreate))) { |
959 | if (tryCommitReplacement(filename, fd, *tempPath, mode)) { |
960 | return true; |
961 | } else { |
962 | KJ_SYSCALL_HANDLE_ERRORS(unlinkat(fd, tempPath->cStr(), 0)) { |
963 | case ENOENT: |
964 | // meh |
965 | break; |
966 | default: |
967 | KJ_FAIL_SYSCALL("unlinkat(fd, tempPath, 0)" , error, *tempPath); |
968 | } |
969 | return false; |
970 | } |
971 | } else { |
972 | // threw, but exceptions are disabled |
973 | return false; |
974 | } |
975 | } |
976 | |
977 | Maybe<AutoCloseFd> tryOpenFileInternal(PathPtr path, WriteMode mode, bool append) const { |
978 | uint flags = O_RDWR | MAYBE_O_CLOEXEC; |
979 | mode_t acl = 0666; |
980 | if (has(mode, WriteMode::CREATE)) { |
981 | flags |= O_CREAT; |
982 | } |
983 | if (!has(mode, WriteMode::MODIFY)) { |
984 | if (!has(mode, WriteMode::CREATE)) { |
985 | // Neither CREATE nor MODIFY -- impossible to satisfy preconditions. |
986 | return nullptr; |
987 | } |
988 | flags |= O_EXCL; |
989 | } |
990 | if (append) { |
991 | flags |= O_APPEND; |
992 | } |
993 | if (has(mode, WriteMode::EXECUTABLE)) { |
994 | acl = 0777; |
995 | } |
996 | if (has(mode, WriteMode::PRIVATE)) { |
997 | acl &= 0700; |
998 | } |
999 | |
1000 | auto filename = path.toString(); |
1001 | |
1002 | int newFd; |
1003 | KJ_SYSCALL_HANDLE_ERRORS(newFd = openat(fd, filename.cStr(), flags, acl)) { |
1004 | case ENOENT: |
1005 | if (has(mode, WriteMode::CREATE)) { |
1006 | // Either: |
1007 | // - The file is a broken symlink. |
1008 | // - A parent directory didn't exist. |
1009 | if (has(mode, WriteMode::CREATE_PARENT) && path.size() > 0 && |
1010 | tryMkdir(path.parent(), WriteMode::CREATE | WriteMode::MODIFY | |
1011 | WriteMode::CREATE_PARENT, true)) { |
1012 | // Retry, but make sure we don't try to create the parent again. |
1013 | return tryOpenFileInternal(path, mode - WriteMode::CREATE_PARENT, append); |
1014 | } |
1015 | |
1016 | // Check for broken link. |
1017 | if (!has(mode, WriteMode::MODIFY) && |
1018 | faccessat(fd, filename.cStr(), F_OK, AT_SYMLINK_NOFOLLOW) >= 0) { |
1019 | // Yep. We treat this as already-exists, which means in CREATE-only mode this is a |
1020 | // simple failure. |
1021 | return nullptr; |
1022 | } |
1023 | |
1024 | KJ_FAIL_REQUIRE("parent is not a directory" , path) { return nullptr; } |
1025 | } else { |
1026 | // MODIFY-only mode. ENOENT = doesn't exist = return null. |
1027 | return nullptr; |
1028 | } |
1029 | case ENOTDIR: |
1030 | if (!has(mode, WriteMode::CREATE)) { |
1031 | // MODIFY-only mode. ENOTDIR = parent not a directory = doesn't exist = return null. |
1032 | return nullptr; |
1033 | } |
1034 | goto failed; |
1035 | case EEXIST: |
1036 | if (!has(mode, WriteMode::MODIFY)) { |
1037 | // CREATE-only mode. EEXIST = already exists = return null. |
1038 | return nullptr; |
1039 | } |
1040 | goto failed; |
1041 | default: |
1042 | failed: |
1043 | KJ_FAIL_SYSCALL("openat(fd, path, O_RDWR | ...)" , error, path) { return nullptr; } |
1044 | } |
1045 | |
1046 | kj::AutoCloseFd result(newFd); |
1047 | #ifndef O_CLOEXEC |
1048 | setCloexec(result); |
1049 | #endif |
1050 | |
1051 | return kj::mv(result); |
1052 | } |
1053 | |
1054 | bool tryCommitReplacement(StringPtr toPath, int fromDirFd, StringPtr fromPath, WriteMode mode, |
1055 | int* errorReason = nullptr) const { |
1056 | if (has(mode, WriteMode::CREATE) && has(mode, WriteMode::MODIFY)) { |
1057 | // Always clobber. Try it. |
1058 | KJ_SYSCALL_HANDLE_ERRORS(renameat(fromDirFd, fromPath.cStr(), fd.get(), toPath.cStr())) { |
1059 | case EISDIR: |
1060 | case ENOTDIR: |
1061 | case ENOTEMPTY: |
1062 | case EEXIST: |
1063 | // Failed because target exists and due to the various weird quirks of rename(), it |
1064 | // can't remove it for us. On Linux we can try an exchange instead. On others we have |
1065 | // to move the target out of the way. |
1066 | break; |
1067 | default: |
1068 | if (errorReason == nullptr) { |
1069 | KJ_FAIL_SYSCALL("rename(fromPath, toPath)" , error, fromPath, toPath) { return false; } |
1070 | } else { |
1071 | *errorReason = error; |
1072 | return false; |
1073 | } |
1074 | } else { |
1075 | return true; |
1076 | } |
1077 | } |
1078 | |
1079 | #if __linux__ && defined(RENAME_EXCHANGE) |
1080 | // Try to use Linux's renameat2() to atomically check preconditions and apply. |
1081 | |
1082 | if (has(mode, WriteMode::MODIFY)) { |
1083 | // Use an exchange to implement modification. |
1084 | // |
1085 | // We reach this branch when performing a MODIFY-only, or when performing a CREATE | MODIFY |
1086 | // in which we determined above that there's a node of a different type blocking the |
1087 | // exchange. |
1088 | |
1089 | KJ_SYSCALL_HANDLE_ERRORS(syscall(SYS_renameat2, |
1090 | fromDirFd, fromPath.cStr(), fd.get(), toPath.cStr(), RENAME_EXCHANGE)) { |
1091 | case ENOSYS: |
1092 | break; // fall back to traditional means |
1093 | case ENOENT: |
1094 | // Presumably because the target path doesn't exist. |
1095 | if (has(mode, WriteMode::CREATE)) { |
1096 | KJ_FAIL_ASSERT("rename(tmp, path) claimed path exists but " |
1097 | "renameat2(fromPath, toPath, EXCAHNGE) said it doest; concurrent modification?" , |
1098 | fromPath, toPath) { return false; } |
1099 | } else { |
1100 | // Assume target doesn't exist. |
1101 | return false; |
1102 | } |
1103 | default: |
1104 | if (errorReason == nullptr) { |
1105 | KJ_FAIL_SYSCALL("renameat2(fromPath, toPath, EXCHANGE)" , error, fromPath, toPath) { |
1106 | return false; |
1107 | } |
1108 | } else { |
1109 | *errorReason = error; |
1110 | return false; |
1111 | } |
1112 | } else { |
1113 | // Successful swap! Delete swapped-out content. |
1114 | rmrf(fromDirFd, fromPath); |
1115 | return true; |
1116 | } |
1117 | } else if (has(mode, WriteMode::CREATE)) { |
1118 | KJ_SYSCALL_HANDLE_ERRORS(syscall(SYS_renameat2, |
1119 | fromDirFd, fromPath.cStr(), fd.get(), toPath.cStr(), RENAME_NOREPLACE)) { |
1120 | case ENOSYS: |
1121 | break; // fall back to traditional means |
1122 | case EEXIST: |
1123 | return false; |
1124 | default: |
1125 | if (errorReason == nullptr) { |
1126 | KJ_FAIL_SYSCALL("renameat2(fromPath, toPath, NOREPLACE)" , error, fromPath, toPath) { |
1127 | return false; |
1128 | } |
1129 | } else { |
1130 | *errorReason = error; |
1131 | return false; |
1132 | } |
1133 | } else { |
1134 | return true; |
1135 | } |
1136 | } |
1137 | #endif |
1138 | |
1139 | // We're unable to do what we wanted atomically. :( |
1140 | |
1141 | if (has(mode, WriteMode::CREATE) && has(mode, WriteMode::MODIFY)) { |
1142 | // We failed to atomically delete the target previously. So now we need to do two calls in |
1143 | // rapid succession to move the old file away then move the new one into place. |
1144 | |
1145 | // Find out what kind of file exists at the target path. |
1146 | struct stat stats; |
1147 | KJ_SYSCALL(fstatat(fd, toPath.cStr(), &stats, AT_SYMLINK_NOFOLLOW)) { return false; } |
1148 | |
1149 | // Create a temporary location to move the existing object to. Note that rename() allows a |
1150 | // non-directory to replace a non-directory, and allows a directory to replace an empty |
1151 | // directory. So we have to create the right type. |
1152 | Path toPathParsed = Path::parse(toPath); |
1153 | String away; |
1154 | KJ_IF_MAYBE(awayPath, createNamedTemporary(toPathParsed, WriteMode::CREATE, |
1155 | [&](StringPtr candidatePath) { |
1156 | if (S_ISDIR(stats.st_mode)) { |
1157 | return mkdirat(fd, candidatePath.cStr(), 0700); |
1158 | } else { |
1159 | #if __APPLE__ |
1160 | // No mknodat() on OSX, gotta open() a file, ugh. |
1161 | int newFd = openat(fd, candidatePath.cStr(), |
1162 | O_RDWR | O_CREAT | O_EXCL | MAYBE_O_CLOEXEC, 0700); |
1163 | if (newFd >= 0) close(newFd); |
1164 | return newFd; |
1165 | #else |
1166 | return mknodat(fd, candidatePath.cStr(), S_IFREG | 0600, dev_t()); |
1167 | #endif |
1168 | } |
1169 | })) { |
1170 | away = kj::mv(*awayPath); |
1171 | } else { |
1172 | // Already threw. |
1173 | return false; |
1174 | } |
1175 | |
1176 | // OK, now move the target object to replace the thing we just created. |
1177 | KJ_SYSCALL(renameat(fd, toPath.cStr(), fd, away.cStr())) { |
1178 | // Something went wrong. Remove the thing we just created. |
1179 | unlinkat(fd, away.cStr(), S_ISDIR(stats.st_mode) ? AT_REMOVEDIR : 0); |
1180 | return false; |
1181 | } |
1182 | |
1183 | // Now move the source object to the target location. |
1184 | KJ_SYSCALL_HANDLE_ERRORS(renameat(fromDirFd, fromPath.cStr(), fd, toPath.cStr())) { |
1185 | default: |
1186 | // Try to put things back where they were. If this fails, though, then we have little |
1187 | // choice but to leave things broken. |
1188 | KJ_SYSCALL_HANDLE_ERRORS(renameat(fd, away.cStr(), fd, toPath.cStr())) { |
1189 | default: break; |
1190 | } |
1191 | |
1192 | if (errorReason == nullptr) { |
1193 | KJ_FAIL_SYSCALL("rename(fromPath, toPath)" , error, fromPath, toPath) { |
1194 | return false; |
1195 | } |
1196 | } else { |
1197 | *errorReason = error; |
1198 | return false; |
1199 | } |
1200 | } |
1201 | |
1202 | // OK, success. Delete the old content. |
1203 | rmrf(fd, away); |
1204 | return true; |
1205 | } else { |
1206 | // Only one of CREATE or MODIFY is specified, so we need to verify non-atomically that the |
1207 | // corresponding precondition (must-not-exist or must-exist, respectively) is held. |
1208 | if (has(mode, WriteMode::CREATE)) { |
1209 | struct stat stats; |
1210 | KJ_SYSCALL_HANDLE_ERRORS(fstatat(fd.get(), toPath.cStr(), &stats, AT_SYMLINK_NOFOLLOW)) { |
1211 | case ENOENT: |
1212 | case ENOTDIR: |
1213 | break; // doesn't exist; continue |
1214 | default: |
1215 | KJ_FAIL_SYSCALL("fstatat(fd, toPath)" , error, toPath) { return false; } |
1216 | } else { |
1217 | return false; // already exists; fail |
1218 | } |
1219 | } else if (has(mode, WriteMode::MODIFY)) { |
1220 | struct stat stats; |
1221 | KJ_SYSCALL_HANDLE_ERRORS(fstatat(fd.get(), toPath.cStr(), &stats, AT_SYMLINK_NOFOLLOW)) { |
1222 | case ENOENT: |
1223 | case ENOTDIR: |
1224 | return false; // doesn't exist; fail |
1225 | default: |
1226 | KJ_FAIL_SYSCALL("fstatat(fd, toPath)" , error, toPath) { return false; } |
1227 | } else { |
1228 | // already exists; continue |
1229 | } |
1230 | } else { |
1231 | // Neither CREATE nor MODIFY. |
1232 | return false; |
1233 | } |
1234 | |
1235 | // Start over in create-and-modify mode. |
1236 | return tryCommitReplacement(toPath, fromDirFd, fromPath, |
1237 | WriteMode::CREATE | WriteMode::MODIFY, |
1238 | errorReason); |
1239 | } |
1240 | } |
1241 | |
1242 | template <typename T> |
1243 | class ReplacerImpl final: public Directory::Replacer<T> { |
1244 | public: |
1245 | ReplacerImpl(Own<const T>&& object, const DiskHandle& handle, |
1246 | String&& tempPath, String&& path, WriteMode mode) |
1247 | : Directory::Replacer<T>(mode), |
1248 | object(kj::mv(object)), handle(handle), |
1249 | tempPath(kj::mv(tempPath)), path(kj::mv(path)) {} |
1250 | |
1251 | ~ReplacerImpl() noexcept(false) { |
1252 | if (!committed) { |
1253 | rmrf(handle.fd, tempPath); |
1254 | } |
1255 | } |
1256 | |
1257 | const T& get() override { |
1258 | return *object; |
1259 | } |
1260 | |
1261 | bool tryCommit() override { |
1262 | KJ_ASSERT(!committed, "already committed" ) { return false; } |
1263 | return committed = handle.tryCommitReplacement(path, handle.fd, tempPath, |
1264 | Directory::Replacer<T>::mode); |
1265 | } |
1266 | |
1267 | private: |
1268 | Own<const T> object; |
1269 | const DiskHandle& handle; |
1270 | String tempPath; |
1271 | String path; |
1272 | bool committed = false; // true if *successfully* committed (in which case tempPath is gone) |
1273 | }; |
1274 | |
1275 | template <typename T> |
1276 | class BrokenReplacer final: public Directory::Replacer<T> { |
1277 | // For recovery path when exceptions are disabled. |
1278 | |
1279 | public: |
1280 | BrokenReplacer(Own<const T> inner) |
1281 | : Directory::Replacer<T>(WriteMode::CREATE | WriteMode::MODIFY), |
1282 | inner(kj::mv(inner)) {} |
1283 | |
1284 | const T& get() override { return *inner; } |
1285 | bool tryCommit() override { return false; } |
1286 | |
1287 | private: |
1288 | Own<const T> inner; |
1289 | }; |
1290 | |
1291 | Maybe<Own<const File>> tryOpenFile(PathPtr path, WriteMode mode) const { |
1292 | return tryOpenFileInternal(path, mode, false).map(newDiskFile); |
1293 | } |
1294 | |
1295 | Own<Directory::Replacer<File>> replaceFile(PathPtr path, WriteMode mode) const { |
1296 | mode_t acl = 0666; |
1297 | if (has(mode, WriteMode::EXECUTABLE)) { |
1298 | acl = 0777; |
1299 | } |
1300 | if (has(mode, WriteMode::PRIVATE)) { |
1301 | acl &= 0700; |
1302 | } |
1303 | |
1304 | int newFd_; |
1305 | KJ_IF_MAYBE(temp, createNamedTemporary(path, mode, |
1306 | [&](StringPtr candidatePath) { |
1307 | return newFd_ = openat(fd, candidatePath.cStr(), |
1308 | O_RDWR | O_CREAT | O_EXCL | MAYBE_O_CLOEXEC, acl); |
1309 | })) { |
1310 | AutoCloseFd newFd(newFd_); |
1311 | #ifndef O_CLOEXEC |
1312 | setCloexec(newFd); |
1313 | #endif |
1314 | return heap<ReplacerImpl<File>>(newDiskFile(kj::mv(newFd)), *this, kj::mv(*temp), |
1315 | path.toString(), mode); |
1316 | } else { |
1317 | // threw, but exceptions are disabled |
1318 | return heap<BrokenReplacer<File>>(newInMemoryFile(nullClock())); |
1319 | } |
1320 | } |
1321 | |
1322 | Own<const File> createTemporary() const { |
1323 | int newFd_; |
1324 | |
1325 | #if __linux__ && defined(O_TMPFILE) |
1326 | // Use syscall() to work around glibc bug with O_TMPFILE: |
1327 | // https://sourceware.org/bugzilla/show_bug.cgi?id=17523 |
1328 | KJ_SYSCALL_HANDLE_ERRORS(newFd_ = syscall( |
1329 | SYS_openat, fd.get(), "." , O_RDWR | O_TMPFILE, 0700)) { |
1330 | case EOPNOTSUPP: |
1331 | case EINVAL: |
1332 | case EISDIR: |
1333 | // Maybe not supported by this kernel / filesystem. Fall back to below. |
1334 | break; |
1335 | default: |
1336 | KJ_FAIL_SYSCALL("open(O_TMPFILE)" , error) { break; } |
1337 | break; |
1338 | } else { |
1339 | AutoCloseFd newFd(newFd_); |
1340 | #ifndef O_CLOEXEC |
1341 | setCloexec(newFd); |
1342 | #endif |
1343 | return newDiskFile(kj::mv(newFd)); |
1344 | } |
1345 | #endif |
1346 | |
1347 | KJ_IF_MAYBE(temp, createNamedTemporary(Path("unnamed" ), WriteMode::CREATE, |
1348 | [&](StringPtr path) { |
1349 | return newFd_ = openat(fd, path.cStr(), O_RDWR | O_CREAT | O_EXCL | MAYBE_O_CLOEXEC, 0600); |
1350 | })) { |
1351 | AutoCloseFd newFd(newFd_); |
1352 | #ifndef O_CLOEXEC |
1353 | setCloexec(newFd); |
1354 | #endif |
1355 | auto result = newDiskFile(kj::mv(newFd)); |
1356 | KJ_SYSCALL(unlinkat(fd, temp->cStr(), 0)) { break; } |
1357 | return kj::mv(result); |
1358 | } else { |
1359 | // threw, but exceptions are disabled |
1360 | return newInMemoryFile(nullClock()); |
1361 | } |
1362 | } |
1363 | |
1364 | Maybe<Own<AppendableFile>> tryAppendFile(PathPtr path, WriteMode mode) const { |
1365 | return tryOpenFileInternal(path, mode, true).map(newDiskAppendableFile); |
1366 | } |
1367 | |
1368 | Maybe<Own<const Directory>> tryOpenSubdir(PathPtr path, WriteMode mode) const { |
1369 | // Must create before open. |
1370 | if (has(mode, WriteMode::CREATE)) { |
1371 | if (!tryMkdir(path, mode, false)) return nullptr; |
1372 | } |
1373 | |
1374 | return tryOpenSubdirInternal(path).map(newDiskDirectory); |
1375 | } |
1376 | |
1377 | Own<Directory::Replacer<Directory>> replaceSubdir(PathPtr path, WriteMode mode) const { |
1378 | mode_t acl = has(mode, WriteMode::PRIVATE) ? 0700 : 0777; |
1379 | |
1380 | KJ_IF_MAYBE(temp, createNamedTemporary(path, mode, |
1381 | [&](StringPtr candidatePath) { |
1382 | return mkdirat(fd, candidatePath.cStr(), acl); |
1383 | })) { |
1384 | int subdirFd_; |
1385 | KJ_SYSCALL_HANDLE_ERRORS(subdirFd_ = openat( |
1386 | fd, temp->cStr(), O_RDONLY | MAYBE_O_CLOEXEC | MAYBE_O_DIRECTORY)) { |
1387 | default: |
1388 | KJ_FAIL_SYSCALL("open(just-created-temporary)" , error); |
1389 | return heap<BrokenReplacer<Directory>>(newInMemoryDirectory(nullClock())); |
1390 | } |
1391 | |
1392 | AutoCloseFd subdirFd(subdirFd_); |
1393 | #ifndef O_CLOEXEC |
1394 | setCloexec(subdirFd); |
1395 | #endif |
1396 | return heap<ReplacerImpl<Directory>>( |
1397 | newDiskDirectory(kj::mv(subdirFd)), *this, kj::mv(*temp), path.toString(), mode); |
1398 | } else { |
1399 | // threw, but exceptions are disabled |
1400 | return heap<BrokenReplacer<Directory>>(newInMemoryDirectory(nullClock())); |
1401 | } |
1402 | } |
1403 | |
1404 | bool trySymlink(PathPtr linkpath, StringPtr content, WriteMode mode) const { |
1405 | return tryReplaceNode(linkpath, mode, [&](StringPtr candidatePath) { |
1406 | return symlinkat(content.cStr(), fd, candidatePath.cStr()); |
1407 | }); |
1408 | } |
1409 | |
1410 | bool tryTransfer(PathPtr toPath, WriteMode toMode, |
1411 | const Directory& fromDirectory, PathPtr fromPath, |
1412 | TransferMode mode, const Directory& self) const { |
1413 | KJ_REQUIRE(toPath.size() > 0, "can't replace self" ) { return false; } |
1414 | |
1415 | if (mode == TransferMode::LINK) { |
1416 | KJ_IF_MAYBE(fromFd, fromDirectory.getFd()) { |
1417 | // Other is a disk directory, so we can hopefully do an efficient move/link. |
1418 | return tryReplaceNode(toPath, toMode, [&](StringPtr candidatePath) { |
1419 | return linkat(*fromFd, fromPath.toString().cStr(), fd, candidatePath.cStr(), 0); |
1420 | }); |
1421 | }; |
1422 | } else if (mode == TransferMode::MOVE) { |
1423 | KJ_IF_MAYBE(fromFd, fromDirectory.getFd()) { |
1424 | KJ_ASSERT(mode == TransferMode::MOVE); |
1425 | |
1426 | int error = 0; |
1427 | if (tryCommitReplacement(toPath.toString(), *fromFd, fromPath.toString(), toMode, |
1428 | &error)) { |
1429 | return true; |
1430 | } else switch (error) { |
1431 | case 0: |
1432 | // Plain old WriteMode precondition failure. |
1433 | return false; |
1434 | case EXDEV: |
1435 | // Can't move between devices. Fall back to default implementation, which does |
1436 | // copy/delete. |
1437 | break; |
1438 | case ENOENT: |
1439 | // Either the destination directory doesn't exist or the source path doesn't exist. |
1440 | // Unfortunately we don't really know. If CREATE_PARENT was provided, try creating |
1441 | // the parent directory. Otherwise, we don't actually need to distinguish between |
1442 | // these two errors; just return false. |
1443 | if (has(toMode, WriteMode::CREATE) && has(toMode, WriteMode::CREATE_PARENT) && |
1444 | toPath.size() > 0 && tryMkdir(toPath.parent(), |
1445 | WriteMode::CREATE | WriteMode::MODIFY | WriteMode::CREATE_PARENT, true)) { |
1446 | // Retry, but make sure we don't try to create the parent again. |
1447 | return tryTransfer(toPath, toMode - WriteMode::CREATE_PARENT, |
1448 | fromDirectory, fromPath, mode, self); |
1449 | } |
1450 | return false; |
1451 | default: |
1452 | KJ_FAIL_SYSCALL("rename(fromPath, toPath)" , error, fromPath, toPath) { |
1453 | return false; |
1454 | } |
1455 | } |
1456 | } |
1457 | } |
1458 | |
1459 | // OK, we can't do anything efficient using the OS. Fall back to default implementation. |
1460 | return self.Directory::tryTransfer(toPath, toMode, fromDirectory, fromPath, mode); |
1461 | } |
1462 | |
1463 | bool tryRemove(PathPtr path) const { |
1464 | return rmrf(fd, path.toString()); |
1465 | } |
1466 | |
1467 | protected: |
1468 | AutoCloseFd fd; |
1469 | }; |
1470 | |
1471 | #define FSNODE_METHODS(classname) \ |
1472 | Maybe<int> getFd() const override { return DiskHandle::getFd(); } \ |
1473 | \ |
1474 | Own<const FsNode> cloneFsNode() const override { \ |
1475 | return heap<classname>(DiskHandle::clone()); \ |
1476 | } \ |
1477 | \ |
1478 | Metadata stat() const override { return DiskHandle::stat(); } \ |
1479 | void sync() const override { DiskHandle::sync(); } \ |
1480 | void datasync() const override { DiskHandle::datasync(); } |
1481 | |
1482 | class DiskReadableFile final: public ReadableFile, public DiskHandle { |
1483 | public: |
1484 | DiskReadableFile(AutoCloseFd&& fd): DiskHandle(kj::mv(fd)) {} |
1485 | |
1486 | FSNODE_METHODS(DiskReadableFile); |
1487 | |
1488 | size_t read(uint64_t offset, ArrayPtr<byte> buffer) const override { |
1489 | return DiskHandle::read(offset, buffer); |
1490 | } |
1491 | Array<const byte> mmap(uint64_t offset, uint64_t size) const override { |
1492 | return DiskHandle::mmap(offset, size); |
1493 | } |
1494 | Array<byte> mmapPrivate(uint64_t offset, uint64_t size) const override { |
1495 | return DiskHandle::mmapPrivate(offset, size); |
1496 | } |
1497 | }; |
1498 | |
1499 | class DiskAppendableFile final: public AppendableFile, public DiskHandle, public FdOutputStream { |
1500 | public: |
1501 | DiskAppendableFile(AutoCloseFd&& fd) |
1502 | : DiskHandle(kj::mv(fd)), |
1503 | FdOutputStream(DiskHandle::fd.get()) {} |
1504 | |
1505 | FSNODE_METHODS(DiskAppendableFile); |
1506 | |
1507 | void write(const void* buffer, size_t size) override { |
1508 | FdOutputStream::write(buffer, size); |
1509 | } |
1510 | void write(ArrayPtr<const ArrayPtr<const byte>> pieces) override { |
1511 | FdOutputStream::write(pieces); |
1512 | } |
1513 | }; |
1514 | |
1515 | class DiskFile final: public File, public DiskHandle { |
1516 | public: |
1517 | DiskFile(AutoCloseFd&& fd): DiskHandle(kj::mv(fd)) {} |
1518 | |
1519 | FSNODE_METHODS(DiskFile); |
1520 | |
1521 | size_t read(uint64_t offset, ArrayPtr<byte> buffer) const override { |
1522 | return DiskHandle::read(offset, buffer); |
1523 | } |
1524 | Array<const byte> mmap(uint64_t offset, uint64_t size) const override { |
1525 | return DiskHandle::mmap(offset, size); |
1526 | } |
1527 | Array<byte> mmapPrivate(uint64_t offset, uint64_t size) const override { |
1528 | return DiskHandle::mmapPrivate(offset, size); |
1529 | } |
1530 | |
1531 | void write(uint64_t offset, ArrayPtr<const byte> data) const override { |
1532 | DiskHandle::write(offset, data); |
1533 | } |
1534 | void zero(uint64_t offset, uint64_t size) const override { |
1535 | DiskHandle::zero(offset, size); |
1536 | } |
1537 | void truncate(uint64_t size) const override { |
1538 | DiskHandle::truncate(size); |
1539 | } |
1540 | Own<const WritableFileMapping> mmapWritable(uint64_t offset, uint64_t size) const override { |
1541 | return DiskHandle::mmapWritable(offset, size); |
1542 | } |
1543 | size_t copy(uint64_t offset, const ReadableFile& from, |
1544 | uint64_t fromOffset, uint64_t size) const override { |
1545 | KJ_IF_MAYBE(result, DiskHandle::copy(offset, from, fromOffset, size)) { |
1546 | return *result; |
1547 | } else { |
1548 | return File::copy(offset, from, fromOffset, size); |
1549 | } |
1550 | } |
1551 | }; |
1552 | |
1553 | class DiskReadableDirectory final: public ReadableDirectory, public DiskHandle { |
1554 | public: |
1555 | DiskReadableDirectory(AutoCloseFd&& fd): DiskHandle(kj::mv(fd)) {} |
1556 | |
1557 | FSNODE_METHODS(DiskReadableDirectory); |
1558 | |
1559 | Array<String> listNames() const override { return DiskHandle::listNames(); } |
1560 | Array<Entry> listEntries() const override { return DiskHandle::listEntries(); } |
1561 | bool exists(PathPtr path) const override { return DiskHandle::exists(path); } |
1562 | Maybe<FsNode::Metadata> tryLstat(PathPtr path) const override { |
1563 | return DiskHandle::tryLstat(path); |
1564 | } |
1565 | Maybe<Own<const ReadableFile>> tryOpenFile(PathPtr path) const override { |
1566 | return DiskHandle::tryOpenFile(path); |
1567 | } |
1568 | Maybe<Own<const ReadableDirectory>> tryOpenSubdir(PathPtr path) const override { |
1569 | return DiskHandle::tryOpenSubdir(path); |
1570 | } |
1571 | Maybe<String> tryReadlink(PathPtr path) const override { return DiskHandle::tryReadlink(path); } |
1572 | }; |
1573 | |
1574 | class DiskDirectory final: public Directory, public DiskHandle { |
1575 | public: |
1576 | DiskDirectory(AutoCloseFd&& fd): DiskHandle(kj::mv(fd)) {} |
1577 | |
1578 | FSNODE_METHODS(DiskDirectory); |
1579 | |
1580 | Array<String> listNames() const override { return DiskHandle::listNames(); } |
1581 | Array<Entry> listEntries() const override { return DiskHandle::listEntries(); } |
1582 | bool exists(PathPtr path) const override { return DiskHandle::exists(path); } |
1583 | Maybe<FsNode::Metadata> tryLstat(PathPtr path) const override { |
1584 | return DiskHandle::tryLstat(path); |
1585 | } |
1586 | Maybe<Own<const ReadableFile>> tryOpenFile(PathPtr path) const override { |
1587 | return DiskHandle::tryOpenFile(path); |
1588 | } |
1589 | Maybe<Own<const ReadableDirectory>> tryOpenSubdir(PathPtr path) const override { |
1590 | return DiskHandle::tryOpenSubdir(path); |
1591 | } |
1592 | Maybe<String> tryReadlink(PathPtr path) const override { return DiskHandle::tryReadlink(path); } |
1593 | |
1594 | Maybe<Own<const File>> tryOpenFile(PathPtr path, WriteMode mode) const override { |
1595 | return DiskHandle::tryOpenFile(path, mode); |
1596 | } |
1597 | Own<Replacer<File>> replaceFile(PathPtr path, WriteMode mode) const override { |
1598 | return DiskHandle::replaceFile(path, mode); |
1599 | } |
1600 | Own<const File> createTemporary() const override { |
1601 | return DiskHandle::createTemporary(); |
1602 | } |
1603 | Maybe<Own<AppendableFile>> tryAppendFile(PathPtr path, WriteMode mode) const override { |
1604 | return DiskHandle::tryAppendFile(path, mode); |
1605 | } |
1606 | Maybe<Own<const Directory>> tryOpenSubdir(PathPtr path, WriteMode mode) const override { |
1607 | return DiskHandle::tryOpenSubdir(path, mode); |
1608 | } |
1609 | Own<Replacer<Directory>> replaceSubdir(PathPtr path, WriteMode mode) const override { |
1610 | return DiskHandle::replaceSubdir(path, mode); |
1611 | } |
1612 | bool trySymlink(PathPtr linkpath, StringPtr content, WriteMode mode) const override { |
1613 | return DiskHandle::trySymlink(linkpath, content, mode); |
1614 | } |
1615 | bool tryTransfer(PathPtr toPath, WriteMode toMode, |
1616 | const Directory& fromDirectory, PathPtr fromPath, |
1617 | TransferMode mode) const override { |
1618 | return DiskHandle::tryTransfer(toPath, toMode, fromDirectory, fromPath, mode, *this); |
1619 | } |
1620 | // tryTransferTo() not implemented because we have nothing special we can do. |
1621 | bool tryRemove(PathPtr path) const override { |
1622 | return DiskHandle::tryRemove(path); |
1623 | } |
1624 | }; |
1625 | |
1626 | class DiskFilesystem final: public Filesystem { |
1627 | public: |
1628 | DiskFilesystem() |
1629 | : root(openDir("/" )), |
1630 | current(openDir("." )), |
1631 | currentPath(computeCurrentPath()) {} |
1632 | |
1633 | const Directory& getRoot() const override { |
1634 | return root; |
1635 | } |
1636 | |
1637 | const Directory& getCurrent() const override { |
1638 | return current; |
1639 | } |
1640 | |
1641 | PathPtr getCurrentPath() const override { |
1642 | return currentPath; |
1643 | } |
1644 | |
1645 | private: |
1646 | DiskDirectory root; |
1647 | DiskDirectory current; |
1648 | Path currentPath; |
1649 | |
1650 | static AutoCloseFd openDir(const char* dir) { |
1651 | int newFd; |
1652 | KJ_SYSCALL(newFd = open(dir, O_RDONLY | MAYBE_O_CLOEXEC | MAYBE_O_DIRECTORY)); |
1653 | AutoCloseFd result(newFd); |
1654 | #ifndef O_CLOEXEC |
1655 | setCloexec(result); |
1656 | #endif |
1657 | return result; |
1658 | } |
1659 | |
1660 | static Path computeCurrentPath() { |
1661 | // If env var PWD is set and points to the current directory, use it. This captures the current |
1662 | // path according to the user's shell, which may differ from the kernel's idea in the presence |
1663 | // of symlinks. |
1664 | const char* pwd = getenv("PWD" ); |
1665 | if (pwd != nullptr) { |
1666 | Path result = nullptr; |
1667 | struct stat pwdStat, dotStat; |
1668 | KJ_IF_MAYBE(e, kj::runCatchingExceptions([&]() { |
1669 | KJ_ASSERT(pwd[0] == '/') { return; } |
1670 | result = Path::parse(pwd + 1); |
1671 | KJ_SYSCALL(lstat(result.toString(true).cStr(), &pwdStat), result) { return; } |
1672 | KJ_SYSCALL(lstat("." , &dotStat)) { return; } |
1673 | })) { |
1674 | // failed, give up on PWD |
1675 | KJ_LOG(WARNING, "PWD environment variable seems invalid" , pwd, *e); |
1676 | } else { |
1677 | if (pwdStat.st_ino == dotStat.st_ino && |
1678 | pwdStat.st_dev == dotStat.st_dev) { |
1679 | return kj::mv(result); |
1680 | } else { |
1681 | KJ_LOG(WARNING, "PWD environment variable doesn't match current directory" , pwd); |
1682 | } |
1683 | } |
1684 | } |
1685 | |
1686 | size_t size = 256; |
1687 | retry: |
1688 | KJ_STACK_ARRAY(char, buf, size, 256, 4096); |
1689 | if (getcwd(buf.begin(), size) == nullptr) { |
1690 | int error = errno; |
1691 | if (error == ENAMETOOLONG) { |
1692 | size *= 2; |
1693 | goto retry; |
1694 | } else { |
1695 | KJ_FAIL_SYSCALL("getcwd()" , error); |
1696 | } |
1697 | } |
1698 | |
1699 | StringPtr path = buf.begin(); |
1700 | |
1701 | // On Linux, the path will start with "(unreachable)" if the working directory is not a subdir |
1702 | // of the root directory, which is possible via chroot() or mount namespaces. |
1703 | KJ_ASSERT(!path.startsWith("(unreachable)" ), |
1704 | "working directory is not reachable from root" , path); |
1705 | KJ_ASSERT(path.startsWith("/" ), "current directory is not absolute" , path); |
1706 | |
1707 | return Path::parse(path.slice(1)); |
1708 | } |
1709 | }; |
1710 | |
1711 | } // namespace |
1712 | |
1713 | Own<ReadableFile> newDiskReadableFile(kj::AutoCloseFd fd) { |
1714 | return heap<DiskReadableFile>(kj::mv(fd)); |
1715 | } |
1716 | Own<AppendableFile> newDiskAppendableFile(kj::AutoCloseFd fd) { |
1717 | return heap<DiskAppendableFile>(kj::mv(fd)); |
1718 | } |
1719 | Own<File> newDiskFile(kj::AutoCloseFd fd) { |
1720 | return heap<DiskFile>(kj::mv(fd)); |
1721 | } |
1722 | Own<ReadableDirectory> newDiskReadableDirectory(kj::AutoCloseFd fd) { |
1723 | return heap<DiskReadableDirectory>(kj::mv(fd)); |
1724 | } |
1725 | Own<Directory> newDiskDirectory(kj::AutoCloseFd fd) { |
1726 | return heap<DiskDirectory>(kj::mv(fd)); |
1727 | } |
1728 | |
1729 | Own<Filesystem> newDiskFilesystem() { |
1730 | return heap<DiskFilesystem>(); |
1731 | } |
1732 | |
1733 | } // namespace kj |
1734 | |
1735 | #endif // !_WIN32 |
1736 | |