| 1 | // Copyright (c) 2015 Sandstorm Development Group, Inc. and contributors |
| 2 | // Licensed under the MIT License: |
| 3 | // |
| 4 | // Permission is hereby granted, free of charge, to any person obtaining a copy |
| 5 | // of this software and associated documentation files (the "Software"), to deal |
| 6 | // in the Software without restriction, including without limitation the rights |
| 7 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
| 8 | // copies of the Software, and to permit persons to whom the Software is |
| 9 | // furnished to do so, subject to the following conditions: |
| 10 | // |
| 11 | // The above copyright notice and this permission notice shall be included in |
| 12 | // all copies or substantial portions of the Software. |
| 13 | // |
| 14 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| 15 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| 16 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
| 17 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| 18 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| 19 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
| 20 | // THE SOFTWARE. |
| 21 | |
| 22 | #if !_WIN32 |
| 23 | |
| 24 | #ifndef _GNU_SOURCE |
| 25 | #define _GNU_SOURCE |
| 26 | #endif |
| 27 | |
| 28 | #include "filesystem.h" |
| 29 | #include "debug.h" |
| 30 | #include <sys/types.h> |
| 31 | #include <sys/stat.h> |
| 32 | #include <sys/ioctl.h> |
| 33 | #include <fcntl.h> |
| 34 | #include <unistd.h> |
| 35 | #include <stdio.h> |
| 36 | #include <sys/mman.h> |
| 37 | #include <errno.h> |
| 38 | #include <dirent.h> |
| 39 | #include <stdlib.h> |
| 40 | #include "vector.h" |
| 41 | #include "miniposix.h" |
| 42 | #include <algorithm> |
| 43 | |
| 44 | #if __linux__ |
| 45 | #include <syscall.h> |
| 46 | #include <linux/fs.h> |
| 47 | #include <sys/sendfile.h> |
| 48 | #endif |
| 49 | |
| 50 | namespace kj { |
| 51 | namespace { |
| 52 | |
| 53 | #define HIDDEN_PREFIX ".kj-tmp." |
| 54 | // Prefix for temp files which should be hidden when listing a directory. |
| 55 | // |
| 56 | // If you change this, make sure to update the unit test. |
| 57 | |
| 58 | #ifdef O_CLOEXEC |
| 59 | #define MAYBE_O_CLOEXEC O_CLOEXEC |
| 60 | #else |
| 61 | #define MAYBE_O_CLOEXEC 0 |
| 62 | #endif |
| 63 | |
| 64 | #ifdef O_DIRECTORY |
| 65 | #define MAYBE_O_DIRECTORY O_DIRECTORY |
| 66 | #else |
| 67 | #define MAYBE_O_DIRECTORY 0 |
| 68 | #endif |
| 69 | |
| 70 | #if __APPLE__ |
| 71 | // Mac OSX defines SEEK_HOLE, but it doesn't work. ("Inappropriate ioctl for device", it says.) |
| 72 | #undef SEEK_HOLE |
| 73 | #endif |
| 74 | |
| 75 | #if __BIONIC__ |
| 76 | // No no DTTOIF function |
| 77 | #undef DT_UNKNOWN |
| 78 | #endif |
| 79 | |
| 80 | static void setCloexec(int fd) KJ_UNUSED; |
| 81 | static void setCloexec(int fd) { |
| 82 | // Set the O_CLOEXEC flag on the given fd. |
| 83 | // |
| 84 | // We try to avoid the need to call this by taking advantage of syscall flags that set it |
| 85 | // atomically on new file descriptors. Unfortunately some platforms do not support such syscalls. |
| 86 | |
| 87 | #ifdef FIOCLEX |
| 88 | // Yay, we can set the flag in one call. |
| 89 | KJ_SYSCALL_HANDLE_ERRORS(ioctl(fd, FIOCLEX)) { |
| 90 | case EINVAL: |
| 91 | case EOPNOTSUPP: |
| 92 | break; |
| 93 | default: |
| 94 | KJ_FAIL_SYSCALL("ioctl(fd, FIOCLEX)" , error) { break; } |
| 95 | break; |
| 96 | } else { |
| 97 | // success |
| 98 | return; |
| 99 | } |
| 100 | #endif |
| 101 | |
| 102 | // Sadness, we must resort to read/modify/write. |
| 103 | // |
| 104 | // (On many platforms, FD_CLOEXEC is the only flag modifiable via F_SETFD and therefore we could |
| 105 | // skip the read... but it seems dangerous to assume that's true of all platforms, and anyway |
| 106 | // most platforms support FIOCLEX.) |
| 107 | int flags; |
| 108 | KJ_SYSCALL(flags = fcntl(fd, F_GETFD)); |
| 109 | if (!(flags & FD_CLOEXEC)) { |
| 110 | KJ_SYSCALL(fcntl(fd, F_SETFD, flags | FD_CLOEXEC)); |
| 111 | } |
| 112 | } |
| 113 | |
| 114 | static Date toKjDate(struct timespec tv) { |
| 115 | return tv.tv_sec * SECONDS + tv.tv_nsec * NANOSECONDS + UNIX_EPOCH; |
| 116 | } |
| 117 | |
| 118 | static FsNode::Type modeToType(mode_t mode) { |
| 119 | switch (mode & S_IFMT) { |
| 120 | case S_IFREG : return FsNode::Type::FILE; |
| 121 | case S_IFDIR : return FsNode::Type::DIRECTORY; |
| 122 | case S_IFLNK : return FsNode::Type::SYMLINK; |
| 123 | case S_IFBLK : return FsNode::Type::BLOCK_DEVICE; |
| 124 | case S_IFCHR : return FsNode::Type::CHARACTER_DEVICE; |
| 125 | case S_IFIFO : return FsNode::Type::NAMED_PIPE; |
| 126 | case S_IFSOCK: return FsNode::Type::SOCKET; |
| 127 | default: return FsNode::Type::OTHER; |
| 128 | } |
| 129 | } |
| 130 | |
| 131 | static FsNode::Metadata statToMetadata(struct stat& stats) { |
| 132 | // Probably st_ino and st_dev are usually under 32 bits, so mix by rotating st_dev left 32 bits |
| 133 | // and XOR. |
| 134 | uint64_t d = stats.st_dev; |
| 135 | uint64_t hash = ((d << 32) | (d >> 32)) ^ stats.st_ino; |
| 136 | |
| 137 | return FsNode::Metadata { |
| 138 | modeToType(stats.st_mode), |
| 139 | implicitCast<uint64_t>(stats.st_size), |
| 140 | implicitCast<uint64_t>(stats.st_blocks * 512u), |
| 141 | #if __APPLE__ |
| 142 | toKjDate(stats.st_mtimespec), |
| 143 | #else |
| 144 | toKjDate(stats.st_mtim), |
| 145 | #endif |
| 146 | implicitCast<uint>(stats.st_nlink), |
| 147 | hash |
| 148 | }; |
| 149 | } |
| 150 | |
| 151 | static bool rmrf(int fd, StringPtr path); |
| 152 | |
| 153 | static void rmrfChildrenAndClose(int fd) { |
| 154 | // Assumes fd is seeked to beginning. |
| 155 | |
| 156 | DIR* dir = fdopendir(fd); |
| 157 | if (dir == nullptr) { |
| 158 | close(fd); |
| 159 | KJ_FAIL_SYSCALL("fdopendir" , errno); |
| 160 | }; |
| 161 | KJ_DEFER(closedir(dir)); |
| 162 | |
| 163 | for (;;) { |
| 164 | errno = 0; |
| 165 | struct dirent* entry = readdir(dir); |
| 166 | if (entry == nullptr) { |
| 167 | int error = errno; |
| 168 | if (error == 0) { |
| 169 | break; |
| 170 | } else { |
| 171 | KJ_FAIL_SYSCALL("readdir" , error); |
| 172 | } |
| 173 | } |
| 174 | |
| 175 | if (entry->d_name[0] == '.' && |
| 176 | (entry->d_name[1] == '\0' || |
| 177 | (entry->d_name[1] == '.' && |
| 178 | entry->d_name[2] == '\0'))) { |
| 179 | // ignore . and .. |
| 180 | } else { |
| 181 | #ifdef DT_UNKNOWN // d_type is not available on all platforms. |
| 182 | if (entry->d_type == DT_DIR) { |
| 183 | int subdirFd; |
| 184 | KJ_SYSCALL(subdirFd = openat( |
| 185 | fd, entry->d_name, O_RDONLY | MAYBE_O_DIRECTORY | MAYBE_O_CLOEXEC)); |
| 186 | rmrfChildrenAndClose(subdirFd); |
| 187 | KJ_SYSCALL(unlinkat(fd, entry->d_name, AT_REMOVEDIR)); |
| 188 | } else if (entry->d_type != DT_UNKNOWN) { |
| 189 | KJ_SYSCALL(unlinkat(fd, entry->d_name, 0)); |
| 190 | } else { |
| 191 | #endif |
| 192 | KJ_ASSERT(rmrf(fd, entry->d_name)); |
| 193 | #ifdef DT_UNKNOWN |
| 194 | } |
| 195 | #endif |
| 196 | } |
| 197 | } |
| 198 | } |
| 199 | |
| 200 | static bool rmrf(int fd, StringPtr path) { |
| 201 | struct stat stats; |
| 202 | KJ_SYSCALL_HANDLE_ERRORS(fstatat(fd, path.cStr(), &stats, AT_SYMLINK_NOFOLLOW)) { |
| 203 | case ENOENT: |
| 204 | case ENOTDIR: |
| 205 | // Doesn't exist. |
| 206 | return false; |
| 207 | default: |
| 208 | KJ_FAIL_SYSCALL("lstat(path)" , error, path) { return false; } |
| 209 | } |
| 210 | |
| 211 | if (S_ISDIR(stats.st_mode)) { |
| 212 | int subdirFd; |
| 213 | KJ_SYSCALL(subdirFd = openat( |
| 214 | fd, path.cStr(), O_RDONLY | MAYBE_O_DIRECTORY | MAYBE_O_CLOEXEC)) { return false; } |
| 215 | rmrfChildrenAndClose(subdirFd); |
| 216 | KJ_SYSCALL(unlinkat(fd, path.cStr(), AT_REMOVEDIR)) { return false; } |
| 217 | } else { |
| 218 | KJ_SYSCALL(unlinkat(fd, path.cStr(), 0)) { return false; } |
| 219 | } |
| 220 | |
| 221 | return true; |
| 222 | } |
| 223 | |
| 224 | struct MmapRange { |
| 225 | uint64_t offset; |
| 226 | uint64_t size; |
| 227 | }; |
| 228 | |
| 229 | static MmapRange getMmapRange(uint64_t offset, uint64_t size) { |
| 230 | // Comes up with an offset and size to pass to mmap(), given an offset and size requested by |
| 231 | // the caller, and considering the fact that mappings must start at a page boundary. |
| 232 | // |
| 233 | // The offset is rounded down to the nearest page boundary, and the size is increased to |
| 234 | // compensate. Note that the endpoint of the mapping is *not* rounded up to a page boundary, as |
| 235 | // mmap() does not actually require this, and it causes trouble on some systems (notably Cygwin). |
| 236 | |
| 237 | #ifndef _SC_PAGESIZE |
| 238 | #define _SC_PAGESIZE _SC_PAGE_SIZE |
| 239 | #endif |
| 240 | static const uint64_t pageSize = sysconf(_SC_PAGESIZE); |
| 241 | uint64_t pageMask = pageSize - 1; |
| 242 | |
| 243 | uint64_t realOffset = offset & ~pageMask; |
| 244 | |
| 245 | return { realOffset, offset + size - realOffset }; |
| 246 | } |
| 247 | |
| 248 | class MmapDisposer: public ArrayDisposer { |
| 249 | protected: |
| 250 | void disposeImpl(void* firstElement, size_t elementSize, size_t elementCount, |
| 251 | size_t capacity, void (*destroyElement)(void*)) const { |
| 252 | auto range = getMmapRange(reinterpret_cast<uintptr_t>(firstElement), |
| 253 | elementSize * elementCount); |
| 254 | KJ_SYSCALL(munmap(reinterpret_cast<byte*>(range.offset), range.size)) { break; } |
| 255 | } |
| 256 | }; |
| 257 | |
| 258 | constexpr MmapDisposer mmapDisposer = MmapDisposer(); |
| 259 | |
| 260 | class DiskHandle { |
| 261 | // We need to implement each of ReadableFile, AppendableFile, File, ReadableDirectory, and |
| 262 | // Directory for disk handles. There is a lot of implementation overlap between these, especially |
| 263 | // stat(), sync(), etc. We can't have everything inherit from a common DiskFsNode that implements |
| 264 | // these because then we get diamond inheritance which means we need to make all our inheritance |
| 265 | // virtual which means downcasting requires RTTI which violates our goal of supporting compiling |
| 266 | // with no RTTI. So instead we have the DiskHandle class which implements all the methods without |
| 267 | // inheriting anything, and then we have DiskFile, DiskDirectory, etc. hold this and delegate to |
| 268 | // it. Ugly, but works. |
| 269 | |
| 270 | public: |
| 271 | DiskHandle(AutoCloseFd&& fd): fd(kj::mv(fd)) {} |
| 272 | |
| 273 | // OsHandle ------------------------------------------------------------------ |
| 274 | |
| 275 | AutoCloseFd clone() const { |
| 276 | int fd2; |
| 277 | #ifdef F_DUPFD_CLOEXEC |
| 278 | KJ_SYSCALL_HANDLE_ERRORS(fd2 = fcntl(fd, F_DUPFD_CLOEXEC, 3)) { |
| 279 | case EINVAL: |
| 280 | case EOPNOTSUPP: |
| 281 | // fall back |
| 282 | break; |
| 283 | default: |
| 284 | KJ_FAIL_SYSCALL("fnctl(fd, F_DUPFD_CLOEXEC, 3)" , error) { break; } |
| 285 | break; |
| 286 | } else { |
| 287 | return AutoCloseFd(fd2); |
| 288 | } |
| 289 | #endif |
| 290 | |
| 291 | KJ_SYSCALL(fd2 = ::dup(fd)); |
| 292 | AutoCloseFd result(fd2); |
| 293 | setCloexec(result); |
| 294 | return result; |
| 295 | } |
| 296 | |
| 297 | int getFd() const { |
| 298 | return fd.get(); |
| 299 | } |
| 300 | |
| 301 | // FsNode -------------------------------------------------------------------- |
| 302 | |
| 303 | FsNode::Metadata stat() const { |
| 304 | struct stat stats; |
| 305 | KJ_SYSCALL(::fstat(fd, &stats)); |
| 306 | return statToMetadata(stats); |
| 307 | } |
| 308 | |
| 309 | void sync() const { |
| 310 | #if __APPLE__ |
| 311 | // For whatever reason, fsync() on OSX only flushes kernel buffers. It does not flush hardware |
| 312 | // disk buffers. This makes it not very useful. But OSX documents fcntl F_FULLFSYNC which does |
| 313 | // the right thing. Why they don't just make fsync() do the right thing, I do not know. |
| 314 | KJ_SYSCALL(fcntl(fd, F_FULLFSYNC)); |
| 315 | #else |
| 316 | KJ_SYSCALL(fsync(fd)); |
| 317 | #endif |
| 318 | } |
| 319 | |
| 320 | void datasync() const { |
| 321 | // The presence of the _POSIX_SYNCHRONIZED_IO define is supposed to tell us that fdatasync() |
| 322 | // exists. But Apple defines this yet doesn't offer fdatasync(). Thanks, Apple. |
| 323 | #if _POSIX_SYNCHRONIZED_IO && !__APPLE__ |
| 324 | KJ_SYSCALL(fdatasync(fd)); |
| 325 | #else |
| 326 | this->sync(); |
| 327 | #endif |
| 328 | } |
| 329 | |
| 330 | // ReadableFile -------------------------------------------------------------- |
| 331 | |
| 332 | size_t read(uint64_t offset, ArrayPtr<byte> buffer) const { |
| 333 | // pread() probably never returns short reads unless it hits EOF. Unfortunately, though, per |
| 334 | // spec we are not allowed to assume this. |
| 335 | |
| 336 | size_t total = 0; |
| 337 | while (buffer.size() > 0) { |
| 338 | ssize_t n; |
| 339 | KJ_SYSCALL(n = pread(fd, buffer.begin(), buffer.size(), offset)); |
| 340 | if (n == 0) break; |
| 341 | total += n; |
| 342 | offset += n; |
| 343 | buffer = buffer.slice(n, buffer.size()); |
| 344 | } |
| 345 | return total; |
| 346 | } |
| 347 | |
| 348 | Array<const byte> mmap(uint64_t offset, uint64_t size) const { |
| 349 | auto range = getMmapRange(offset, size); |
| 350 | const void* mapping = ::mmap(NULL, range.size, PROT_READ, MAP_SHARED, fd, range.offset); |
| 351 | if (mapping == MAP_FAILED) { |
| 352 | KJ_FAIL_SYSCALL("mmap" , errno); |
| 353 | } |
| 354 | return Array<const byte>(reinterpret_cast<const byte*>(mapping) + (offset - range.offset), |
| 355 | size, mmapDisposer); |
| 356 | } |
| 357 | |
| 358 | Array<byte> mmapPrivate(uint64_t offset, uint64_t size) const { |
| 359 | auto range = getMmapRange(offset, size); |
| 360 | void* mapping = ::mmap(NULL, range.size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, range.offset); |
| 361 | if (mapping == MAP_FAILED) { |
| 362 | KJ_FAIL_SYSCALL("mmap" , errno); |
| 363 | } |
| 364 | return Array<byte>(reinterpret_cast<byte*>(mapping) + (offset - range.offset), |
| 365 | size, mmapDisposer); |
| 366 | } |
| 367 | |
| 368 | // File ---------------------------------------------------------------------- |
| 369 | |
| 370 | void write(uint64_t offset, ArrayPtr<const byte> data) const { |
| 371 | // pwrite() probably never returns short writes unless there's no space left on disk. |
| 372 | // Unfortunately, though, per spec we are not allowed to assume this. |
| 373 | |
| 374 | while (data.size() > 0) { |
| 375 | ssize_t n; |
| 376 | KJ_SYSCALL(n = pwrite(fd, data.begin(), data.size(), offset)); |
| 377 | KJ_ASSERT(n > 0, "pwrite() returned zero?" ); |
| 378 | offset += n; |
| 379 | data = data.slice(n, data.size()); |
| 380 | } |
| 381 | } |
| 382 | |
| 383 | void zero(uint64_t offset, uint64_t size) const { |
| 384 | #ifdef FALLOC_FL_PUNCH_HOLE |
| 385 | KJ_SYSCALL_HANDLE_ERRORS( |
| 386 | fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, offset, size)) { |
| 387 | case EOPNOTSUPP: |
| 388 | // fall back to below |
| 389 | break; |
| 390 | default: |
| 391 | KJ_FAIL_SYSCALL("fallocate(FALLOC_FL_PUNCH_HOLE)" , error) { return; } |
| 392 | } else { |
| 393 | return; |
| 394 | } |
| 395 | #endif |
| 396 | |
| 397 | static const byte ZEROS[4096] = { 0 }; |
| 398 | |
| 399 | #if __APPLE__ || __CYGWIN__ |
| 400 | // Mac & Cygwin doesn't have pwritev(). |
| 401 | while (size > sizeof(ZEROS)) { |
| 402 | write(offset, ZEROS); |
| 403 | size -= sizeof(ZEROS); |
| 404 | offset += sizeof(ZEROS); |
| 405 | } |
| 406 | write(offset, kj::arrayPtr(ZEROS, size)); |
| 407 | #else |
| 408 | // Use a 4k buffer of zeros amplified by iov to write zeros with as few syscalls as possible. |
| 409 | size_t count = (size + sizeof(ZEROS) - 1) / sizeof(ZEROS); |
| 410 | const size_t iovmax = miniposix::iovMax(count); |
| 411 | KJ_STACK_ARRAY(struct iovec, iov, kj::min(iovmax, count), 16, 256); |
| 412 | |
| 413 | for (auto& item: iov) { |
| 414 | item.iov_base = const_cast<byte*>(ZEROS); |
| 415 | item.iov_len = sizeof(ZEROS); |
| 416 | } |
| 417 | |
| 418 | while (size > 0) { |
| 419 | size_t iovCount; |
| 420 | if (size >= iov.size() * sizeof(ZEROS)) { |
| 421 | iovCount = iov.size(); |
| 422 | } else { |
| 423 | iovCount = size / sizeof(ZEROS); |
| 424 | size_t rem = size % sizeof(ZEROS); |
| 425 | if (rem > 0) { |
| 426 | iov[iovCount++].iov_len = rem; |
| 427 | } |
| 428 | } |
| 429 | |
| 430 | ssize_t n; |
| 431 | KJ_SYSCALL(n = pwritev(fd, iov.begin(), count, offset)); |
| 432 | KJ_ASSERT(n > 0, "pwrite() returned zero?" ); |
| 433 | |
| 434 | offset += n; |
| 435 | size -= n; |
| 436 | } |
| 437 | #endif |
| 438 | } |
| 439 | |
| 440 | void truncate(uint64_t size) const { |
| 441 | KJ_SYSCALL(ftruncate(fd, size)); |
| 442 | } |
| 443 | |
| 444 | class WritableFileMappingImpl final: public WritableFileMapping { |
| 445 | public: |
| 446 | WritableFileMappingImpl(Array<byte> bytes): bytes(kj::mv(bytes)) {} |
| 447 | |
| 448 | ArrayPtr<byte> get() const override { |
| 449 | // const_cast OK because WritableFileMapping does indeed provide a writable view despite |
| 450 | // being const itself. |
| 451 | return arrayPtr(const_cast<byte*>(bytes.begin()), bytes.size()); |
| 452 | } |
| 453 | |
| 454 | void changed(ArrayPtr<byte> slice) const override { |
| 455 | KJ_REQUIRE(slice.begin() >= bytes.begin() && slice.end() <= bytes.end(), |
| 456 | "byte range is not part of this mapping" ); |
| 457 | |
| 458 | // msync() requires page-alignment, apparently, so use getMmapRange() to accomplish that. |
| 459 | auto range = getMmapRange(reinterpret_cast<uintptr_t>(slice.begin()), slice.size()); |
| 460 | KJ_SYSCALL(msync(reinterpret_cast<void*>(range.offset), range.size, MS_ASYNC)); |
| 461 | } |
| 462 | |
| 463 | void sync(ArrayPtr<byte> slice) const override { |
| 464 | KJ_REQUIRE(slice.begin() >= bytes.begin() && slice.end() <= bytes.end(), |
| 465 | "byte range is not part of this mapping" ); |
| 466 | |
| 467 | // msync() requires page-alignment, apparently, so use getMmapRange() to accomplish that. |
| 468 | auto range = getMmapRange(reinterpret_cast<uintptr_t>(slice.begin()), slice.size()); |
| 469 | KJ_SYSCALL(msync(reinterpret_cast<void*>(range.offset), range.size, MS_SYNC)); |
| 470 | } |
| 471 | |
| 472 | private: |
| 473 | Array<byte> bytes; |
| 474 | }; |
| 475 | |
| 476 | Own<const WritableFileMapping> mmapWritable(uint64_t offset, uint64_t size) const { |
| 477 | auto range = getMmapRange(offset, size); |
| 478 | void* mapping = ::mmap(NULL, range.size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, range.offset); |
| 479 | if (mapping == MAP_FAILED) { |
| 480 | KJ_FAIL_SYSCALL("mmap" , errno); |
| 481 | } |
| 482 | auto array = Array<byte>(reinterpret_cast<byte*>(mapping) + (offset - range.offset), |
| 483 | size, mmapDisposer); |
| 484 | return heap<WritableFileMappingImpl>(kj::mv(array)); |
| 485 | } |
| 486 | |
| 487 | size_t copyChunk(uint64_t offset, int fromFd, uint64_t fromOffset, uint64_t size) const { |
| 488 | // Copies a range of bytes from `fromFd` to this file in the most efficient way possible for |
| 489 | // the OS. Only returns less than `size` if EOF. Does not account for holes. |
| 490 | |
| 491 | #if __linux__ |
| 492 | { |
| 493 | KJ_SYSCALL(lseek(fd, offset, SEEK_SET)); |
| 494 | off_t fromPos = fromOffset; |
| 495 | off_t end = fromOffset + size; |
| 496 | while (fromPos < end) { |
| 497 | ssize_t n; |
| 498 | KJ_SYSCALL_HANDLE_ERRORS(n = sendfile(fd, fromFd, &fromPos, end - fromPos)) { |
| 499 | case EINVAL: |
| 500 | case ENOSYS: |
| 501 | goto sendfileNotAvailable; |
| 502 | default: |
| 503 | KJ_FAIL_SYSCALL("sendfile" , error) { return fromPos - fromOffset; } |
| 504 | } |
| 505 | } |
| 506 | return fromPos - fromOffset; |
| 507 | } |
| 508 | |
| 509 | sendfileNotAvailable: |
| 510 | #endif |
| 511 | uint64_t total = 0; |
| 512 | while (size > 0) { |
| 513 | byte buffer[4096]; |
| 514 | ssize_t n; |
| 515 | KJ_SYSCALL(n = pread(fromFd, buffer, kj::min(sizeof(buffer), size), fromOffset)); |
| 516 | if (n == 0) break; |
| 517 | write(offset, arrayPtr(buffer, n)); |
| 518 | fromOffset += n; |
| 519 | offset += n; |
| 520 | total += n; |
| 521 | size -= n; |
| 522 | } |
| 523 | return total; |
| 524 | } |
| 525 | |
| 526 | kj::Maybe<size_t> copy(uint64_t offset, const ReadableFile& from, |
| 527 | uint64_t fromOffset, uint64_t size) const { |
| 528 | KJ_IF_MAYBE(otherFd, from.getFd()) { |
| 529 | #ifdef FICLONE |
| 530 | if (offset == 0 && fromOffset == 0 && size == kj::maxValue && stat().size == 0) { |
| 531 | if (ioctl(fd, FICLONE, *otherFd) >= 0) { |
| 532 | return stat().size; |
| 533 | } |
| 534 | } else if (size > 0) { // src_length = 0 has special meaning for the syscall, so avoid. |
| 535 | struct file_clone_range range; |
| 536 | memset(&range, 0, sizeof(range)); |
| 537 | range.src_fd = *otherFd; |
| 538 | range.dest_offset = offset; |
| 539 | range.src_offset = fromOffset; |
| 540 | range.src_length = size == kj::maxValue ? 0 : size; |
| 541 | if (ioctl(fd, FICLONERANGE, &range) >= 0) { |
| 542 | // TODO(someday): What does FICLONERANGE actually do if the range goes past EOF? The docs |
| 543 | // don't say. Maybe it only copies the parts that exist. Maybe it punches holes for the |
| 544 | // rest. Where does the destination file's EOF marker end up? Who knows? |
| 545 | return kj::min(from.stat().size - fromOffset, size); |
| 546 | } |
| 547 | } else { |
| 548 | // size == 0 |
| 549 | return size_t(0); |
| 550 | } |
| 551 | |
| 552 | // ioctl failed. Almost all failures documented for these are of the form "the operation is |
| 553 | // not supported for the filesystem(s) specified", so fall back to other approaches. |
| 554 | #endif |
| 555 | |
| 556 | off_t toPos = offset; |
| 557 | off_t fromPos = fromOffset; |
| 558 | off_t end = size == kj::maxValue ? off_t(kj::maxValue) : off_t(fromOffset + size); |
| 559 | |
| 560 | for (;;) { |
| 561 | // Handle data. |
| 562 | { |
| 563 | // Find out how much data there is before the next hole. |
| 564 | off_t nextHole; |
| 565 | #ifdef SEEK_HOLE |
| 566 | KJ_SYSCALL_HANDLE_ERRORS(nextHole = lseek(*otherFd, fromPos, SEEK_HOLE)) { |
| 567 | case EINVAL: |
| 568 | // SEEK_HOLE probably not supported. Assume no holes. |
| 569 | nextHole = end; |
| 570 | break; |
| 571 | case ENXIO: |
| 572 | // Past EOF. Stop here. |
| 573 | return fromPos - fromOffset; |
| 574 | default: |
| 575 | KJ_FAIL_SYSCALL("lseek(fd, pos, SEEK_HOLE)" , error) { return fromPos - fromOffset; } |
| 576 | } |
| 577 | #else |
| 578 | // SEEK_HOLE not supported. Assume no holes. |
| 579 | nextHole = end; |
| 580 | #endif |
| 581 | |
| 582 | // Copy the next chunk of data. |
| 583 | off_t copyTo = kj::min(end, nextHole); |
| 584 | size_t amount = copyTo - fromPos; |
| 585 | if (amount > 0) { |
| 586 | size_t n = copyChunk(toPos, *otherFd, fromPos, amount); |
| 587 | fromPos += n; |
| 588 | toPos += n; |
| 589 | |
| 590 | if (n < amount) { |
| 591 | return fromPos - fromOffset; |
| 592 | } |
| 593 | } |
| 594 | |
| 595 | if (fromPos == end) { |
| 596 | return fromPos - fromOffset; |
| 597 | } |
| 598 | } |
| 599 | |
| 600 | #ifdef SEEK_HOLE |
| 601 | // Handle hole. |
| 602 | { |
| 603 | // Find out how much hole there is before the next data. |
| 604 | off_t nextData; |
| 605 | KJ_SYSCALL_HANDLE_ERRORS(nextData = lseek(*otherFd, fromPos, SEEK_DATA)) { |
| 606 | case EINVAL: |
| 607 | // SEEK_DATA probably not supported. But we should only have gotten here if we |
| 608 | // were expecting a hole. |
| 609 | KJ_FAIL_ASSERT("can't determine hole size; SEEK_DATA not supported" ); |
| 610 | break; |
| 611 | case ENXIO: |
| 612 | // No more data. Set to EOF. |
| 613 | KJ_SYSCALL(nextData = lseek(*otherFd, 0, SEEK_END)); |
| 614 | if (nextData > end) { |
| 615 | end = nextData; |
| 616 | } |
| 617 | break; |
| 618 | default: |
| 619 | KJ_FAIL_SYSCALL("lseek(fd, pos, SEEK_HOLE)" , error) { return fromPos - fromOffset; } |
| 620 | } |
| 621 | |
| 622 | // Write zeros. |
| 623 | off_t zeroTo = kj::min(end, nextData); |
| 624 | off_t amount = zeroTo - fromPos; |
| 625 | if (amount > 0) { |
| 626 | zero(toPos, amount); |
| 627 | toPos += amount; |
| 628 | fromPos = zeroTo; |
| 629 | } |
| 630 | |
| 631 | if (fromPos == end) { |
| 632 | return fromPos - fromOffset; |
| 633 | } |
| 634 | } |
| 635 | #endif |
| 636 | } |
| 637 | } |
| 638 | |
| 639 | // Indicates caller should call File::copy() default implementation. |
| 640 | return nullptr; |
| 641 | } |
| 642 | |
| 643 | // ReadableDirectory --------------------------------------------------------- |
| 644 | |
| 645 | template <typename Func> |
| 646 | auto list(bool needTypes, Func&& func) const |
| 647 | -> Array<Decay<decltype(func(instance<StringPtr>(), instance<FsNode::Type>()))>> { |
| 648 | // Seek to start of directory. |
| 649 | KJ_SYSCALL(lseek(fd, 0, SEEK_SET)); |
| 650 | |
| 651 | // Unfortunately, fdopendir() takes ownership of the file descriptor. Therefore we need to |
| 652 | // make a duplicate. |
| 653 | int duped; |
| 654 | KJ_SYSCALL(duped = dup(fd)); |
| 655 | DIR* dir = fdopendir(duped); |
| 656 | if (dir == nullptr) { |
| 657 | close(duped); |
| 658 | KJ_FAIL_SYSCALL("fdopendir" , errno); |
| 659 | } |
| 660 | |
| 661 | KJ_DEFER(closedir(dir)); |
| 662 | typedef Decay<decltype(func(instance<StringPtr>(), instance<FsNode::Type>()))> Entry; |
| 663 | kj::Vector<Entry> entries; |
| 664 | |
| 665 | for (;;) { |
| 666 | errno = 0; |
| 667 | struct dirent* entry = readdir(dir); |
| 668 | if (entry == nullptr) { |
| 669 | int error = errno; |
| 670 | if (error == 0) { |
| 671 | break; |
| 672 | } else { |
| 673 | KJ_FAIL_SYSCALL("readdir" , error); |
| 674 | } |
| 675 | } |
| 676 | |
| 677 | kj::StringPtr name = entry->d_name; |
| 678 | if (name != "." && name != ".." && !name.startsWith(HIDDEN_PREFIX)) { |
| 679 | #ifdef DT_UNKNOWN // d_type is not available on all platforms. |
| 680 | if (entry->d_type != DT_UNKNOWN) { |
| 681 | entries.add(func(name, modeToType(DTTOIF(entry->d_type)))); |
| 682 | } else { |
| 683 | #endif |
| 684 | if (needTypes) { |
| 685 | // Unknown type. Fall back to stat. |
| 686 | struct stat stats; |
| 687 | KJ_SYSCALL(fstatat(fd, name.cStr(), &stats, AT_SYMLINK_NOFOLLOW)); |
| 688 | entries.add(func(name, modeToType(stats.st_mode))); |
| 689 | } else { |
| 690 | entries.add(func(name, FsNode::Type::OTHER)); |
| 691 | } |
| 692 | #ifdef DT_UNKNOWN |
| 693 | } |
| 694 | #endif |
| 695 | } |
| 696 | } |
| 697 | |
| 698 | auto result = entries.releaseAsArray(); |
| 699 | std::sort(result.begin(), result.end()); |
| 700 | return result; |
| 701 | } |
| 702 | |
| 703 | Array<String> listNames() const { |
| 704 | return list(false, [](StringPtr name, FsNode::Type type) { return heapString(name); }); |
| 705 | } |
| 706 | |
| 707 | Array<ReadableDirectory::Entry> listEntries() const { |
| 708 | return list(true, [](StringPtr name, FsNode::Type type) { |
| 709 | return ReadableDirectory::Entry { type, heapString(name), }; |
| 710 | }); |
| 711 | } |
| 712 | |
| 713 | bool exists(PathPtr path) const { |
| 714 | KJ_SYSCALL_HANDLE_ERRORS(faccessat(fd, path.toString().cStr(), F_OK, 0)) { |
| 715 | case ENOENT: |
| 716 | case ENOTDIR: |
| 717 | return false; |
| 718 | default: |
| 719 | KJ_FAIL_SYSCALL("faccessat(fd, path)" , error, path) { return false; } |
| 720 | } |
| 721 | return true; |
| 722 | } |
| 723 | |
| 724 | Maybe<FsNode::Metadata> tryLstat(PathPtr path) const { |
| 725 | struct stat stats; |
| 726 | KJ_SYSCALL_HANDLE_ERRORS(fstatat(fd, path.toString().cStr(), &stats, AT_SYMLINK_NOFOLLOW)) { |
| 727 | case ENOENT: |
| 728 | case ENOTDIR: |
| 729 | return nullptr; |
| 730 | default: |
| 731 | KJ_FAIL_SYSCALL("faccessat(fd, path)" , error, path) { return nullptr; } |
| 732 | } |
| 733 | return statToMetadata(stats); |
| 734 | } |
| 735 | |
| 736 | Maybe<Own<const ReadableFile>> tryOpenFile(PathPtr path) const { |
| 737 | int newFd; |
| 738 | KJ_SYSCALL_HANDLE_ERRORS(newFd = openat( |
| 739 | fd, path.toString().cStr(), O_RDONLY | MAYBE_O_CLOEXEC)) { |
| 740 | case ENOENT: |
| 741 | case ENOTDIR: |
| 742 | return nullptr; |
| 743 | default: |
| 744 | KJ_FAIL_SYSCALL("openat(fd, path, O_RDONLY)" , error, path) { return nullptr; } |
| 745 | } |
| 746 | |
| 747 | kj::AutoCloseFd result(newFd); |
| 748 | #ifndef O_CLOEXEC |
| 749 | setCloexec(result); |
| 750 | #endif |
| 751 | |
| 752 | return newDiskReadableFile(kj::mv(result)); |
| 753 | } |
| 754 | |
| 755 | Maybe<AutoCloseFd> tryOpenSubdirInternal(PathPtr path) const { |
| 756 | int newFd; |
| 757 | KJ_SYSCALL_HANDLE_ERRORS(newFd = openat( |
| 758 | fd, path.toString().cStr(), O_RDONLY | MAYBE_O_CLOEXEC | MAYBE_O_DIRECTORY)) { |
| 759 | case ENOENT: |
| 760 | return nullptr; |
| 761 | case ENOTDIR: |
| 762 | // Could mean that a parent is not a directory, which we treat as "doesn't exist". |
| 763 | // Could also mean that the specified file is not a directory, which should throw. |
| 764 | // Check using exists(). |
| 765 | if (!exists(path)) { |
| 766 | return nullptr; |
| 767 | } |
| 768 | // fallthrough |
| 769 | default: |
| 770 | KJ_FAIL_SYSCALL("openat(fd, path, O_DIRECTORY)" , error, path) { return nullptr; } |
| 771 | } |
| 772 | |
| 773 | kj::AutoCloseFd result(newFd); |
| 774 | #ifndef O_CLOEXEC |
| 775 | setCloexec(result); |
| 776 | #endif |
| 777 | |
| 778 | return kj::mv(result); |
| 779 | } |
| 780 | |
| 781 | Maybe<Own<const ReadableDirectory>> tryOpenSubdir(PathPtr path) const { |
| 782 | return tryOpenSubdirInternal(path).map(newDiskReadableDirectory); |
| 783 | } |
| 784 | |
| 785 | Maybe<String> tryReadlink(PathPtr path) const { |
| 786 | size_t trySize = 256; |
| 787 | for (;;) { |
| 788 | KJ_STACK_ARRAY(char, buf, trySize, 256, 4096); |
| 789 | ssize_t n = readlinkat(fd, path.toString().cStr(), buf.begin(), buf.size()); |
| 790 | if (n < 0) { |
| 791 | int error = errno; |
| 792 | switch (error) { |
| 793 | case EINTR: |
| 794 | continue; |
| 795 | case ENOENT: |
| 796 | case ENOTDIR: |
| 797 | case EINVAL: // not a link |
| 798 | return nullptr; |
| 799 | default: |
| 800 | KJ_FAIL_SYSCALL("readlinkat(fd, path)" , error, path) { return nullptr; } |
| 801 | } |
| 802 | } |
| 803 | |
| 804 | if (n >= buf.size()) { |
| 805 | // Didn't give it enough space. Better retry with a bigger buffer. |
| 806 | trySize *= 2; |
| 807 | continue; |
| 808 | } |
| 809 | |
| 810 | return heapString(buf.begin(), n); |
| 811 | } |
| 812 | } |
| 813 | |
| 814 | // Directory ----------------------------------------------------------------- |
| 815 | |
| 816 | bool tryMkdir(PathPtr path, WriteMode mode, bool noThrow) const { |
| 817 | // Internal function to make a directory. |
| 818 | |
| 819 | auto filename = path.toString(); |
| 820 | mode_t acl = has(mode, WriteMode::PRIVATE) ? 0700 : 0777; |
| 821 | |
| 822 | KJ_SYSCALL_HANDLE_ERRORS(mkdirat(fd, filename.cStr(), acl)) { |
| 823 | case EEXIST: { |
| 824 | // Apparently this path exists. |
| 825 | if (!has(mode, WriteMode::MODIFY)) { |
| 826 | // Require exclusive create. |
| 827 | return false; |
| 828 | } |
| 829 | |
| 830 | // MODIFY is allowed, so we just need to check whether the existing entry is a directory. |
| 831 | struct stat stats; |
| 832 | KJ_SYSCALL_HANDLE_ERRORS(fstatat(fd, filename.cStr(), &stats, 0)) { |
| 833 | default: |
| 834 | // mkdir() says EEXIST but we can't stat it. Maybe it's a dangling link, or maybe |
| 835 | // we can't access it for some reason. Assume failure. |
| 836 | // |
| 837 | // TODO(someday): Maybe we should be creating the directory at the target of the |
| 838 | // link? |
| 839 | goto failed; |
| 840 | } |
| 841 | return (stats.st_mode & S_IFMT) == S_IFDIR; |
| 842 | } |
| 843 | case ENOENT: |
| 844 | if (has(mode, WriteMode::CREATE_PARENT) && path.size() > 0 && |
| 845 | tryMkdir(path.parent(), WriteMode::CREATE | WriteMode::MODIFY | |
| 846 | WriteMode::CREATE_PARENT, true)) { |
| 847 | // Retry, but make sure we don't try to create the parent again. |
| 848 | return tryMkdir(path, mode - WriteMode::CREATE_PARENT, noThrow); |
| 849 | } else { |
| 850 | goto failed; |
| 851 | } |
| 852 | default: |
| 853 | failed: |
| 854 | if (noThrow) { |
| 855 | // Caller requested no throwing. |
| 856 | return false; |
| 857 | } else { |
| 858 | KJ_FAIL_SYSCALL("mkdirat(fd, path)" , error, path); |
| 859 | } |
| 860 | } |
| 861 | |
| 862 | return true; |
| 863 | } |
| 864 | |
| 865 | kj::Maybe<String> createNamedTemporary( |
| 866 | PathPtr finalName, WriteMode mode, Function<int(StringPtr)> tryCreate) const { |
| 867 | // Create a temporary file which will eventually replace `finalName`. |
| 868 | // |
| 869 | // Calls `tryCreate` to actually create the temporary, passing in the desired path. tryCreate() |
| 870 | // is expected to behave like a syscall, returning a negative value and setting `errno` on |
| 871 | // error. tryCreate() MUST fail with EEXIST if the path exists -- this is not checked in |
| 872 | // advance, since it needs to be checked atomically. In the case of EEXIST, tryCreate() will |
| 873 | // be called again with a new path. |
| 874 | // |
| 875 | // Returns the temporary path that succeeded. Only returns nullptr if there was an exception |
| 876 | // but we're compiled with -fno-exceptions. |
| 877 | |
| 878 | if (finalName.size() == 0) { |
| 879 | KJ_FAIL_REQUIRE("can't replace self" ) { break; } |
| 880 | return nullptr; |
| 881 | } |
| 882 | |
| 883 | static uint counter = 0; |
| 884 | static const pid_t pid = getpid(); |
| 885 | String pathPrefix; |
| 886 | if (finalName.size() > 1) { |
| 887 | pathPrefix = kj::str(finalName.parent(), '/'); |
| 888 | } |
| 889 | auto path = kj::str(pathPrefix, HIDDEN_PREFIX, pid, '.', counter++, '.', |
| 890 | finalName.basename()[0], ".partial" ); |
| 891 | |
| 892 | KJ_SYSCALL_HANDLE_ERRORS(tryCreate(path)) { |
| 893 | case EEXIST: |
| 894 | return createNamedTemporary(finalName, mode, kj::mv(tryCreate)); |
| 895 | case ENOENT: |
| 896 | if (has(mode, WriteMode::CREATE_PARENT) && finalName.size() > 1 && |
| 897 | tryMkdir(finalName.parent(), WriteMode::CREATE | WriteMode::MODIFY | |
| 898 | WriteMode::CREATE_PARENT, true)) { |
| 899 | // Retry, but make sure we don't try to create the parent again. |
| 900 | mode = mode - WriteMode::CREATE_PARENT; |
| 901 | return createNamedTemporary(finalName, mode, kj::mv(tryCreate)); |
| 902 | } |
| 903 | // fallthrough |
| 904 | default: |
| 905 | KJ_FAIL_SYSCALL("create(path)" , error, path) { break; } |
| 906 | return nullptr; |
| 907 | } |
| 908 | |
| 909 | return kj::mv(path); |
| 910 | } |
| 911 | |
| 912 | bool tryReplaceNode(PathPtr path, WriteMode mode, Function<int(StringPtr)> tryCreate) const { |
| 913 | // Replaces the given path with an object created by calling tryCreate(). |
| 914 | // |
| 915 | // tryCreate() must behave like a syscall which creates the node at the path passed to it, |
| 916 | // returning a negative value on error. If the path passed to tryCreate already exists, it |
| 917 | // MUST fail with EEXIST. |
| 918 | // |
| 919 | // When `mode` includes MODIFY, replaceNode() reacts to EEXIST by creating the node in a |
| 920 | // temporary location and then rename()ing it into place. |
| 921 | |
| 922 | if (path.size() == 0) { |
| 923 | KJ_FAIL_REQUIRE("can't replace self" ) { return false; } |
| 924 | } |
| 925 | |
| 926 | auto filename = path.toString(); |
| 927 | |
| 928 | if (has(mode, WriteMode::CREATE)) { |
| 929 | // First try just cerating the node in-place. |
| 930 | KJ_SYSCALL_HANDLE_ERRORS(tryCreate(filename)) { |
| 931 | case EEXIST: |
| 932 | // Target exists. |
| 933 | if (has(mode, WriteMode::MODIFY)) { |
| 934 | // Fall back to MODIFY path, below. |
| 935 | break; |
| 936 | } else { |
| 937 | return false; |
| 938 | } |
| 939 | case ENOENT: |
| 940 | if (has(mode, WriteMode::CREATE_PARENT) && path.size() > 0 && |
| 941 | tryMkdir(path.parent(), WriteMode::CREATE | WriteMode::MODIFY | |
| 942 | WriteMode::CREATE_PARENT, true)) { |
| 943 | // Retry, but make sure we don't try to create the parent again. |
| 944 | return tryReplaceNode(path, mode - WriteMode::CREATE_PARENT, kj::mv(tryCreate)); |
| 945 | } |
| 946 | // fallthrough |
| 947 | default: |
| 948 | KJ_FAIL_SYSCALL("create(path)" , error, path) { return false; } |
| 949 | } else { |
| 950 | // Success. |
| 951 | return true; |
| 952 | } |
| 953 | } |
| 954 | |
| 955 | // Either we don't have CREATE mode or the target already exists. We need to perform a |
| 956 | // replacement instead. |
| 957 | |
| 958 | KJ_IF_MAYBE(tempPath, createNamedTemporary(path, mode, kj::mv(tryCreate))) { |
| 959 | if (tryCommitReplacement(filename, fd, *tempPath, mode)) { |
| 960 | return true; |
| 961 | } else { |
| 962 | KJ_SYSCALL_HANDLE_ERRORS(unlinkat(fd, tempPath->cStr(), 0)) { |
| 963 | case ENOENT: |
| 964 | // meh |
| 965 | break; |
| 966 | default: |
| 967 | KJ_FAIL_SYSCALL("unlinkat(fd, tempPath, 0)" , error, *tempPath); |
| 968 | } |
| 969 | return false; |
| 970 | } |
| 971 | } else { |
| 972 | // threw, but exceptions are disabled |
| 973 | return false; |
| 974 | } |
| 975 | } |
| 976 | |
| 977 | Maybe<AutoCloseFd> tryOpenFileInternal(PathPtr path, WriteMode mode, bool append) const { |
| 978 | uint flags = O_RDWR | MAYBE_O_CLOEXEC; |
| 979 | mode_t acl = 0666; |
| 980 | if (has(mode, WriteMode::CREATE)) { |
| 981 | flags |= O_CREAT; |
| 982 | } |
| 983 | if (!has(mode, WriteMode::MODIFY)) { |
| 984 | if (!has(mode, WriteMode::CREATE)) { |
| 985 | // Neither CREATE nor MODIFY -- impossible to satisfy preconditions. |
| 986 | return nullptr; |
| 987 | } |
| 988 | flags |= O_EXCL; |
| 989 | } |
| 990 | if (append) { |
| 991 | flags |= O_APPEND; |
| 992 | } |
| 993 | if (has(mode, WriteMode::EXECUTABLE)) { |
| 994 | acl = 0777; |
| 995 | } |
| 996 | if (has(mode, WriteMode::PRIVATE)) { |
| 997 | acl &= 0700; |
| 998 | } |
| 999 | |
| 1000 | auto filename = path.toString(); |
| 1001 | |
| 1002 | int newFd; |
| 1003 | KJ_SYSCALL_HANDLE_ERRORS(newFd = openat(fd, filename.cStr(), flags, acl)) { |
| 1004 | case ENOENT: |
| 1005 | if (has(mode, WriteMode::CREATE)) { |
| 1006 | // Either: |
| 1007 | // - The file is a broken symlink. |
| 1008 | // - A parent directory didn't exist. |
| 1009 | if (has(mode, WriteMode::CREATE_PARENT) && path.size() > 0 && |
| 1010 | tryMkdir(path.parent(), WriteMode::CREATE | WriteMode::MODIFY | |
| 1011 | WriteMode::CREATE_PARENT, true)) { |
| 1012 | // Retry, but make sure we don't try to create the parent again. |
| 1013 | return tryOpenFileInternal(path, mode - WriteMode::CREATE_PARENT, append); |
| 1014 | } |
| 1015 | |
| 1016 | // Check for broken link. |
| 1017 | if (!has(mode, WriteMode::MODIFY) && |
| 1018 | faccessat(fd, filename.cStr(), F_OK, AT_SYMLINK_NOFOLLOW) >= 0) { |
| 1019 | // Yep. We treat this as already-exists, which means in CREATE-only mode this is a |
| 1020 | // simple failure. |
| 1021 | return nullptr; |
| 1022 | } |
| 1023 | |
| 1024 | KJ_FAIL_REQUIRE("parent is not a directory" , path) { return nullptr; } |
| 1025 | } else { |
| 1026 | // MODIFY-only mode. ENOENT = doesn't exist = return null. |
| 1027 | return nullptr; |
| 1028 | } |
| 1029 | case ENOTDIR: |
| 1030 | if (!has(mode, WriteMode::CREATE)) { |
| 1031 | // MODIFY-only mode. ENOTDIR = parent not a directory = doesn't exist = return null. |
| 1032 | return nullptr; |
| 1033 | } |
| 1034 | goto failed; |
| 1035 | case EEXIST: |
| 1036 | if (!has(mode, WriteMode::MODIFY)) { |
| 1037 | // CREATE-only mode. EEXIST = already exists = return null. |
| 1038 | return nullptr; |
| 1039 | } |
| 1040 | goto failed; |
| 1041 | default: |
| 1042 | failed: |
| 1043 | KJ_FAIL_SYSCALL("openat(fd, path, O_RDWR | ...)" , error, path) { return nullptr; } |
| 1044 | } |
| 1045 | |
| 1046 | kj::AutoCloseFd result(newFd); |
| 1047 | #ifndef O_CLOEXEC |
| 1048 | setCloexec(result); |
| 1049 | #endif |
| 1050 | |
| 1051 | return kj::mv(result); |
| 1052 | } |
| 1053 | |
| 1054 | bool tryCommitReplacement(StringPtr toPath, int fromDirFd, StringPtr fromPath, WriteMode mode, |
| 1055 | int* errorReason = nullptr) const { |
| 1056 | if (has(mode, WriteMode::CREATE) && has(mode, WriteMode::MODIFY)) { |
| 1057 | // Always clobber. Try it. |
| 1058 | KJ_SYSCALL_HANDLE_ERRORS(renameat(fromDirFd, fromPath.cStr(), fd.get(), toPath.cStr())) { |
| 1059 | case EISDIR: |
| 1060 | case ENOTDIR: |
| 1061 | case ENOTEMPTY: |
| 1062 | case EEXIST: |
| 1063 | // Failed because target exists and due to the various weird quirks of rename(), it |
| 1064 | // can't remove it for us. On Linux we can try an exchange instead. On others we have |
| 1065 | // to move the target out of the way. |
| 1066 | break; |
| 1067 | default: |
| 1068 | if (errorReason == nullptr) { |
| 1069 | KJ_FAIL_SYSCALL("rename(fromPath, toPath)" , error, fromPath, toPath) { return false; } |
| 1070 | } else { |
| 1071 | *errorReason = error; |
| 1072 | return false; |
| 1073 | } |
| 1074 | } else { |
| 1075 | return true; |
| 1076 | } |
| 1077 | } |
| 1078 | |
| 1079 | #if __linux__ && defined(RENAME_EXCHANGE) |
| 1080 | // Try to use Linux's renameat2() to atomically check preconditions and apply. |
| 1081 | |
| 1082 | if (has(mode, WriteMode::MODIFY)) { |
| 1083 | // Use an exchange to implement modification. |
| 1084 | // |
| 1085 | // We reach this branch when performing a MODIFY-only, or when performing a CREATE | MODIFY |
| 1086 | // in which we determined above that there's a node of a different type blocking the |
| 1087 | // exchange. |
| 1088 | |
| 1089 | KJ_SYSCALL_HANDLE_ERRORS(syscall(SYS_renameat2, |
| 1090 | fromDirFd, fromPath.cStr(), fd.get(), toPath.cStr(), RENAME_EXCHANGE)) { |
| 1091 | case ENOSYS: |
| 1092 | break; // fall back to traditional means |
| 1093 | case ENOENT: |
| 1094 | // Presumably because the target path doesn't exist. |
| 1095 | if (has(mode, WriteMode::CREATE)) { |
| 1096 | KJ_FAIL_ASSERT("rename(tmp, path) claimed path exists but " |
| 1097 | "renameat2(fromPath, toPath, EXCAHNGE) said it doest; concurrent modification?" , |
| 1098 | fromPath, toPath) { return false; } |
| 1099 | } else { |
| 1100 | // Assume target doesn't exist. |
| 1101 | return false; |
| 1102 | } |
| 1103 | default: |
| 1104 | if (errorReason == nullptr) { |
| 1105 | KJ_FAIL_SYSCALL("renameat2(fromPath, toPath, EXCHANGE)" , error, fromPath, toPath) { |
| 1106 | return false; |
| 1107 | } |
| 1108 | } else { |
| 1109 | *errorReason = error; |
| 1110 | return false; |
| 1111 | } |
| 1112 | } else { |
| 1113 | // Successful swap! Delete swapped-out content. |
| 1114 | rmrf(fromDirFd, fromPath); |
| 1115 | return true; |
| 1116 | } |
| 1117 | } else if (has(mode, WriteMode::CREATE)) { |
| 1118 | KJ_SYSCALL_HANDLE_ERRORS(syscall(SYS_renameat2, |
| 1119 | fromDirFd, fromPath.cStr(), fd.get(), toPath.cStr(), RENAME_NOREPLACE)) { |
| 1120 | case ENOSYS: |
| 1121 | break; // fall back to traditional means |
| 1122 | case EEXIST: |
| 1123 | return false; |
| 1124 | default: |
| 1125 | if (errorReason == nullptr) { |
| 1126 | KJ_FAIL_SYSCALL("renameat2(fromPath, toPath, NOREPLACE)" , error, fromPath, toPath) { |
| 1127 | return false; |
| 1128 | } |
| 1129 | } else { |
| 1130 | *errorReason = error; |
| 1131 | return false; |
| 1132 | } |
| 1133 | } else { |
| 1134 | return true; |
| 1135 | } |
| 1136 | } |
| 1137 | #endif |
| 1138 | |
| 1139 | // We're unable to do what we wanted atomically. :( |
| 1140 | |
| 1141 | if (has(mode, WriteMode::CREATE) && has(mode, WriteMode::MODIFY)) { |
| 1142 | // We failed to atomically delete the target previously. So now we need to do two calls in |
| 1143 | // rapid succession to move the old file away then move the new one into place. |
| 1144 | |
| 1145 | // Find out what kind of file exists at the target path. |
| 1146 | struct stat stats; |
| 1147 | KJ_SYSCALL(fstatat(fd, toPath.cStr(), &stats, AT_SYMLINK_NOFOLLOW)) { return false; } |
| 1148 | |
| 1149 | // Create a temporary location to move the existing object to. Note that rename() allows a |
| 1150 | // non-directory to replace a non-directory, and allows a directory to replace an empty |
| 1151 | // directory. So we have to create the right type. |
| 1152 | Path toPathParsed = Path::parse(toPath); |
| 1153 | String away; |
| 1154 | KJ_IF_MAYBE(awayPath, createNamedTemporary(toPathParsed, WriteMode::CREATE, |
| 1155 | [&](StringPtr candidatePath) { |
| 1156 | if (S_ISDIR(stats.st_mode)) { |
| 1157 | return mkdirat(fd, candidatePath.cStr(), 0700); |
| 1158 | } else { |
| 1159 | #if __APPLE__ |
| 1160 | // No mknodat() on OSX, gotta open() a file, ugh. |
| 1161 | int newFd = openat(fd, candidatePath.cStr(), |
| 1162 | O_RDWR | O_CREAT | O_EXCL | MAYBE_O_CLOEXEC, 0700); |
| 1163 | if (newFd >= 0) close(newFd); |
| 1164 | return newFd; |
| 1165 | #else |
| 1166 | return mknodat(fd, candidatePath.cStr(), S_IFREG | 0600, dev_t()); |
| 1167 | #endif |
| 1168 | } |
| 1169 | })) { |
| 1170 | away = kj::mv(*awayPath); |
| 1171 | } else { |
| 1172 | // Already threw. |
| 1173 | return false; |
| 1174 | } |
| 1175 | |
| 1176 | // OK, now move the target object to replace the thing we just created. |
| 1177 | KJ_SYSCALL(renameat(fd, toPath.cStr(), fd, away.cStr())) { |
| 1178 | // Something went wrong. Remove the thing we just created. |
| 1179 | unlinkat(fd, away.cStr(), S_ISDIR(stats.st_mode) ? AT_REMOVEDIR : 0); |
| 1180 | return false; |
| 1181 | } |
| 1182 | |
| 1183 | // Now move the source object to the target location. |
| 1184 | KJ_SYSCALL_HANDLE_ERRORS(renameat(fromDirFd, fromPath.cStr(), fd, toPath.cStr())) { |
| 1185 | default: |
| 1186 | // Try to put things back where they were. If this fails, though, then we have little |
| 1187 | // choice but to leave things broken. |
| 1188 | KJ_SYSCALL_HANDLE_ERRORS(renameat(fd, away.cStr(), fd, toPath.cStr())) { |
| 1189 | default: break; |
| 1190 | } |
| 1191 | |
| 1192 | if (errorReason == nullptr) { |
| 1193 | KJ_FAIL_SYSCALL("rename(fromPath, toPath)" , error, fromPath, toPath) { |
| 1194 | return false; |
| 1195 | } |
| 1196 | } else { |
| 1197 | *errorReason = error; |
| 1198 | return false; |
| 1199 | } |
| 1200 | } |
| 1201 | |
| 1202 | // OK, success. Delete the old content. |
| 1203 | rmrf(fd, away); |
| 1204 | return true; |
| 1205 | } else { |
| 1206 | // Only one of CREATE or MODIFY is specified, so we need to verify non-atomically that the |
| 1207 | // corresponding precondition (must-not-exist or must-exist, respectively) is held. |
| 1208 | if (has(mode, WriteMode::CREATE)) { |
| 1209 | struct stat stats; |
| 1210 | KJ_SYSCALL_HANDLE_ERRORS(fstatat(fd.get(), toPath.cStr(), &stats, AT_SYMLINK_NOFOLLOW)) { |
| 1211 | case ENOENT: |
| 1212 | case ENOTDIR: |
| 1213 | break; // doesn't exist; continue |
| 1214 | default: |
| 1215 | KJ_FAIL_SYSCALL("fstatat(fd, toPath)" , error, toPath) { return false; } |
| 1216 | } else { |
| 1217 | return false; // already exists; fail |
| 1218 | } |
| 1219 | } else if (has(mode, WriteMode::MODIFY)) { |
| 1220 | struct stat stats; |
| 1221 | KJ_SYSCALL_HANDLE_ERRORS(fstatat(fd.get(), toPath.cStr(), &stats, AT_SYMLINK_NOFOLLOW)) { |
| 1222 | case ENOENT: |
| 1223 | case ENOTDIR: |
| 1224 | return false; // doesn't exist; fail |
| 1225 | default: |
| 1226 | KJ_FAIL_SYSCALL("fstatat(fd, toPath)" , error, toPath) { return false; } |
| 1227 | } else { |
| 1228 | // already exists; continue |
| 1229 | } |
| 1230 | } else { |
| 1231 | // Neither CREATE nor MODIFY. |
| 1232 | return false; |
| 1233 | } |
| 1234 | |
| 1235 | // Start over in create-and-modify mode. |
| 1236 | return tryCommitReplacement(toPath, fromDirFd, fromPath, |
| 1237 | WriteMode::CREATE | WriteMode::MODIFY, |
| 1238 | errorReason); |
| 1239 | } |
| 1240 | } |
| 1241 | |
| 1242 | template <typename T> |
| 1243 | class ReplacerImpl final: public Directory::Replacer<T> { |
| 1244 | public: |
| 1245 | ReplacerImpl(Own<const T>&& object, const DiskHandle& handle, |
| 1246 | String&& tempPath, String&& path, WriteMode mode) |
| 1247 | : Directory::Replacer<T>(mode), |
| 1248 | object(kj::mv(object)), handle(handle), |
| 1249 | tempPath(kj::mv(tempPath)), path(kj::mv(path)) {} |
| 1250 | |
| 1251 | ~ReplacerImpl() noexcept(false) { |
| 1252 | if (!committed) { |
| 1253 | rmrf(handle.fd, tempPath); |
| 1254 | } |
| 1255 | } |
| 1256 | |
| 1257 | const T& get() override { |
| 1258 | return *object; |
| 1259 | } |
| 1260 | |
| 1261 | bool tryCommit() override { |
| 1262 | KJ_ASSERT(!committed, "already committed" ) { return false; } |
| 1263 | return committed = handle.tryCommitReplacement(path, handle.fd, tempPath, |
| 1264 | Directory::Replacer<T>::mode); |
| 1265 | } |
| 1266 | |
| 1267 | private: |
| 1268 | Own<const T> object; |
| 1269 | const DiskHandle& handle; |
| 1270 | String tempPath; |
| 1271 | String path; |
| 1272 | bool committed = false; // true if *successfully* committed (in which case tempPath is gone) |
| 1273 | }; |
| 1274 | |
| 1275 | template <typename T> |
| 1276 | class BrokenReplacer final: public Directory::Replacer<T> { |
| 1277 | // For recovery path when exceptions are disabled. |
| 1278 | |
| 1279 | public: |
| 1280 | BrokenReplacer(Own<const T> inner) |
| 1281 | : Directory::Replacer<T>(WriteMode::CREATE | WriteMode::MODIFY), |
| 1282 | inner(kj::mv(inner)) {} |
| 1283 | |
| 1284 | const T& get() override { return *inner; } |
| 1285 | bool tryCommit() override { return false; } |
| 1286 | |
| 1287 | private: |
| 1288 | Own<const T> inner; |
| 1289 | }; |
| 1290 | |
| 1291 | Maybe<Own<const File>> tryOpenFile(PathPtr path, WriteMode mode) const { |
| 1292 | return tryOpenFileInternal(path, mode, false).map(newDiskFile); |
| 1293 | } |
| 1294 | |
| 1295 | Own<Directory::Replacer<File>> replaceFile(PathPtr path, WriteMode mode) const { |
| 1296 | mode_t acl = 0666; |
| 1297 | if (has(mode, WriteMode::EXECUTABLE)) { |
| 1298 | acl = 0777; |
| 1299 | } |
| 1300 | if (has(mode, WriteMode::PRIVATE)) { |
| 1301 | acl &= 0700; |
| 1302 | } |
| 1303 | |
| 1304 | int newFd_; |
| 1305 | KJ_IF_MAYBE(temp, createNamedTemporary(path, mode, |
| 1306 | [&](StringPtr candidatePath) { |
| 1307 | return newFd_ = openat(fd, candidatePath.cStr(), |
| 1308 | O_RDWR | O_CREAT | O_EXCL | MAYBE_O_CLOEXEC, acl); |
| 1309 | })) { |
| 1310 | AutoCloseFd newFd(newFd_); |
| 1311 | #ifndef O_CLOEXEC |
| 1312 | setCloexec(newFd); |
| 1313 | #endif |
| 1314 | return heap<ReplacerImpl<File>>(newDiskFile(kj::mv(newFd)), *this, kj::mv(*temp), |
| 1315 | path.toString(), mode); |
| 1316 | } else { |
| 1317 | // threw, but exceptions are disabled |
| 1318 | return heap<BrokenReplacer<File>>(newInMemoryFile(nullClock())); |
| 1319 | } |
| 1320 | } |
| 1321 | |
| 1322 | Own<const File> createTemporary() const { |
| 1323 | int newFd_; |
| 1324 | |
| 1325 | #if __linux__ && defined(O_TMPFILE) |
| 1326 | // Use syscall() to work around glibc bug with O_TMPFILE: |
| 1327 | // https://sourceware.org/bugzilla/show_bug.cgi?id=17523 |
| 1328 | KJ_SYSCALL_HANDLE_ERRORS(newFd_ = syscall( |
| 1329 | SYS_openat, fd.get(), "." , O_RDWR | O_TMPFILE, 0700)) { |
| 1330 | case EOPNOTSUPP: |
| 1331 | case EINVAL: |
| 1332 | case EISDIR: |
| 1333 | // Maybe not supported by this kernel / filesystem. Fall back to below. |
| 1334 | break; |
| 1335 | default: |
| 1336 | KJ_FAIL_SYSCALL("open(O_TMPFILE)" , error) { break; } |
| 1337 | break; |
| 1338 | } else { |
| 1339 | AutoCloseFd newFd(newFd_); |
| 1340 | #ifndef O_CLOEXEC |
| 1341 | setCloexec(newFd); |
| 1342 | #endif |
| 1343 | return newDiskFile(kj::mv(newFd)); |
| 1344 | } |
| 1345 | #endif |
| 1346 | |
| 1347 | KJ_IF_MAYBE(temp, createNamedTemporary(Path("unnamed" ), WriteMode::CREATE, |
| 1348 | [&](StringPtr path) { |
| 1349 | return newFd_ = openat(fd, path.cStr(), O_RDWR | O_CREAT | O_EXCL | MAYBE_O_CLOEXEC, 0600); |
| 1350 | })) { |
| 1351 | AutoCloseFd newFd(newFd_); |
| 1352 | #ifndef O_CLOEXEC |
| 1353 | setCloexec(newFd); |
| 1354 | #endif |
| 1355 | auto result = newDiskFile(kj::mv(newFd)); |
| 1356 | KJ_SYSCALL(unlinkat(fd, temp->cStr(), 0)) { break; } |
| 1357 | return kj::mv(result); |
| 1358 | } else { |
| 1359 | // threw, but exceptions are disabled |
| 1360 | return newInMemoryFile(nullClock()); |
| 1361 | } |
| 1362 | } |
| 1363 | |
| 1364 | Maybe<Own<AppendableFile>> tryAppendFile(PathPtr path, WriteMode mode) const { |
| 1365 | return tryOpenFileInternal(path, mode, true).map(newDiskAppendableFile); |
| 1366 | } |
| 1367 | |
| 1368 | Maybe<Own<const Directory>> tryOpenSubdir(PathPtr path, WriteMode mode) const { |
| 1369 | // Must create before open. |
| 1370 | if (has(mode, WriteMode::CREATE)) { |
| 1371 | if (!tryMkdir(path, mode, false)) return nullptr; |
| 1372 | } |
| 1373 | |
| 1374 | return tryOpenSubdirInternal(path).map(newDiskDirectory); |
| 1375 | } |
| 1376 | |
| 1377 | Own<Directory::Replacer<Directory>> replaceSubdir(PathPtr path, WriteMode mode) const { |
| 1378 | mode_t acl = has(mode, WriteMode::PRIVATE) ? 0700 : 0777; |
| 1379 | |
| 1380 | KJ_IF_MAYBE(temp, createNamedTemporary(path, mode, |
| 1381 | [&](StringPtr candidatePath) { |
| 1382 | return mkdirat(fd, candidatePath.cStr(), acl); |
| 1383 | })) { |
| 1384 | int subdirFd_; |
| 1385 | KJ_SYSCALL_HANDLE_ERRORS(subdirFd_ = openat( |
| 1386 | fd, temp->cStr(), O_RDONLY | MAYBE_O_CLOEXEC | MAYBE_O_DIRECTORY)) { |
| 1387 | default: |
| 1388 | KJ_FAIL_SYSCALL("open(just-created-temporary)" , error); |
| 1389 | return heap<BrokenReplacer<Directory>>(newInMemoryDirectory(nullClock())); |
| 1390 | } |
| 1391 | |
| 1392 | AutoCloseFd subdirFd(subdirFd_); |
| 1393 | #ifndef O_CLOEXEC |
| 1394 | setCloexec(subdirFd); |
| 1395 | #endif |
| 1396 | return heap<ReplacerImpl<Directory>>( |
| 1397 | newDiskDirectory(kj::mv(subdirFd)), *this, kj::mv(*temp), path.toString(), mode); |
| 1398 | } else { |
| 1399 | // threw, but exceptions are disabled |
| 1400 | return heap<BrokenReplacer<Directory>>(newInMemoryDirectory(nullClock())); |
| 1401 | } |
| 1402 | } |
| 1403 | |
| 1404 | bool trySymlink(PathPtr linkpath, StringPtr content, WriteMode mode) const { |
| 1405 | return tryReplaceNode(linkpath, mode, [&](StringPtr candidatePath) { |
| 1406 | return symlinkat(content.cStr(), fd, candidatePath.cStr()); |
| 1407 | }); |
| 1408 | } |
| 1409 | |
| 1410 | bool tryTransfer(PathPtr toPath, WriteMode toMode, |
| 1411 | const Directory& fromDirectory, PathPtr fromPath, |
| 1412 | TransferMode mode, const Directory& self) const { |
| 1413 | KJ_REQUIRE(toPath.size() > 0, "can't replace self" ) { return false; } |
| 1414 | |
| 1415 | if (mode == TransferMode::LINK) { |
| 1416 | KJ_IF_MAYBE(fromFd, fromDirectory.getFd()) { |
| 1417 | // Other is a disk directory, so we can hopefully do an efficient move/link. |
| 1418 | return tryReplaceNode(toPath, toMode, [&](StringPtr candidatePath) { |
| 1419 | return linkat(*fromFd, fromPath.toString().cStr(), fd, candidatePath.cStr(), 0); |
| 1420 | }); |
| 1421 | }; |
| 1422 | } else if (mode == TransferMode::MOVE) { |
| 1423 | KJ_IF_MAYBE(fromFd, fromDirectory.getFd()) { |
| 1424 | KJ_ASSERT(mode == TransferMode::MOVE); |
| 1425 | |
| 1426 | int error = 0; |
| 1427 | if (tryCommitReplacement(toPath.toString(), *fromFd, fromPath.toString(), toMode, |
| 1428 | &error)) { |
| 1429 | return true; |
| 1430 | } else switch (error) { |
| 1431 | case 0: |
| 1432 | // Plain old WriteMode precondition failure. |
| 1433 | return false; |
| 1434 | case EXDEV: |
| 1435 | // Can't move between devices. Fall back to default implementation, which does |
| 1436 | // copy/delete. |
| 1437 | break; |
| 1438 | case ENOENT: |
| 1439 | // Either the destination directory doesn't exist or the source path doesn't exist. |
| 1440 | // Unfortunately we don't really know. If CREATE_PARENT was provided, try creating |
| 1441 | // the parent directory. Otherwise, we don't actually need to distinguish between |
| 1442 | // these two errors; just return false. |
| 1443 | if (has(toMode, WriteMode::CREATE) && has(toMode, WriteMode::CREATE_PARENT) && |
| 1444 | toPath.size() > 0 && tryMkdir(toPath.parent(), |
| 1445 | WriteMode::CREATE | WriteMode::MODIFY | WriteMode::CREATE_PARENT, true)) { |
| 1446 | // Retry, but make sure we don't try to create the parent again. |
| 1447 | return tryTransfer(toPath, toMode - WriteMode::CREATE_PARENT, |
| 1448 | fromDirectory, fromPath, mode, self); |
| 1449 | } |
| 1450 | return false; |
| 1451 | default: |
| 1452 | KJ_FAIL_SYSCALL("rename(fromPath, toPath)" , error, fromPath, toPath) { |
| 1453 | return false; |
| 1454 | } |
| 1455 | } |
| 1456 | } |
| 1457 | } |
| 1458 | |
| 1459 | // OK, we can't do anything efficient using the OS. Fall back to default implementation. |
| 1460 | return self.Directory::tryTransfer(toPath, toMode, fromDirectory, fromPath, mode); |
| 1461 | } |
| 1462 | |
| 1463 | bool tryRemove(PathPtr path) const { |
| 1464 | return rmrf(fd, path.toString()); |
| 1465 | } |
| 1466 | |
| 1467 | protected: |
| 1468 | AutoCloseFd fd; |
| 1469 | }; |
| 1470 | |
| 1471 | #define FSNODE_METHODS(classname) \ |
| 1472 | Maybe<int> getFd() const override { return DiskHandle::getFd(); } \ |
| 1473 | \ |
| 1474 | Own<const FsNode> cloneFsNode() const override { \ |
| 1475 | return heap<classname>(DiskHandle::clone()); \ |
| 1476 | } \ |
| 1477 | \ |
| 1478 | Metadata stat() const override { return DiskHandle::stat(); } \ |
| 1479 | void sync() const override { DiskHandle::sync(); } \ |
| 1480 | void datasync() const override { DiskHandle::datasync(); } |
| 1481 | |
| 1482 | class DiskReadableFile final: public ReadableFile, public DiskHandle { |
| 1483 | public: |
| 1484 | DiskReadableFile(AutoCloseFd&& fd): DiskHandle(kj::mv(fd)) {} |
| 1485 | |
| 1486 | FSNODE_METHODS(DiskReadableFile); |
| 1487 | |
| 1488 | size_t read(uint64_t offset, ArrayPtr<byte> buffer) const override { |
| 1489 | return DiskHandle::read(offset, buffer); |
| 1490 | } |
| 1491 | Array<const byte> mmap(uint64_t offset, uint64_t size) const override { |
| 1492 | return DiskHandle::mmap(offset, size); |
| 1493 | } |
| 1494 | Array<byte> mmapPrivate(uint64_t offset, uint64_t size) const override { |
| 1495 | return DiskHandle::mmapPrivate(offset, size); |
| 1496 | } |
| 1497 | }; |
| 1498 | |
| 1499 | class DiskAppendableFile final: public AppendableFile, public DiskHandle, public FdOutputStream { |
| 1500 | public: |
| 1501 | DiskAppendableFile(AutoCloseFd&& fd) |
| 1502 | : DiskHandle(kj::mv(fd)), |
| 1503 | FdOutputStream(DiskHandle::fd.get()) {} |
| 1504 | |
| 1505 | FSNODE_METHODS(DiskAppendableFile); |
| 1506 | |
| 1507 | void write(const void* buffer, size_t size) override { |
| 1508 | FdOutputStream::write(buffer, size); |
| 1509 | } |
| 1510 | void write(ArrayPtr<const ArrayPtr<const byte>> pieces) override { |
| 1511 | FdOutputStream::write(pieces); |
| 1512 | } |
| 1513 | }; |
| 1514 | |
| 1515 | class DiskFile final: public File, public DiskHandle { |
| 1516 | public: |
| 1517 | DiskFile(AutoCloseFd&& fd): DiskHandle(kj::mv(fd)) {} |
| 1518 | |
| 1519 | FSNODE_METHODS(DiskFile); |
| 1520 | |
| 1521 | size_t read(uint64_t offset, ArrayPtr<byte> buffer) const override { |
| 1522 | return DiskHandle::read(offset, buffer); |
| 1523 | } |
| 1524 | Array<const byte> mmap(uint64_t offset, uint64_t size) const override { |
| 1525 | return DiskHandle::mmap(offset, size); |
| 1526 | } |
| 1527 | Array<byte> mmapPrivate(uint64_t offset, uint64_t size) const override { |
| 1528 | return DiskHandle::mmapPrivate(offset, size); |
| 1529 | } |
| 1530 | |
| 1531 | void write(uint64_t offset, ArrayPtr<const byte> data) const override { |
| 1532 | DiskHandle::write(offset, data); |
| 1533 | } |
| 1534 | void zero(uint64_t offset, uint64_t size) const override { |
| 1535 | DiskHandle::zero(offset, size); |
| 1536 | } |
| 1537 | void truncate(uint64_t size) const override { |
| 1538 | DiskHandle::truncate(size); |
| 1539 | } |
| 1540 | Own<const WritableFileMapping> mmapWritable(uint64_t offset, uint64_t size) const override { |
| 1541 | return DiskHandle::mmapWritable(offset, size); |
| 1542 | } |
| 1543 | size_t copy(uint64_t offset, const ReadableFile& from, |
| 1544 | uint64_t fromOffset, uint64_t size) const override { |
| 1545 | KJ_IF_MAYBE(result, DiskHandle::copy(offset, from, fromOffset, size)) { |
| 1546 | return *result; |
| 1547 | } else { |
| 1548 | return File::copy(offset, from, fromOffset, size); |
| 1549 | } |
| 1550 | } |
| 1551 | }; |
| 1552 | |
| 1553 | class DiskReadableDirectory final: public ReadableDirectory, public DiskHandle { |
| 1554 | public: |
| 1555 | DiskReadableDirectory(AutoCloseFd&& fd): DiskHandle(kj::mv(fd)) {} |
| 1556 | |
| 1557 | FSNODE_METHODS(DiskReadableDirectory); |
| 1558 | |
| 1559 | Array<String> listNames() const override { return DiskHandle::listNames(); } |
| 1560 | Array<Entry> listEntries() const override { return DiskHandle::listEntries(); } |
| 1561 | bool exists(PathPtr path) const override { return DiskHandle::exists(path); } |
| 1562 | Maybe<FsNode::Metadata> tryLstat(PathPtr path) const override { |
| 1563 | return DiskHandle::tryLstat(path); |
| 1564 | } |
| 1565 | Maybe<Own<const ReadableFile>> tryOpenFile(PathPtr path) const override { |
| 1566 | return DiskHandle::tryOpenFile(path); |
| 1567 | } |
| 1568 | Maybe<Own<const ReadableDirectory>> tryOpenSubdir(PathPtr path) const override { |
| 1569 | return DiskHandle::tryOpenSubdir(path); |
| 1570 | } |
| 1571 | Maybe<String> tryReadlink(PathPtr path) const override { return DiskHandle::tryReadlink(path); } |
| 1572 | }; |
| 1573 | |
| 1574 | class DiskDirectory final: public Directory, public DiskHandle { |
| 1575 | public: |
| 1576 | DiskDirectory(AutoCloseFd&& fd): DiskHandle(kj::mv(fd)) {} |
| 1577 | |
| 1578 | FSNODE_METHODS(DiskDirectory); |
| 1579 | |
| 1580 | Array<String> listNames() const override { return DiskHandle::listNames(); } |
| 1581 | Array<Entry> listEntries() const override { return DiskHandle::listEntries(); } |
| 1582 | bool exists(PathPtr path) const override { return DiskHandle::exists(path); } |
| 1583 | Maybe<FsNode::Metadata> tryLstat(PathPtr path) const override { |
| 1584 | return DiskHandle::tryLstat(path); |
| 1585 | } |
| 1586 | Maybe<Own<const ReadableFile>> tryOpenFile(PathPtr path) const override { |
| 1587 | return DiskHandle::tryOpenFile(path); |
| 1588 | } |
| 1589 | Maybe<Own<const ReadableDirectory>> tryOpenSubdir(PathPtr path) const override { |
| 1590 | return DiskHandle::tryOpenSubdir(path); |
| 1591 | } |
| 1592 | Maybe<String> tryReadlink(PathPtr path) const override { return DiskHandle::tryReadlink(path); } |
| 1593 | |
| 1594 | Maybe<Own<const File>> tryOpenFile(PathPtr path, WriteMode mode) const override { |
| 1595 | return DiskHandle::tryOpenFile(path, mode); |
| 1596 | } |
| 1597 | Own<Replacer<File>> replaceFile(PathPtr path, WriteMode mode) const override { |
| 1598 | return DiskHandle::replaceFile(path, mode); |
| 1599 | } |
| 1600 | Own<const File> createTemporary() const override { |
| 1601 | return DiskHandle::createTemporary(); |
| 1602 | } |
| 1603 | Maybe<Own<AppendableFile>> tryAppendFile(PathPtr path, WriteMode mode) const override { |
| 1604 | return DiskHandle::tryAppendFile(path, mode); |
| 1605 | } |
| 1606 | Maybe<Own<const Directory>> tryOpenSubdir(PathPtr path, WriteMode mode) const override { |
| 1607 | return DiskHandle::tryOpenSubdir(path, mode); |
| 1608 | } |
| 1609 | Own<Replacer<Directory>> replaceSubdir(PathPtr path, WriteMode mode) const override { |
| 1610 | return DiskHandle::replaceSubdir(path, mode); |
| 1611 | } |
| 1612 | bool trySymlink(PathPtr linkpath, StringPtr content, WriteMode mode) const override { |
| 1613 | return DiskHandle::trySymlink(linkpath, content, mode); |
| 1614 | } |
| 1615 | bool tryTransfer(PathPtr toPath, WriteMode toMode, |
| 1616 | const Directory& fromDirectory, PathPtr fromPath, |
| 1617 | TransferMode mode) const override { |
| 1618 | return DiskHandle::tryTransfer(toPath, toMode, fromDirectory, fromPath, mode, *this); |
| 1619 | } |
| 1620 | // tryTransferTo() not implemented because we have nothing special we can do. |
| 1621 | bool tryRemove(PathPtr path) const override { |
| 1622 | return DiskHandle::tryRemove(path); |
| 1623 | } |
| 1624 | }; |
| 1625 | |
| 1626 | class DiskFilesystem final: public Filesystem { |
| 1627 | public: |
| 1628 | DiskFilesystem() |
| 1629 | : root(openDir("/" )), |
| 1630 | current(openDir("." )), |
| 1631 | currentPath(computeCurrentPath()) {} |
| 1632 | |
| 1633 | const Directory& getRoot() const override { |
| 1634 | return root; |
| 1635 | } |
| 1636 | |
| 1637 | const Directory& getCurrent() const override { |
| 1638 | return current; |
| 1639 | } |
| 1640 | |
| 1641 | PathPtr getCurrentPath() const override { |
| 1642 | return currentPath; |
| 1643 | } |
| 1644 | |
| 1645 | private: |
| 1646 | DiskDirectory root; |
| 1647 | DiskDirectory current; |
| 1648 | Path currentPath; |
| 1649 | |
| 1650 | static AutoCloseFd openDir(const char* dir) { |
| 1651 | int newFd; |
| 1652 | KJ_SYSCALL(newFd = open(dir, O_RDONLY | MAYBE_O_CLOEXEC | MAYBE_O_DIRECTORY)); |
| 1653 | AutoCloseFd result(newFd); |
| 1654 | #ifndef O_CLOEXEC |
| 1655 | setCloexec(result); |
| 1656 | #endif |
| 1657 | return result; |
| 1658 | } |
| 1659 | |
| 1660 | static Path computeCurrentPath() { |
| 1661 | // If env var PWD is set and points to the current directory, use it. This captures the current |
| 1662 | // path according to the user's shell, which may differ from the kernel's idea in the presence |
| 1663 | // of symlinks. |
| 1664 | const char* pwd = getenv("PWD" ); |
| 1665 | if (pwd != nullptr) { |
| 1666 | Path result = nullptr; |
| 1667 | struct stat pwdStat, dotStat; |
| 1668 | KJ_IF_MAYBE(e, kj::runCatchingExceptions([&]() { |
| 1669 | KJ_ASSERT(pwd[0] == '/') { return; } |
| 1670 | result = Path::parse(pwd + 1); |
| 1671 | KJ_SYSCALL(lstat(result.toString(true).cStr(), &pwdStat), result) { return; } |
| 1672 | KJ_SYSCALL(lstat("." , &dotStat)) { return; } |
| 1673 | })) { |
| 1674 | // failed, give up on PWD |
| 1675 | KJ_LOG(WARNING, "PWD environment variable seems invalid" , pwd, *e); |
| 1676 | } else { |
| 1677 | if (pwdStat.st_ino == dotStat.st_ino && |
| 1678 | pwdStat.st_dev == dotStat.st_dev) { |
| 1679 | return kj::mv(result); |
| 1680 | } else { |
| 1681 | KJ_LOG(WARNING, "PWD environment variable doesn't match current directory" , pwd); |
| 1682 | } |
| 1683 | } |
| 1684 | } |
| 1685 | |
| 1686 | size_t size = 256; |
| 1687 | retry: |
| 1688 | KJ_STACK_ARRAY(char, buf, size, 256, 4096); |
| 1689 | if (getcwd(buf.begin(), size) == nullptr) { |
| 1690 | int error = errno; |
| 1691 | if (error == ENAMETOOLONG) { |
| 1692 | size *= 2; |
| 1693 | goto retry; |
| 1694 | } else { |
| 1695 | KJ_FAIL_SYSCALL("getcwd()" , error); |
| 1696 | } |
| 1697 | } |
| 1698 | |
| 1699 | StringPtr path = buf.begin(); |
| 1700 | |
| 1701 | // On Linux, the path will start with "(unreachable)" if the working directory is not a subdir |
| 1702 | // of the root directory, which is possible via chroot() or mount namespaces. |
| 1703 | KJ_ASSERT(!path.startsWith("(unreachable)" ), |
| 1704 | "working directory is not reachable from root" , path); |
| 1705 | KJ_ASSERT(path.startsWith("/" ), "current directory is not absolute" , path); |
| 1706 | |
| 1707 | return Path::parse(path.slice(1)); |
| 1708 | } |
| 1709 | }; |
| 1710 | |
| 1711 | } // namespace |
| 1712 | |
| 1713 | Own<ReadableFile> newDiskReadableFile(kj::AutoCloseFd fd) { |
| 1714 | return heap<DiskReadableFile>(kj::mv(fd)); |
| 1715 | } |
| 1716 | Own<AppendableFile> newDiskAppendableFile(kj::AutoCloseFd fd) { |
| 1717 | return heap<DiskAppendableFile>(kj::mv(fd)); |
| 1718 | } |
| 1719 | Own<File> newDiskFile(kj::AutoCloseFd fd) { |
| 1720 | return heap<DiskFile>(kj::mv(fd)); |
| 1721 | } |
| 1722 | Own<ReadableDirectory> newDiskReadableDirectory(kj::AutoCloseFd fd) { |
| 1723 | return heap<DiskReadableDirectory>(kj::mv(fd)); |
| 1724 | } |
| 1725 | Own<Directory> newDiskDirectory(kj::AutoCloseFd fd) { |
| 1726 | return heap<DiskDirectory>(kj::mv(fd)); |
| 1727 | } |
| 1728 | |
| 1729 | Own<Filesystem> newDiskFilesystem() { |
| 1730 | return heap<DiskFilesystem>(); |
| 1731 | } |
| 1732 | |
| 1733 | } // namespace kj |
| 1734 | |
| 1735 | #endif // !_WIN32 |
| 1736 | |