1 | // Licensed to the Apache Software Foundation (ASF) under one |
2 | // or more contributor license agreements. See the NOTICE file |
3 | // distributed with this work for additional information |
4 | // regarding copyright ownership. The ASF licenses this file |
5 | // to you under the Apache License, Version 2.0 (the |
6 | // "License"); you may not use this file except in compliance |
7 | // with the License. You may obtain a copy of the License at |
8 | // |
9 | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | // |
11 | // Unless required by applicable law or agreed to in writing, |
12 | // software distributed under the License is distributed on an |
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | // KIND, either express or implied. See the License for the |
15 | // specific language governing permissions and limitations |
16 | // under the License. |
17 | |
18 | // Ensure 64-bit off_t for platforms where it matters |
19 | #ifdef _FILE_OFFSET_BITS |
20 | #undef _FILE_OFFSET_BITS |
21 | #endif |
22 | |
23 | #define _FILE_OFFSET_BITS 64 |
24 | |
25 | #include "arrow/util/windows_compatibility.h" // IWYU pragma: keep |
26 | |
27 | #include <algorithm> |
28 | #include <cerrno> |
29 | #include <cstdint> |
30 | #include <cstring> |
31 | #include <sstream> |
32 | #include <string> |
33 | |
34 | #include <fcntl.h> |
35 | #include <stdlib.h> |
36 | #include <sys/stat.h> |
37 | #include <sys/types.h> // IWYU pragma: keep |
38 | |
39 | // Defines that don't exist in MinGW |
40 | #if defined(__MINGW32__) |
41 | #define ARROW_WRITE_SHMODE S_IRUSR | S_IWUSR |
42 | #elif defined(_MSC_VER) // Visual Studio |
43 | |
44 | #else // gcc / clang on POSIX platforms |
45 | #define ARROW_WRITE_SHMODE S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH |
46 | #endif |
47 | |
48 | // For filename conversion |
49 | #if defined(_MSC_VER) |
50 | #include <boost/system/system_error.hpp> // NOLINT |
51 | #include <codecvt> |
52 | #include <locale> |
53 | #endif |
54 | |
55 | // ---------------------------------------------------------------------- |
56 | // file compatibility stuff |
57 | |
58 | #if defined(__MINGW32__) // MinGW |
59 | // nothing |
60 | #elif defined(_MSC_VER) // Visual Studio |
61 | #include <io.h> |
62 | #else // POSIX / Linux |
63 | // nothing |
64 | #endif |
65 | |
66 | #ifdef _WIN32 // Windows |
67 | #include "arrow/io/mman.h" |
68 | #undef Realloc |
69 | #undef Free |
70 | #else // POSIX-like platforms |
71 | #include <sys/mman.h> |
72 | #include <unistd.h> |
73 | #endif |
74 | |
75 | // POSIX systems do not have this |
76 | #ifndef O_BINARY |
77 | #define O_BINARY 0 |
78 | #endif |
79 | |
80 | // define max read/write count |
81 | #if defined(_MSC_VER) |
82 | #define ARROW_MAX_IO_CHUNKSIZE INT32_MAX |
83 | #else |
84 | |
85 | #ifdef __APPLE__ |
86 | // due to macOS bug, we need to set read/write max |
87 | #define ARROW_MAX_IO_CHUNKSIZE INT32_MAX |
88 | #else |
89 | // see notes on Linux read/write manpage |
90 | #define ARROW_MAX_IO_CHUNKSIZE 0x7ffff000 |
91 | #endif |
92 | |
93 | #endif |
94 | |
95 | #include "arrow/status.h" |
96 | #include "arrow/util/io-util.h" |
97 | |
98 | namespace arrow { |
99 | namespace internal { |
100 | |
101 | #define CHECK_LSEEK(retval) \ |
102 | if ((retval) == -1) return Status::IOError("lseek failed"); |
103 | |
104 | static inline int64_t lseek64_compat(int fd, int64_t pos, int whence) { |
105 | #if defined(_MSC_VER) |
106 | return _lseeki64(fd, pos, whence); |
107 | #else |
108 | return lseek(fd, pos, whence); |
109 | #endif |
110 | } |
111 | |
112 | static inline Status CheckFileOpResult(int ret, int errno_actual, |
113 | const PlatformFilename& file_name, |
114 | const char* opname) { |
115 | if (ret == -1) { |
116 | return Status::IOError("Failed to " , opname, " file: " , file_name.string(), |
117 | " , error: " , std::strerror(errno_actual)); |
118 | } |
119 | return Status::OK(); |
120 | } |
121 | |
122 | // |
123 | // File name handling |
124 | // |
125 | |
126 | Status FileNameFromString(const std::string& file_name, PlatformFilename* out) { |
127 | #if defined(_MSC_VER) |
128 | try { |
129 | std::codecvt_utf8_utf16<wchar_t> utf16_converter; |
130 | out->assign(file_name, utf16_converter); |
131 | } catch (boost::system::system_error& e) { |
132 | return Status::Invalid(e.what()); |
133 | } |
134 | #else |
135 | *out = internal::PlatformFilename(file_name); |
136 | #endif |
137 | return Status::OK(); |
138 | } |
139 | |
140 | // |
141 | // Functions for creating file descriptors |
142 | // |
143 | |
144 | Status FileOpenReadable(const PlatformFilename& file_name, int* fd) { |
145 | int ret, errno_actual; |
146 | #if defined(_MSC_VER) |
147 | errno_actual = _wsopen_s(fd, file_name.wstring().c_str(), |
148 | _O_RDONLY | _O_BINARY | _O_NOINHERIT, _SH_DENYNO, _S_IREAD); |
149 | ret = *fd; |
150 | #else |
151 | ret = *fd = open(file_name.c_str(), O_RDONLY | O_BINARY); |
152 | errno_actual = errno; |
153 | #endif |
154 | |
155 | return CheckFileOpResult(ret, errno_actual, file_name, "open local" ); |
156 | } |
157 | |
158 | Status FileOpenWritable(const PlatformFilename& file_name, bool write_only, bool truncate, |
159 | bool append, int* fd) { |
160 | int ret, errno_actual; |
161 | |
162 | #if defined(_MSC_VER) |
163 | int oflag = _O_CREAT | _O_BINARY | _O_NOINHERIT; |
164 | int pmode = _S_IWRITE; |
165 | if (!write_only) { |
166 | pmode |= _S_IREAD; |
167 | } |
168 | |
169 | if (truncate) { |
170 | oflag |= _O_TRUNC; |
171 | } |
172 | if (append) { |
173 | oflag |= _O_APPEND; |
174 | } |
175 | |
176 | if (write_only) { |
177 | oflag |= _O_WRONLY; |
178 | } else { |
179 | oflag |= _O_RDWR; |
180 | } |
181 | |
182 | errno_actual = _wsopen_s(fd, file_name.wstring().c_str(), oflag, _SH_DENYNO, pmode); |
183 | ret = *fd; |
184 | |
185 | #else |
186 | int oflag = O_CREAT | O_BINARY; |
187 | |
188 | if (truncate) { |
189 | oflag |= O_TRUNC; |
190 | } |
191 | if (append) { |
192 | oflag |= O_APPEND; |
193 | } |
194 | |
195 | if (write_only) { |
196 | oflag |= O_WRONLY; |
197 | } else { |
198 | oflag |= O_RDWR; |
199 | } |
200 | |
201 | ret = *fd = open(file_name.c_str(), oflag, ARROW_WRITE_SHMODE); |
202 | errno_actual = errno; |
203 | #endif |
204 | return CheckFileOpResult(ret, errno_actual, file_name, "open local" ); |
205 | } |
206 | |
207 | Status FileTell(int fd, int64_t* pos) { |
208 | int64_t current_pos; |
209 | |
210 | #if defined(_MSC_VER) |
211 | current_pos = _telli64(fd); |
212 | if (current_pos == -1) { |
213 | return Status::IOError("_telli64 failed" ); |
214 | } |
215 | #else |
216 | current_pos = lseek64_compat(fd, 0, SEEK_CUR); |
217 | CHECK_LSEEK(current_pos); |
218 | #endif |
219 | |
220 | *pos = current_pos; |
221 | return Status::OK(); |
222 | } |
223 | |
224 | Status CreatePipe(int fd[2]) { |
225 | int ret; |
226 | #if defined(_WIN32) |
227 | ret = _pipe(fd, 4096, _O_BINARY); |
228 | #else |
229 | ret = pipe(fd); |
230 | #endif |
231 | |
232 | if (ret == -1) { |
233 | return Status::IOError("Error creating pipe: " , std::strerror(errno)); |
234 | } |
235 | return Status::OK(); |
236 | } |
237 | |
238 | static Status StatusFromErrno(const char* prefix) { |
239 | #ifdef _WIN32 |
240 | errno = __map_mman_error(GetLastError(), EPERM); |
241 | #endif |
242 | return Status::IOError(prefix, std::strerror(errno)); |
243 | } |
244 | |
245 | // |
246 | // Compatible way to remap a memory map |
247 | // |
248 | |
249 | Status MemoryMapRemap(void* addr, size_t old_size, size_t new_size, int fildes, |
250 | void** new_addr) { |
251 | // should only be called with writable files |
252 | *new_addr = MAP_FAILED; |
253 | #ifdef _WIN32 |
254 | // flags are ignored on windows |
255 | HANDLE fm, h; |
256 | |
257 | if (!UnmapViewOfFile(addr)) { |
258 | return StatusFromErrno("UnmapViewOfFile failed: " ); |
259 | } |
260 | |
261 | h = reinterpret_cast<HANDLE>(_get_osfhandle(fildes)); |
262 | if (h == INVALID_HANDLE_VALUE) { |
263 | return StatusFromErrno("Cannot get file handle: " ); |
264 | } |
265 | |
266 | LONG new_size_low = static_cast<LONG>(new_size & 0xFFFFFFFFL); |
267 | LONG new_size_high = static_cast<LONG>((new_size >> 32) & 0xFFFFFFFFL); |
268 | |
269 | SetFilePointer(h, new_size_low, &new_size_high, FILE_BEGIN); |
270 | SetEndOfFile(h); |
271 | fm = CreateFileMapping(h, NULL, PAGE_READWRITE, 0, 0, "" ); |
272 | if (fm == NULL) { |
273 | return StatusFromErrno("CreateFileMapping failed: " ); |
274 | } |
275 | *new_addr = MapViewOfFile(fm, FILE_MAP_WRITE, 0, 0, new_size); |
276 | CloseHandle(fm); |
277 | if (new_addr == NULL) { |
278 | return StatusFromErrno("MapViewOfFile failed: " ); |
279 | } |
280 | return Status::OK(); |
281 | #else |
282 | #ifdef __APPLE__ |
283 | // we have to close the mmap first, truncate the file to the new size |
284 | // and recreate the mmap |
285 | if (munmap(addr, old_size) == -1) { |
286 | return StatusFromErrno("munmap failed: " ); |
287 | } |
288 | if (ftruncate(fildes, new_size) == -1) { |
289 | return StatusFromErrno("ftruncate failed: " ); |
290 | } |
291 | // we set READ / WRITE flags on the new map, since we could only have |
292 | // unlarged a RW map in the first place |
293 | *new_addr = mmap(NULL, new_size, PROT_READ | PROT_WRITE, MAP_SHARED, fildes, 0); |
294 | if (*new_addr == MAP_FAILED) { |
295 | return StatusFromErrno("mmap failed: " ); |
296 | } |
297 | return Status::OK(); |
298 | #else |
299 | if (ftruncate(fildes, new_size) == -1) { |
300 | return StatusFromErrno("ftruncate failed: " ); |
301 | } |
302 | *new_addr = mremap(addr, old_size, new_size, MREMAP_MAYMOVE); |
303 | if (*new_addr == MAP_FAILED) { |
304 | return StatusFromErrno("mremap failed: " ); |
305 | } |
306 | return Status::OK(); |
307 | #endif |
308 | #endif |
309 | } |
310 | |
311 | // |
312 | // Closing files |
313 | // |
314 | |
315 | Status FileClose(int fd) { |
316 | int ret; |
317 | |
318 | #if defined(_MSC_VER) |
319 | ret = static_cast<int>(_close(fd)); |
320 | #else |
321 | ret = static_cast<int>(close(fd)); |
322 | #endif |
323 | |
324 | if (ret == -1) { |
325 | return Status::IOError("error closing file" ); |
326 | } |
327 | return Status::OK(); |
328 | } |
329 | |
330 | // |
331 | // Seeking and telling |
332 | // |
333 | |
334 | Status FileSeek(int fd, int64_t pos, int whence) { |
335 | int64_t ret = lseek64_compat(fd, pos, whence); |
336 | CHECK_LSEEK(ret); |
337 | return Status::OK(); |
338 | } |
339 | |
340 | Status FileSeek(int fd, int64_t pos) { return FileSeek(fd, pos, SEEK_SET); } |
341 | |
342 | Status FileGetSize(int fd, int64_t* size) { |
343 | #if defined(_MSC_VER) |
344 | struct __stat64 st; |
345 | #else |
346 | struct stat st; |
347 | #endif |
348 | st.st_size = -1; |
349 | |
350 | #if defined(_MSC_VER) |
351 | int ret = _fstat64(fd, &st); |
352 | #else |
353 | int ret = fstat(fd, &st); |
354 | #endif |
355 | |
356 | if (ret == -1) { |
357 | return Status::IOError("error stat()ing file" ); |
358 | } |
359 | if (st.st_size == 0) { |
360 | // Maybe the file doesn't support getting its size, double-check by |
361 | // trying to tell() (seekable files usually have a size, while |
362 | // non-seekable files don't) |
363 | int64_t position; |
364 | RETURN_NOT_OK(FileTell(fd, &position)); |
365 | } else if (st.st_size < 0) { |
366 | return Status::IOError("error getting file size" ); |
367 | } |
368 | *size = st.st_size; |
369 | return Status::OK(); |
370 | } |
371 | |
372 | // |
373 | // Reading data |
374 | // |
375 | |
376 | static inline int64_t pread_compat(int fd, void* buf, int64_t nbytes, int64_t pos) { |
377 | #if defined(_WIN32) |
378 | HANDLE handle = reinterpret_cast<HANDLE>(_get_osfhandle(fd)); |
379 | DWORD dwBytesRead = 0; |
380 | OVERLAPPED overlapped = {0}; |
381 | overlapped.Offset = static_cast<uint32_t>(pos); |
382 | overlapped.OffsetHigh = static_cast<uint32_t>(pos >> 32); |
383 | |
384 | // Note: ReadFile() will update the file position |
385 | BOOL bRet = |
386 | ReadFile(handle, buf, static_cast<uint32_t>(nbytes), &dwBytesRead, &overlapped); |
387 | if (bRet || GetLastError() == ERROR_HANDLE_EOF) { |
388 | return dwBytesRead; |
389 | } else { |
390 | return -1; |
391 | } |
392 | #else |
393 | return static_cast<int64_t>( |
394 | pread(fd, buf, static_cast<size_t>(nbytes), static_cast<off_t>(pos))); |
395 | #endif |
396 | } |
397 | |
398 | Status FileRead(int fd, uint8_t* buffer, int64_t nbytes, int64_t* bytes_read) { |
399 | *bytes_read = 0; |
400 | |
401 | while (*bytes_read < nbytes) { |
402 | int64_t chunksize = |
403 | std::min(static_cast<int64_t>(ARROW_MAX_IO_CHUNKSIZE), nbytes - *bytes_read); |
404 | #if defined(_MSC_VER) |
405 | int64_t ret = |
406 | static_cast<int64_t>(_read(fd, buffer, static_cast<uint32_t>(chunksize))); |
407 | #else |
408 | int64_t ret = static_cast<int64_t>(read(fd, buffer, static_cast<size_t>(chunksize))); |
409 | #endif |
410 | |
411 | if (ret == -1) { |
412 | *bytes_read = ret; |
413 | break; |
414 | } |
415 | if (ret == 0) { |
416 | // EOF |
417 | break; |
418 | } |
419 | buffer += ret; |
420 | *bytes_read += ret; |
421 | } |
422 | |
423 | if (*bytes_read == -1) { |
424 | return Status::IOError(std::string("Error reading bytes from file: " ) + |
425 | std::string(strerror(errno))); |
426 | } |
427 | |
428 | return Status::OK(); |
429 | } |
430 | |
431 | Status FileReadAt(int fd, uint8_t* buffer, int64_t position, int64_t nbytes, |
432 | int64_t* bytes_read) { |
433 | *bytes_read = 0; |
434 | |
435 | while (*bytes_read < nbytes) { |
436 | int64_t chunksize = |
437 | std::min(static_cast<int64_t>(ARROW_MAX_IO_CHUNKSIZE), nbytes - *bytes_read); |
438 | int64_t ret = pread_compat(fd, buffer, chunksize, position); |
439 | |
440 | if (ret == -1) { |
441 | *bytes_read = ret; |
442 | break; |
443 | } |
444 | if (ret == 0) { |
445 | // EOF |
446 | break; |
447 | } |
448 | buffer += ret; |
449 | position += ret; |
450 | *bytes_read += ret; |
451 | } |
452 | |
453 | if (*bytes_read == -1) { |
454 | return Status::IOError(std::string("Error reading bytes from file: " ) + |
455 | std::string(strerror(errno))); |
456 | } |
457 | return Status::OK(); |
458 | } |
459 | |
460 | // |
461 | // Writing data |
462 | // |
463 | |
464 | Status FileWrite(int fd, const uint8_t* buffer, const int64_t nbytes) { |
465 | int ret = 0; |
466 | int64_t bytes_written = 0; |
467 | |
468 | while (ret != -1 && bytes_written < nbytes) { |
469 | int64_t chunksize = |
470 | std::min(static_cast<int64_t>(ARROW_MAX_IO_CHUNKSIZE), nbytes - bytes_written); |
471 | #if defined(_MSC_VER) |
472 | ret = static_cast<int>( |
473 | _write(fd, buffer + bytes_written, static_cast<uint32_t>(chunksize))); |
474 | #else |
475 | ret = static_cast<int>( |
476 | write(fd, buffer + bytes_written, static_cast<size_t>(chunksize))); |
477 | #endif |
478 | |
479 | if (ret != -1) { |
480 | bytes_written += ret; |
481 | } |
482 | } |
483 | |
484 | if (ret == -1) { |
485 | return Status::IOError(std::string("Error writing bytes from file: " ) + |
486 | std::string(strerror(errno))); |
487 | } |
488 | return Status::OK(); |
489 | } |
490 | |
491 | Status FileTruncate(int fd, const int64_t size) { |
492 | int ret, errno_actual; |
493 | |
494 | #ifdef _WIN32 |
495 | errno_actual = _chsize_s(fd, static_cast<size_t>(size)); |
496 | ret = errno_actual == 0 ? 0 : -1; |
497 | #else |
498 | ret = ftruncate(fd, static_cast<size_t>(size)); |
499 | errno_actual = errno; |
500 | #endif |
501 | |
502 | if (ret == -1) { |
503 | return Status::IOError(std::string("Error truncating file: " ) + |
504 | std::string(strerror(errno_actual))); |
505 | } |
506 | return Status::OK(); |
507 | } |
508 | |
509 | // |
510 | // Environment variables |
511 | // |
512 | |
513 | Status GetEnvVar(const char* name, std::string* out) { |
514 | #ifdef _WIN32 |
515 | // On Windows, getenv() reads an early copy of the process' environment |
516 | // which doesn't get updated when SetEnvironmentVariable() is called. |
517 | constexpr int32_t bufsize = 2000; |
518 | char c_str[bufsize]; |
519 | auto res = GetEnvironmentVariableA(name, c_str, bufsize); |
520 | if (res >= bufsize) { |
521 | return Status::CapacityError("environment variable value too long" ); |
522 | } else if (res == 0) { |
523 | return Status::KeyError("environment variable undefined" ); |
524 | } |
525 | *out = std::string(c_str); |
526 | return Status::OK(); |
527 | #else |
528 | char* c_str = getenv(name); |
529 | if (c_str == nullptr) { |
530 | return Status::KeyError("environment variable undefined" ); |
531 | } |
532 | *out = std::string(c_str); |
533 | return Status::OK(); |
534 | #endif |
535 | } |
536 | |
537 | Status GetEnvVar(const std::string& name, std::string* out) { |
538 | return GetEnvVar(name.c_str(), out); |
539 | } |
540 | |
541 | Status SetEnvVar(const char* name, const char* value) { |
542 | #ifdef _WIN32 |
543 | if (SetEnvironmentVariableA(name, value)) { |
544 | return Status::OK(); |
545 | } else { |
546 | return Status::Invalid("failed setting environment variable" ); |
547 | } |
548 | #else |
549 | if (setenv(name, value, 1) == 0) { |
550 | return Status::OK(); |
551 | } else { |
552 | return Status::Invalid("failed setting environment variable" ); |
553 | } |
554 | #endif |
555 | } |
556 | |
557 | Status SetEnvVar(const std::string& name, const std::string& value) { |
558 | return SetEnvVar(name.c_str(), value.c_str()); |
559 | } |
560 | |
561 | Status DelEnvVar(const char* name) { |
562 | #ifdef _WIN32 |
563 | if (SetEnvironmentVariableA(name, nullptr)) { |
564 | return Status::OK(); |
565 | } else { |
566 | return Status::Invalid("failed deleting environment variable" ); |
567 | } |
568 | #else |
569 | if (unsetenv(name) == 0) { |
570 | return Status::OK(); |
571 | } else { |
572 | return Status::Invalid("failed deleting environment variable" ); |
573 | } |
574 | #endif |
575 | } |
576 | |
577 | Status DelEnvVar(const std::string& name) { return DelEnvVar(name.c_str()); } |
578 | |
579 | } // namespace internal |
580 | } // namespace arrow |
581 | |