1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18// Ensure 64-bit off_t for platforms where it matters
19#ifdef _FILE_OFFSET_BITS
20#undef _FILE_OFFSET_BITS
21#endif
22
23#define _FILE_OFFSET_BITS 64
24
25#include "arrow/util/windows_compatibility.h" // IWYU pragma: keep
26
27#include <algorithm>
28#include <cerrno>
29#include <cstdint>
30#include <cstring>
31#include <sstream>
32#include <string>
33
34#include <fcntl.h>
35#include <stdlib.h>
36#include <sys/stat.h>
37#include <sys/types.h> // IWYU pragma: keep
38
39// Defines that don't exist in MinGW
40#if defined(__MINGW32__)
41#define ARROW_WRITE_SHMODE S_IRUSR | S_IWUSR
42#elif defined(_MSC_VER) // Visual Studio
43
44#else // gcc / clang on POSIX platforms
45#define ARROW_WRITE_SHMODE S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH
46#endif
47
48// For filename conversion
49#if defined(_MSC_VER)
50#include <boost/system/system_error.hpp> // NOLINT
51#include <codecvt>
52#include <locale>
53#endif
54
55// ----------------------------------------------------------------------
56// file compatibility stuff
57
58#if defined(__MINGW32__) // MinGW
59// nothing
60#elif defined(_MSC_VER) // Visual Studio
61#include <io.h>
62#else // POSIX / Linux
63// nothing
64#endif
65
66#ifdef _WIN32 // Windows
67#include "arrow/io/mman.h"
68#undef Realloc
69#undef Free
70#else // POSIX-like platforms
71#include <sys/mman.h>
72#include <unistd.h>
73#endif
74
75// POSIX systems do not have this
76#ifndef O_BINARY
77#define O_BINARY 0
78#endif
79
80// define max read/write count
81#if defined(_MSC_VER)
82#define ARROW_MAX_IO_CHUNKSIZE INT32_MAX
83#else
84
85#ifdef __APPLE__
86// due to macOS bug, we need to set read/write max
87#define ARROW_MAX_IO_CHUNKSIZE INT32_MAX
88#else
89// see notes on Linux read/write manpage
90#define ARROW_MAX_IO_CHUNKSIZE 0x7ffff000
91#endif
92
93#endif
94
95#include "arrow/status.h"
96#include "arrow/util/io-util.h"
97
98namespace arrow {
99namespace internal {
100
101#define CHECK_LSEEK(retval) \
102 if ((retval) == -1) return Status::IOError("lseek failed");
103
104static inline int64_t lseek64_compat(int fd, int64_t pos, int whence) {
105#if defined(_MSC_VER)
106 return _lseeki64(fd, pos, whence);
107#else
108 return lseek(fd, pos, whence);
109#endif
110}
111
112static inline Status CheckFileOpResult(int ret, int errno_actual,
113 const PlatformFilename& file_name,
114 const char* opname) {
115 if (ret == -1) {
116 return Status::IOError("Failed to ", opname, " file: ", file_name.string(),
117 " , error: ", std::strerror(errno_actual));
118 }
119 return Status::OK();
120}
121
122//
123// File name handling
124//
125
126Status FileNameFromString(const std::string& file_name, PlatformFilename* out) {
127#if defined(_MSC_VER)
128 try {
129 std::codecvt_utf8_utf16<wchar_t> utf16_converter;
130 out->assign(file_name, utf16_converter);
131 } catch (boost::system::system_error& e) {
132 return Status::Invalid(e.what());
133 }
134#else
135 *out = internal::PlatformFilename(file_name);
136#endif
137 return Status::OK();
138}
139
140//
141// Functions for creating file descriptors
142//
143
144Status FileOpenReadable(const PlatformFilename& file_name, int* fd) {
145 int ret, errno_actual;
146#if defined(_MSC_VER)
147 errno_actual = _wsopen_s(fd, file_name.wstring().c_str(),
148 _O_RDONLY | _O_BINARY | _O_NOINHERIT, _SH_DENYNO, _S_IREAD);
149 ret = *fd;
150#else
151 ret = *fd = open(file_name.c_str(), O_RDONLY | O_BINARY);
152 errno_actual = errno;
153#endif
154
155 return CheckFileOpResult(ret, errno_actual, file_name, "open local");
156}
157
158Status FileOpenWritable(const PlatformFilename& file_name, bool write_only, bool truncate,
159 bool append, int* fd) {
160 int ret, errno_actual;
161
162#if defined(_MSC_VER)
163 int oflag = _O_CREAT | _O_BINARY | _O_NOINHERIT;
164 int pmode = _S_IWRITE;
165 if (!write_only) {
166 pmode |= _S_IREAD;
167 }
168
169 if (truncate) {
170 oflag |= _O_TRUNC;
171 }
172 if (append) {
173 oflag |= _O_APPEND;
174 }
175
176 if (write_only) {
177 oflag |= _O_WRONLY;
178 } else {
179 oflag |= _O_RDWR;
180 }
181
182 errno_actual = _wsopen_s(fd, file_name.wstring().c_str(), oflag, _SH_DENYNO, pmode);
183 ret = *fd;
184
185#else
186 int oflag = O_CREAT | O_BINARY;
187
188 if (truncate) {
189 oflag |= O_TRUNC;
190 }
191 if (append) {
192 oflag |= O_APPEND;
193 }
194
195 if (write_only) {
196 oflag |= O_WRONLY;
197 } else {
198 oflag |= O_RDWR;
199 }
200
201 ret = *fd = open(file_name.c_str(), oflag, ARROW_WRITE_SHMODE);
202 errno_actual = errno;
203#endif
204 return CheckFileOpResult(ret, errno_actual, file_name, "open local");
205}
206
207Status FileTell(int fd, int64_t* pos) {
208 int64_t current_pos;
209
210#if defined(_MSC_VER)
211 current_pos = _telli64(fd);
212 if (current_pos == -1) {
213 return Status::IOError("_telli64 failed");
214 }
215#else
216 current_pos = lseek64_compat(fd, 0, SEEK_CUR);
217 CHECK_LSEEK(current_pos);
218#endif
219
220 *pos = current_pos;
221 return Status::OK();
222}
223
224Status CreatePipe(int fd[2]) {
225 int ret;
226#if defined(_WIN32)
227 ret = _pipe(fd, 4096, _O_BINARY);
228#else
229 ret = pipe(fd);
230#endif
231
232 if (ret == -1) {
233 return Status::IOError("Error creating pipe: ", std::strerror(errno));
234 }
235 return Status::OK();
236}
237
238static Status StatusFromErrno(const char* prefix) {
239#ifdef _WIN32
240 errno = __map_mman_error(GetLastError(), EPERM);
241#endif
242 return Status::IOError(prefix, std::strerror(errno));
243}
244
245//
246// Compatible way to remap a memory map
247//
248
249Status MemoryMapRemap(void* addr, size_t old_size, size_t new_size, int fildes,
250 void** new_addr) {
251 // should only be called with writable files
252 *new_addr = MAP_FAILED;
253#ifdef _WIN32
254 // flags are ignored on windows
255 HANDLE fm, h;
256
257 if (!UnmapViewOfFile(addr)) {
258 return StatusFromErrno("UnmapViewOfFile failed: ");
259 }
260
261 h = reinterpret_cast<HANDLE>(_get_osfhandle(fildes));
262 if (h == INVALID_HANDLE_VALUE) {
263 return StatusFromErrno("Cannot get file handle: ");
264 }
265
266 LONG new_size_low = static_cast<LONG>(new_size & 0xFFFFFFFFL);
267 LONG new_size_high = static_cast<LONG>((new_size >> 32) & 0xFFFFFFFFL);
268
269 SetFilePointer(h, new_size_low, &new_size_high, FILE_BEGIN);
270 SetEndOfFile(h);
271 fm = CreateFileMapping(h, NULL, PAGE_READWRITE, 0, 0, "");
272 if (fm == NULL) {
273 return StatusFromErrno("CreateFileMapping failed: ");
274 }
275 *new_addr = MapViewOfFile(fm, FILE_MAP_WRITE, 0, 0, new_size);
276 CloseHandle(fm);
277 if (new_addr == NULL) {
278 return StatusFromErrno("MapViewOfFile failed: ");
279 }
280 return Status::OK();
281#else
282#ifdef __APPLE__
283 // we have to close the mmap first, truncate the file to the new size
284 // and recreate the mmap
285 if (munmap(addr, old_size) == -1) {
286 return StatusFromErrno("munmap failed: ");
287 }
288 if (ftruncate(fildes, new_size) == -1) {
289 return StatusFromErrno("ftruncate failed: ");
290 }
291 // we set READ / WRITE flags on the new map, since we could only have
292 // unlarged a RW map in the first place
293 *new_addr = mmap(NULL, new_size, PROT_READ | PROT_WRITE, MAP_SHARED, fildes, 0);
294 if (*new_addr == MAP_FAILED) {
295 return StatusFromErrno("mmap failed: ");
296 }
297 return Status::OK();
298#else
299 if (ftruncate(fildes, new_size) == -1) {
300 return StatusFromErrno("ftruncate failed: ");
301 }
302 *new_addr = mremap(addr, old_size, new_size, MREMAP_MAYMOVE);
303 if (*new_addr == MAP_FAILED) {
304 return StatusFromErrno("mremap failed: ");
305 }
306 return Status::OK();
307#endif
308#endif
309}
310
311//
312// Closing files
313//
314
315Status FileClose(int fd) {
316 int ret;
317
318#if defined(_MSC_VER)
319 ret = static_cast<int>(_close(fd));
320#else
321 ret = static_cast<int>(close(fd));
322#endif
323
324 if (ret == -1) {
325 return Status::IOError("error closing file");
326 }
327 return Status::OK();
328}
329
330//
331// Seeking and telling
332//
333
334Status FileSeek(int fd, int64_t pos, int whence) {
335 int64_t ret = lseek64_compat(fd, pos, whence);
336 CHECK_LSEEK(ret);
337 return Status::OK();
338}
339
340Status FileSeek(int fd, int64_t pos) { return FileSeek(fd, pos, SEEK_SET); }
341
342Status FileGetSize(int fd, int64_t* size) {
343#if defined(_MSC_VER)
344 struct __stat64 st;
345#else
346 struct stat st;
347#endif
348 st.st_size = -1;
349
350#if defined(_MSC_VER)
351 int ret = _fstat64(fd, &st);
352#else
353 int ret = fstat(fd, &st);
354#endif
355
356 if (ret == -1) {
357 return Status::IOError("error stat()ing file");
358 }
359 if (st.st_size == 0) {
360 // Maybe the file doesn't support getting its size, double-check by
361 // trying to tell() (seekable files usually have a size, while
362 // non-seekable files don't)
363 int64_t position;
364 RETURN_NOT_OK(FileTell(fd, &position));
365 } else if (st.st_size < 0) {
366 return Status::IOError("error getting file size");
367 }
368 *size = st.st_size;
369 return Status::OK();
370}
371
372//
373// Reading data
374//
375
376static inline int64_t pread_compat(int fd, void* buf, int64_t nbytes, int64_t pos) {
377#if defined(_WIN32)
378 HANDLE handle = reinterpret_cast<HANDLE>(_get_osfhandle(fd));
379 DWORD dwBytesRead = 0;
380 OVERLAPPED overlapped = {0};
381 overlapped.Offset = static_cast<uint32_t>(pos);
382 overlapped.OffsetHigh = static_cast<uint32_t>(pos >> 32);
383
384 // Note: ReadFile() will update the file position
385 BOOL bRet =
386 ReadFile(handle, buf, static_cast<uint32_t>(nbytes), &dwBytesRead, &overlapped);
387 if (bRet || GetLastError() == ERROR_HANDLE_EOF) {
388 return dwBytesRead;
389 } else {
390 return -1;
391 }
392#else
393 return static_cast<int64_t>(
394 pread(fd, buf, static_cast<size_t>(nbytes), static_cast<off_t>(pos)));
395#endif
396}
397
398Status FileRead(int fd, uint8_t* buffer, int64_t nbytes, int64_t* bytes_read) {
399 *bytes_read = 0;
400
401 while (*bytes_read < nbytes) {
402 int64_t chunksize =
403 std::min(static_cast<int64_t>(ARROW_MAX_IO_CHUNKSIZE), nbytes - *bytes_read);
404#if defined(_MSC_VER)
405 int64_t ret =
406 static_cast<int64_t>(_read(fd, buffer, static_cast<uint32_t>(chunksize)));
407#else
408 int64_t ret = static_cast<int64_t>(read(fd, buffer, static_cast<size_t>(chunksize)));
409#endif
410
411 if (ret == -1) {
412 *bytes_read = ret;
413 break;
414 }
415 if (ret == 0) {
416 // EOF
417 break;
418 }
419 buffer += ret;
420 *bytes_read += ret;
421 }
422
423 if (*bytes_read == -1) {
424 return Status::IOError(std::string("Error reading bytes from file: ") +
425 std::string(strerror(errno)));
426 }
427
428 return Status::OK();
429}
430
431Status FileReadAt(int fd, uint8_t* buffer, int64_t position, int64_t nbytes,
432 int64_t* bytes_read) {
433 *bytes_read = 0;
434
435 while (*bytes_read < nbytes) {
436 int64_t chunksize =
437 std::min(static_cast<int64_t>(ARROW_MAX_IO_CHUNKSIZE), nbytes - *bytes_read);
438 int64_t ret = pread_compat(fd, buffer, chunksize, position);
439
440 if (ret == -1) {
441 *bytes_read = ret;
442 break;
443 }
444 if (ret == 0) {
445 // EOF
446 break;
447 }
448 buffer += ret;
449 position += ret;
450 *bytes_read += ret;
451 }
452
453 if (*bytes_read == -1) {
454 return Status::IOError(std::string("Error reading bytes from file: ") +
455 std::string(strerror(errno)));
456 }
457 return Status::OK();
458}
459
460//
461// Writing data
462//
463
464Status FileWrite(int fd, const uint8_t* buffer, const int64_t nbytes) {
465 int ret = 0;
466 int64_t bytes_written = 0;
467
468 while (ret != -1 && bytes_written < nbytes) {
469 int64_t chunksize =
470 std::min(static_cast<int64_t>(ARROW_MAX_IO_CHUNKSIZE), nbytes - bytes_written);
471#if defined(_MSC_VER)
472 ret = static_cast<int>(
473 _write(fd, buffer + bytes_written, static_cast<uint32_t>(chunksize)));
474#else
475 ret = static_cast<int>(
476 write(fd, buffer + bytes_written, static_cast<size_t>(chunksize)));
477#endif
478
479 if (ret != -1) {
480 bytes_written += ret;
481 }
482 }
483
484 if (ret == -1) {
485 return Status::IOError(std::string("Error writing bytes from file: ") +
486 std::string(strerror(errno)));
487 }
488 return Status::OK();
489}
490
491Status FileTruncate(int fd, const int64_t size) {
492 int ret, errno_actual;
493
494#ifdef _WIN32
495 errno_actual = _chsize_s(fd, static_cast<size_t>(size));
496 ret = errno_actual == 0 ? 0 : -1;
497#else
498 ret = ftruncate(fd, static_cast<size_t>(size));
499 errno_actual = errno;
500#endif
501
502 if (ret == -1) {
503 return Status::IOError(std::string("Error truncating file: ") +
504 std::string(strerror(errno_actual)));
505 }
506 return Status::OK();
507}
508
509//
510// Environment variables
511//
512
513Status GetEnvVar(const char* name, std::string* out) {
514#ifdef _WIN32
515 // On Windows, getenv() reads an early copy of the process' environment
516 // which doesn't get updated when SetEnvironmentVariable() is called.
517 constexpr int32_t bufsize = 2000;
518 char c_str[bufsize];
519 auto res = GetEnvironmentVariableA(name, c_str, bufsize);
520 if (res >= bufsize) {
521 return Status::CapacityError("environment variable value too long");
522 } else if (res == 0) {
523 return Status::KeyError("environment variable undefined");
524 }
525 *out = std::string(c_str);
526 return Status::OK();
527#else
528 char* c_str = getenv(name);
529 if (c_str == nullptr) {
530 return Status::KeyError("environment variable undefined");
531 }
532 *out = std::string(c_str);
533 return Status::OK();
534#endif
535}
536
537Status GetEnvVar(const std::string& name, std::string* out) {
538 return GetEnvVar(name.c_str(), out);
539}
540
541Status SetEnvVar(const char* name, const char* value) {
542#ifdef _WIN32
543 if (SetEnvironmentVariableA(name, value)) {
544 return Status::OK();
545 } else {
546 return Status::Invalid("failed setting environment variable");
547 }
548#else
549 if (setenv(name, value, 1) == 0) {
550 return Status::OK();
551 } else {
552 return Status::Invalid("failed setting environment variable");
553 }
554#endif
555}
556
557Status SetEnvVar(const std::string& name, const std::string& value) {
558 return SetEnvVar(name.c_str(), value.c_str());
559}
560
561Status DelEnvVar(const char* name) {
562#ifdef _WIN32
563 if (SetEnvironmentVariableA(name, nullptr)) {
564 return Status::OK();
565 } else {
566 return Status::Invalid("failed deleting environment variable");
567 }
568#else
569 if (unsetenv(name) == 0) {
570 return Status::OK();
571 } else {
572 return Status::Invalid("failed deleting environment variable");
573 }
574#endif
575}
576
577Status DelEnvVar(const std::string& name) { return DelEnvVar(name.c_str()); }
578
579} // namespace internal
580} // namespace arrow
581