1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18// IO interface implementations for OS files
19
20#ifndef ARROW_IO_FILE_H
21#define ARROW_IO_FILE_H
22
23#include <cstdint>
24#include <memory>
25#include <string>
26
27#include "arrow/io/interfaces.h"
28#include "arrow/util/visibility.h"
29
30namespace arrow {
31
32class Buffer;
33class MemoryPool;
34class Status;
35
36namespace io {
37
38class ARROW_EXPORT FileOutputStream : public OutputStream {
39 public:
40 ~FileOutputStream() override;
41
42 /// \brief Open a local file for writing, truncating any existing file
43 /// \param[in] path with UTF8 encoding
44 /// \param[out] out a base interface OutputStream instance
45 ///
46 /// When opening a new file, any existing file with the indicated path is
47 /// truncated to 0 bytes, deleting any existing data
48 static Status Open(const std::string& path, std::shared_ptr<OutputStream>* out);
49
50 /// \brief Open a local file for writing
51 /// \param[in] path with UTF8 encoding
52 /// \param[in] append append to existing file, otherwise truncate to 0 bytes
53 /// \param[out] out a base interface OutputStream instance
54 static Status Open(const std::string& path, bool append,
55 std::shared_ptr<OutputStream>* out);
56
57 /// \brief Open a file descriptor for writing. The underlying file isn't
58 /// truncated.
59 /// \param[in] fd file descriptor
60 /// \param[out] out a base interface OutputStream instance
61 ///
62 /// The file descriptor becomes owned by the OutputStream, and will be closed
63 /// on Close() or destruction.
64 static Status Open(int fd, std::shared_ptr<OutputStream>* out);
65
66 /// \brief Open a local file for writing, truncating any existing file
67 /// \param[in] path with UTF8 encoding
68 /// \param[out] file a FileOutputStream instance
69 ///
70 /// When opening a new file, any existing file with the indicated path is
71 /// truncated to 0 bytes, deleting any existing data
72 static Status Open(const std::string& path, std::shared_ptr<FileOutputStream>* file);
73
74 /// \brief Open a local file for writing
75 /// \param[in] path with UTF8 encoding
76 /// \param[in] append append to existing file, otherwise truncate to 0 bytes
77 /// \param[out] file a FileOutputStream instance
78 static Status Open(const std::string& path, bool append,
79 std::shared_ptr<FileOutputStream>* file);
80
81 /// \brief Open a file descriptor for writing. The underlying file isn't
82 /// truncated.
83 /// \param[in] fd file descriptor
84 /// \param[out] out a FileOutputStream instance
85 ///
86 /// The file descriptor becomes owned by the OutputStream, and will be closed
87 /// on Close() or destruction.
88 static Status Open(int fd, std::shared_ptr<FileOutputStream>* out);
89
90 // OutputStream interface
91 Status Close() override;
92 bool closed() const override;
93 Status Tell(int64_t* position) const override;
94
95 // Write bytes to the stream. Thread-safe
96 Status Write(const void* data, int64_t nbytes) override;
97
98 using Writable::Write;
99
100 int file_descriptor() const;
101
102 private:
103 FileOutputStream();
104
105 class ARROW_NO_EXPORT FileOutputStreamImpl;
106 std::unique_ptr<FileOutputStreamImpl> impl_;
107};
108
109// Operating system file
110class ARROW_EXPORT ReadableFile : public RandomAccessFile {
111 public:
112 ~ReadableFile() override;
113
114 /// \brief Open a local file for reading
115 /// \param[in] path with UTF8 encoding
116 /// \param[out] file ReadableFile instance
117 /// Open file, allocate memory (if needed) from default memory pool
118 static Status Open(const std::string& path, std::shared_ptr<ReadableFile>* file);
119
120 /// \brief Open a local file for reading
121 /// \param[in] path with UTF8 encoding
122 /// \param[in] pool a MemoryPool for memory allocations
123 /// \param[out] file ReadableFile instance
124 /// Open file with one's own memory pool for memory allocations
125 static Status Open(const std::string& path, MemoryPool* pool,
126 std::shared_ptr<ReadableFile>* file);
127
128 /// \brief Open a local file for reading
129 /// \param[in] fd file descriptor
130 /// \param[out] file ReadableFile instance
131 /// Open file with one's own memory pool for memory allocations
132 ///
133 /// The file descriptor becomes owned by the ReadableFile, and will be closed
134 /// on Close() or destruction.
135 static Status Open(int fd, std::shared_ptr<ReadableFile>* file);
136
137 /// \brief Open a local file for reading
138 /// \param[in] fd file descriptor
139 /// \param[in] pool a MemoryPool for memory allocations
140 /// \param[out] file ReadableFile instance
141 /// Open file with one's own memory pool for memory allocations
142 ///
143 /// The file descriptor becomes owned by the ReadableFile, and will be closed
144 /// on Close() or destruction.
145 static Status Open(int fd, MemoryPool* pool, std::shared_ptr<ReadableFile>* file);
146
147 Status Close() override;
148 bool closed() const override;
149 Status Tell(int64_t* position) const override;
150
151 // Read bytes from the file. Thread-safe
152 Status Read(int64_t nbytes, int64_t* bytes_read, void* buffer) override;
153 Status Read(int64_t nbytes, std::shared_ptr<Buffer>* out) override;
154
155 /// \brief Thread-safe implementation of ReadAt
156 Status ReadAt(int64_t position, int64_t nbytes, int64_t* bytes_read,
157 void* out) override;
158
159 /// \brief Thread-safe implementation of ReadAt
160 Status ReadAt(int64_t position, int64_t nbytes, std::shared_ptr<Buffer>* out) override;
161
162 Status GetSize(int64_t* size) override;
163 Status Seek(int64_t position) override;
164
165 int file_descriptor() const;
166
167 private:
168 explicit ReadableFile(MemoryPool* pool);
169
170 class ARROW_NO_EXPORT ReadableFileImpl;
171 std::unique_ptr<ReadableFileImpl> impl_;
172};
173
174// A file interface that uses memory-mapped files for memory interactions,
175// supporting zero copy reads. The same class is used for both reading and
176// writing.
177//
178// If opening a file in a writable mode, it is not truncated first as with
179// FileOutputStream
180class ARROW_EXPORT MemoryMappedFile : public ReadWriteFileInterface {
181 public:
182 ~MemoryMappedFile() override;
183
184 /// Create new file with indicated size, return in read/write mode
185 static Status Create(const std::string& path, int64_t size,
186 std::shared_ptr<MemoryMappedFile>* out);
187
188 static Status Open(const std::string& path, FileMode::type mode,
189 std::shared_ptr<MemoryMappedFile>* out);
190
191 Status Close() override;
192
193 bool closed() const override;
194
195 Status Tell(int64_t* position) const override;
196
197 Status Seek(int64_t position) override;
198
199 // Required by RandomAccessFile, copies memory into out. Not thread-safe
200 Status Read(int64_t nbytes, int64_t* bytes_read, void* out) override;
201
202 // Zero copy read, moves position pointer. Not thread-safe
203 Status Read(int64_t nbytes, std::shared_ptr<Buffer>* out) override;
204
205 // Zero-copy read, leaves position unchanged. Acquires a reader lock
206 // for the duration of slice creation (typically very short). Is thread-safe.
207 Status ReadAt(int64_t position, int64_t nbytes, std::shared_ptr<Buffer>* out) override;
208
209 // Raw copy of the memory at specified position. Thread-safe, but
210 // locks out other readers for the duration of memcpy. Prefer the
211 // zero copy method
212 Status ReadAt(int64_t position, int64_t nbytes, int64_t* bytes_read,
213 void* out) override;
214
215 bool supports_zero_copy() const override;
216
217 /// Write data at the current position in the file. Thread-safe
218 Status Write(const void* data, int64_t nbytes) override;
219
220 /// Set the size of the map to new_size.
221 Status Resize(int64_t new_size);
222
223 /// Write data at a particular position in the file. Thread-safe
224 Status WriteAt(int64_t position, const void* data, int64_t nbytes) override;
225
226 // @return: the size in bytes of the memory source
227 Status GetSize(int64_t* size) const;
228
229 // @return: the size in bytes of the memory source
230 Status GetSize(int64_t* size) override;
231
232 int file_descriptor() const;
233
234 private:
235 MemoryMappedFile();
236
237 Status WriteInternal(const void* data, int64_t nbytes);
238
239 class ARROW_NO_EXPORT MemoryMap;
240 std::shared_ptr<MemoryMap> memory_map_;
241};
242
243} // namespace io
244} // namespace arrow
245
246#endif // ARROW_IO_FILE_H
247