1 | // Licensed to the Apache Software Foundation (ASF) under one |
2 | // or more contributor license agreements. See the NOTICE file |
3 | // distributed with this work for additional information |
4 | // regarding copyright ownership. The ASF licenses this file |
5 | // to you under the Apache License, Version 2.0 (the |
6 | // "License"); you may not use this file except in compliance |
7 | // with the License. You may obtain a copy of the License at |
8 | // |
9 | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | // |
11 | // Unless required by applicable law or agreed to in writing, |
12 | // software distributed under the License is distributed on an |
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | // KIND, either express or implied. See the License for the |
15 | // specific language governing permissions and limitations |
16 | // under the License. |
17 | |
18 | // IO interface implementations for OS files |
19 | |
20 | #ifndef ARROW_IO_FILE_H |
21 | #define ARROW_IO_FILE_H |
22 | |
23 | #include <cstdint> |
24 | #include <memory> |
25 | #include <string> |
26 | |
27 | #include "arrow/io/interfaces.h" |
28 | #include "arrow/util/visibility.h" |
29 | |
30 | namespace arrow { |
31 | |
32 | class Buffer; |
33 | class MemoryPool; |
34 | class Status; |
35 | |
36 | namespace io { |
37 | |
38 | class ARROW_EXPORT FileOutputStream : public OutputStream { |
39 | public: |
40 | ~FileOutputStream() override; |
41 | |
42 | /// \brief Open a local file for writing, truncating any existing file |
43 | /// \param[in] path with UTF8 encoding |
44 | /// \param[out] out a base interface OutputStream instance |
45 | /// |
46 | /// When opening a new file, any existing file with the indicated path is |
47 | /// truncated to 0 bytes, deleting any existing data |
48 | static Status Open(const std::string& path, std::shared_ptr<OutputStream>* out); |
49 | |
50 | /// \brief Open a local file for writing |
51 | /// \param[in] path with UTF8 encoding |
52 | /// \param[in] append append to existing file, otherwise truncate to 0 bytes |
53 | /// \param[out] out a base interface OutputStream instance |
54 | static Status Open(const std::string& path, bool append, |
55 | std::shared_ptr<OutputStream>* out); |
56 | |
57 | /// \brief Open a file descriptor for writing. The underlying file isn't |
58 | /// truncated. |
59 | /// \param[in] fd file descriptor |
60 | /// \param[out] out a base interface OutputStream instance |
61 | /// |
62 | /// The file descriptor becomes owned by the OutputStream, and will be closed |
63 | /// on Close() or destruction. |
64 | static Status Open(int fd, std::shared_ptr<OutputStream>* out); |
65 | |
66 | /// \brief Open a local file for writing, truncating any existing file |
67 | /// \param[in] path with UTF8 encoding |
68 | /// \param[out] file a FileOutputStream instance |
69 | /// |
70 | /// When opening a new file, any existing file with the indicated path is |
71 | /// truncated to 0 bytes, deleting any existing data |
72 | static Status Open(const std::string& path, std::shared_ptr<FileOutputStream>* file); |
73 | |
74 | /// \brief Open a local file for writing |
75 | /// \param[in] path with UTF8 encoding |
76 | /// \param[in] append append to existing file, otherwise truncate to 0 bytes |
77 | /// \param[out] file a FileOutputStream instance |
78 | static Status Open(const std::string& path, bool append, |
79 | std::shared_ptr<FileOutputStream>* file); |
80 | |
81 | /// \brief Open a file descriptor for writing. The underlying file isn't |
82 | /// truncated. |
83 | /// \param[in] fd file descriptor |
84 | /// \param[out] out a FileOutputStream instance |
85 | /// |
86 | /// The file descriptor becomes owned by the OutputStream, and will be closed |
87 | /// on Close() or destruction. |
88 | static Status Open(int fd, std::shared_ptr<FileOutputStream>* out); |
89 | |
90 | // OutputStream interface |
91 | Status Close() override; |
92 | bool closed() const override; |
93 | Status Tell(int64_t* position) const override; |
94 | |
95 | // Write bytes to the stream. Thread-safe |
96 | Status Write(const void* data, int64_t nbytes) override; |
97 | |
98 | using Writable::Write; |
99 | |
100 | int file_descriptor() const; |
101 | |
102 | private: |
103 | FileOutputStream(); |
104 | |
105 | class ARROW_NO_EXPORT FileOutputStreamImpl; |
106 | std::unique_ptr<FileOutputStreamImpl> impl_; |
107 | }; |
108 | |
109 | // Operating system file |
110 | class ARROW_EXPORT ReadableFile : public RandomAccessFile { |
111 | public: |
112 | ~ReadableFile() override; |
113 | |
114 | /// \brief Open a local file for reading |
115 | /// \param[in] path with UTF8 encoding |
116 | /// \param[out] file ReadableFile instance |
117 | /// Open file, allocate memory (if needed) from default memory pool |
118 | static Status Open(const std::string& path, std::shared_ptr<ReadableFile>* file); |
119 | |
120 | /// \brief Open a local file for reading |
121 | /// \param[in] path with UTF8 encoding |
122 | /// \param[in] pool a MemoryPool for memory allocations |
123 | /// \param[out] file ReadableFile instance |
124 | /// Open file with one's own memory pool for memory allocations |
125 | static Status Open(const std::string& path, MemoryPool* pool, |
126 | std::shared_ptr<ReadableFile>* file); |
127 | |
128 | /// \brief Open a local file for reading |
129 | /// \param[in] fd file descriptor |
130 | /// \param[out] file ReadableFile instance |
131 | /// Open file with one's own memory pool for memory allocations |
132 | /// |
133 | /// The file descriptor becomes owned by the ReadableFile, and will be closed |
134 | /// on Close() or destruction. |
135 | static Status Open(int fd, std::shared_ptr<ReadableFile>* file); |
136 | |
137 | /// \brief Open a local file for reading |
138 | /// \param[in] fd file descriptor |
139 | /// \param[in] pool a MemoryPool for memory allocations |
140 | /// \param[out] file ReadableFile instance |
141 | /// Open file with one's own memory pool for memory allocations |
142 | /// |
143 | /// The file descriptor becomes owned by the ReadableFile, and will be closed |
144 | /// on Close() or destruction. |
145 | static Status Open(int fd, MemoryPool* pool, std::shared_ptr<ReadableFile>* file); |
146 | |
147 | Status Close() override; |
148 | bool closed() const override; |
149 | Status Tell(int64_t* position) const override; |
150 | |
151 | // Read bytes from the file. Thread-safe |
152 | Status Read(int64_t nbytes, int64_t* bytes_read, void* buffer) override; |
153 | Status Read(int64_t nbytes, std::shared_ptr<Buffer>* out) override; |
154 | |
155 | /// \brief Thread-safe implementation of ReadAt |
156 | Status ReadAt(int64_t position, int64_t nbytes, int64_t* bytes_read, |
157 | void* out) override; |
158 | |
159 | /// \brief Thread-safe implementation of ReadAt |
160 | Status ReadAt(int64_t position, int64_t nbytes, std::shared_ptr<Buffer>* out) override; |
161 | |
162 | Status GetSize(int64_t* size) override; |
163 | Status Seek(int64_t position) override; |
164 | |
165 | int file_descriptor() const; |
166 | |
167 | private: |
168 | explicit ReadableFile(MemoryPool* pool); |
169 | |
170 | class ARROW_NO_EXPORT ReadableFileImpl; |
171 | std::unique_ptr<ReadableFileImpl> impl_; |
172 | }; |
173 | |
174 | // A file interface that uses memory-mapped files for memory interactions, |
175 | // supporting zero copy reads. The same class is used for both reading and |
176 | // writing. |
177 | // |
178 | // If opening a file in a writable mode, it is not truncated first as with |
179 | // FileOutputStream |
180 | class ARROW_EXPORT MemoryMappedFile : public ReadWriteFileInterface { |
181 | public: |
182 | ~MemoryMappedFile() override; |
183 | |
184 | /// Create new file with indicated size, return in read/write mode |
185 | static Status Create(const std::string& path, int64_t size, |
186 | std::shared_ptr<MemoryMappedFile>* out); |
187 | |
188 | static Status Open(const std::string& path, FileMode::type mode, |
189 | std::shared_ptr<MemoryMappedFile>* out); |
190 | |
191 | Status Close() override; |
192 | |
193 | bool closed() const override; |
194 | |
195 | Status Tell(int64_t* position) const override; |
196 | |
197 | Status Seek(int64_t position) override; |
198 | |
199 | // Required by RandomAccessFile, copies memory into out. Not thread-safe |
200 | Status Read(int64_t nbytes, int64_t* bytes_read, void* out) override; |
201 | |
202 | // Zero copy read, moves position pointer. Not thread-safe |
203 | Status Read(int64_t nbytes, std::shared_ptr<Buffer>* out) override; |
204 | |
205 | // Zero-copy read, leaves position unchanged. Acquires a reader lock |
206 | // for the duration of slice creation (typically very short). Is thread-safe. |
207 | Status ReadAt(int64_t position, int64_t nbytes, std::shared_ptr<Buffer>* out) override; |
208 | |
209 | // Raw copy of the memory at specified position. Thread-safe, but |
210 | // locks out other readers for the duration of memcpy. Prefer the |
211 | // zero copy method |
212 | Status ReadAt(int64_t position, int64_t nbytes, int64_t* bytes_read, |
213 | void* out) override; |
214 | |
215 | bool supports_zero_copy() const override; |
216 | |
217 | /// Write data at the current position in the file. Thread-safe |
218 | Status Write(const void* data, int64_t nbytes) override; |
219 | |
220 | /// Set the size of the map to new_size. |
221 | Status Resize(int64_t new_size); |
222 | |
223 | /// Write data at a particular position in the file. Thread-safe |
224 | Status WriteAt(int64_t position, const void* data, int64_t nbytes) override; |
225 | |
226 | // @return: the size in bytes of the memory source |
227 | Status GetSize(int64_t* size) const; |
228 | |
229 | // @return: the size in bytes of the memory source |
230 | Status GetSize(int64_t* size) override; |
231 | |
232 | int file_descriptor() const; |
233 | |
234 | private: |
235 | MemoryMappedFile(); |
236 | |
237 | Status WriteInternal(const void* data, int64_t nbytes); |
238 | |
239 | class ARROW_NO_EXPORT MemoryMap; |
240 | std::shared_ptr<MemoryMap> memory_map_; |
241 | }; |
242 | |
243 | } // namespace io |
244 | } // namespace arrow |
245 | |
246 | #endif // ARROW_IO_FILE_H |
247 | |