1 | // Licensed to the Apache Software Foundation (ASF) under one |
2 | // or more contributor license agreements. See the NOTICE file |
3 | // distributed with this work for additional information |
4 | // regarding copyright ownership. The ASF licenses this file |
5 | // to you under the Apache License, Version 2.0 (the |
6 | // "License"); you may not use this file except in compliance |
7 | // with the License. You may obtain a copy of the License at |
8 | // |
9 | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | // |
11 | // Unless required by applicable law or agreed to in writing, |
12 | // software distributed under the License is distributed on an |
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | // KIND, either express or implied. See the License for the |
15 | // specific language governing permissions and limitations |
16 | // under the License. |
17 | |
18 | #ifndef ARROW_BUFFER_H |
19 | #define ARROW_BUFFER_H |
20 | |
21 | #include <algorithm> |
22 | #include <cstdint> |
23 | #include <cstring> |
24 | #include <memory> |
25 | #include <string> |
26 | #include <type_traits> |
27 | #include <vector> |
28 | |
29 | #include "arrow/memory_pool.h" |
30 | #include "arrow/status.h" |
31 | #include "arrow/util/macros.h" |
32 | #include "arrow/util/visibility.h" |
33 | |
34 | namespace arrow { |
35 | |
36 | // ---------------------------------------------------------------------- |
37 | // Buffer classes |
38 | |
39 | /// \class Buffer |
40 | /// \brief Object containing a pointer to a piece of contiguous memory with a |
41 | /// particular size. |
42 | /// |
43 | /// Buffers have two related notions of length: size and capacity. Size is |
44 | /// the number of bytes that might have valid data. Capacity is the number |
45 | /// of bytes that were allocated for the buffer in total. |
46 | /// |
47 | /// The Buffer base class does not own its memory, but subclasses often do. |
48 | /// |
49 | /// The following invariant is always true: Size <= Capacity |
50 | class ARROW_EXPORT Buffer { |
51 | public: |
52 | /// \brief Construct from buffer and size without copying memory |
53 | /// |
54 | /// \param[in] data a memory buffer |
55 | /// \param[in] size buffer size |
56 | /// |
57 | /// \note The passed memory must be kept alive through some other means |
58 | Buffer(const uint8_t* data, int64_t size) |
59 | : is_mutable_(false), |
60 | data_(data), |
61 | mutable_data_(NULLPTR), |
62 | size_(size), |
63 | capacity_(size) {} |
64 | |
65 | /// \brief Construct from std::string without copying memory |
66 | /// |
67 | /// \param[in] data a std::string object |
68 | /// |
69 | /// \note The std::string must stay alive for the lifetime of the Buffer, so |
70 | /// temporary rvalue strings must be stored in an lvalue somewhere |
71 | explicit Buffer(const std::string& data) |
72 | : Buffer(reinterpret_cast<const uint8_t*>(data.c_str()), |
73 | static_cast<int64_t>(data.size())) {} |
74 | |
75 | virtual ~Buffer() = default; |
76 | |
77 | /// An offset into data that is owned by another buffer, but we want to be |
78 | /// able to retain a valid pointer to it even after other shared_ptr's to the |
79 | /// parent buffer have been destroyed |
80 | /// |
81 | /// This method makes no assertions about alignment or padding of the buffer but |
82 | /// in general we expected buffers to be aligned and padded to 64 bytes. In the future |
83 | /// we might add utility methods to help determine if a buffer satisfies this contract. |
84 | Buffer(const std::shared_ptr<Buffer>& parent, const int64_t offset, const int64_t size) |
85 | : Buffer(parent->data() + offset, size) { |
86 | parent_ = parent; |
87 | } |
88 | |
89 | bool is_mutable() const { return is_mutable_; } |
90 | |
91 | /// Return true if both buffers are the same size and contain the same bytes |
92 | /// up to the number of compared bytes |
93 | bool Equals(const Buffer& other, int64_t nbytes) const; |
94 | |
95 | /// Return true if both buffers are the same size and contain the same bytes |
96 | bool Equals(const Buffer& other) const; |
97 | |
98 | /// Copy a section of the buffer into a new Buffer. |
99 | Status Copy(const int64_t start, const int64_t nbytes, MemoryPool* pool, |
100 | std::shared_ptr<Buffer>* out) const; |
101 | |
102 | /// Copy a section of the buffer using the default memory pool into a new Buffer. |
103 | Status Copy(const int64_t start, const int64_t nbytes, |
104 | std::shared_ptr<Buffer>* out) const; |
105 | |
106 | /// Zero bytes in padding, i.e. bytes between size_ and capacity_. |
107 | void ZeroPadding() { |
108 | #ifndef NDEBUG |
109 | CheckMutable(); |
110 | #endif |
111 | // A zero-capacity buffer can have a null data pointer |
112 | if (capacity_ != 0) { |
113 | memset(mutable_data_ + size_, 0, static_cast<size_t>(capacity_ - size_)); |
114 | } |
115 | } |
116 | |
117 | /// \brief Construct a new buffer that owns its memory from a std::string |
118 | /// |
119 | /// \param[in] data a std::string object |
120 | /// \param[in] pool a memory pool |
121 | /// \param[out] out the created buffer |
122 | /// |
123 | /// \return Status message |
124 | static Status FromString(const std::string& data, MemoryPool* pool, |
125 | std::shared_ptr<Buffer>* out); |
126 | |
127 | /// \brief Construct a new buffer that owns its memory from a std::string |
128 | /// using the default memory pool |
129 | static Status FromString(const std::string& data, std::shared_ptr<Buffer>* out); |
130 | |
131 | /// \brief Construct an immutable buffer that takes ownership of the contents |
132 | /// of an std::string |
133 | /// \param[in] data an rvalue-reference of a string |
134 | /// \return a new Buffer instance |
135 | static std::shared_ptr<Buffer> FromString(std::string&& data); |
136 | |
137 | /// \brief Create buffer referencing typed memory with some length without |
138 | /// copying |
139 | /// \param[in] data the typed memory as C array |
140 | /// \param[in] length the number of values in the array |
141 | /// \return a new shared_ptr<Buffer> |
142 | template <typename T, typename SizeType = int64_t> |
143 | static std::shared_ptr<Buffer> Wrap(const T* data, SizeType length) { |
144 | return std::make_shared<Buffer>(reinterpret_cast<const uint8_t*>(data), |
145 | static_cast<int64_t>(sizeof(T) * length)); |
146 | } |
147 | |
148 | /// \brief Create buffer referencing std::vector with some length without |
149 | /// copying |
150 | /// \param[in] data the vector to be referenced. If this vector is changed, |
151 | /// the buffer may become invalid |
152 | /// \return a new shared_ptr<Buffer> |
153 | template <typename T> |
154 | static std::shared_ptr<Buffer> Wrap(const std::vector<T>& data) { |
155 | return std::make_shared<Buffer>(reinterpret_cast<const uint8_t*>(data.data()), |
156 | static_cast<int64_t>(sizeof(T) * data.size())); |
157 | } |
158 | |
159 | /// \brief Copy buffer contents into a new std::string |
160 | /// \return std::string |
161 | /// \note Can throw std::bad_alloc if buffer is large |
162 | std::string ToString() const; |
163 | |
164 | /// \brief Return a pointer to the buffer's data |
165 | const uint8_t* data() const { return data_; } |
166 | /// \brief Return a writable pointer to the buffer's data |
167 | /// |
168 | /// The buffer has to be mutable. Otherwise, an assertion may be thrown |
169 | /// or a null pointer may be returned. |
170 | uint8_t* mutable_data() { |
171 | #ifndef NDEBUG |
172 | CheckMutable(); |
173 | #endif |
174 | return mutable_data_; |
175 | } |
176 | |
177 | /// \brief Return the buffer's size in bytes |
178 | int64_t size() const { return size_; } |
179 | |
180 | /// \brief Return the buffer's capacity (number of allocated bytes) |
181 | int64_t capacity() const { return capacity_; } |
182 | |
183 | std::shared_ptr<Buffer> parent() const { return parent_; } |
184 | |
185 | protected: |
186 | bool is_mutable_; |
187 | const uint8_t* data_; |
188 | uint8_t* mutable_data_; |
189 | int64_t size_; |
190 | int64_t capacity_; |
191 | |
192 | // null by default, but may be set |
193 | std::shared_ptr<Buffer> parent_; |
194 | |
195 | void CheckMutable() const; |
196 | |
197 | private: |
198 | ARROW_DISALLOW_COPY_AND_ASSIGN(Buffer); |
199 | }; |
200 | |
201 | /// \defgroup buffer-slicing-functions Functions for slicing buffers |
202 | /// |
203 | /// @{ |
204 | |
205 | /// \brief Construct a view on a buffer at the given offset and length. |
206 | /// |
207 | /// This function cannot fail and does not check for errors (except in debug builds) |
208 | static inline std::shared_ptr<Buffer> SliceBuffer(const std::shared_ptr<Buffer>& buffer, |
209 | const int64_t offset, |
210 | const int64_t length) { |
211 | return std::make_shared<Buffer>(buffer, offset, length); |
212 | } |
213 | |
214 | /// \brief Construct a view on a buffer at the given offset, up to the buffer's end. |
215 | /// |
216 | /// This function cannot fail and does not check for errors (except in debug builds) |
217 | static inline std::shared_ptr<Buffer> SliceBuffer(const std::shared_ptr<Buffer>& buffer, |
218 | const int64_t offset) { |
219 | int64_t length = buffer->size() - offset; |
220 | return SliceBuffer(buffer, offset, length); |
221 | } |
222 | |
223 | /// \brief Like SliceBuffer, but construct a mutable buffer slice. |
224 | /// |
225 | /// If the parent buffer is not mutable, behavior is undefined (it may abort |
226 | /// in debug builds). |
227 | ARROW_EXPORT |
228 | std::shared_ptr<Buffer> SliceMutableBuffer(const std::shared_ptr<Buffer>& buffer, |
229 | const int64_t offset, const int64_t length); |
230 | |
231 | /// @} |
232 | |
233 | /// \class MutableBuffer |
234 | /// \brief A Buffer whose contents can be mutated. May or may not own its data. |
235 | class ARROW_EXPORT MutableBuffer : public Buffer { |
236 | public: |
237 | MutableBuffer(uint8_t* data, const int64_t size) : Buffer(data, size) { |
238 | mutable_data_ = data; |
239 | is_mutable_ = true; |
240 | } |
241 | |
242 | MutableBuffer(const std::shared_ptr<Buffer>& parent, const int64_t offset, |
243 | const int64_t size); |
244 | |
245 | /// \brief Create buffer referencing typed memory with some length |
246 | /// \param[in] data the typed memory as C array |
247 | /// \param[in] length the number of values in the array |
248 | /// \return a new shared_ptr<Buffer> |
249 | template <typename T, typename SizeType = int64_t> |
250 | static std::shared_ptr<Buffer> Wrap(T* data, SizeType length) { |
251 | return std::make_shared<MutableBuffer>(reinterpret_cast<uint8_t*>(data), |
252 | static_cast<int64_t>(sizeof(T) * length)); |
253 | } |
254 | |
255 | protected: |
256 | MutableBuffer() : Buffer(NULLPTR, 0) {} |
257 | }; |
258 | |
259 | /// \class ResizableBuffer |
260 | /// \brief A mutable buffer that can be resized |
261 | class ARROW_EXPORT ResizableBuffer : public MutableBuffer { |
262 | public: |
263 | /// Change buffer reported size to indicated size, allocating memory if |
264 | /// necessary. This will ensure that the capacity of the buffer is a multiple |
265 | /// of 64 bytes as defined in Layout.md. |
266 | /// Consider using ZeroPadding afterwards, to conform to the Arrow layout |
267 | /// specification. |
268 | /// |
269 | /// @param new_size The new size for the buffer. |
270 | /// @param shrink_to_fit Whether to shrink the capacity if new size < current size |
271 | virtual Status Resize(const int64_t new_size, bool shrink_to_fit = true) = 0; |
272 | |
273 | /// Ensure that buffer has enough memory allocated to fit the indicated |
274 | /// capacity (and meets the 64 byte padding requirement in Layout.md). |
275 | /// It does not change buffer's reported size and doesn't zero the padding. |
276 | virtual Status Reserve(const int64_t new_capacity) = 0; |
277 | |
278 | template <class T> |
279 | Status TypedResize(const int64_t new_nb_elements, bool shrink_to_fit = true) { |
280 | return Resize(sizeof(T) * new_nb_elements, shrink_to_fit); |
281 | } |
282 | |
283 | template <class T> |
284 | Status TypedReserve(const int64_t new_nb_elements) { |
285 | return Reserve(sizeof(T) * new_nb_elements); |
286 | } |
287 | |
288 | protected: |
289 | ResizableBuffer(uint8_t* data, int64_t size) : MutableBuffer(data, size) {} |
290 | }; |
291 | |
292 | /// \defgroup buffer-allocation-functions Functions for allocating buffers |
293 | /// |
294 | /// @{ |
295 | |
296 | /// \brief Allocate a fixed size mutable buffer from a memory pool, zero its padding. |
297 | /// |
298 | /// \param[in] pool a memory pool |
299 | /// \param[in] size size of buffer to allocate |
300 | /// \param[out] out the allocated buffer (contains padding) |
301 | /// |
302 | /// \return Status message |
303 | ARROW_EXPORT |
304 | Status AllocateBuffer(MemoryPool* pool, const int64_t size, std::shared_ptr<Buffer>* out); |
305 | |
306 | /// \brief Allocate a fixed size mutable buffer from a memory pool, zero its padding. |
307 | /// |
308 | /// \param[in] pool a memory pool |
309 | /// \param[in] size size of buffer to allocate |
310 | /// \param[out] out the allocated buffer (contains padding) |
311 | /// |
312 | /// \return Status message |
313 | ARROW_EXPORT |
314 | Status AllocateBuffer(MemoryPool* pool, const int64_t size, std::unique_ptr<Buffer>* out); |
315 | |
316 | /// \brief Allocate a fixed-size mutable buffer from the default memory pool |
317 | /// |
318 | /// \param[in] size size of buffer to allocate |
319 | /// \param[out] out the allocated buffer (contains padding) |
320 | /// |
321 | /// \return Status message |
322 | ARROW_EXPORT |
323 | Status AllocateBuffer(const int64_t size, std::shared_ptr<Buffer>* out); |
324 | |
325 | /// \brief Allocate a fixed-size mutable buffer from the default memory pool |
326 | /// |
327 | /// \param[in] size size of buffer to allocate |
328 | /// \param[out] out the allocated buffer (contains padding) |
329 | /// |
330 | /// \return Status message |
331 | ARROW_EXPORT |
332 | Status AllocateBuffer(const int64_t size, std::unique_ptr<Buffer>* out); |
333 | |
334 | /// \brief Allocate a resizeable buffer from a memory pool, zero its padding. |
335 | /// |
336 | /// \param[in] pool a memory pool |
337 | /// \param[in] size size of buffer to allocate |
338 | /// \param[out] out the allocated buffer |
339 | /// |
340 | /// \return Status message |
341 | ARROW_EXPORT |
342 | Status AllocateResizableBuffer(MemoryPool* pool, const int64_t size, |
343 | std::shared_ptr<ResizableBuffer>* out); |
344 | |
345 | /// \brief Allocate a resizeable buffer from a memory pool, zero its padding. |
346 | /// |
347 | /// \param[in] pool a memory pool |
348 | /// \param[in] size size of buffer to allocate |
349 | /// \param[out] out the allocated buffer |
350 | /// |
351 | /// \return Status message |
352 | ARROW_EXPORT |
353 | Status AllocateResizableBuffer(MemoryPool* pool, const int64_t size, |
354 | std::unique_ptr<ResizableBuffer>* out); |
355 | |
356 | /// \brief Allocate a resizeable buffer from the default memory pool |
357 | /// |
358 | /// \param[in] size size of buffer to allocate |
359 | /// \param[out] out the allocated buffer |
360 | /// |
361 | /// \return Status message |
362 | ARROW_EXPORT |
363 | Status AllocateResizableBuffer(const int64_t size, std::shared_ptr<ResizableBuffer>* out); |
364 | |
365 | /// \brief Allocate a resizeable buffer from the default memory pool |
366 | /// |
367 | /// \param[in] size size of buffer to allocate |
368 | /// \param[out] out the allocated buffer |
369 | /// |
370 | /// \return Status message |
371 | ARROW_EXPORT |
372 | Status AllocateResizableBuffer(const int64_t size, std::unique_ptr<ResizableBuffer>* out); |
373 | |
374 | /// \brief Allocate a bitmap buffer from a memory pool |
375 | /// no guarantee on values is provided. |
376 | /// |
377 | /// \param[in] pool memory pool to allocate memory from |
378 | /// \param[in] length size in bits of bitmap to allocate |
379 | /// \param[out] out the resulting buffer |
380 | /// |
381 | /// \return Status message |
382 | ARROW_EXPORT |
383 | Status AllocateBitmap(MemoryPool* pool, int64_t length, std::shared_ptr<Buffer>* out); |
384 | |
385 | /// \brief Allocate a zero-initialized bitmap buffer from a memory pool |
386 | /// |
387 | /// \param[in] pool memory pool to allocate memory from |
388 | /// \param[in] length size in bits of bitmap to allocate |
389 | /// \param[out] out the resulting buffer (zero-initialized). |
390 | /// |
391 | /// \return Status message |
392 | ARROW_EXPORT |
393 | Status AllocateEmptyBitmap(MemoryPool* pool, int64_t length, |
394 | std::shared_ptr<Buffer>* out); |
395 | |
396 | /// \brief Allocate a zero-initialized bitmap buffer from the default memory pool |
397 | /// |
398 | /// \param[in] length size in bits of bitmap to allocate |
399 | /// \param[out] out the resulting buffer |
400 | /// |
401 | /// \return Status message |
402 | ARROW_EXPORT |
403 | Status AllocateEmptyBitmap(int64_t length, std::shared_ptr<Buffer>* out); |
404 | |
405 | /// @} |
406 | |
407 | } // namespace arrow |
408 | |
409 | #endif // ARROW_BUFFER_H |
410 | |