1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18#ifndef ARROW_BUFFER_H
19#define ARROW_BUFFER_H
20
21#include <algorithm>
22#include <cstdint>
23#include <cstring>
24#include <memory>
25#include <string>
26#include <type_traits>
27#include <vector>
28
29#include "arrow/memory_pool.h"
30#include "arrow/status.h"
31#include "arrow/util/macros.h"
32#include "arrow/util/visibility.h"
33
34namespace arrow {
35
36// ----------------------------------------------------------------------
37// Buffer classes
38
39/// \class Buffer
40/// \brief Object containing a pointer to a piece of contiguous memory with a
41/// particular size.
42///
43/// Buffers have two related notions of length: size and capacity. Size is
44/// the number of bytes that might have valid data. Capacity is the number
45/// of bytes that were allocated for the buffer in total.
46///
47/// The Buffer base class does not own its memory, but subclasses often do.
48///
49/// The following invariant is always true: Size <= Capacity
50class ARROW_EXPORT Buffer {
51 public:
52 /// \brief Construct from buffer and size without copying memory
53 ///
54 /// \param[in] data a memory buffer
55 /// \param[in] size buffer size
56 ///
57 /// \note The passed memory must be kept alive through some other means
58 Buffer(const uint8_t* data, int64_t size)
59 : is_mutable_(false),
60 data_(data),
61 mutable_data_(NULLPTR),
62 size_(size),
63 capacity_(size) {}
64
65 /// \brief Construct from std::string without copying memory
66 ///
67 /// \param[in] data a std::string object
68 ///
69 /// \note The std::string must stay alive for the lifetime of the Buffer, so
70 /// temporary rvalue strings must be stored in an lvalue somewhere
71 explicit Buffer(const std::string& data)
72 : Buffer(reinterpret_cast<const uint8_t*>(data.c_str()),
73 static_cast<int64_t>(data.size())) {}
74
75 virtual ~Buffer() = default;
76
77 /// An offset into data that is owned by another buffer, but we want to be
78 /// able to retain a valid pointer to it even after other shared_ptr's to the
79 /// parent buffer have been destroyed
80 ///
81 /// This method makes no assertions about alignment or padding of the buffer but
82 /// in general we expected buffers to be aligned and padded to 64 bytes. In the future
83 /// we might add utility methods to help determine if a buffer satisfies this contract.
84 Buffer(const std::shared_ptr<Buffer>& parent, const int64_t offset, const int64_t size)
85 : Buffer(parent->data() + offset, size) {
86 parent_ = parent;
87 }
88
89 bool is_mutable() const { return is_mutable_; }
90
91 /// Return true if both buffers are the same size and contain the same bytes
92 /// up to the number of compared bytes
93 bool Equals(const Buffer& other, int64_t nbytes) const;
94
95 /// Return true if both buffers are the same size and contain the same bytes
96 bool Equals(const Buffer& other) const;
97
98 /// Copy a section of the buffer into a new Buffer.
99 Status Copy(const int64_t start, const int64_t nbytes, MemoryPool* pool,
100 std::shared_ptr<Buffer>* out) const;
101
102 /// Copy a section of the buffer using the default memory pool into a new Buffer.
103 Status Copy(const int64_t start, const int64_t nbytes,
104 std::shared_ptr<Buffer>* out) const;
105
106 /// Zero bytes in padding, i.e. bytes between size_ and capacity_.
107 void ZeroPadding() {
108#ifndef NDEBUG
109 CheckMutable();
110#endif
111 // A zero-capacity buffer can have a null data pointer
112 if (capacity_ != 0) {
113 memset(mutable_data_ + size_, 0, static_cast<size_t>(capacity_ - size_));
114 }
115 }
116
117 /// \brief Construct a new buffer that owns its memory from a std::string
118 ///
119 /// \param[in] data a std::string object
120 /// \param[in] pool a memory pool
121 /// \param[out] out the created buffer
122 ///
123 /// \return Status message
124 static Status FromString(const std::string& data, MemoryPool* pool,
125 std::shared_ptr<Buffer>* out);
126
127 /// \brief Construct a new buffer that owns its memory from a std::string
128 /// using the default memory pool
129 static Status FromString(const std::string& data, std::shared_ptr<Buffer>* out);
130
131 /// \brief Construct an immutable buffer that takes ownership of the contents
132 /// of an std::string
133 /// \param[in] data an rvalue-reference of a string
134 /// \return a new Buffer instance
135 static std::shared_ptr<Buffer> FromString(std::string&& data);
136
137 /// \brief Create buffer referencing typed memory with some length without
138 /// copying
139 /// \param[in] data the typed memory as C array
140 /// \param[in] length the number of values in the array
141 /// \return a new shared_ptr<Buffer>
142 template <typename T, typename SizeType = int64_t>
143 static std::shared_ptr<Buffer> Wrap(const T* data, SizeType length) {
144 return std::make_shared<Buffer>(reinterpret_cast<const uint8_t*>(data),
145 static_cast<int64_t>(sizeof(T) * length));
146 }
147
148 /// \brief Create buffer referencing std::vector with some length without
149 /// copying
150 /// \param[in] data the vector to be referenced. If this vector is changed,
151 /// the buffer may become invalid
152 /// \return a new shared_ptr<Buffer>
153 template <typename T>
154 static std::shared_ptr<Buffer> Wrap(const std::vector<T>& data) {
155 return std::make_shared<Buffer>(reinterpret_cast<const uint8_t*>(data.data()),
156 static_cast<int64_t>(sizeof(T) * data.size()));
157 }
158
159 /// \brief Copy buffer contents into a new std::string
160 /// \return std::string
161 /// \note Can throw std::bad_alloc if buffer is large
162 std::string ToString() const;
163
164 /// \brief Return a pointer to the buffer's data
165 const uint8_t* data() const { return data_; }
166 /// \brief Return a writable pointer to the buffer's data
167 ///
168 /// The buffer has to be mutable. Otherwise, an assertion may be thrown
169 /// or a null pointer may be returned.
170 uint8_t* mutable_data() {
171#ifndef NDEBUG
172 CheckMutable();
173#endif
174 return mutable_data_;
175 }
176
177 /// \brief Return the buffer's size in bytes
178 int64_t size() const { return size_; }
179
180 /// \brief Return the buffer's capacity (number of allocated bytes)
181 int64_t capacity() const { return capacity_; }
182
183 std::shared_ptr<Buffer> parent() const { return parent_; }
184
185 protected:
186 bool is_mutable_;
187 const uint8_t* data_;
188 uint8_t* mutable_data_;
189 int64_t size_;
190 int64_t capacity_;
191
192 // null by default, but may be set
193 std::shared_ptr<Buffer> parent_;
194
195 void CheckMutable() const;
196
197 private:
198 ARROW_DISALLOW_COPY_AND_ASSIGN(Buffer);
199};
200
201/// \defgroup buffer-slicing-functions Functions for slicing buffers
202///
203/// @{
204
205/// \brief Construct a view on a buffer at the given offset and length.
206///
207/// This function cannot fail and does not check for errors (except in debug builds)
208static inline std::shared_ptr<Buffer> SliceBuffer(const std::shared_ptr<Buffer>& buffer,
209 const int64_t offset,
210 const int64_t length) {
211 return std::make_shared<Buffer>(buffer, offset, length);
212}
213
214/// \brief Construct a view on a buffer at the given offset, up to the buffer's end.
215///
216/// This function cannot fail and does not check for errors (except in debug builds)
217static inline std::shared_ptr<Buffer> SliceBuffer(const std::shared_ptr<Buffer>& buffer,
218 const int64_t offset) {
219 int64_t length = buffer->size() - offset;
220 return SliceBuffer(buffer, offset, length);
221}
222
223/// \brief Like SliceBuffer, but construct a mutable buffer slice.
224///
225/// If the parent buffer is not mutable, behavior is undefined (it may abort
226/// in debug builds).
227ARROW_EXPORT
228std::shared_ptr<Buffer> SliceMutableBuffer(const std::shared_ptr<Buffer>& buffer,
229 const int64_t offset, const int64_t length);
230
231/// @}
232
233/// \class MutableBuffer
234/// \brief A Buffer whose contents can be mutated. May or may not own its data.
235class ARROW_EXPORT MutableBuffer : public Buffer {
236 public:
237 MutableBuffer(uint8_t* data, const int64_t size) : Buffer(data, size) {
238 mutable_data_ = data;
239 is_mutable_ = true;
240 }
241
242 MutableBuffer(const std::shared_ptr<Buffer>& parent, const int64_t offset,
243 const int64_t size);
244
245 /// \brief Create buffer referencing typed memory with some length
246 /// \param[in] data the typed memory as C array
247 /// \param[in] length the number of values in the array
248 /// \return a new shared_ptr<Buffer>
249 template <typename T, typename SizeType = int64_t>
250 static std::shared_ptr<Buffer> Wrap(T* data, SizeType length) {
251 return std::make_shared<MutableBuffer>(reinterpret_cast<uint8_t*>(data),
252 static_cast<int64_t>(sizeof(T) * length));
253 }
254
255 protected:
256 MutableBuffer() : Buffer(NULLPTR, 0) {}
257};
258
259/// \class ResizableBuffer
260/// \brief A mutable buffer that can be resized
261class ARROW_EXPORT ResizableBuffer : public MutableBuffer {
262 public:
263 /// Change buffer reported size to indicated size, allocating memory if
264 /// necessary. This will ensure that the capacity of the buffer is a multiple
265 /// of 64 bytes as defined in Layout.md.
266 /// Consider using ZeroPadding afterwards, to conform to the Arrow layout
267 /// specification.
268 ///
269 /// @param new_size The new size for the buffer.
270 /// @param shrink_to_fit Whether to shrink the capacity if new size < current size
271 virtual Status Resize(const int64_t new_size, bool shrink_to_fit = true) = 0;
272
273 /// Ensure that buffer has enough memory allocated to fit the indicated
274 /// capacity (and meets the 64 byte padding requirement in Layout.md).
275 /// It does not change buffer's reported size and doesn't zero the padding.
276 virtual Status Reserve(const int64_t new_capacity) = 0;
277
278 template <class T>
279 Status TypedResize(const int64_t new_nb_elements, bool shrink_to_fit = true) {
280 return Resize(sizeof(T) * new_nb_elements, shrink_to_fit);
281 }
282
283 template <class T>
284 Status TypedReserve(const int64_t new_nb_elements) {
285 return Reserve(sizeof(T) * new_nb_elements);
286 }
287
288 protected:
289 ResizableBuffer(uint8_t* data, int64_t size) : MutableBuffer(data, size) {}
290};
291
292/// \defgroup buffer-allocation-functions Functions for allocating buffers
293///
294/// @{
295
296/// \brief Allocate a fixed size mutable buffer from a memory pool, zero its padding.
297///
298/// \param[in] pool a memory pool
299/// \param[in] size size of buffer to allocate
300/// \param[out] out the allocated buffer (contains padding)
301///
302/// \return Status message
303ARROW_EXPORT
304Status AllocateBuffer(MemoryPool* pool, const int64_t size, std::shared_ptr<Buffer>* out);
305
306/// \brief Allocate a fixed size mutable buffer from a memory pool, zero its padding.
307///
308/// \param[in] pool a memory pool
309/// \param[in] size size of buffer to allocate
310/// \param[out] out the allocated buffer (contains padding)
311///
312/// \return Status message
313ARROW_EXPORT
314Status AllocateBuffer(MemoryPool* pool, const int64_t size, std::unique_ptr<Buffer>* out);
315
316/// \brief Allocate a fixed-size mutable buffer from the default memory pool
317///
318/// \param[in] size size of buffer to allocate
319/// \param[out] out the allocated buffer (contains padding)
320///
321/// \return Status message
322ARROW_EXPORT
323Status AllocateBuffer(const int64_t size, std::shared_ptr<Buffer>* out);
324
325/// \brief Allocate a fixed-size mutable buffer from the default memory pool
326///
327/// \param[in] size size of buffer to allocate
328/// \param[out] out the allocated buffer (contains padding)
329///
330/// \return Status message
331ARROW_EXPORT
332Status AllocateBuffer(const int64_t size, std::unique_ptr<Buffer>* out);
333
334/// \brief Allocate a resizeable buffer from a memory pool, zero its padding.
335///
336/// \param[in] pool a memory pool
337/// \param[in] size size of buffer to allocate
338/// \param[out] out the allocated buffer
339///
340/// \return Status message
341ARROW_EXPORT
342Status AllocateResizableBuffer(MemoryPool* pool, const int64_t size,
343 std::shared_ptr<ResizableBuffer>* out);
344
345/// \brief Allocate a resizeable buffer from a memory pool, zero its padding.
346///
347/// \param[in] pool a memory pool
348/// \param[in] size size of buffer to allocate
349/// \param[out] out the allocated buffer
350///
351/// \return Status message
352ARROW_EXPORT
353Status AllocateResizableBuffer(MemoryPool* pool, const int64_t size,
354 std::unique_ptr<ResizableBuffer>* out);
355
356/// \brief Allocate a resizeable buffer from the default memory pool
357///
358/// \param[in] size size of buffer to allocate
359/// \param[out] out the allocated buffer
360///
361/// \return Status message
362ARROW_EXPORT
363Status AllocateResizableBuffer(const int64_t size, std::shared_ptr<ResizableBuffer>* out);
364
365/// \brief Allocate a resizeable buffer from the default memory pool
366///
367/// \param[in] size size of buffer to allocate
368/// \param[out] out the allocated buffer
369///
370/// \return Status message
371ARROW_EXPORT
372Status AllocateResizableBuffer(const int64_t size, std::unique_ptr<ResizableBuffer>* out);
373
374/// \brief Allocate a bitmap buffer from a memory pool
375/// no guarantee on values is provided.
376///
377/// \param[in] pool memory pool to allocate memory from
378/// \param[in] length size in bits of bitmap to allocate
379/// \param[out] out the resulting buffer
380///
381/// \return Status message
382ARROW_EXPORT
383Status AllocateBitmap(MemoryPool* pool, int64_t length, std::shared_ptr<Buffer>* out);
384
385/// \brief Allocate a zero-initialized bitmap buffer from a memory pool
386///
387/// \param[in] pool memory pool to allocate memory from
388/// \param[in] length size in bits of bitmap to allocate
389/// \param[out] out the resulting buffer (zero-initialized).
390///
391/// \return Status message
392ARROW_EXPORT
393Status AllocateEmptyBitmap(MemoryPool* pool, int64_t length,
394 std::shared_ptr<Buffer>* out);
395
396/// \brief Allocate a zero-initialized bitmap buffer from the default memory pool
397///
398/// \param[in] length size in bits of bitmap to allocate
399/// \param[out] out the resulting buffer
400///
401/// \return Status message
402ARROW_EXPORT
403Status AllocateEmptyBitmap(int64_t length, std::shared_ptr<Buffer>* out);
404
405/// @}
406
407} // namespace arrow
408
409#endif // ARROW_BUFFER_H
410