1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18#pragma once
19
20#include <algorithm> // IWYU pragma: keep
21#include <array>
22#include <cstddef>
23#include <cstdint>
24#include <cstring>
25#include <iterator>
26#include <limits>
27#include <memory>
28#include <string>
29#include <type_traits>
30#include <vector>
31
32#include "arrow/buffer-builder.h"
33#include "arrow/memory_pool.h"
34#include "arrow/status.h"
35#include "arrow/type.h"
36#include "arrow/type_traits.h"
37#include "arrow/util/bit-util.h"
38#include "arrow/util/macros.h"
39#include "arrow/util/string_view.h"
40#include "arrow/util/type_traits.h"
41#include "arrow/util/visibility.h"
42
43namespace arrow {
44
45class Array;
46struct ArrayData;
47
48constexpr int64_t kMinBuilderCapacity = 1 << 5;
49constexpr int64_t kListMaximumElements = std::numeric_limits<int32_t>::max() - 1;
50
51/// Base class for all data array builders.
52///
53/// This class provides a facilities for incrementally building the null bitmap
54/// (see Append methods) and as a side effect the current number of slots and
55/// the null count.
56///
57/// \note Users are expected to use builders as one of the concrete types below.
58/// For example, ArrayBuilder* pointing to BinaryBuilder should be downcast before use.
59class ARROW_EXPORT ArrayBuilder {
60 public:
61 explicit ArrayBuilder(const std::shared_ptr<DataType>& type, MemoryPool* pool)
62 : type_(type), pool_(pool), null_bitmap_builder_(pool) {}
63
64 virtual ~ArrayBuilder() = default;
65
66 /// For nested types. Since the objects are owned by this class instance, we
67 /// skip shared pointers and just return a raw pointer
68 ArrayBuilder* child(int i) { return children_[i].get(); }
69
70 int num_children() const { return static_cast<int>(children_.size()); }
71
72 int64_t length() const { return length_; }
73 int64_t null_count() const { return null_count_; }
74 int64_t capacity() const { return capacity_; }
75
76 /// \brief Ensure that enough memory has been allocated to fit the indicated
77 /// number of total elements in the builder, including any that have already
78 /// been appended. Does not account for reallocations that may be due to
79 /// variable size data, like binary values. To make space for incremental
80 /// appends, use Reserve instead.
81 ///
82 /// \param[in] capacity the minimum number of total array values to
83 /// accommodate. Must be greater than the current capacity.
84 /// \return Status
85 virtual Status Resize(int64_t capacity);
86
87 /// \brief Ensure that there is enough space allocated to add the indicated
88 /// number of elements without any further calls to Resize. The memory
89 /// allocated is rounded up to the next highest power of 2 similar to memory
90 /// allocations in STL containers like std::vector
91 /// \param[in] additional_capacity the number of additional array values
92 /// \return Status
93 Status Reserve(int64_t additional_capacity) {
94 auto min_capacity = length() + additional_capacity;
95 if (min_capacity <= capacity()) return Status::OK();
96
97 // leave growth factor up to BufferBuilder
98 auto new_capacity = BufferBuilder::GrowByFactor(min_capacity);
99 return Resize(new_capacity);
100 }
101
102 /// Reset the builder.
103 virtual void Reset();
104
105 /// For cases where raw data was memcpy'd into the internal buffers, allows us
106 /// to advance the length of the builder. It is your responsibility to use
107 /// this function responsibly.
108 Status Advance(int64_t elements);
109
110 /// \brief Return result of builder as an internal generic ArrayData
111 /// object. Resets builder except for dictionary builder
112 ///
113 /// \param[out] out the finalized ArrayData object
114 /// \return Status
115 virtual Status FinishInternal(std::shared_ptr<ArrayData>* out) = 0;
116
117 /// \brief Return result of builder as an Array object.
118 ///
119 /// The builder is reset except for DictionaryBuilder.
120 ///
121 /// \param[out] out the finalized Array object
122 /// \return Status
123 Status Finish(std::shared_ptr<Array>* out);
124
125 std::shared_ptr<DataType> type() const { return type_; }
126
127 protected:
128 /// Append to null bitmap
129 Status AppendToBitmap(bool is_valid);
130
131 /// Vector append. Treat each zero byte as a null. If valid_bytes is null
132 /// assume all of length bits are valid.
133 Status AppendToBitmap(const uint8_t* valid_bytes, int64_t length);
134
135 /// Set the next length bits to not null (i.e. valid).
136 Status SetNotNull(int64_t length);
137
138 // Unsafe operations (don't check capacity/don't resize)
139
140 void UnsafeAppendNull() { UnsafeAppendToBitmap(false); }
141
142 // Append to null bitmap, update the length
143 void UnsafeAppendToBitmap(bool is_valid) {
144 null_bitmap_builder_.UnsafeAppend(is_valid);
145 ++length_;
146 if (!is_valid) ++null_count_;
147 }
148
149 // Vector append. Treat each zero byte as a nullzero. If valid_bytes is null
150 // assume all of length bits are valid.
151 void UnsafeAppendToBitmap(const uint8_t* valid_bytes, int64_t length) {
152 if (valid_bytes == NULLPTR) {
153 return UnsafeSetNotNull(length);
154 }
155 null_bitmap_builder_.UnsafeAppend(valid_bytes, length);
156 length_ += length;
157 null_count_ = null_bitmap_builder_.false_count();
158 }
159
160 void UnsafeAppendToBitmap(const std::vector<bool>& is_valid);
161
162 // Set the next length bits to not null (i.e. valid).
163 void UnsafeSetNotNull(int64_t length);
164
165 static Status TrimBuffer(const int64_t bytes_filled, ResizableBuffer* buffer);
166
167 static Status CheckCapacity(int64_t new_capacity, int64_t old_capacity) {
168 if (new_capacity < 0) {
169 return Status::Invalid("Resize capacity must be positive");
170 }
171 if (new_capacity < old_capacity) {
172 return Status::Invalid("Resize cannot downsize");
173 }
174 return Status::OK();
175 }
176
177 std::shared_ptr<DataType> type_;
178 MemoryPool* pool_;
179
180 TypedBufferBuilder<bool> null_bitmap_builder_;
181 int64_t null_count_ = 0;
182
183 // Array length, so far. Also, the index of the next element to be added
184 int64_t length_ = 0;
185 int64_t capacity_ = 0;
186
187 // Child value array builders. These are owned by this class
188 std::vector<std::shared_ptr<ArrayBuilder>> children_;
189
190 private:
191 ARROW_DISALLOW_COPY_AND_ASSIGN(ArrayBuilder);
192};
193
194} // namespace arrow
195