1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18#include "arrow/array/builder_primitive.h"
19
20#include <algorithm>
21#include <cstddef>
22#include <cstdint>
23#include <cstring>
24#include <sstream>
25#include <utility>
26#include <vector>
27
28#include "arrow/array.h"
29#include "arrow/buffer.h"
30#include "arrow/status.h"
31#include "arrow/type.h"
32#include "arrow/type_traits.h"
33#include "arrow/util/bit-util.h"
34#include "arrow/util/int-util.h"
35#include "arrow/util/logging.h"
36
37namespace arrow {
38
39// ----------------------------------------------------------------------
40// Null builder
41
42Status NullBuilder::FinishInternal(std::shared_ptr<ArrayData>* out) {
43 *out = ArrayData::Make(null(), length_, {nullptr}, length_);
44 length_ = null_count_ = 0;
45 return Status::OK();
46}
47
48// ----------------------------------------------------------------------
49
50template <typename T>
51void PrimitiveBuilder<T>::Reset() {
52 data_.reset();
53 raw_data_ = nullptr;
54}
55
56template <typename T>
57Status PrimitiveBuilder<T>::Resize(int64_t capacity) {
58 RETURN_NOT_OK(CheckCapacity(capacity, capacity_));
59 capacity = std::max(capacity, kMinBuilderCapacity);
60
61 int64_t nbytes = TypeTraits<T>::bytes_required(capacity);
62 if (capacity_ == 0) {
63 RETURN_NOT_OK(AllocateResizableBuffer(pool_, nbytes, &data_));
64 } else {
65 RETURN_NOT_OK(data_->Resize(nbytes));
66 }
67
68 raw_data_ = reinterpret_cast<value_type*>(data_->mutable_data());
69 return ArrayBuilder::Resize(capacity);
70}
71
72template <typename T>
73Status PrimitiveBuilder<T>::AppendValues(const value_type* values, int64_t length,
74 const uint8_t* valid_bytes) {
75 RETURN_NOT_OK(Reserve(length));
76
77 if (length > 0) {
78 std::memcpy(raw_data_ + length_, values,
79 static_cast<std::size_t>(TypeTraits<T>::bytes_required(length)));
80 }
81
82 // length_ is update by these
83 ArrayBuilder::UnsafeAppendToBitmap(valid_bytes, length);
84 return Status::OK();
85}
86
87template <typename T>
88Status PrimitiveBuilder<T>::AppendValues(const value_type* values, int64_t length,
89 const std::vector<bool>& is_valid) {
90 RETURN_NOT_OK(Reserve(length));
91 DCHECK_EQ(length, static_cast<int64_t>(is_valid.size()));
92
93 if (length > 0) {
94 std::memcpy(raw_data_ + length_, values,
95 static_cast<std::size_t>(TypeTraits<T>::bytes_required(length)));
96 }
97
98 // length_ is update by these
99 ArrayBuilder::UnsafeAppendToBitmap(is_valid);
100 return Status::OK();
101}
102
103template <typename T>
104Status PrimitiveBuilder<T>::AppendValues(const std::vector<value_type>& values,
105 const std::vector<bool>& is_valid) {
106 return AppendValues(values.data(), static_cast<int64_t>(values.size()), is_valid);
107}
108
109template <typename T>
110Status PrimitiveBuilder<T>::AppendValues(const std::vector<value_type>& values) {
111 return AppendValues(values.data(), static_cast<int64_t>(values.size()));
112}
113
114template <typename T>
115Status PrimitiveBuilder<T>::FinishInternal(std::shared_ptr<ArrayData>* out) {
116 RETURN_NOT_OK(TrimBuffer(TypeTraits<T>::bytes_required(length_), data_.get()));
117 std::shared_ptr<Buffer> null_bitmap;
118 RETURN_NOT_OK(null_bitmap_builder_.Finish(&null_bitmap));
119 *out = ArrayData::Make(type_, length_, {null_bitmap, data_}, null_count_);
120
121 data_ = nullptr;
122 capacity_ = length_ = null_count_ = 0;
123
124 return Status::OK();
125}
126
127template class PrimitiveBuilder<UInt8Type>;
128template class PrimitiveBuilder<UInt16Type>;
129template class PrimitiveBuilder<UInt32Type>;
130template class PrimitiveBuilder<UInt64Type>;
131template class PrimitiveBuilder<Int8Type>;
132template class PrimitiveBuilder<Int16Type>;
133template class PrimitiveBuilder<Int32Type>;
134template class PrimitiveBuilder<Int64Type>;
135template class PrimitiveBuilder<Date32Type>;
136template class PrimitiveBuilder<Date64Type>;
137template class PrimitiveBuilder<Time32Type>;
138template class PrimitiveBuilder<Time64Type>;
139template class PrimitiveBuilder<TimestampType>;
140template class PrimitiveBuilder<HalfFloatType>;
141template class PrimitiveBuilder<FloatType>;
142template class PrimitiveBuilder<DoubleType>;
143
144BooleanBuilder::BooleanBuilder(MemoryPool* pool)
145 : ArrayBuilder(boolean(), pool), data_(nullptr), raw_data_(nullptr) {}
146
147BooleanBuilder::BooleanBuilder(const std::shared_ptr<DataType>& type, MemoryPool* pool)
148 : BooleanBuilder(pool) {
149 DCHECK_EQ(Type::BOOL, type->id());
150}
151
152void BooleanBuilder::Reset() {
153 ArrayBuilder::Reset();
154 data_.reset();
155 raw_data_ = nullptr;
156}
157
158Status BooleanBuilder::Resize(int64_t capacity) {
159 RETURN_NOT_OK(CheckCapacity(capacity, capacity_));
160 capacity = std::max(capacity, kMinBuilderCapacity);
161
162 const int64_t new_bitmap_size = BitUtil::BytesForBits(capacity);
163 if (capacity_ == 0) {
164 RETURN_NOT_OK(AllocateResizableBuffer(pool_, new_bitmap_size, &data_));
165 raw_data_ = reinterpret_cast<uint8_t*>(data_->mutable_data());
166
167 // We zero the memory for booleans to keep things simple; for some reason if
168 // we do not, even though we may write every bit (through in-place | or &),
169 // valgrind will still show a warning. If we do not zero the bytes here, we
170 // will have to be careful to zero them in AppendNull and AppendNulls. Also,
171 // zeroing the bits results in deterministic bits when each byte may have a
172 // mix of nulls and not nulls.
173 //
174 // We only zero up to new_bitmap_size because the padding was zeroed by
175 // AllocateResizableBuffer
176 memset(raw_data_, 0, static_cast<size_t>(new_bitmap_size));
177 } else {
178 const int64_t old_bitmap_capacity = data_->capacity();
179 RETURN_NOT_OK(data_->Resize(new_bitmap_size));
180 const int64_t new_bitmap_capacity = data_->capacity();
181 raw_data_ = reinterpret_cast<uint8_t*>(data_->mutable_data());
182
183 // See comment above about why we zero memory for booleans
184 memset(raw_data_ + old_bitmap_capacity, 0,
185 static_cast<size_t>(new_bitmap_capacity - old_bitmap_capacity));
186 }
187
188 return ArrayBuilder::Resize(capacity);
189}
190
191Status BooleanBuilder::FinishInternal(std::shared_ptr<ArrayData>* out) {
192 int64_t bit_offset = length_ % 8;
193 if (bit_offset > 0) {
194 // Adjust last byte
195 data_->mutable_data()[length_ / 8] &= BitUtil::kPrecedingBitmask[bit_offset];
196 }
197
198 std::shared_ptr<Buffer> null_bitmap;
199 RETURN_NOT_OK(null_bitmap_builder_.Finish(&null_bitmap));
200 RETURN_NOT_OK(TrimBuffer(BitUtil::BytesForBits(length_), data_.get()));
201
202 *out = ArrayData::Make(boolean(), length_, {null_bitmap, data_}, null_count_);
203
204 data_ = nullptr;
205 capacity_ = length_ = null_count_ = 0;
206 return Status::OK();
207}
208
209Status BooleanBuilder::AppendValues(const uint8_t* values, int64_t length,
210 const uint8_t* valid_bytes) {
211 RETURN_NOT_OK(Reserve(length));
212
213 int64_t i = 0;
214 internal::GenerateBitsUnrolled(raw_data_, length_, length,
215 [values, &i]() -> bool { return values[i++] != 0; });
216
217 // this updates length_
218 ArrayBuilder::UnsafeAppendToBitmap(valid_bytes, length);
219 return Status::OK();
220}
221
222Status BooleanBuilder::AppendValues(const uint8_t* values, int64_t length,
223 const std::vector<bool>& is_valid) {
224 RETURN_NOT_OK(Reserve(length));
225 DCHECK_EQ(length, static_cast<int64_t>(is_valid.size()));
226
227 int64_t i = 0;
228 internal::GenerateBitsUnrolled(raw_data_, length_, length,
229 [values, &i]() -> bool { return values[i++]; });
230
231 // this updates length_
232 ArrayBuilder::UnsafeAppendToBitmap(is_valid);
233 return Status::OK();
234}
235
236Status BooleanBuilder::AppendValues(const std::vector<uint8_t>& values,
237 const std::vector<bool>& is_valid) {
238 return AppendValues(values.data(), static_cast<int64_t>(values.size()), is_valid);
239}
240
241Status BooleanBuilder::AppendValues(const std::vector<uint8_t>& values) {
242 return AppendValues(values.data(), static_cast<int64_t>(values.size()));
243}
244
245Status BooleanBuilder::AppendValues(const std::vector<bool>& values,
246 const std::vector<bool>& is_valid) {
247 const int64_t length = static_cast<int64_t>(values.size());
248 RETURN_NOT_OK(Reserve(length));
249 DCHECK_EQ(length, static_cast<int64_t>(is_valid.size()));
250
251 int64_t i = 0;
252 internal::GenerateBitsUnrolled(raw_data_, length_, length,
253 [&values, &i]() -> bool { return values[i++]; });
254
255 // this updates length_
256 ArrayBuilder::UnsafeAppendToBitmap(is_valid);
257 return Status::OK();
258}
259
260Status BooleanBuilder::AppendValues(const std::vector<bool>& values) {
261 const int64_t length = static_cast<int64_t>(values.size());
262 RETURN_NOT_OK(Reserve(length));
263
264 int64_t i = 0;
265 internal::GenerateBitsUnrolled(raw_data_, length_, length,
266 [&values, &i]() -> bool { return values[i++]; });
267
268 // this updates length_
269 ArrayBuilder::UnsafeSetNotNull(length);
270 return Status::OK();
271}
272
273} // namespace arrow
274