1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18#pragma once
19
20#include <algorithm>
21#include <memory>
22#include <vector>
23
24#include "arrow/array/builder_base.h"
25#include "arrow/type.h"
26
27namespace arrow {
28
29class ARROW_EXPORT NullBuilder : public ArrayBuilder {
30 public:
31 explicit NullBuilder(MemoryPool* pool ARROW_MEMORY_POOL_DEFAULT)
32 : ArrayBuilder(null(), pool) {}
33
34 Status AppendNull() {
35 ++null_count_;
36 ++length_;
37 return Status::OK();
38 }
39
40 Status Append(std::nullptr_t value) { return AppendNull(); }
41
42 Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
43};
44
45template <typename Type>
46class ARROW_EXPORT PrimitiveBuilder : public ArrayBuilder {
47 public:
48 using value_type = typename Type::c_type;
49
50 explicit PrimitiveBuilder(const std::shared_ptr<DataType>& type, MemoryPool* pool)
51 : ArrayBuilder(type, pool), data_(NULLPTR), raw_data_(NULLPTR) {}
52
53 using ArrayBuilder::Advance;
54
55 /// Write nulls as uint8_t* (0 value indicates null) into pre-allocated memory
56 /// The memory at the corresponding data slot is set to 0 to prevent
57 /// uninitialized memory access
58 Status AppendNulls(const uint8_t* valid_bytes, int64_t length) {
59 ARROW_RETURN_NOT_OK(Reserve(length));
60 memset(raw_data_ + length_, 0,
61 static_cast<size_t>(TypeTraits<Type>::bytes_required(length)));
62 UnsafeAppendToBitmap(valid_bytes, length);
63 return Status::OK();
64 }
65
66 /// \brief Append a single null element
67 Status AppendNull() {
68 ARROW_RETURN_NOT_OK(Reserve(1));
69 memset(raw_data_ + length_, 0, sizeof(value_type));
70 UnsafeAppendToBitmap(false);
71 return Status::OK();
72 }
73
74 value_type GetValue(int64_t index) const {
75 return reinterpret_cast<const value_type*>(data_->data())[index];
76 }
77
78 /// \brief Append a sequence of elements in one shot
79 /// \param[in] values a contiguous C array of values
80 /// \param[in] length the number of values to append
81 /// \param[in] valid_bytes an optional sequence of bytes where non-zero
82 /// indicates a valid (non-null) value
83 /// \return Status
84 Status AppendValues(const value_type* values, int64_t length,
85 const uint8_t* valid_bytes = NULLPTR);
86
87 /// \brief Append a sequence of elements in one shot
88 /// \param[in] values a contiguous C array of values
89 /// \param[in] length the number of values to append
90 /// \param[in] is_valid an std::vector<bool> indicating valid (1) or null
91 /// (0). Equal in length to values
92 /// \return Status
93 Status AppendValues(const value_type* values, int64_t length,
94 const std::vector<bool>& is_valid);
95
96 /// \brief Append a sequence of elements in one shot
97 /// \param[in] values a std::vector of values
98 /// \param[in] is_valid an std::vector<bool> indicating valid (1) or null
99 /// (0). Equal in length to values
100 /// \return Status
101 Status AppendValues(const std::vector<value_type>& values,
102 const std::vector<bool>& is_valid);
103
104 /// \brief Append a sequence of elements in one shot
105 /// \param[in] values a std::vector of values
106 /// \return Status
107 Status AppendValues(const std::vector<value_type>& values);
108
109 /// \brief Append a sequence of elements in one shot
110 /// \param[in] values_begin InputIterator to the beginning of the values
111 /// \param[in] values_end InputIterator pointing to the end of the values
112 /// \return Status
113
114 template <typename ValuesIter>
115 Status AppendValues(ValuesIter values_begin, ValuesIter values_end) {
116 int64_t length = static_cast<int64_t>(std::distance(values_begin, values_end));
117 ARROW_RETURN_NOT_OK(Reserve(length));
118
119 std::copy(values_begin, values_end, raw_data_ + length_);
120
121 // this updates the length_
122 UnsafeSetNotNull(length);
123 return Status::OK();
124 }
125
126 /// \brief Append a sequence of elements in one shot, with a specified nullmap
127 /// \param[in] values_begin InputIterator to the beginning of the values
128 /// \param[in] values_end InputIterator pointing to the end of the values
129 /// \param[in] valid_begin InputIterator with elements indication valid(1)
130 /// or null(0) values.
131 /// \return Status
132 template <typename ValuesIter, typename ValidIter>
133 typename std::enable_if<!std::is_pointer<ValidIter>::value, Status>::type AppendValues(
134 ValuesIter values_begin, ValuesIter values_end, ValidIter valid_begin) {
135 static_assert(!internal::is_null_pointer<ValidIter>::value,
136 "Don't pass a NULLPTR directly as valid_begin, use the 2-argument "
137 "version instead");
138 int64_t length = static_cast<int64_t>(std::distance(values_begin, values_end));
139 ARROW_RETURN_NOT_OK(Reserve(length));
140
141 std::copy(values_begin, values_end, raw_data_ + length_);
142
143 // this updates the length_
144 for (int64_t i = 0; i != length; ++i) {
145 UnsafeAppendToBitmap(*valid_begin);
146 ++valid_begin;
147 }
148 return Status::OK();
149 }
150
151 // Same as above, with a pointer type ValidIter
152 template <typename ValuesIter, typename ValidIter>
153 typename std::enable_if<std::is_pointer<ValidIter>::value, Status>::type AppendValues(
154 ValuesIter values_begin, ValuesIter values_end, ValidIter valid_begin) {
155 int64_t length = static_cast<int64_t>(std::distance(values_begin, values_end));
156 ARROW_RETURN_NOT_OK(Reserve(length));
157
158 std::copy(values_begin, values_end, raw_data_ + length_);
159
160 // this updates the length_
161 if (valid_begin == NULLPTR) {
162 UnsafeSetNotNull(length);
163 } else {
164 for (int64_t i = 0; i != length; ++i) {
165 UnsafeAppendToBitmap(*valid_begin);
166 ++valid_begin;
167 }
168 }
169
170 return Status::OK();
171 }
172
173 Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
174 void Reset() override;
175
176 Status Resize(int64_t capacity) override;
177
178 protected:
179 std::shared_ptr<ResizableBuffer> data_;
180 value_type* raw_data_;
181};
182
183/// Base class for all Builders that emit an Array of a scalar numerical type.
184template <typename T>
185class ARROW_EXPORT NumericBuilder : public PrimitiveBuilder<T> {
186 public:
187 using typename PrimitiveBuilder<T>::value_type;
188 using PrimitiveBuilder<T>::PrimitiveBuilder;
189
190 template <typename T1 = T>
191 explicit NumericBuilder(
192 typename std::enable_if<TypeTraits<T1>::is_parameter_free, MemoryPool*>::type pool
193 ARROW_MEMORY_POOL_DEFAULT)
194 : PrimitiveBuilder<T1>(TypeTraits<T1>::type_singleton(), pool) {}
195
196 using ArrayBuilder::UnsafeAppendNull;
197 using ArrayBuilder::UnsafeAppendToBitmap;
198 using PrimitiveBuilder<T>::AppendValues;
199 using PrimitiveBuilder<T>::Resize;
200 using PrimitiveBuilder<T>::Reserve;
201
202 /// Append a single scalar and increase the size if necessary.
203 Status Append(const value_type val) {
204 ARROW_RETURN_NOT_OK(ArrayBuilder::Reserve(1));
205 UnsafeAppend(val);
206 return Status::OK();
207 }
208
209 /// Append a single scalar under the assumption that the underlying Buffer is
210 /// large enough.
211 ///
212 /// This method does not capacity-check; make sure to call Reserve
213 /// beforehand.
214 void UnsafeAppend(const value_type val) {
215 raw_data_[length_] = val;
216 UnsafeAppendToBitmap(true);
217 }
218
219 protected:
220 using PrimitiveBuilder<T>::length_;
221 using PrimitiveBuilder<T>::raw_data_;
222};
223
224// Builders
225
226using UInt8Builder = NumericBuilder<UInt8Type>;
227using UInt16Builder = NumericBuilder<UInt16Type>;
228using UInt32Builder = NumericBuilder<UInt32Type>;
229using UInt64Builder = NumericBuilder<UInt64Type>;
230
231using Int8Builder = NumericBuilder<Int8Type>;
232using Int16Builder = NumericBuilder<Int16Type>;
233using Int32Builder = NumericBuilder<Int32Type>;
234using Int64Builder = NumericBuilder<Int64Type>;
235using TimestampBuilder = NumericBuilder<TimestampType>;
236using Time32Builder = NumericBuilder<Time32Type>;
237using Time64Builder = NumericBuilder<Time64Type>;
238using Date32Builder = NumericBuilder<Date32Type>;
239using Date64Builder = NumericBuilder<Date64Type>;
240
241using HalfFloatBuilder = NumericBuilder<HalfFloatType>;
242using FloatBuilder = NumericBuilder<FloatType>;
243using DoubleBuilder = NumericBuilder<DoubleType>;
244
245class ARROW_EXPORT BooleanBuilder : public ArrayBuilder {
246 public:
247 using value_type = bool;
248 explicit BooleanBuilder(MemoryPool* pool ARROW_MEMORY_POOL_DEFAULT);
249
250 explicit BooleanBuilder(const std::shared_ptr<DataType>& type, MemoryPool* pool);
251
252 using ArrayBuilder::Advance;
253 using ArrayBuilder::UnsafeAppendNull;
254
255 /// Write nulls as uint8_t* (0 value indicates null) into pre-allocated memory
256 Status AppendNulls(const uint8_t* valid_bytes, int64_t length) {
257 ARROW_RETURN_NOT_OK(Reserve(length));
258 UnsafeAppendToBitmap(valid_bytes, length);
259
260 return Status::OK();
261 }
262
263 Status AppendNull() {
264 ARROW_RETURN_NOT_OK(Reserve(1));
265 UnsafeAppendToBitmap(false);
266
267 return Status::OK();
268 }
269
270 /// Scalar append
271 Status Append(const bool val) {
272 ARROW_RETURN_NOT_OK(Reserve(1));
273 UnsafeAppend(val);
274 return Status::OK();
275 }
276
277 Status Append(const uint8_t val) { return Append(val != 0); }
278
279 /// Scalar append, without checking for capacity
280 void UnsafeAppend(const bool val) {
281 if (val) {
282 BitUtil::SetBit(raw_data_, length_);
283 } else {
284 BitUtil::ClearBit(raw_data_, length_);
285 }
286 UnsafeAppendToBitmap(true);
287 }
288
289 void UnsafeAppend(const uint8_t val) { UnsafeAppend(val != 0); }
290
291 /// \brief Append a sequence of elements in one shot
292 /// \param[in] values a contiguous array of bytes (non-zero is 1)
293 /// \param[in] length the number of values to append
294 /// \param[in] valid_bytes an optional sequence of bytes where non-zero
295 /// indicates a valid (non-null) value
296 /// \return Status
297 Status AppendValues(const uint8_t* values, int64_t length,
298 const uint8_t* valid_bytes = NULLPTR);
299
300 /// \brief Append a sequence of elements in one shot
301 /// \param[in] values a contiguous C array of values
302 /// \param[in] length the number of values to append
303 /// \param[in] is_valid an std::vector<bool> indicating valid (1) or null
304 /// (0). Equal in length to values
305 /// \return Status
306 Status AppendValues(const uint8_t* values, int64_t length,
307 const std::vector<bool>& is_valid);
308
309 /// \brief Append a sequence of elements in one shot
310 /// \param[in] values a std::vector of bytes
311 /// \param[in] is_valid an std::vector<bool> indicating valid (1) or null
312 /// (0). Equal in length to values
313 /// \return Status
314 Status AppendValues(const std::vector<uint8_t>& values,
315 const std::vector<bool>& is_valid);
316
317 /// \brief Append a sequence of elements in one shot
318 /// \param[in] values a std::vector of bytes
319 /// \return Status
320 Status AppendValues(const std::vector<uint8_t>& values);
321
322 /// \brief Append a sequence of elements in one shot
323 /// \param[in] values an std::vector<bool> indicating true (1) or false
324 /// \param[in] is_valid an std::vector<bool> indicating valid (1) or null
325 /// (0). Equal in length to values
326 /// \return Status
327 Status AppendValues(const std::vector<bool>& values, const std::vector<bool>& is_valid);
328
329 /// \brief Append a sequence of elements in one shot
330 /// \param[in] values an std::vector<bool> indicating true (1) or false
331 /// \return Status
332 Status AppendValues(const std::vector<bool>& values);
333
334 /// \brief Append a sequence of elements in one shot
335 /// \param[in] values_begin InputIterator to the beginning of the values
336 /// \param[in] values_end InputIterator pointing to the end of the values
337 /// or null(0) values
338 /// \return Status
339 template <typename ValuesIter>
340 Status AppendValues(ValuesIter values_begin, ValuesIter values_end) {
341 int64_t length = static_cast<int64_t>(std::distance(values_begin, values_end));
342 ARROW_RETURN_NOT_OK(Reserve(length));
343 auto iter = values_begin;
344 internal::GenerateBitsUnrolled(raw_data_, length_, length,
345 [&iter]() -> bool { return *(iter++); });
346
347 // this updates length_
348 UnsafeSetNotNull(length);
349 return Status::OK();
350 }
351
352 /// \brief Append a sequence of elements in one shot, with a specified nullmap
353 /// \param[in] values_begin InputIterator to the beginning of the values
354 /// \param[in] values_end InputIterator pointing to the end of the values
355 /// \param[in] valid_begin InputIterator with elements indication valid(1)
356 /// or null(0) values
357 /// \return Status
358 template <typename ValuesIter, typename ValidIter>
359 typename std::enable_if<!std::is_pointer<ValidIter>::value, Status>::type AppendValues(
360 ValuesIter values_begin, ValuesIter values_end, ValidIter valid_begin) {
361 static_assert(!internal::is_null_pointer<ValidIter>::value,
362 "Don't pass a NULLPTR directly as valid_begin, use the 2-argument "
363 "version instead");
364 int64_t length = static_cast<int64_t>(std::distance(values_begin, values_end));
365 ARROW_RETURN_NOT_OK(Reserve(length));
366
367 auto iter = values_begin;
368 internal::GenerateBitsUnrolled(raw_data_, length_, length,
369 [&iter]() -> bool { return *(iter++); });
370
371 // this updates length_
372 for (int64_t i = 0; i != length; ++i) {
373 ArrayBuilder::UnsafeAppendToBitmap(*valid_begin);
374 ++valid_begin;
375 }
376 return Status::OK();
377 }
378
379 // Same as above, for a pointer type ValidIter
380 template <typename ValuesIter, typename ValidIter>
381 typename std::enable_if<std::is_pointer<ValidIter>::value, Status>::type AppendValues(
382 ValuesIter values_begin, ValuesIter values_end, ValidIter valid_begin) {
383 int64_t length = static_cast<int64_t>(std::distance(values_begin, values_end));
384 ARROW_RETURN_NOT_OK(Reserve(length));
385
386 auto iter = values_begin;
387 internal::GenerateBitsUnrolled(raw_data_, length_, length,
388 [&iter]() -> bool { return *(iter++); });
389
390 // this updates the length_
391 if (valid_begin == NULLPTR) {
392 UnsafeSetNotNull(length);
393 } else {
394 for (int64_t i = 0; i != length; ++i) {
395 ArrayBuilder::UnsafeAppendToBitmap(*valid_begin);
396 ++valid_begin;
397 }
398 }
399
400 return Status::OK();
401 }
402
403 Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
404 void Reset() override;
405 Status Resize(int64_t capacity) override;
406
407 protected:
408 std::shared_ptr<ResizableBuffer> data_;
409 uint8_t* raw_data_;
410};
411
412} // namespace arrow
413