1 | // Licensed to the Apache Software Foundation (ASF) under one |
2 | // or more contributor license agreements. See the NOTICE file |
3 | // distributed with this work for additional information |
4 | // regarding copyright ownership. The ASF licenses this file |
5 | // to you under the Apache License, Version 2.0 (the |
6 | // "License"); you may not use this file except in compliance |
7 | // with the License. You may obtain a copy of the License at |
8 | // |
9 | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | // |
11 | // Unless required by applicable law or agreed to in writing, |
12 | // software distributed under the License is distributed on an |
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | // KIND, either express or implied. See the License for the |
15 | // specific language governing permissions and limitations |
16 | // under the License. |
17 | |
18 | #pragma once |
19 | |
20 | #include <algorithm> |
21 | #include <memory> |
22 | #include <vector> |
23 | |
24 | #include "arrow/array/builder_base.h" |
25 | #include "arrow/type.h" |
26 | |
27 | namespace arrow { |
28 | |
29 | class ARROW_EXPORT NullBuilder : public ArrayBuilder { |
30 | public: |
31 | explicit NullBuilder(MemoryPool* pool ARROW_MEMORY_POOL_DEFAULT) |
32 | : ArrayBuilder(null(), pool) {} |
33 | |
34 | Status AppendNull() { |
35 | ++null_count_; |
36 | ++length_; |
37 | return Status::OK(); |
38 | } |
39 | |
40 | Status Append(std::nullptr_t value) { return AppendNull(); } |
41 | |
42 | Status FinishInternal(std::shared_ptr<ArrayData>* out) override; |
43 | }; |
44 | |
45 | template <typename Type> |
46 | class ARROW_EXPORT PrimitiveBuilder : public ArrayBuilder { |
47 | public: |
48 | using value_type = typename Type::c_type; |
49 | |
50 | explicit PrimitiveBuilder(const std::shared_ptr<DataType>& type, MemoryPool* pool) |
51 | : ArrayBuilder(type, pool), data_(NULLPTR), raw_data_(NULLPTR) {} |
52 | |
53 | using ArrayBuilder::Advance; |
54 | |
55 | /// Write nulls as uint8_t* (0 value indicates null) into pre-allocated memory |
56 | /// The memory at the corresponding data slot is set to 0 to prevent |
57 | /// uninitialized memory access |
58 | Status AppendNulls(const uint8_t* valid_bytes, int64_t length) { |
59 | ARROW_RETURN_NOT_OK(Reserve(length)); |
60 | memset(raw_data_ + length_, 0, |
61 | static_cast<size_t>(TypeTraits<Type>::bytes_required(length))); |
62 | UnsafeAppendToBitmap(valid_bytes, length); |
63 | return Status::OK(); |
64 | } |
65 | |
66 | /// \brief Append a single null element |
67 | Status AppendNull() { |
68 | ARROW_RETURN_NOT_OK(Reserve(1)); |
69 | memset(raw_data_ + length_, 0, sizeof(value_type)); |
70 | UnsafeAppendToBitmap(false); |
71 | return Status::OK(); |
72 | } |
73 | |
74 | value_type GetValue(int64_t index) const { |
75 | return reinterpret_cast<const value_type*>(data_->data())[index]; |
76 | } |
77 | |
78 | /// \brief Append a sequence of elements in one shot |
79 | /// \param[in] values a contiguous C array of values |
80 | /// \param[in] length the number of values to append |
81 | /// \param[in] valid_bytes an optional sequence of bytes where non-zero |
82 | /// indicates a valid (non-null) value |
83 | /// \return Status |
84 | Status AppendValues(const value_type* values, int64_t length, |
85 | const uint8_t* valid_bytes = NULLPTR); |
86 | |
87 | /// \brief Append a sequence of elements in one shot |
88 | /// \param[in] values a contiguous C array of values |
89 | /// \param[in] length the number of values to append |
90 | /// \param[in] is_valid an std::vector<bool> indicating valid (1) or null |
91 | /// (0). Equal in length to values |
92 | /// \return Status |
93 | Status AppendValues(const value_type* values, int64_t length, |
94 | const std::vector<bool>& is_valid); |
95 | |
96 | /// \brief Append a sequence of elements in one shot |
97 | /// \param[in] values a std::vector of values |
98 | /// \param[in] is_valid an std::vector<bool> indicating valid (1) or null |
99 | /// (0). Equal in length to values |
100 | /// \return Status |
101 | Status AppendValues(const std::vector<value_type>& values, |
102 | const std::vector<bool>& is_valid); |
103 | |
104 | /// \brief Append a sequence of elements in one shot |
105 | /// \param[in] values a std::vector of values |
106 | /// \return Status |
107 | Status AppendValues(const std::vector<value_type>& values); |
108 | |
109 | /// \brief Append a sequence of elements in one shot |
110 | /// \param[in] values_begin InputIterator to the beginning of the values |
111 | /// \param[in] values_end InputIterator pointing to the end of the values |
112 | /// \return Status |
113 | |
114 | template <typename ValuesIter> |
115 | Status AppendValues(ValuesIter values_begin, ValuesIter values_end) { |
116 | int64_t length = static_cast<int64_t>(std::distance(values_begin, values_end)); |
117 | ARROW_RETURN_NOT_OK(Reserve(length)); |
118 | |
119 | std::copy(values_begin, values_end, raw_data_ + length_); |
120 | |
121 | // this updates the length_ |
122 | UnsafeSetNotNull(length); |
123 | return Status::OK(); |
124 | } |
125 | |
126 | /// \brief Append a sequence of elements in one shot, with a specified nullmap |
127 | /// \param[in] values_begin InputIterator to the beginning of the values |
128 | /// \param[in] values_end InputIterator pointing to the end of the values |
129 | /// \param[in] valid_begin InputIterator with elements indication valid(1) |
130 | /// or null(0) values. |
131 | /// \return Status |
132 | template <typename ValuesIter, typename ValidIter> |
133 | typename std::enable_if<!std::is_pointer<ValidIter>::value, Status>::type AppendValues( |
134 | ValuesIter values_begin, ValuesIter values_end, ValidIter valid_begin) { |
135 | static_assert(!internal::is_null_pointer<ValidIter>::value, |
136 | "Don't pass a NULLPTR directly as valid_begin, use the 2-argument " |
137 | "version instead" ); |
138 | int64_t length = static_cast<int64_t>(std::distance(values_begin, values_end)); |
139 | ARROW_RETURN_NOT_OK(Reserve(length)); |
140 | |
141 | std::copy(values_begin, values_end, raw_data_ + length_); |
142 | |
143 | // this updates the length_ |
144 | for (int64_t i = 0; i != length; ++i) { |
145 | UnsafeAppendToBitmap(*valid_begin); |
146 | ++valid_begin; |
147 | } |
148 | return Status::OK(); |
149 | } |
150 | |
151 | // Same as above, with a pointer type ValidIter |
152 | template <typename ValuesIter, typename ValidIter> |
153 | typename std::enable_if<std::is_pointer<ValidIter>::value, Status>::type AppendValues( |
154 | ValuesIter values_begin, ValuesIter values_end, ValidIter valid_begin) { |
155 | int64_t length = static_cast<int64_t>(std::distance(values_begin, values_end)); |
156 | ARROW_RETURN_NOT_OK(Reserve(length)); |
157 | |
158 | std::copy(values_begin, values_end, raw_data_ + length_); |
159 | |
160 | // this updates the length_ |
161 | if (valid_begin == NULLPTR) { |
162 | UnsafeSetNotNull(length); |
163 | } else { |
164 | for (int64_t i = 0; i != length; ++i) { |
165 | UnsafeAppendToBitmap(*valid_begin); |
166 | ++valid_begin; |
167 | } |
168 | } |
169 | |
170 | return Status::OK(); |
171 | } |
172 | |
173 | Status FinishInternal(std::shared_ptr<ArrayData>* out) override; |
174 | void Reset() override; |
175 | |
176 | Status Resize(int64_t capacity) override; |
177 | |
178 | protected: |
179 | std::shared_ptr<ResizableBuffer> data_; |
180 | value_type* raw_data_; |
181 | }; |
182 | |
183 | /// Base class for all Builders that emit an Array of a scalar numerical type. |
184 | template <typename T> |
185 | class ARROW_EXPORT NumericBuilder : public PrimitiveBuilder<T> { |
186 | public: |
187 | using typename PrimitiveBuilder<T>::value_type; |
188 | using PrimitiveBuilder<T>::PrimitiveBuilder; |
189 | |
190 | template <typename T1 = T> |
191 | explicit NumericBuilder( |
192 | typename std::enable_if<TypeTraits<T1>::is_parameter_free, MemoryPool*>::type pool |
193 | ARROW_MEMORY_POOL_DEFAULT) |
194 | : PrimitiveBuilder<T1>(TypeTraits<T1>::type_singleton(), pool) {} |
195 | |
196 | using ArrayBuilder::UnsafeAppendNull; |
197 | using ArrayBuilder::UnsafeAppendToBitmap; |
198 | using PrimitiveBuilder<T>::AppendValues; |
199 | using PrimitiveBuilder<T>::Resize; |
200 | using PrimitiveBuilder<T>::Reserve; |
201 | |
202 | /// Append a single scalar and increase the size if necessary. |
203 | Status Append(const value_type val) { |
204 | ARROW_RETURN_NOT_OK(ArrayBuilder::Reserve(1)); |
205 | UnsafeAppend(val); |
206 | return Status::OK(); |
207 | } |
208 | |
209 | /// Append a single scalar under the assumption that the underlying Buffer is |
210 | /// large enough. |
211 | /// |
212 | /// This method does not capacity-check; make sure to call Reserve |
213 | /// beforehand. |
214 | void UnsafeAppend(const value_type val) { |
215 | raw_data_[length_] = val; |
216 | UnsafeAppendToBitmap(true); |
217 | } |
218 | |
219 | protected: |
220 | using PrimitiveBuilder<T>::length_; |
221 | using PrimitiveBuilder<T>::raw_data_; |
222 | }; |
223 | |
224 | // Builders |
225 | |
226 | using UInt8Builder = NumericBuilder<UInt8Type>; |
227 | using UInt16Builder = NumericBuilder<UInt16Type>; |
228 | using UInt32Builder = NumericBuilder<UInt32Type>; |
229 | using UInt64Builder = NumericBuilder<UInt64Type>; |
230 | |
231 | using Int8Builder = NumericBuilder<Int8Type>; |
232 | using Int16Builder = NumericBuilder<Int16Type>; |
233 | using Int32Builder = NumericBuilder<Int32Type>; |
234 | using Int64Builder = NumericBuilder<Int64Type>; |
235 | using TimestampBuilder = NumericBuilder<TimestampType>; |
236 | using Time32Builder = NumericBuilder<Time32Type>; |
237 | using Time64Builder = NumericBuilder<Time64Type>; |
238 | using Date32Builder = NumericBuilder<Date32Type>; |
239 | using Date64Builder = NumericBuilder<Date64Type>; |
240 | |
241 | using HalfFloatBuilder = NumericBuilder<HalfFloatType>; |
242 | using FloatBuilder = NumericBuilder<FloatType>; |
243 | using DoubleBuilder = NumericBuilder<DoubleType>; |
244 | |
245 | class ARROW_EXPORT BooleanBuilder : public ArrayBuilder { |
246 | public: |
247 | using value_type = bool; |
248 | explicit BooleanBuilder(MemoryPool* pool ARROW_MEMORY_POOL_DEFAULT); |
249 | |
250 | explicit BooleanBuilder(const std::shared_ptr<DataType>& type, MemoryPool* pool); |
251 | |
252 | using ArrayBuilder::Advance; |
253 | using ArrayBuilder::UnsafeAppendNull; |
254 | |
255 | /// Write nulls as uint8_t* (0 value indicates null) into pre-allocated memory |
256 | Status AppendNulls(const uint8_t* valid_bytes, int64_t length) { |
257 | ARROW_RETURN_NOT_OK(Reserve(length)); |
258 | UnsafeAppendToBitmap(valid_bytes, length); |
259 | |
260 | return Status::OK(); |
261 | } |
262 | |
263 | Status AppendNull() { |
264 | ARROW_RETURN_NOT_OK(Reserve(1)); |
265 | UnsafeAppendToBitmap(false); |
266 | |
267 | return Status::OK(); |
268 | } |
269 | |
270 | /// Scalar append |
271 | Status Append(const bool val) { |
272 | ARROW_RETURN_NOT_OK(Reserve(1)); |
273 | UnsafeAppend(val); |
274 | return Status::OK(); |
275 | } |
276 | |
277 | Status Append(const uint8_t val) { return Append(val != 0); } |
278 | |
279 | /// Scalar append, without checking for capacity |
280 | void UnsafeAppend(const bool val) { |
281 | if (val) { |
282 | BitUtil::SetBit(raw_data_, length_); |
283 | } else { |
284 | BitUtil::ClearBit(raw_data_, length_); |
285 | } |
286 | UnsafeAppendToBitmap(true); |
287 | } |
288 | |
289 | void UnsafeAppend(const uint8_t val) { UnsafeAppend(val != 0); } |
290 | |
291 | /// \brief Append a sequence of elements in one shot |
292 | /// \param[in] values a contiguous array of bytes (non-zero is 1) |
293 | /// \param[in] length the number of values to append |
294 | /// \param[in] valid_bytes an optional sequence of bytes where non-zero |
295 | /// indicates a valid (non-null) value |
296 | /// \return Status |
297 | Status AppendValues(const uint8_t* values, int64_t length, |
298 | const uint8_t* valid_bytes = NULLPTR); |
299 | |
300 | /// \brief Append a sequence of elements in one shot |
301 | /// \param[in] values a contiguous C array of values |
302 | /// \param[in] length the number of values to append |
303 | /// \param[in] is_valid an std::vector<bool> indicating valid (1) or null |
304 | /// (0). Equal in length to values |
305 | /// \return Status |
306 | Status AppendValues(const uint8_t* values, int64_t length, |
307 | const std::vector<bool>& is_valid); |
308 | |
309 | /// \brief Append a sequence of elements in one shot |
310 | /// \param[in] values a std::vector of bytes |
311 | /// \param[in] is_valid an std::vector<bool> indicating valid (1) or null |
312 | /// (0). Equal in length to values |
313 | /// \return Status |
314 | Status AppendValues(const std::vector<uint8_t>& values, |
315 | const std::vector<bool>& is_valid); |
316 | |
317 | /// \brief Append a sequence of elements in one shot |
318 | /// \param[in] values a std::vector of bytes |
319 | /// \return Status |
320 | Status AppendValues(const std::vector<uint8_t>& values); |
321 | |
322 | /// \brief Append a sequence of elements in one shot |
323 | /// \param[in] values an std::vector<bool> indicating true (1) or false |
324 | /// \param[in] is_valid an std::vector<bool> indicating valid (1) or null |
325 | /// (0). Equal in length to values |
326 | /// \return Status |
327 | Status AppendValues(const std::vector<bool>& values, const std::vector<bool>& is_valid); |
328 | |
329 | /// \brief Append a sequence of elements in one shot |
330 | /// \param[in] values an std::vector<bool> indicating true (1) or false |
331 | /// \return Status |
332 | Status AppendValues(const std::vector<bool>& values); |
333 | |
334 | /// \brief Append a sequence of elements in one shot |
335 | /// \param[in] values_begin InputIterator to the beginning of the values |
336 | /// \param[in] values_end InputIterator pointing to the end of the values |
337 | /// or null(0) values |
338 | /// \return Status |
339 | template <typename ValuesIter> |
340 | Status AppendValues(ValuesIter values_begin, ValuesIter values_end) { |
341 | int64_t length = static_cast<int64_t>(std::distance(values_begin, values_end)); |
342 | ARROW_RETURN_NOT_OK(Reserve(length)); |
343 | auto iter = values_begin; |
344 | internal::GenerateBitsUnrolled(raw_data_, length_, length, |
345 | [&iter]() -> bool { return *(iter++); }); |
346 | |
347 | // this updates length_ |
348 | UnsafeSetNotNull(length); |
349 | return Status::OK(); |
350 | } |
351 | |
352 | /// \brief Append a sequence of elements in one shot, with a specified nullmap |
353 | /// \param[in] values_begin InputIterator to the beginning of the values |
354 | /// \param[in] values_end InputIterator pointing to the end of the values |
355 | /// \param[in] valid_begin InputIterator with elements indication valid(1) |
356 | /// or null(0) values |
357 | /// \return Status |
358 | template <typename ValuesIter, typename ValidIter> |
359 | typename std::enable_if<!std::is_pointer<ValidIter>::value, Status>::type AppendValues( |
360 | ValuesIter values_begin, ValuesIter values_end, ValidIter valid_begin) { |
361 | static_assert(!internal::is_null_pointer<ValidIter>::value, |
362 | "Don't pass a NULLPTR directly as valid_begin, use the 2-argument " |
363 | "version instead" ); |
364 | int64_t length = static_cast<int64_t>(std::distance(values_begin, values_end)); |
365 | ARROW_RETURN_NOT_OK(Reserve(length)); |
366 | |
367 | auto iter = values_begin; |
368 | internal::GenerateBitsUnrolled(raw_data_, length_, length, |
369 | [&iter]() -> bool { return *(iter++); }); |
370 | |
371 | // this updates length_ |
372 | for (int64_t i = 0; i != length; ++i) { |
373 | ArrayBuilder::UnsafeAppendToBitmap(*valid_begin); |
374 | ++valid_begin; |
375 | } |
376 | return Status::OK(); |
377 | } |
378 | |
379 | // Same as above, for a pointer type ValidIter |
380 | template <typename ValuesIter, typename ValidIter> |
381 | typename std::enable_if<std::is_pointer<ValidIter>::value, Status>::type AppendValues( |
382 | ValuesIter values_begin, ValuesIter values_end, ValidIter valid_begin) { |
383 | int64_t length = static_cast<int64_t>(std::distance(values_begin, values_end)); |
384 | ARROW_RETURN_NOT_OK(Reserve(length)); |
385 | |
386 | auto iter = values_begin; |
387 | internal::GenerateBitsUnrolled(raw_data_, length_, length, |
388 | [&iter]() -> bool { return *(iter++); }); |
389 | |
390 | // this updates the length_ |
391 | if (valid_begin == NULLPTR) { |
392 | UnsafeSetNotNull(length); |
393 | } else { |
394 | for (int64_t i = 0; i != length; ++i) { |
395 | ArrayBuilder::UnsafeAppendToBitmap(*valid_begin); |
396 | ++valid_begin; |
397 | } |
398 | } |
399 | |
400 | return Status::OK(); |
401 | } |
402 | |
403 | Status FinishInternal(std::shared_ptr<ArrayData>* out) override; |
404 | void Reset() override; |
405 | Status Resize(int64_t capacity) override; |
406 | |
407 | protected: |
408 | std::shared_ptr<ResizableBuffer> data_; |
409 | uint8_t* raw_data_; |
410 | }; |
411 | |
412 | } // namespace arrow |
413 | |