1 | // Licensed to the Apache Software Foundation (ASF) under one |
2 | // or more contributor license agreements. See the NOTICE file |
3 | // distributed with this work for additional information |
4 | // regarding copyright ownership. The ASF licenses this file |
5 | // to you under the Apache License, Version 2.0 (the |
6 | // "License"); you may not use this file except in compliance |
7 | // with the License. You may obtain a copy of the License at |
8 | // |
9 | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | // |
11 | // Unless required by applicable law or agreed to in writing, |
12 | // software distributed under the License is distributed on an |
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | // KIND, either express or implied. See the License for the |
15 | // specific language governing permissions and limitations |
16 | // under the License. |
17 | |
18 | #include "arrow/array/builder_primitive.h" |
19 | |
20 | #include <algorithm> |
21 | #include <cstddef> |
22 | #include <cstdint> |
23 | #include <cstring> |
24 | #include <sstream> |
25 | #include <utility> |
26 | #include <vector> |
27 | |
28 | #include "arrow/array.h" |
29 | #include "arrow/buffer.h" |
30 | #include "arrow/status.h" |
31 | #include "arrow/type.h" |
32 | #include "arrow/type_traits.h" |
33 | #include "arrow/util/bit-util.h" |
34 | #include "arrow/util/int-util.h" |
35 | #include "arrow/util/logging.h" |
36 | |
37 | namespace arrow { |
38 | |
39 | // ---------------------------------------------------------------------- |
40 | // Null builder |
41 | |
42 | Status NullBuilder::FinishInternal(std::shared_ptr<ArrayData>* out) { |
43 | *out = ArrayData::Make(null(), length_, {nullptr}, length_); |
44 | length_ = null_count_ = 0; |
45 | return Status::OK(); |
46 | } |
47 | |
48 | // ---------------------------------------------------------------------- |
49 | |
50 | template <typename T> |
51 | void PrimitiveBuilder<T>::Reset() { |
52 | data_.reset(); |
53 | raw_data_ = nullptr; |
54 | } |
55 | |
56 | template <typename T> |
57 | Status PrimitiveBuilder<T>::Resize(int64_t capacity) { |
58 | RETURN_NOT_OK(CheckCapacity(capacity, capacity_)); |
59 | capacity = std::max(capacity, kMinBuilderCapacity); |
60 | |
61 | int64_t nbytes = TypeTraits<T>::bytes_required(capacity); |
62 | if (capacity_ == 0) { |
63 | RETURN_NOT_OK(AllocateResizableBuffer(pool_, nbytes, &data_)); |
64 | } else { |
65 | RETURN_NOT_OK(data_->Resize(nbytes)); |
66 | } |
67 | |
68 | raw_data_ = reinterpret_cast<value_type*>(data_->mutable_data()); |
69 | return ArrayBuilder::Resize(capacity); |
70 | } |
71 | |
72 | template <typename T> |
73 | Status PrimitiveBuilder<T>::AppendValues(const value_type* values, int64_t length, |
74 | const uint8_t* valid_bytes) { |
75 | RETURN_NOT_OK(Reserve(length)); |
76 | |
77 | if (length > 0) { |
78 | std::memcpy(raw_data_ + length_, values, |
79 | static_cast<std::size_t>(TypeTraits<T>::bytes_required(length))); |
80 | } |
81 | |
82 | // length_ is update by these |
83 | ArrayBuilder::UnsafeAppendToBitmap(valid_bytes, length); |
84 | return Status::OK(); |
85 | } |
86 | |
87 | template <typename T> |
88 | Status PrimitiveBuilder<T>::AppendValues(const value_type* values, int64_t length, |
89 | const std::vector<bool>& is_valid) { |
90 | RETURN_NOT_OK(Reserve(length)); |
91 | DCHECK_EQ(length, static_cast<int64_t>(is_valid.size())); |
92 | |
93 | if (length > 0) { |
94 | std::memcpy(raw_data_ + length_, values, |
95 | static_cast<std::size_t>(TypeTraits<T>::bytes_required(length))); |
96 | } |
97 | |
98 | // length_ is update by these |
99 | ArrayBuilder::UnsafeAppendToBitmap(is_valid); |
100 | return Status::OK(); |
101 | } |
102 | |
103 | template <typename T> |
104 | Status PrimitiveBuilder<T>::AppendValues(const std::vector<value_type>& values, |
105 | const std::vector<bool>& is_valid) { |
106 | return AppendValues(values.data(), static_cast<int64_t>(values.size()), is_valid); |
107 | } |
108 | |
109 | template <typename T> |
110 | Status PrimitiveBuilder<T>::AppendValues(const std::vector<value_type>& values) { |
111 | return AppendValues(values.data(), static_cast<int64_t>(values.size())); |
112 | } |
113 | |
114 | template <typename T> |
115 | Status PrimitiveBuilder<T>::FinishInternal(std::shared_ptr<ArrayData>* out) { |
116 | RETURN_NOT_OK(TrimBuffer(TypeTraits<T>::bytes_required(length_), data_.get())); |
117 | std::shared_ptr<Buffer> null_bitmap; |
118 | RETURN_NOT_OK(null_bitmap_builder_.Finish(&null_bitmap)); |
119 | *out = ArrayData::Make(type_, length_, {null_bitmap, data_}, null_count_); |
120 | |
121 | data_ = nullptr; |
122 | capacity_ = length_ = null_count_ = 0; |
123 | |
124 | return Status::OK(); |
125 | } |
126 | |
127 | template class PrimitiveBuilder<UInt8Type>; |
128 | template class PrimitiveBuilder<UInt16Type>; |
129 | template class PrimitiveBuilder<UInt32Type>; |
130 | template class PrimitiveBuilder<UInt64Type>; |
131 | template class PrimitiveBuilder<Int8Type>; |
132 | template class PrimitiveBuilder<Int16Type>; |
133 | template class PrimitiveBuilder<Int32Type>; |
134 | template class PrimitiveBuilder<Int64Type>; |
135 | template class PrimitiveBuilder<Date32Type>; |
136 | template class PrimitiveBuilder<Date64Type>; |
137 | template class PrimitiveBuilder<Time32Type>; |
138 | template class PrimitiveBuilder<Time64Type>; |
139 | template class PrimitiveBuilder<TimestampType>; |
140 | template class PrimitiveBuilder<HalfFloatType>; |
141 | template class PrimitiveBuilder<FloatType>; |
142 | template class PrimitiveBuilder<DoubleType>; |
143 | |
144 | BooleanBuilder::BooleanBuilder(MemoryPool* pool) |
145 | : ArrayBuilder(boolean(), pool), data_(nullptr), raw_data_(nullptr) {} |
146 | |
147 | BooleanBuilder::BooleanBuilder(const std::shared_ptr<DataType>& type, MemoryPool* pool) |
148 | : BooleanBuilder(pool) { |
149 | DCHECK_EQ(Type::BOOL, type->id()); |
150 | } |
151 | |
152 | void BooleanBuilder::Reset() { |
153 | ArrayBuilder::Reset(); |
154 | data_.reset(); |
155 | raw_data_ = nullptr; |
156 | } |
157 | |
158 | Status BooleanBuilder::Resize(int64_t capacity) { |
159 | RETURN_NOT_OK(CheckCapacity(capacity, capacity_)); |
160 | capacity = std::max(capacity, kMinBuilderCapacity); |
161 | |
162 | const int64_t new_bitmap_size = BitUtil::BytesForBits(capacity); |
163 | if (capacity_ == 0) { |
164 | RETURN_NOT_OK(AllocateResizableBuffer(pool_, new_bitmap_size, &data_)); |
165 | raw_data_ = reinterpret_cast<uint8_t*>(data_->mutable_data()); |
166 | |
167 | // We zero the memory for booleans to keep things simple; for some reason if |
168 | // we do not, even though we may write every bit (through in-place | or &), |
169 | // valgrind will still show a warning. If we do not zero the bytes here, we |
170 | // will have to be careful to zero them in AppendNull and AppendNulls. Also, |
171 | // zeroing the bits results in deterministic bits when each byte may have a |
172 | // mix of nulls and not nulls. |
173 | // |
174 | // We only zero up to new_bitmap_size because the padding was zeroed by |
175 | // AllocateResizableBuffer |
176 | memset(raw_data_, 0, static_cast<size_t>(new_bitmap_size)); |
177 | } else { |
178 | const int64_t old_bitmap_capacity = data_->capacity(); |
179 | RETURN_NOT_OK(data_->Resize(new_bitmap_size)); |
180 | const int64_t new_bitmap_capacity = data_->capacity(); |
181 | raw_data_ = reinterpret_cast<uint8_t*>(data_->mutable_data()); |
182 | |
183 | // See comment above about why we zero memory for booleans |
184 | memset(raw_data_ + old_bitmap_capacity, 0, |
185 | static_cast<size_t>(new_bitmap_capacity - old_bitmap_capacity)); |
186 | } |
187 | |
188 | return ArrayBuilder::Resize(capacity); |
189 | } |
190 | |
191 | Status BooleanBuilder::FinishInternal(std::shared_ptr<ArrayData>* out) { |
192 | int64_t bit_offset = length_ % 8; |
193 | if (bit_offset > 0) { |
194 | // Adjust last byte |
195 | data_->mutable_data()[length_ / 8] &= BitUtil::kPrecedingBitmask[bit_offset]; |
196 | } |
197 | |
198 | std::shared_ptr<Buffer> null_bitmap; |
199 | RETURN_NOT_OK(null_bitmap_builder_.Finish(&null_bitmap)); |
200 | RETURN_NOT_OK(TrimBuffer(BitUtil::BytesForBits(length_), data_.get())); |
201 | |
202 | *out = ArrayData::Make(boolean(), length_, {null_bitmap, data_}, null_count_); |
203 | |
204 | data_ = nullptr; |
205 | capacity_ = length_ = null_count_ = 0; |
206 | return Status::OK(); |
207 | } |
208 | |
209 | Status BooleanBuilder::AppendValues(const uint8_t* values, int64_t length, |
210 | const uint8_t* valid_bytes) { |
211 | RETURN_NOT_OK(Reserve(length)); |
212 | |
213 | int64_t i = 0; |
214 | internal::GenerateBitsUnrolled(raw_data_, length_, length, |
215 | [values, &i]() -> bool { return values[i++] != 0; }); |
216 | |
217 | // this updates length_ |
218 | ArrayBuilder::UnsafeAppendToBitmap(valid_bytes, length); |
219 | return Status::OK(); |
220 | } |
221 | |
222 | Status BooleanBuilder::AppendValues(const uint8_t* values, int64_t length, |
223 | const std::vector<bool>& is_valid) { |
224 | RETURN_NOT_OK(Reserve(length)); |
225 | DCHECK_EQ(length, static_cast<int64_t>(is_valid.size())); |
226 | |
227 | int64_t i = 0; |
228 | internal::GenerateBitsUnrolled(raw_data_, length_, length, |
229 | [values, &i]() -> bool { return values[i++]; }); |
230 | |
231 | // this updates length_ |
232 | ArrayBuilder::UnsafeAppendToBitmap(is_valid); |
233 | return Status::OK(); |
234 | } |
235 | |
236 | Status BooleanBuilder::AppendValues(const std::vector<uint8_t>& values, |
237 | const std::vector<bool>& is_valid) { |
238 | return AppendValues(values.data(), static_cast<int64_t>(values.size()), is_valid); |
239 | } |
240 | |
241 | Status BooleanBuilder::AppendValues(const std::vector<uint8_t>& values) { |
242 | return AppendValues(values.data(), static_cast<int64_t>(values.size())); |
243 | } |
244 | |
245 | Status BooleanBuilder::AppendValues(const std::vector<bool>& values, |
246 | const std::vector<bool>& is_valid) { |
247 | const int64_t length = static_cast<int64_t>(values.size()); |
248 | RETURN_NOT_OK(Reserve(length)); |
249 | DCHECK_EQ(length, static_cast<int64_t>(is_valid.size())); |
250 | |
251 | int64_t i = 0; |
252 | internal::GenerateBitsUnrolled(raw_data_, length_, length, |
253 | [&values, &i]() -> bool { return values[i++]; }); |
254 | |
255 | // this updates length_ |
256 | ArrayBuilder::UnsafeAppendToBitmap(is_valid); |
257 | return Status::OK(); |
258 | } |
259 | |
260 | Status BooleanBuilder::AppendValues(const std::vector<bool>& values) { |
261 | const int64_t length = static_cast<int64_t>(values.size()); |
262 | RETURN_NOT_OK(Reserve(length)); |
263 | |
264 | int64_t i = 0; |
265 | internal::GenerateBitsUnrolled(raw_data_, length_, length, |
266 | [&values, &i]() -> bool { return values[i++]; }); |
267 | |
268 | // this updates length_ |
269 | ArrayBuilder::UnsafeSetNotNull(length); |
270 | return Status::OK(); |
271 | } |
272 | |
273 | } // namespace arrow |
274 | |