1 | // Licensed to the Apache Software Foundation (ASF) under one |
2 | // or more contributor license agreements. See the NOTICE file |
3 | // distributed with this work for additional information |
4 | // regarding copyright ownership. The ASF licenses this file |
5 | // to you under the Apache License, Version 2.0 (the |
6 | // "License"); you may not use this file except in compliance |
7 | // with the License. You may obtain a copy of the License at |
8 | // |
9 | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | // |
11 | // Unless required by applicable law or agreed to in writing, |
12 | // software distributed under the License is distributed on an |
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | // KIND, either express or implied. See the License for the |
15 | // specific language governing permissions and limitations |
16 | // under the License. |
17 | |
18 | #include <algorithm> |
19 | #include <array> |
20 | #include <cstdint> |
21 | #include <cstring> |
22 | #include <iterator> |
23 | #include <limits> |
24 | #include <memory> |
25 | #include <numeric> |
26 | #include <string> |
27 | #include <type_traits> |
28 | #include <vector> |
29 | |
30 | #include <gtest/gtest.h> |
31 | |
32 | #include "arrow/array.h" |
33 | #include "arrow/buffer-builder.h" |
34 | #include "arrow/buffer.h" |
35 | #include "arrow/builder.h" |
36 | #include "arrow/ipc/test-common.h" |
37 | #include "arrow/memory_pool.h" |
38 | #include "arrow/record_batch.h" |
39 | #include "arrow/status.h" |
40 | #include "arrow/test-common.h" |
41 | #include "arrow/test-util.h" |
42 | #include "arrow/type.h" |
43 | #include "arrow/util/bit-util.h" |
44 | #include "arrow/util/checked_cast.h" |
45 | #include "arrow/util/decimal.h" |
46 | #include "arrow/util/lazy.h" |
47 | |
48 | // This file is compiled together with array-*-test.cc into a single |
49 | // executable array-test. |
50 | |
51 | namespace arrow { |
52 | |
53 | using std::string; |
54 | using std::vector; |
55 | |
56 | using internal::checked_cast; |
57 | |
58 | class TestArray : public ::testing::Test { |
59 | public: |
60 | void SetUp() { pool_ = default_memory_pool(); } |
61 | |
62 | protected: |
63 | MemoryPool* pool_; |
64 | }; |
65 | |
66 | TEST_F(TestArray, TestNullCount) { |
67 | // These are placeholders |
68 | auto data = std::make_shared<Buffer>(nullptr, 0); |
69 | auto null_bitmap = std::make_shared<Buffer>(nullptr, 0); |
70 | |
71 | std::unique_ptr<Int32Array> arr(new Int32Array(100, data, null_bitmap, 10)); |
72 | ASSERT_EQ(10, arr->null_count()); |
73 | |
74 | std::unique_ptr<Int32Array> arr_no_nulls(new Int32Array(100, data)); |
75 | ASSERT_EQ(0, arr_no_nulls->null_count()); |
76 | } |
77 | |
78 | TEST_F(TestArray, TestLength) { |
79 | // Placeholder buffer |
80 | auto data = std::make_shared<Buffer>(nullptr, 0); |
81 | |
82 | std::unique_ptr<Int32Array> arr(new Int32Array(100, data)); |
83 | ASSERT_EQ(arr->length(), 100); |
84 | } |
85 | |
86 | Status MakeArrayFromValidBytes(const vector<uint8_t>& v, MemoryPool* pool, |
87 | std::shared_ptr<Array>* out) { |
88 | int64_t null_count = v.size() - std::accumulate(v.begin(), v.end(), 0); |
89 | |
90 | std::shared_ptr<Buffer> null_buf; |
91 | RETURN_NOT_OK(BitUtil::BytesToBits(v, default_memory_pool(), &null_buf)); |
92 | |
93 | TypedBufferBuilder<int32_t> value_builder(pool); |
94 | for (size_t i = 0; i < v.size(); ++i) { |
95 | RETURN_NOT_OK(value_builder.Append(0)); |
96 | } |
97 | |
98 | std::shared_ptr<Buffer> values; |
99 | RETURN_NOT_OK(value_builder.Finish(&values)); |
100 | *out = std::make_shared<Int32Array>(v.size(), values, null_buf, null_count); |
101 | return Status::OK(); |
102 | } |
103 | |
104 | TEST_F(TestArray, TestEquality) { |
105 | std::shared_ptr<Array> array, equal_array, unequal_array; |
106 | |
107 | ASSERT_OK(MakeArrayFromValidBytes({1, 0, 1, 1, 0, 1, 0, 0}, pool_, &array)); |
108 | ASSERT_OK(MakeArrayFromValidBytes({1, 0, 1, 1, 0, 1, 0, 0}, pool_, &equal_array)); |
109 | ASSERT_OK(MakeArrayFromValidBytes({1, 1, 1, 1, 0, 1, 0, 0}, pool_, &unequal_array)); |
110 | |
111 | EXPECT_TRUE(array->Equals(array)); |
112 | EXPECT_TRUE(array->Equals(equal_array)); |
113 | EXPECT_TRUE(equal_array->Equals(array)); |
114 | EXPECT_FALSE(equal_array->Equals(unequal_array)); |
115 | EXPECT_FALSE(unequal_array->Equals(equal_array)); |
116 | EXPECT_TRUE(array->RangeEquals(4, 8, 4, unequal_array)); |
117 | EXPECT_FALSE(array->RangeEquals(0, 4, 0, unequal_array)); |
118 | EXPECT_FALSE(array->RangeEquals(0, 8, 0, unequal_array)); |
119 | EXPECT_FALSE(array->RangeEquals(1, 2, 1, unequal_array)); |
120 | |
121 | auto timestamp_ns_array = std::make_shared<NumericArray<TimestampType>>( |
122 | timestamp(TimeUnit::NANO), array->length(), array->data()->buffers[1], |
123 | array->data()->buffers[0], array->null_count()); |
124 | auto timestamp_us_array = std::make_shared<NumericArray<TimestampType>>( |
125 | timestamp(TimeUnit::MICRO), array->length(), array->data()->buffers[1], |
126 | array->data()->buffers[0], array->null_count()); |
127 | ASSERT_FALSE(array->Equals(timestamp_ns_array)); |
128 | // ARROW-2567: Ensure that not only the type id but also the type equality |
129 | // itself is checked. |
130 | ASSERT_FALSE(timestamp_us_array->Equals(timestamp_ns_array)); |
131 | } |
132 | |
133 | TEST_F(TestArray, TestNullArrayEquality) { |
134 | auto array_1 = std::make_shared<NullArray>(10); |
135 | auto array_2 = std::make_shared<NullArray>(10); |
136 | auto array_3 = std::make_shared<NullArray>(20); |
137 | |
138 | EXPECT_TRUE(array_1->Equals(array_1)); |
139 | EXPECT_TRUE(array_1->Equals(array_2)); |
140 | EXPECT_FALSE(array_1->Equals(array_3)); |
141 | } |
142 | |
143 | TEST_F(TestArray, SliceRecomputeNullCount) { |
144 | vector<uint8_t> valid_bytes = {1, 0, 1, 1, 0, 1, 0, 0, 0}; |
145 | |
146 | std::shared_ptr<Array> array; |
147 | ASSERT_OK(MakeArrayFromValidBytes(valid_bytes, pool_, &array)); |
148 | |
149 | ASSERT_EQ(5, array->null_count()); |
150 | |
151 | auto slice = array->Slice(1, 4); |
152 | ASSERT_EQ(2, slice->null_count()); |
153 | |
154 | slice = array->Slice(4); |
155 | ASSERT_EQ(4, slice->null_count()); |
156 | |
157 | slice = array->Slice(0); |
158 | ASSERT_EQ(5, slice->null_count()); |
159 | |
160 | // No bitmap, compute 0 |
161 | std::shared_ptr<Buffer> data; |
162 | const int kBufferSize = 64; |
163 | ASSERT_OK(AllocateBuffer(pool_, kBufferSize, &data)); |
164 | memset(data->mutable_data(), 0, kBufferSize); |
165 | |
166 | auto arr = std::make_shared<Int32Array>(16, data, nullptr, -1); |
167 | ASSERT_EQ(0, arr->null_count()); |
168 | } |
169 | |
170 | TEST_F(TestArray, NullArraySliceNullCount) { |
171 | auto null_arr = std::make_shared<NullArray>(10); |
172 | auto null_arr_sliced = null_arr->Slice(3, 6); |
173 | |
174 | // The internal null count is 6, does not require recomputation |
175 | ASSERT_EQ(6, null_arr_sliced->data()->null_count); |
176 | |
177 | ASSERT_EQ(6, null_arr_sliced->null_count()); |
178 | } |
179 | |
180 | TEST_F(TestArray, TestIsNullIsValid) { |
181 | // clang-format off |
182 | vector<uint8_t> null_bitmap = {1, 0, 1, 1, 0, 1, 0, 0, |
183 | 1, 0, 1, 1, 0, 1, 0, 0, |
184 | 1, 0, 1, 1, 0, 1, 0, 0, |
185 | 1, 0, 1, 1, 0, 1, 0, 0, |
186 | 1, 0, 0, 1}; |
187 | // clang-format on |
188 | int64_t null_count = 0; |
189 | for (uint8_t x : null_bitmap) { |
190 | if (x == 0) { |
191 | ++null_count; |
192 | } |
193 | } |
194 | |
195 | std::shared_ptr<Buffer> null_buf; |
196 | ASSERT_OK(BitUtil::BytesToBits(null_bitmap, default_memory_pool(), &null_buf)); |
197 | |
198 | std::unique_ptr<Array> arr; |
199 | arr.reset(new Int32Array(null_bitmap.size(), nullptr, null_buf, null_count)); |
200 | |
201 | ASSERT_EQ(null_count, arr->null_count()); |
202 | ASSERT_EQ(5, null_buf->size()); |
203 | |
204 | ASSERT_TRUE(arr->null_bitmap()->Equals(*null_buf.get())); |
205 | |
206 | for (size_t i = 0; i < null_bitmap.size(); ++i) { |
207 | EXPECT_EQ(null_bitmap[i] != 0, !arr->IsNull(i)) << i; |
208 | EXPECT_EQ(null_bitmap[i] != 0, arr->IsValid(i)) << i; |
209 | } |
210 | } |
211 | |
212 | TEST_F(TestArray, TestIsNullIsValidNoNulls) { |
213 | const int64_t size = 10; |
214 | |
215 | std::unique_ptr<Array> arr; |
216 | arr.reset(new Int32Array(size, nullptr, nullptr, 0)); |
217 | |
218 | for (size_t i = 0; i < size; ++i) { |
219 | EXPECT_TRUE(arr->IsValid(i)); |
220 | EXPECT_FALSE(arr->IsNull(i)); |
221 | } |
222 | } |
223 | |
224 | TEST_F(TestArray, BuildLargeInMemoryArray) { |
225 | #ifdef NDEBUG |
226 | const int64_t length = static_cast<int64_t>(std::numeric_limits<int32_t>::max()) + 1; |
227 | #elif !defined(ARROW_VALGRIND) |
228 | // use a smaller size since the insert function isn't optimized properly on debug and |
229 | // the test takes a long time to complete |
230 | const int64_t length = 2 << 24; |
231 | #else |
232 | // use an even smaller size with valgrind |
233 | const int64_t length = 2 << 20; |
234 | #endif |
235 | |
236 | BooleanBuilder builder; |
237 | std::vector<bool> zeros(length); |
238 | ASSERT_OK(builder.AppendValues(zeros)); |
239 | |
240 | std::shared_ptr<Array> result; |
241 | FinishAndCheckPadding(&builder, &result); |
242 | |
243 | ASSERT_EQ(length, result->length()); |
244 | } |
245 | |
246 | TEST_F(TestArray, TestCopy) {} |
247 | |
248 | // ---------------------------------------------------------------------- |
249 | // Null type tests |
250 | |
251 | TEST(TestNullBuilder, Basics) { |
252 | NullBuilder builder; |
253 | std::shared_ptr<Array> array; |
254 | |
255 | ASSERT_OK(builder.AppendNull()); |
256 | ASSERT_OK(builder.Append(nullptr)); |
257 | ASSERT_OK(builder.AppendNull()); |
258 | ASSERT_OK(builder.Finish(&array)); |
259 | |
260 | const auto& null_array = checked_cast<NullArray&>(*array); |
261 | ASSERT_EQ(null_array.length(), 3); |
262 | ASSERT_EQ(null_array.null_count(), 3); |
263 | } |
264 | |
265 | // ---------------------------------------------------------------------- |
266 | // Primitive type tests |
267 | |
268 | TEST_F(TestBuilder, TestReserve) { |
269 | UInt8Builder builder(pool_); |
270 | |
271 | ASSERT_OK(builder.Resize(1000)); |
272 | ASSERT_EQ(1000, builder.capacity()); |
273 | |
274 | // Builder only contains 0 elements, but calling Reserve will result in a round |
275 | // up to next power of 2 |
276 | ASSERT_OK(builder.Reserve(1030)); |
277 | ASSERT_EQ(BitUtil::NextPower2(1030), builder.capacity()); |
278 | } |
279 | |
280 | TEST_F(TestBuilder, TestResizeDownsize) { |
281 | UInt8Builder builder(pool_); |
282 | |
283 | ASSERT_OK(builder.Resize(1000)); |
284 | ASSERT_EQ(1000, builder.capacity()); |
285 | |
286 | // Can't downsize. |
287 | ASSERT_RAISES(Invalid, builder.Resize(500)); |
288 | } |
289 | |
290 | template <typename Attrs> |
291 | class TestPrimitiveBuilder : public TestBuilder { |
292 | public: |
293 | typedef typename Attrs::ArrayType ArrayType; |
294 | typedef typename Attrs::BuilderType BuilderType; |
295 | typedef typename Attrs::T T; |
296 | typedef typename Attrs::Type Type; |
297 | |
298 | virtual void SetUp() { |
299 | TestBuilder::SetUp(); |
300 | |
301 | type_ = Attrs::type(); |
302 | |
303 | std::unique_ptr<ArrayBuilder> tmp; |
304 | ASSERT_OK(MakeBuilder(pool_, type_, &tmp)); |
305 | builder_.reset(checked_cast<BuilderType*>(tmp.release())); |
306 | |
307 | ASSERT_OK(MakeBuilder(pool_, type_, &tmp)); |
308 | builder_nn_.reset(checked_cast<BuilderType*>(tmp.release())); |
309 | } |
310 | |
311 | void RandomData(int64_t N, double pct_null = 0.1) { |
312 | Attrs::draw(N, &draws_); |
313 | |
314 | valid_bytes_.resize(static_cast<size_t>(N)); |
315 | random_null_bytes(N, pct_null, valid_bytes_.data()); |
316 | } |
317 | |
318 | void Check(const std::unique_ptr<BuilderType>& builder, bool nullable) { |
319 | int64_t size = builder->length(); |
320 | auto ex_data = Buffer::Wrap(draws_.data(), size); |
321 | |
322 | std::shared_ptr<Buffer> ex_null_bitmap; |
323 | int64_t ex_null_count = 0; |
324 | |
325 | if (nullable) { |
326 | ASSERT_OK( |
327 | BitUtil::BytesToBits(valid_bytes_, default_memory_pool(), &ex_null_bitmap)); |
328 | ex_null_count = CountNulls(valid_bytes_); |
329 | } else { |
330 | ex_null_bitmap = nullptr; |
331 | } |
332 | |
333 | auto expected = |
334 | std::make_shared<ArrayType>(size, ex_data, ex_null_bitmap, ex_null_count); |
335 | |
336 | std::shared_ptr<Array> out; |
337 | FinishAndCheckPadding(builder.get(), &out); |
338 | |
339 | std::shared_ptr<ArrayType> result = std::dynamic_pointer_cast<ArrayType>(out); |
340 | |
341 | // Builder is now reset |
342 | ASSERT_EQ(0, builder->length()); |
343 | ASSERT_EQ(0, builder->capacity()); |
344 | ASSERT_EQ(0, builder->null_count()); |
345 | |
346 | ASSERT_EQ(ex_null_count, result->null_count()); |
347 | ASSERT_TRUE(result->Equals(*expected)); |
348 | } |
349 | |
350 | void FlipValue(T* ptr) { |
351 | auto byteptr = reinterpret_cast<uint8_t*>(ptr); |
352 | *byteptr = static_cast<uint8_t>(~*byteptr); |
353 | } |
354 | |
355 | protected: |
356 | std::unique_ptr<BuilderType> builder_; |
357 | std::unique_ptr<BuilderType> builder_nn_; |
358 | |
359 | vector<T> draws_; |
360 | vector<uint8_t> valid_bytes_; |
361 | }; |
362 | |
363 | /// \brief uint8_t isn't a valid template parameter to uniform_int_distribution, so |
364 | /// we use SampleType to determine which kind of integer to use to sample. |
365 | template <typename T, |
366 | typename = typename std::enable_if<std::is_integral<T>::value, T>::type> |
367 | struct UniformIntSampleType { |
368 | using type = T; |
369 | }; |
370 | |
371 | template <> |
372 | struct UniformIntSampleType<uint8_t> { |
373 | using type = uint16_t; |
374 | }; |
375 | |
376 | template <> |
377 | struct UniformIntSampleType<int8_t> { |
378 | using type = int16_t; |
379 | }; |
380 | |
381 | #define PTYPE_DECL(CapType, c_type) \ |
382 | typedef CapType##Array ArrayType; \ |
383 | typedef CapType##Builder BuilderType; \ |
384 | typedef CapType##Type Type; \ |
385 | typedef c_type T; \ |
386 | \ |
387 | static std::shared_ptr<DataType> type() { return std::make_shared<Type>(); } |
388 | |
389 | #define PINT_DECL(CapType, c_type) \ |
390 | struct P##CapType { \ |
391 | PTYPE_DECL(CapType, c_type) \ |
392 | static void draw(int64_t N, vector<T>* draws) { \ |
393 | using sample_type = typename UniformIntSampleType<c_type>::type; \ |
394 | const T lower = std::numeric_limits<T>::min(); \ |
395 | const T upper = std::numeric_limits<T>::max(); \ |
396 | randint(N, static_cast<sample_type>(lower), static_cast<sample_type>(upper), \ |
397 | draws); \ |
398 | } \ |
399 | } |
400 | |
401 | #define PFLOAT_DECL(CapType, c_type, LOWER, UPPER) \ |
402 | struct P##CapType { \ |
403 | PTYPE_DECL(CapType, c_type) \ |
404 | static void draw(int64_t N, vector<T>* draws) { \ |
405 | random_real(N, 0, LOWER, UPPER, draws); \ |
406 | } \ |
407 | } |
408 | |
409 | PINT_DECL(UInt8, uint8_t); |
410 | PINT_DECL(UInt16, uint16_t); |
411 | PINT_DECL(UInt32, uint32_t); |
412 | PINT_DECL(UInt64, uint64_t); |
413 | |
414 | PINT_DECL(Int8, int8_t); |
415 | PINT_DECL(Int16, int16_t); |
416 | PINT_DECL(Int32, int32_t); |
417 | PINT_DECL(Int64, int64_t); |
418 | |
419 | PFLOAT_DECL(Float, float, -1000.0f, 1000.0f); |
420 | PFLOAT_DECL(Double, double, -1000.0, 1000.0); |
421 | |
422 | struct PBoolean { |
423 | PTYPE_DECL(Boolean, uint8_t) |
424 | }; |
425 | |
426 | template <> |
427 | void TestPrimitiveBuilder<PBoolean>::RandomData(int64_t N, double pct_null) { |
428 | draws_.resize(static_cast<size_t>(N)); |
429 | valid_bytes_.resize(static_cast<size_t>(N)); |
430 | |
431 | random_null_bytes(N, 0.5, draws_.data()); |
432 | random_null_bytes(N, pct_null, valid_bytes_.data()); |
433 | } |
434 | |
435 | template <> |
436 | void TestPrimitiveBuilder<PBoolean>::FlipValue(T* ptr) { |
437 | *ptr = !*ptr; |
438 | } |
439 | |
440 | template <> |
441 | void TestPrimitiveBuilder<PBoolean>::Check(const std::unique_ptr<BooleanBuilder>& builder, |
442 | bool nullable) { |
443 | const int64_t size = builder->length(); |
444 | |
445 | // Build expected result array |
446 | std::shared_ptr<Buffer> ex_data; |
447 | std::shared_ptr<Buffer> ex_null_bitmap; |
448 | int64_t ex_null_count = 0; |
449 | |
450 | ASSERT_OK(BitUtil::BytesToBits(draws_, default_memory_pool(), &ex_data)); |
451 | if (nullable) { |
452 | ASSERT_OK(BitUtil::BytesToBits(valid_bytes_, default_memory_pool(), &ex_null_bitmap)); |
453 | ex_null_count = CountNulls(valid_bytes_); |
454 | } else { |
455 | ex_null_bitmap = nullptr; |
456 | } |
457 | auto expected = |
458 | std::make_shared<BooleanArray>(size, ex_data, ex_null_bitmap, ex_null_count); |
459 | ASSERT_EQ(size, expected->length()); |
460 | |
461 | // Finish builder and check result array |
462 | std::shared_ptr<Array> out; |
463 | FinishAndCheckPadding(builder.get(), &out); |
464 | |
465 | std::shared_ptr<BooleanArray> result = std::dynamic_pointer_cast<BooleanArray>(out); |
466 | |
467 | ASSERT_EQ(ex_null_count, result->null_count()); |
468 | ASSERT_EQ(size, result->length()); |
469 | |
470 | for (int64_t i = 0; i < size; ++i) { |
471 | if (nullable) { |
472 | ASSERT_EQ(valid_bytes_[i] == 0, result->IsNull(i)) << i; |
473 | } else { |
474 | ASSERT_FALSE(result->IsNull(i)); |
475 | } |
476 | if (!result->IsNull(i)) { |
477 | bool actual = BitUtil::GetBit(result->values()->data(), i); |
478 | ASSERT_EQ(draws_[i] != 0, actual) << i; |
479 | } |
480 | } |
481 | ASSERT_TRUE(result->Equals(*expected)); |
482 | |
483 | // Builder is now reset |
484 | ASSERT_EQ(0, builder->length()); |
485 | ASSERT_EQ(0, builder->capacity()); |
486 | ASSERT_EQ(0, builder->null_count()); |
487 | } |
488 | |
489 | typedef ::testing::Types<PBoolean, PUInt8, PUInt16, PUInt32, PUInt64, PInt8, PInt16, |
490 | PInt32, PInt64, PFloat, PDouble> |
491 | Primitives; |
492 | |
493 | TYPED_TEST_CASE(TestPrimitiveBuilder, Primitives); |
494 | |
495 | TYPED_TEST(TestPrimitiveBuilder, TestInit) { |
496 | int64_t n = 1000; |
497 | ASSERT_OK(this->builder_->Reserve(n)); |
498 | ASSERT_EQ(BitUtil::NextPower2(n), this->builder_->capacity()); |
499 | |
500 | // unsure if this should go in all builder classes |
501 | ASSERT_EQ(0, this->builder_->num_children()); |
502 | } |
503 | |
504 | TYPED_TEST(TestPrimitiveBuilder, TestAppendNull) { |
505 | int64_t size = 1000; |
506 | for (int64_t i = 0; i < size; ++i) { |
507 | ASSERT_OK(this->builder_->AppendNull()); |
508 | } |
509 | |
510 | std::shared_ptr<Array> out; |
511 | FinishAndCheckPadding(this->builder_.get(), &out); |
512 | auto result = std::dynamic_pointer_cast<typename TypeParam::ArrayType>(out); |
513 | |
514 | for (int64_t i = 0; i < size; ++i) { |
515 | ASSERT_TRUE(result->IsNull(i)) << i; |
516 | } |
517 | } |
518 | |
519 | TYPED_TEST(TestPrimitiveBuilder, TestAppendNulls) { |
520 | const int64_t size = 10; |
521 | const uint8_t valid_bytes[10] = {1, 0, 1, 0, 1, 0, 1, 0, 1, 0}; |
522 | |
523 | ASSERT_OK(this->builder_->AppendNulls(valid_bytes, size)); |
524 | |
525 | std::shared_ptr<Array> result; |
526 | FinishAndCheckPadding(this->builder_.get(), &result); |
527 | |
528 | for (int64_t i = 0; i < size; ++i) { |
529 | ASSERT_EQ(result->IsValid(i), static_cast<bool>(valid_bytes[i])); |
530 | } |
531 | } |
532 | |
533 | TYPED_TEST(TestPrimitiveBuilder, TestArrayDtorDealloc) { |
534 | DECL_T(); |
535 | |
536 | int64_t size = 1000; |
537 | |
538 | vector<T>& draws = this->draws_; |
539 | vector<uint8_t>& valid_bytes = this->valid_bytes_; |
540 | |
541 | int64_t memory_before = this->pool_->bytes_allocated(); |
542 | |
543 | this->RandomData(size); |
544 | ASSERT_OK(this->builder_->Reserve(size)); |
545 | |
546 | int64_t i; |
547 | for (i = 0; i < size; ++i) { |
548 | if (valid_bytes[i] > 0) { |
549 | ASSERT_OK(this->builder_->Append(draws[i])); |
550 | } else { |
551 | ASSERT_OK(this->builder_->AppendNull()); |
552 | } |
553 | } |
554 | |
555 | do { |
556 | std::shared_ptr<Array> result; |
557 | FinishAndCheckPadding(this->builder_.get(), &result); |
558 | } while (false); |
559 | |
560 | ASSERT_EQ(memory_before, this->pool_->bytes_allocated()); |
561 | } |
562 | |
563 | TYPED_TEST(TestPrimitiveBuilder, Equality) { |
564 | DECL_T(); |
565 | |
566 | const int64_t size = 1000; |
567 | this->RandomData(size); |
568 | vector<T>& draws = this->draws_; |
569 | vector<uint8_t>& valid_bytes = this->valid_bytes_; |
570 | std::shared_ptr<Array> array, equal_array, unequal_array; |
571 | auto builder = this->builder_.get(); |
572 | ASSERT_OK(MakeArray(valid_bytes, draws, size, builder, &array)); |
573 | ASSERT_OK(MakeArray(valid_bytes, draws, size, builder, &equal_array)); |
574 | |
575 | // Make the not equal array by negating the first valid element with itself. |
576 | const auto first_valid = std::find_if(valid_bytes.begin(), valid_bytes.end(), |
577 | [](uint8_t valid) { return valid > 0; }); |
578 | const int64_t first_valid_idx = std::distance(valid_bytes.begin(), first_valid); |
579 | // This should be true with a very high probability, but might introduce flakiness |
580 | ASSERT_LT(first_valid_idx, size - 1); |
581 | this->FlipValue(&draws[first_valid_idx]); |
582 | ASSERT_OK(MakeArray(valid_bytes, draws, size, builder, &unequal_array)); |
583 | |
584 | // test normal equality |
585 | EXPECT_TRUE(array->Equals(array)); |
586 | EXPECT_TRUE(array->Equals(equal_array)); |
587 | EXPECT_TRUE(equal_array->Equals(array)); |
588 | EXPECT_FALSE(equal_array->Equals(unequal_array)); |
589 | EXPECT_FALSE(unequal_array->Equals(equal_array)); |
590 | |
591 | // Test range equality |
592 | EXPECT_FALSE(array->RangeEquals(0, first_valid_idx + 1, 0, unequal_array)); |
593 | EXPECT_FALSE(array->RangeEquals(first_valid_idx, size, first_valid_idx, unequal_array)); |
594 | EXPECT_TRUE(array->RangeEquals(0, first_valid_idx, 0, unequal_array)); |
595 | EXPECT_TRUE( |
596 | array->RangeEquals(first_valid_idx + 1, size, first_valid_idx + 1, unequal_array)); |
597 | } |
598 | |
599 | TYPED_TEST(TestPrimitiveBuilder, SliceEquality) { |
600 | DECL_T(); |
601 | |
602 | const int64_t size = 1000; |
603 | this->RandomData(size); |
604 | vector<T>& draws = this->draws_; |
605 | vector<uint8_t>& valid_bytes = this->valid_bytes_; |
606 | auto builder = this->builder_.get(); |
607 | |
608 | std::shared_ptr<Array> array; |
609 | ASSERT_OK(MakeArray(valid_bytes, draws, size, builder, &array)); |
610 | |
611 | std::shared_ptr<Array> slice, slice2; |
612 | |
613 | slice = array->Slice(5); |
614 | slice2 = array->Slice(5); |
615 | ASSERT_EQ(size - 5, slice->length()); |
616 | |
617 | ASSERT_TRUE(slice->Equals(slice2)); |
618 | ASSERT_TRUE(array->RangeEquals(5, array->length(), 0, slice)); |
619 | |
620 | // Chained slices |
621 | slice2 = array->Slice(2)->Slice(3); |
622 | ASSERT_TRUE(slice->Equals(slice2)); |
623 | |
624 | slice = array->Slice(5, 10); |
625 | slice2 = array->Slice(5, 10); |
626 | ASSERT_EQ(10, slice->length()); |
627 | |
628 | ASSERT_TRUE(slice->Equals(slice2)); |
629 | ASSERT_TRUE(array->RangeEquals(5, 15, 0, slice)); |
630 | } |
631 | |
632 | TYPED_TEST(TestPrimitiveBuilder, TestAppendScalar) { |
633 | DECL_T(); |
634 | |
635 | const int64_t size = 10000; |
636 | |
637 | vector<T>& draws = this->draws_; |
638 | vector<uint8_t>& valid_bytes = this->valid_bytes_; |
639 | |
640 | this->RandomData(size); |
641 | |
642 | ASSERT_OK(this->builder_->Reserve(1000)); |
643 | ASSERT_OK(this->builder_nn_->Reserve(1000)); |
644 | |
645 | int64_t null_count = 0; |
646 | // Append the first 1000 |
647 | for (size_t i = 0; i < 1000; ++i) { |
648 | if (valid_bytes[i] > 0) { |
649 | ASSERT_OK(this->builder_->Append(draws[i])); |
650 | } else { |
651 | ASSERT_OK(this->builder_->AppendNull()); |
652 | ++null_count; |
653 | } |
654 | ASSERT_OK(this->builder_nn_->Append(draws[i])); |
655 | } |
656 | |
657 | ASSERT_EQ(null_count, this->builder_->null_count()); |
658 | |
659 | ASSERT_EQ(1000, this->builder_->length()); |
660 | ASSERT_EQ(1024, this->builder_->capacity()); |
661 | |
662 | ASSERT_EQ(1000, this->builder_nn_->length()); |
663 | ASSERT_EQ(1024, this->builder_nn_->capacity()); |
664 | |
665 | ASSERT_OK(this->builder_->Reserve(size - 1000)); |
666 | ASSERT_OK(this->builder_nn_->Reserve(size - 1000)); |
667 | |
668 | // Append the next 9000 |
669 | for (size_t i = 1000; i < size; ++i) { |
670 | if (valid_bytes[i] > 0) { |
671 | ASSERT_OK(this->builder_->Append(draws[i])); |
672 | } else { |
673 | ASSERT_OK(this->builder_->AppendNull()); |
674 | } |
675 | ASSERT_OK(this->builder_nn_->Append(draws[i])); |
676 | } |
677 | |
678 | ASSERT_EQ(size, this->builder_->length()); |
679 | ASSERT_EQ(BitUtil::NextPower2(size), this->builder_->capacity()); |
680 | |
681 | ASSERT_EQ(size, this->builder_nn_->length()); |
682 | ASSERT_EQ(BitUtil::NextPower2(size), this->builder_nn_->capacity()); |
683 | |
684 | this->Check(this->builder_, true); |
685 | this->Check(this->builder_nn_, false); |
686 | } |
687 | |
688 | TYPED_TEST(TestPrimitiveBuilder, TestAppendValues) { |
689 | DECL_T(); |
690 | |
691 | int64_t size = 10000; |
692 | this->RandomData(size); |
693 | |
694 | vector<T>& draws = this->draws_; |
695 | vector<uint8_t>& valid_bytes = this->valid_bytes_; |
696 | |
697 | // first slug |
698 | int64_t K = 1000; |
699 | |
700 | ASSERT_OK(this->builder_->AppendValues(draws.data(), K, valid_bytes.data())); |
701 | ASSERT_OK(this->builder_nn_->AppendValues(draws.data(), K)); |
702 | |
703 | ASSERT_EQ(1000, this->builder_->length()); |
704 | ASSERT_EQ(1024, this->builder_->capacity()); |
705 | |
706 | ASSERT_EQ(1000, this->builder_nn_->length()); |
707 | ASSERT_EQ(1024, this->builder_nn_->capacity()); |
708 | |
709 | // Append the next 9000 |
710 | ASSERT_OK( |
711 | this->builder_->AppendValues(draws.data() + K, size - K, valid_bytes.data() + K)); |
712 | ASSERT_OK(this->builder_nn_->AppendValues(draws.data() + K, size - K)); |
713 | |
714 | ASSERT_EQ(size, this->builder_->length()); |
715 | ASSERT_EQ(BitUtil::NextPower2(size), this->builder_->capacity()); |
716 | |
717 | ASSERT_EQ(size, this->builder_nn_->length()); |
718 | ASSERT_EQ(BitUtil::NextPower2(size), this->builder_nn_->capacity()); |
719 | |
720 | this->Check(this->builder_, true); |
721 | this->Check(this->builder_nn_, false); |
722 | } |
723 | |
724 | TYPED_TEST(TestPrimitiveBuilder, TestAppendValuesIter) { |
725 | int64_t size = 10000; |
726 | this->RandomData(size); |
727 | |
728 | ASSERT_OK(this->builder_->AppendValues(this->draws_.begin(), this->draws_.end(), |
729 | this->valid_bytes_.begin())); |
730 | ASSERT_OK(this->builder_nn_->AppendValues(this->draws_.begin(), this->draws_.end())); |
731 | |
732 | ASSERT_EQ(size, this->builder_->length()); |
733 | ASSERT_EQ(BitUtil::NextPower2(size), this->builder_->capacity()); |
734 | |
735 | this->Check(this->builder_, true); |
736 | this->Check(this->builder_nn_, false); |
737 | } |
738 | |
739 | TYPED_TEST(TestPrimitiveBuilder, TestAppendValuesIterNullValid) { |
740 | int64_t size = 10000; |
741 | this->RandomData(size); |
742 | |
743 | ASSERT_OK(this->builder_nn_->AppendValues(this->draws_.begin(), |
744 | this->draws_.begin() + size / 2, |
745 | static_cast<uint8_t*>(nullptr))); |
746 | |
747 | ASSERT_EQ(BitUtil::NextPower2(size / 2), this->builder_nn_->capacity()); |
748 | |
749 | ASSERT_OK(this->builder_nn_->AppendValues(this->draws_.begin() + size / 2, |
750 | this->draws_.end(), |
751 | static_cast<uint64_t*>(nullptr))); |
752 | |
753 | this->Check(this->builder_nn_, false); |
754 | } |
755 | |
756 | TYPED_TEST(TestPrimitiveBuilder, TestAppendValuesLazyIter) { |
757 | DECL_T(); |
758 | |
759 | int64_t size = 10000; |
760 | this->RandomData(size); |
761 | |
762 | auto& draws = this->draws_; |
763 | auto& valid_bytes = this->valid_bytes_; |
764 | |
765 | auto halve = [&draws](int64_t index) { return draws[index] / 2; }; |
766 | auto lazy_iter = internal::MakeLazyRange(halve, size); |
767 | |
768 | ASSERT_OK(this->builder_->AppendValues(lazy_iter.begin(), lazy_iter.end(), |
769 | valid_bytes.begin())); |
770 | |
771 | std::vector<T> halved; |
772 | transform(draws.begin(), draws.end(), back_inserter(halved), |
773 | [](T in) { return in / 2; }); |
774 | |
775 | std::shared_ptr<Array> result; |
776 | FinishAndCheckPadding(this->builder_.get(), &result); |
777 | |
778 | std::shared_ptr<Array> expected; |
779 | ASSERT_OK( |
780 | this->builder_->AppendValues(halved.data(), halved.size(), valid_bytes.data())); |
781 | FinishAndCheckPadding(this->builder_.get(), &expected); |
782 | |
783 | ASSERT_TRUE(expected->Equals(result)); |
784 | } |
785 | |
786 | TYPED_TEST(TestPrimitiveBuilder, TestAppendValuesIterConverted) { |
787 | DECL_T(); |
788 | // find type we can safely convert the tested values to and from |
789 | using conversion_type = |
790 | typename std::conditional<std::is_floating_point<T>::value, double, |
791 | typename std::conditional<std::is_unsigned<T>::value, |
792 | uint64_t, int64_t>::type>::type; |
793 | |
794 | int64_t size = 10000; |
795 | this->RandomData(size); |
796 | |
797 | // append convertible values |
798 | vector<conversion_type> draws_converted(this->draws_.begin(), this->draws_.end()); |
799 | vector<int32_t> valid_bytes_converted(this->valid_bytes_.begin(), |
800 | this->valid_bytes_.end()); |
801 | |
802 | auto cast_values = internal::MakeLazyRange( |
803 | [&draws_converted](int64_t index) { |
804 | return static_cast<T>(draws_converted[index]); |
805 | }, |
806 | size); |
807 | auto cast_valid = internal::MakeLazyRange( |
808 | [&valid_bytes_converted](int64_t index) { |
809 | return static_cast<bool>(valid_bytes_converted[index]); |
810 | }, |
811 | size); |
812 | |
813 | ASSERT_OK(this->builder_->AppendValues(cast_values.begin(), cast_values.end(), |
814 | cast_valid.begin())); |
815 | ASSERT_OK(this->builder_nn_->AppendValues(cast_values.begin(), cast_values.end())); |
816 | |
817 | ASSERT_EQ(size, this->builder_->length()); |
818 | ASSERT_EQ(BitUtil::NextPower2(size), this->builder_->capacity()); |
819 | |
820 | ASSERT_EQ(size, this->builder_->length()); |
821 | ASSERT_EQ(BitUtil::NextPower2(size), this->builder_->capacity()); |
822 | |
823 | this->Check(this->builder_, true); |
824 | this->Check(this->builder_nn_, false); |
825 | } |
826 | |
827 | TYPED_TEST(TestPrimitiveBuilder, TestZeroPadded) { |
828 | DECL_T(); |
829 | |
830 | int64_t size = 10000; |
831 | this->RandomData(size); |
832 | |
833 | vector<T>& draws = this->draws_; |
834 | vector<uint8_t>& valid_bytes = this->valid_bytes_; |
835 | |
836 | // first slug |
837 | int64_t K = 1000; |
838 | |
839 | ASSERT_OK(this->builder_->AppendValues(draws.data(), K, valid_bytes.data())); |
840 | |
841 | std::shared_ptr<Array> out; |
842 | FinishAndCheckPadding(this->builder_.get(), &out); |
843 | } |
844 | |
845 | TYPED_TEST(TestPrimitiveBuilder, TestAppendValuesStdBool) { |
846 | // ARROW-1383 |
847 | DECL_T(); |
848 | |
849 | int64_t size = 10000; |
850 | this->RandomData(size); |
851 | |
852 | vector<T>& draws = this->draws_; |
853 | |
854 | std::vector<bool> is_valid; |
855 | |
856 | // first slug |
857 | int64_t K = 1000; |
858 | |
859 | for (int64_t i = 0; i < K; ++i) { |
860 | is_valid.push_back(this->valid_bytes_[i] != 0); |
861 | } |
862 | ASSERT_OK(this->builder_->AppendValues(draws.data(), K, is_valid)); |
863 | ASSERT_OK(this->builder_nn_->AppendValues(draws.data(), K)); |
864 | |
865 | ASSERT_EQ(1000, this->builder_->length()); |
866 | ASSERT_EQ(1024, this->builder_->capacity()); |
867 | ASSERT_EQ(1000, this->builder_nn_->length()); |
868 | ASSERT_EQ(1024, this->builder_nn_->capacity()); |
869 | |
870 | // Append the next 9000 |
871 | is_valid.clear(); |
872 | std::vector<T> partial_draws; |
873 | for (int64_t i = K; i < size; ++i) { |
874 | partial_draws.push_back(draws[i]); |
875 | is_valid.push_back(this->valid_bytes_[i] != 0); |
876 | } |
877 | |
878 | ASSERT_OK(this->builder_->AppendValues(partial_draws, is_valid)); |
879 | ASSERT_OK(this->builder_nn_->AppendValues(partial_draws)); |
880 | |
881 | ASSERT_EQ(size, this->builder_->length()); |
882 | ASSERT_EQ(BitUtil::NextPower2(size), this->builder_->capacity()); |
883 | |
884 | ASSERT_EQ(size, this->builder_nn_->length()); |
885 | ASSERT_EQ(BitUtil::NextPower2(size), this->builder_->capacity()); |
886 | |
887 | this->Check(this->builder_, true); |
888 | this->Check(this->builder_nn_, false); |
889 | } |
890 | |
891 | TYPED_TEST(TestPrimitiveBuilder, TestAdvance) { |
892 | int64_t n = 1000; |
893 | ASSERT_OK(this->builder_->Reserve(n)); |
894 | |
895 | ASSERT_OK(this->builder_->Advance(100)); |
896 | ASSERT_EQ(100, this->builder_->length()); |
897 | |
898 | ASSERT_OK(this->builder_->Advance(900)); |
899 | |
900 | int64_t too_many = this->builder_->capacity() - 1000 + 1; |
901 | ASSERT_RAISES(Invalid, this->builder_->Advance(too_many)); |
902 | } |
903 | |
904 | TYPED_TEST(TestPrimitiveBuilder, TestResize) { |
905 | int64_t cap = kMinBuilderCapacity * 2; |
906 | |
907 | ASSERT_OK(this->builder_->Reserve(cap)); |
908 | ASSERT_EQ(cap, this->builder_->capacity()); |
909 | } |
910 | |
911 | TYPED_TEST(TestPrimitiveBuilder, TestReserve) { |
912 | ASSERT_OK(this->builder_->Reserve(10)); |
913 | ASSERT_EQ(0, this->builder_->length()); |
914 | ASSERT_EQ(kMinBuilderCapacity, this->builder_->capacity()); |
915 | |
916 | ASSERT_OK(this->builder_->Reserve(90)); |
917 | ASSERT_OK(this->builder_->Advance(100)); |
918 | ASSERT_OK(this->builder_->Reserve(kMinBuilderCapacity)); |
919 | |
920 | ASSERT_RAISES(Invalid, this->builder_->Resize(1)); |
921 | |
922 | ASSERT_EQ(BitUtil::NextPower2(kMinBuilderCapacity + 100), this->builder_->capacity()); |
923 | } |
924 | |
925 | TEST(TestBooleanBuilder, TestStdBoolVectorAppend) { |
926 | BooleanBuilder builder; |
927 | BooleanBuilder builder_nn; |
928 | |
929 | std::vector<bool> values, is_valid; |
930 | |
931 | const int length = 10000; |
932 | random_is_valid(length, 0.5, &values); |
933 | random_is_valid(length, 0.1, &is_valid); |
934 | |
935 | const int chunksize = 1000; |
936 | for (int chunk = 0; chunk < length / chunksize; ++chunk) { |
937 | std::vector<bool> chunk_values, chunk_is_valid; |
938 | for (int i = chunk * chunksize; i < (chunk + 1) * chunksize; ++i) { |
939 | chunk_values.push_back(values[i]); |
940 | chunk_is_valid.push_back(is_valid[i]); |
941 | } |
942 | ASSERT_OK(builder.AppendValues(chunk_values, chunk_is_valid)); |
943 | ASSERT_OK(builder_nn.AppendValues(chunk_values)); |
944 | } |
945 | |
946 | std::shared_ptr<Array> result, result_nn; |
947 | ASSERT_OK(builder.Finish(&result)); |
948 | ASSERT_OK(builder_nn.Finish(&result_nn)); |
949 | |
950 | const auto& arr = checked_cast<const BooleanArray&>(*result); |
951 | const auto& arr_nn = checked_cast<const BooleanArray&>(*result_nn); |
952 | for (int i = 0; i < length; ++i) { |
953 | if (is_valid[i]) { |
954 | ASSERT_FALSE(arr.IsNull(i)); |
955 | ASSERT_EQ(values[i], arr.Value(i)); |
956 | } else { |
957 | ASSERT_TRUE(arr.IsNull(i)); |
958 | } |
959 | ASSERT_EQ(values[i], arr_nn.Value(i)); |
960 | } |
961 | } |
962 | |
963 | template <typename TYPE> |
964 | void CheckSliceApproxEquals() { |
965 | using T = typename TYPE::c_type; |
966 | |
967 | const int64_t kSize = 50; |
968 | vector<T> draws1; |
969 | vector<T> draws2; |
970 | |
971 | const uint32_t kSeed = 0; |
972 | random_real(kSize, kSeed, 0.0, 100.0, &draws1); |
973 | random_real(kSize, kSeed + 1, 0.0, 100.0, &draws2); |
974 | |
975 | // Make the draws equal in the sliced segment, but unequal elsewhere (to |
976 | // catch not using the slice offset) |
977 | for (int64_t i = 10; i < 30; ++i) { |
978 | draws2[i] = draws1[i]; |
979 | } |
980 | |
981 | vector<bool> is_valid; |
982 | random_is_valid(kSize, 0.1, &is_valid); |
983 | |
984 | std::shared_ptr<Array> array1, array2; |
985 | ArrayFromVector<TYPE, T>(is_valid, draws1, &array1); |
986 | ArrayFromVector<TYPE, T>(is_valid, draws2, &array2); |
987 | |
988 | std::shared_ptr<Array> slice1 = array1->Slice(10, 20); |
989 | std::shared_ptr<Array> slice2 = array2->Slice(10, 20); |
990 | |
991 | ASSERT_TRUE(slice1->ApproxEquals(slice2)); |
992 | } |
993 | |
994 | TEST(TestPrimitiveAdHoc, FloatingSliceApproxEquals) { |
995 | CheckSliceApproxEquals<FloatType>(); |
996 | CheckSliceApproxEquals<DoubleType>(); |
997 | } |
998 | |
999 | // ---------------------------------------------------------------------- |
1000 | // FixedSizeBinary tests |
1001 | |
1002 | class TestFWBinaryArray : public ::testing::Test { |
1003 | public: |
1004 | void SetUp() {} |
1005 | |
1006 | void InitBuilder(int byte_width) { |
1007 | auto type = fixed_size_binary(byte_width); |
1008 | builder_.reset(new FixedSizeBinaryBuilder(type, default_memory_pool())); |
1009 | } |
1010 | |
1011 | protected: |
1012 | std::unique_ptr<FixedSizeBinaryBuilder> builder_; |
1013 | }; |
1014 | |
1015 | TEST_F(TestFWBinaryArray, Builder) { |
1016 | int32_t byte_width = 10; |
1017 | int64_t length = 4096; |
1018 | |
1019 | int64_t nbytes = length * byte_width; |
1020 | |
1021 | vector<uint8_t> data(nbytes); |
1022 | random_bytes(nbytes, 0, data.data()); |
1023 | |
1024 | vector<uint8_t> is_valid(length); |
1025 | random_null_bytes(length, 0.1, is_valid.data()); |
1026 | |
1027 | const uint8_t* raw_data = data.data(); |
1028 | |
1029 | std::shared_ptr<Array> result; |
1030 | |
1031 | auto CheckResult = [&length, &is_valid, &raw_data, &byte_width](const Array& result) { |
1032 | // Verify output |
1033 | const auto& fw_result = checked_cast<const FixedSizeBinaryArray&>(result); |
1034 | |
1035 | ASSERT_EQ(length, result.length()); |
1036 | |
1037 | for (int64_t i = 0; i < result.length(); ++i) { |
1038 | if (is_valid[i]) { |
1039 | ASSERT_EQ(0, |
1040 | memcmp(raw_data + byte_width * i, fw_result.GetValue(i), byte_width)); |
1041 | } else { |
1042 | ASSERT_TRUE(fw_result.IsNull(i)); |
1043 | } |
1044 | } |
1045 | }; |
1046 | |
1047 | // Build using iterative API |
1048 | InitBuilder(byte_width); |
1049 | for (int64_t i = 0; i < length; ++i) { |
1050 | if (is_valid[i]) { |
1051 | ASSERT_OK(builder_->Append(raw_data + byte_width * i)); |
1052 | } else { |
1053 | ASSERT_OK(builder_->AppendNull()); |
1054 | } |
1055 | } |
1056 | |
1057 | FinishAndCheckPadding(builder_.get(), &result); |
1058 | CheckResult(*result); |
1059 | |
1060 | // Build using batch API |
1061 | InitBuilder(byte_width); |
1062 | |
1063 | const uint8_t* raw_is_valid = is_valid.data(); |
1064 | |
1065 | ASSERT_OK(builder_->AppendValues(raw_data, 50, raw_is_valid)); |
1066 | ASSERT_OK( |
1067 | builder_->AppendValues(raw_data + 50 * byte_width, length - 50, raw_is_valid + 50)); |
1068 | FinishAndCheckPadding(builder_.get(), &result); |
1069 | |
1070 | CheckResult(*result); |
1071 | |
1072 | // Build from std::string |
1073 | InitBuilder(byte_width); |
1074 | for (int64_t i = 0; i < length; ++i) { |
1075 | if (is_valid[i]) { |
1076 | ASSERT_OK(builder_->Append( |
1077 | string(reinterpret_cast<const char*>(raw_data + byte_width * i), byte_width))); |
1078 | } else { |
1079 | ASSERT_OK(builder_->AppendNull()); |
1080 | } |
1081 | } |
1082 | |
1083 | ASSERT_OK(builder_->Finish(&result)); |
1084 | CheckResult(*result); |
1085 | } |
1086 | |
1087 | TEST_F(TestFWBinaryArray, EqualsRangeEquals) { |
1088 | // Check that we don't compare data in null slots |
1089 | |
1090 | auto type = fixed_size_binary(4); |
1091 | FixedSizeBinaryBuilder builder1(type); |
1092 | FixedSizeBinaryBuilder builder2(type); |
1093 | |
1094 | ASSERT_OK(builder1.Append("foo1" )); |
1095 | ASSERT_OK(builder1.AppendNull()); |
1096 | |
1097 | ASSERT_OK(builder2.Append("foo1" )); |
1098 | ASSERT_OK(builder2.Append("foo2" )); |
1099 | |
1100 | std::shared_ptr<Array> array1, array2; |
1101 | ASSERT_OK(builder1.Finish(&array1)); |
1102 | ASSERT_OK(builder2.Finish(&array2)); |
1103 | |
1104 | const auto& a1 = checked_cast<const FixedSizeBinaryArray&>(*array1); |
1105 | const auto& a2 = checked_cast<const FixedSizeBinaryArray&>(*array2); |
1106 | |
1107 | FixedSizeBinaryArray equal1(type, 2, a1.values(), a1.null_bitmap(), 1); |
1108 | FixedSizeBinaryArray equal2(type, 2, a2.values(), a1.null_bitmap(), 1); |
1109 | |
1110 | ASSERT_TRUE(equal1.Equals(equal2)); |
1111 | ASSERT_TRUE(equal1.RangeEquals(equal2, 0, 2, 0)); |
1112 | } |
1113 | |
1114 | TEST_F(TestFWBinaryArray, ZeroSize) { |
1115 | auto type = fixed_size_binary(0); |
1116 | FixedSizeBinaryBuilder builder(type); |
1117 | |
1118 | ASSERT_OK(builder.Append("" )); |
1119 | ASSERT_OK(builder.Append(std::string())); |
1120 | ASSERT_OK(builder.Append(static_cast<const uint8_t*>(nullptr))); |
1121 | ASSERT_OK(builder.AppendNull()); |
1122 | ASSERT_OK(builder.AppendNull()); |
1123 | ASSERT_OK(builder.AppendNull()); |
1124 | |
1125 | std::shared_ptr<Array> array; |
1126 | ASSERT_OK(builder.Finish(&array)); |
1127 | |
1128 | const auto& fw_array = checked_cast<const FixedSizeBinaryArray&>(*array); |
1129 | |
1130 | // data is never allocated |
1131 | ASSERT_TRUE(fw_array.values() == nullptr); |
1132 | ASSERT_EQ(0, fw_array.byte_width()); |
1133 | |
1134 | ASSERT_EQ(6, array->length()); |
1135 | ASSERT_EQ(3, array->null_count()); |
1136 | } |
1137 | |
1138 | TEST_F(TestFWBinaryArray, ZeroPadding) { |
1139 | auto type = fixed_size_binary(4); |
1140 | FixedSizeBinaryBuilder builder(type); |
1141 | |
1142 | ASSERT_OK(builder.Append("foo1" )); |
1143 | ASSERT_OK(builder.AppendNull()); |
1144 | ASSERT_OK(builder.Append("foo2" )); |
1145 | ASSERT_OK(builder.AppendNull()); |
1146 | ASSERT_OK(builder.Append("foo3" )); |
1147 | |
1148 | std::shared_ptr<Array> array; |
1149 | FinishAndCheckPadding(&builder, &array); |
1150 | } |
1151 | |
1152 | TEST_F(TestFWBinaryArray, Slice) { |
1153 | auto type = fixed_size_binary(4); |
1154 | FixedSizeBinaryBuilder builder(type); |
1155 | |
1156 | vector<string> strings = {"foo1" , "foo2" , "foo3" , "foo4" , "foo5" }; |
1157 | vector<uint8_t> is_null = {0, 1, 0, 0, 0}; |
1158 | |
1159 | for (int i = 0; i < 5; ++i) { |
1160 | if (is_null[i]) { |
1161 | ASSERT_OK(builder.AppendNull()); |
1162 | } else { |
1163 | ASSERT_OK(builder.Append(strings[i])); |
1164 | } |
1165 | } |
1166 | |
1167 | std::shared_ptr<Array> array; |
1168 | ASSERT_OK(builder.Finish(&array)); |
1169 | |
1170 | std::shared_ptr<Array> slice, slice2; |
1171 | |
1172 | slice = array->Slice(1); |
1173 | slice2 = array->Slice(1); |
1174 | ASSERT_EQ(4, slice->length()); |
1175 | |
1176 | ASSERT_TRUE(slice->Equals(slice2)); |
1177 | ASSERT_TRUE(array->RangeEquals(1, slice->length(), 0, slice)); |
1178 | |
1179 | // Chained slices |
1180 | slice = array->Slice(2); |
1181 | slice2 = array->Slice(1)->Slice(1); |
1182 | ASSERT_TRUE(slice->Equals(slice2)); |
1183 | |
1184 | slice = array->Slice(1, 3); |
1185 | ASSERT_EQ(3, slice->length()); |
1186 | |
1187 | slice2 = array->Slice(1, 3); |
1188 | ASSERT_TRUE(slice->Equals(slice2)); |
1189 | ASSERT_TRUE(array->RangeEquals(1, 3, 0, slice)); |
1190 | } |
1191 | |
1192 | // ---------------------------------------------------------------------- |
1193 | // AdaptiveInt tests |
1194 | |
1195 | class TestAdaptiveIntBuilder : public TestBuilder { |
1196 | public: |
1197 | void SetUp() { |
1198 | TestBuilder::SetUp(); |
1199 | builder_ = std::make_shared<AdaptiveIntBuilder>(pool_); |
1200 | } |
1201 | |
1202 | void Done() { FinishAndCheckPadding(builder_.get(), &result_); } |
1203 | |
1204 | protected: |
1205 | std::shared_ptr<AdaptiveIntBuilder> builder_; |
1206 | |
1207 | std::shared_ptr<Array> expected_; |
1208 | std::shared_ptr<Array> result_; |
1209 | }; |
1210 | |
1211 | TEST_F(TestAdaptiveIntBuilder, TestInt8) { |
1212 | ASSERT_OK(builder_->Append(0)); |
1213 | ASSERT_OK(builder_->Append(127)); |
1214 | ASSERT_OK(builder_->Append(-128)); |
1215 | |
1216 | Done(); |
1217 | |
1218 | std::vector<int8_t> expected_values({0, 127, -128}); |
1219 | ArrayFromVector<Int8Type, int8_t>(expected_values, &expected_); |
1220 | AssertArraysEqual(*expected_, *result_); |
1221 | } |
1222 | |
1223 | TEST_F(TestAdaptiveIntBuilder, TestInt16) { |
1224 | ASSERT_OK(builder_->Append(0)); |
1225 | ASSERT_OK(builder_->Append(128)); |
1226 | Done(); |
1227 | |
1228 | std::vector<int16_t> expected_values({0, 128}); |
1229 | ArrayFromVector<Int16Type, int16_t>(expected_values, &expected_); |
1230 | AssertArraysEqual(*expected_, *result_); |
1231 | |
1232 | SetUp(); |
1233 | ASSERT_OK(builder_->Append(-129)); |
1234 | expected_values = {-129}; |
1235 | Done(); |
1236 | |
1237 | ArrayFromVector<Int16Type, int16_t>(expected_values, &expected_); |
1238 | AssertArraysEqual(*expected_, *result_); |
1239 | |
1240 | SetUp(); |
1241 | ASSERT_OK(builder_->Append(std::numeric_limits<int16_t>::max())); |
1242 | ASSERT_OK(builder_->Append(std::numeric_limits<int16_t>::min())); |
1243 | expected_values = {std::numeric_limits<int16_t>::max(), |
1244 | std::numeric_limits<int16_t>::min()}; |
1245 | Done(); |
1246 | |
1247 | ArrayFromVector<Int16Type, int16_t>(expected_values, &expected_); |
1248 | AssertArraysEqual(*expected_, *result_); |
1249 | } |
1250 | |
1251 | TEST_F(TestAdaptiveIntBuilder, TestInt32) { |
1252 | ASSERT_OK(builder_->Append(0)); |
1253 | ASSERT_OK( |
1254 | builder_->Append(static_cast<int64_t>(std::numeric_limits<int16_t>::max()) + 1)); |
1255 | Done(); |
1256 | |
1257 | std::vector<int32_t> expected_values( |
1258 | {0, static_cast<int32_t>(std::numeric_limits<int16_t>::max()) + 1}); |
1259 | ArrayFromVector<Int32Type, int32_t>(expected_values, &expected_); |
1260 | AssertArraysEqual(*expected_, *result_); |
1261 | |
1262 | SetUp(); |
1263 | ASSERT_OK( |
1264 | builder_->Append(static_cast<int64_t>(std::numeric_limits<int16_t>::min()) - 1)); |
1265 | expected_values = {static_cast<int32_t>(std::numeric_limits<int16_t>::min()) - 1}; |
1266 | Done(); |
1267 | |
1268 | ArrayFromVector<Int32Type, int32_t>(expected_values, &expected_); |
1269 | AssertArraysEqual(*expected_, *result_); |
1270 | |
1271 | SetUp(); |
1272 | ASSERT_OK(builder_->Append(std::numeric_limits<int32_t>::max())); |
1273 | ASSERT_OK(builder_->Append(std::numeric_limits<int32_t>::min())); |
1274 | expected_values = {std::numeric_limits<int32_t>::max(), |
1275 | std::numeric_limits<int32_t>::min()}; |
1276 | Done(); |
1277 | |
1278 | ArrayFromVector<Int32Type, int32_t>(expected_values, &expected_); |
1279 | AssertArraysEqual(*expected_, *result_); |
1280 | } |
1281 | |
1282 | TEST_F(TestAdaptiveIntBuilder, TestInt64) { |
1283 | ASSERT_OK(builder_->Append(0)); |
1284 | ASSERT_OK( |
1285 | builder_->Append(static_cast<int64_t>(std::numeric_limits<int32_t>::max()) + 1)); |
1286 | Done(); |
1287 | |
1288 | std::vector<int64_t> expected_values( |
1289 | {0, static_cast<int64_t>(std::numeric_limits<int32_t>::max()) + 1}); |
1290 | ArrayFromVector<Int64Type, int64_t>(expected_values, &expected_); |
1291 | AssertArraysEqual(*expected_, *result_); |
1292 | |
1293 | SetUp(); |
1294 | ASSERT_OK( |
1295 | builder_->Append(static_cast<int64_t>(std::numeric_limits<int32_t>::min()) - 1)); |
1296 | expected_values = {static_cast<int64_t>(std::numeric_limits<int32_t>::min()) - 1}; |
1297 | Done(); |
1298 | |
1299 | ArrayFromVector<Int64Type, int64_t>(expected_values, &expected_); |
1300 | AssertArraysEqual(*expected_, *result_); |
1301 | |
1302 | SetUp(); |
1303 | ASSERT_OK(builder_->Append(std::numeric_limits<int64_t>::max())); |
1304 | ASSERT_OK(builder_->Append(std::numeric_limits<int64_t>::min())); |
1305 | expected_values = {std::numeric_limits<int64_t>::max(), |
1306 | std::numeric_limits<int64_t>::min()}; |
1307 | Done(); |
1308 | |
1309 | ArrayFromVector<Int64Type, int64_t>(expected_values, &expected_); |
1310 | AssertArraysEqual(*expected_, *result_); |
1311 | } |
1312 | |
1313 | TEST_F(TestAdaptiveIntBuilder, TestAppendValues) { |
1314 | { |
1315 | std::vector<int64_t> expected_values( |
1316 | {0, static_cast<int64_t>(std::numeric_limits<int32_t>::max()) + 1}); |
1317 | ASSERT_OK(builder_->AppendValues(expected_values.data(), expected_values.size())); |
1318 | Done(); |
1319 | |
1320 | ArrayFromVector<Int64Type, int64_t>(expected_values, &expected_); |
1321 | AssertArraysEqual(*expected_, *result_); |
1322 | } |
1323 | { |
1324 | SetUp(); |
1325 | std::vector<int64_t> values( |
1326 | {0, std::numeric_limits<int32_t>::min(), std::numeric_limits<int32_t>::max()}); |
1327 | ASSERT_OK(builder_->AppendValues(values.data(), values.size())); |
1328 | Done(); |
1329 | |
1330 | std::vector<int32_t> expected_values( |
1331 | {0, std::numeric_limits<int32_t>::min(), std::numeric_limits<int32_t>::max()}); |
1332 | |
1333 | ArrayFromVector<Int32Type, int32_t>(expected_values, &expected_); |
1334 | AssertArraysEqual(*expected_, *result_); |
1335 | } |
1336 | { |
1337 | SetUp(); |
1338 | std::vector<int64_t> values( |
1339 | {0, std::numeric_limits<int16_t>::min(), std::numeric_limits<int16_t>::max()}); |
1340 | ASSERT_OK(builder_->AppendValues(values.data(), values.size())); |
1341 | Done(); |
1342 | |
1343 | std::vector<int16_t> expected_values( |
1344 | {0, std::numeric_limits<int16_t>::min(), std::numeric_limits<int16_t>::max()}); |
1345 | |
1346 | ArrayFromVector<Int16Type, int16_t>(expected_values, &expected_); |
1347 | AssertArraysEqual(*expected_, *result_); |
1348 | } |
1349 | { |
1350 | SetUp(); |
1351 | std::vector<int64_t> values( |
1352 | {0, std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max()}); |
1353 | ASSERT_OK(builder_->AppendValues(values.data(), values.size())); |
1354 | Done(); |
1355 | |
1356 | std::vector<int8_t> expected_values( |
1357 | {0, std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max()}); |
1358 | |
1359 | ArrayFromVector<Int8Type, int8_t>(expected_values, &expected_); |
1360 | AssertArraysEqual(*expected_, *result_); |
1361 | } |
1362 | } |
1363 | |
1364 | TEST_F(TestAdaptiveIntBuilder, TestAssertZeroPadded) { |
1365 | std::vector<int64_t> values( |
1366 | {0, static_cast<int64_t>(std::numeric_limits<int32_t>::max()) + 1}); |
1367 | ASSERT_OK(builder_->AppendValues(values.data(), values.size())); |
1368 | Done(); |
1369 | } |
1370 | |
1371 | TEST_F(TestAdaptiveIntBuilder, TestAppendNull) { |
1372 | int64_t size = 1000; |
1373 | ASSERT_OK(builder_->Append(127)); |
1374 | for (unsigned index = 1; index < size - 1; ++index) { |
1375 | ASSERT_OK(builder_->AppendNull()); |
1376 | } |
1377 | ASSERT_OK(builder_->Append(-128)); |
1378 | |
1379 | Done(); |
1380 | |
1381 | std::vector<bool> expected_valid(size, false); |
1382 | expected_valid[0] = true; |
1383 | expected_valid[size - 1] = true; |
1384 | std::vector<int8_t> expected_values(size); |
1385 | expected_values[0] = 127; |
1386 | expected_values[size - 1] = -128; |
1387 | std::shared_ptr<Array> expected; |
1388 | ArrayFromVector<Int8Type, int8_t>(expected_valid, expected_values, &expected_); |
1389 | AssertArraysEqual(*expected_, *result_); |
1390 | } |
1391 | |
1392 | TEST_F(TestAdaptiveIntBuilder, TestAppendNulls) { |
1393 | constexpr int64_t size = 10; |
1394 | const uint8_t valid_bytes[size] = {1, 0, 1, 0, 1, 0, 1, 0, 1, 0}; |
1395 | ASSERT_OK(builder_->AppendNulls(valid_bytes, size)); |
1396 | |
1397 | Done(); |
1398 | |
1399 | for (unsigned index = 0; index < size; ++index) { |
1400 | ASSERT_EQ(result_->IsValid(index), static_cast<bool>(valid_bytes[index])); |
1401 | } |
1402 | } |
1403 | |
1404 | class TestAdaptiveUIntBuilder : public TestBuilder { |
1405 | public: |
1406 | void SetUp() { |
1407 | TestBuilder::SetUp(); |
1408 | builder_ = std::make_shared<AdaptiveUIntBuilder>(pool_); |
1409 | } |
1410 | |
1411 | void Done() { FinishAndCheckPadding(builder_.get(), &result_); } |
1412 | |
1413 | protected: |
1414 | std::shared_ptr<AdaptiveUIntBuilder> builder_; |
1415 | |
1416 | std::shared_ptr<Array> expected_; |
1417 | std::shared_ptr<Array> result_; |
1418 | }; |
1419 | |
1420 | TEST_F(TestAdaptiveUIntBuilder, TestUInt8) { |
1421 | ASSERT_OK(builder_->Append(0)); |
1422 | ASSERT_OK(builder_->Append(255)); |
1423 | |
1424 | Done(); |
1425 | |
1426 | std::vector<uint8_t> expected_values({0, 255}); |
1427 | ArrayFromVector<UInt8Type, uint8_t>(expected_values, &expected_); |
1428 | ASSERT_TRUE(expected_->Equals(result_)); |
1429 | } |
1430 | |
1431 | TEST_F(TestAdaptiveUIntBuilder, TestUInt16) { |
1432 | ASSERT_OK(builder_->Append(0)); |
1433 | ASSERT_OK(builder_->Append(256)); |
1434 | Done(); |
1435 | |
1436 | std::vector<uint16_t> expected_values({0, 256}); |
1437 | ArrayFromVector<UInt16Type, uint16_t>(expected_values, &expected_); |
1438 | ASSERT_TRUE(expected_->Equals(result_)); |
1439 | |
1440 | SetUp(); |
1441 | ASSERT_OK(builder_->Append(std::numeric_limits<uint16_t>::max())); |
1442 | expected_values = {std::numeric_limits<uint16_t>::max()}; |
1443 | Done(); |
1444 | |
1445 | ArrayFromVector<UInt16Type, uint16_t>(expected_values, &expected_); |
1446 | ASSERT_TRUE(expected_->Equals(result_)); |
1447 | } |
1448 | |
1449 | TEST_F(TestAdaptiveUIntBuilder, TestUInt32) { |
1450 | ASSERT_OK(builder_->Append(0)); |
1451 | ASSERT_OK( |
1452 | builder_->Append(static_cast<uint64_t>(std::numeric_limits<uint16_t>::max()) + 1)); |
1453 | Done(); |
1454 | |
1455 | std::vector<uint32_t> expected_values( |
1456 | {0, static_cast<uint32_t>(std::numeric_limits<uint16_t>::max()) + 1}); |
1457 | ArrayFromVector<UInt32Type, uint32_t>(expected_values, &expected_); |
1458 | ASSERT_TRUE(expected_->Equals(result_)); |
1459 | |
1460 | SetUp(); |
1461 | ASSERT_OK(builder_->Append(std::numeric_limits<uint32_t>::max())); |
1462 | expected_values = {std::numeric_limits<uint32_t>::max()}; |
1463 | Done(); |
1464 | |
1465 | ArrayFromVector<UInt32Type, uint32_t>(expected_values, &expected_); |
1466 | ASSERT_TRUE(expected_->Equals(result_)); |
1467 | } |
1468 | |
1469 | TEST_F(TestAdaptiveUIntBuilder, TestUInt64) { |
1470 | ASSERT_OK(builder_->Append(0)); |
1471 | ASSERT_OK( |
1472 | builder_->Append(static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()) + 1)); |
1473 | Done(); |
1474 | |
1475 | std::vector<uint64_t> expected_values( |
1476 | {0, static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()) + 1}); |
1477 | ArrayFromVector<UInt64Type, uint64_t>(expected_values, &expected_); |
1478 | ASSERT_TRUE(expected_->Equals(result_)); |
1479 | |
1480 | SetUp(); |
1481 | ASSERT_OK(builder_->Append(std::numeric_limits<uint64_t>::max())); |
1482 | expected_values = {std::numeric_limits<uint64_t>::max()}; |
1483 | Done(); |
1484 | |
1485 | ArrayFromVector<UInt64Type, uint64_t>(expected_values, &expected_); |
1486 | ASSERT_TRUE(expected_->Equals(result_)); |
1487 | } |
1488 | |
1489 | TEST_F(TestAdaptiveUIntBuilder, TestAppendValues) { |
1490 | std::vector<uint64_t> expected_values( |
1491 | {0, static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()) + 1}); |
1492 | ASSERT_OK(builder_->AppendValues(expected_values.data(), expected_values.size())); |
1493 | Done(); |
1494 | |
1495 | ArrayFromVector<UInt64Type, uint64_t>(expected_values, &expected_); |
1496 | ASSERT_TRUE(expected_->Equals(result_)); |
1497 | } |
1498 | |
1499 | TEST_F(TestAdaptiveUIntBuilder, TestAssertZeroPadded) { |
1500 | std::vector<uint64_t> values( |
1501 | {0, static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()) + 1}); |
1502 | ASSERT_OK(builder_->AppendValues(values.data(), values.size())); |
1503 | Done(); |
1504 | } |
1505 | |
1506 | TEST_F(TestAdaptiveUIntBuilder, TestAppendNull) { |
1507 | int64_t size = 1000; |
1508 | ASSERT_OK(builder_->Append(254)); |
1509 | for (unsigned index = 1; index < size - 1; ++index) { |
1510 | ASSERT_OK(builder_->AppendNull()); |
1511 | } |
1512 | ASSERT_OK(builder_->Append(255)); |
1513 | |
1514 | Done(); |
1515 | |
1516 | std::vector<bool> expected_valid(size, false); |
1517 | expected_valid[0] = true; |
1518 | expected_valid[size - 1] = true; |
1519 | std::vector<uint8_t> expected_values(size); |
1520 | expected_values[0] = 254; |
1521 | expected_values[size - 1] = 255; |
1522 | std::shared_ptr<Array> expected; |
1523 | ArrayFromVector<UInt8Type, uint8_t>(expected_valid, expected_values, &expected_); |
1524 | AssertArraysEqual(*expected_, *result_); |
1525 | } |
1526 | |
1527 | TEST_F(TestAdaptiveUIntBuilder, TestAppendNulls) { |
1528 | constexpr int64_t size = 10; |
1529 | const uint8_t valid_bytes[size] = {1, 0, 1, 0, 1, 0, 1, 0, 1, 0}; |
1530 | ASSERT_OK(builder_->AppendNulls(valid_bytes, size)); |
1531 | |
1532 | Done(); |
1533 | |
1534 | for (unsigned index = 0; index < size; ++index) { |
1535 | ASSERT_EQ(result_->IsValid(index), static_cast<bool>(valid_bytes[index])); |
1536 | } |
1537 | } |
1538 | |
1539 | // ---------------------------------------------------------------------- |
1540 | // Union tests |
1541 | |
1542 | TEST(TestUnionArrayAdHoc, TestSliceEquals) { |
1543 | std::shared_ptr<RecordBatch> batch; |
1544 | ASSERT_OK(ipc::MakeUnion(&batch)); |
1545 | |
1546 | const int64_t size = batch->num_rows(); |
1547 | |
1548 | auto CheckUnion = [&size](std::shared_ptr<Array> array) { |
1549 | std::shared_ptr<Array> slice, slice2; |
1550 | slice = array->Slice(2); |
1551 | ASSERT_EQ(size - 2, slice->length()); |
1552 | |
1553 | slice2 = array->Slice(2); |
1554 | ASSERT_EQ(size - 2, slice->length()); |
1555 | |
1556 | ASSERT_TRUE(slice->Equals(slice2)); |
1557 | ASSERT_TRUE(array->RangeEquals(2, array->length(), 0, slice)); |
1558 | |
1559 | // Chained slices |
1560 | slice2 = array->Slice(1)->Slice(1); |
1561 | ASSERT_TRUE(slice->Equals(slice2)); |
1562 | |
1563 | slice = array->Slice(1, 5); |
1564 | slice2 = array->Slice(1, 5); |
1565 | ASSERT_EQ(5, slice->length()); |
1566 | |
1567 | ASSERT_TRUE(slice->Equals(slice2)); |
1568 | ASSERT_TRUE(array->RangeEquals(1, 6, 0, slice)); |
1569 | |
1570 | AssertZeroPadded(*array); |
1571 | TestInitialized(*array); |
1572 | }; |
1573 | |
1574 | CheckUnion(batch->column(1)); |
1575 | CheckUnion(batch->column(2)); |
1576 | } |
1577 | |
1578 | using DecimalVector = std::vector<Decimal128>; |
1579 | |
1580 | class DecimalTest : public ::testing::TestWithParam<int> { |
1581 | public: |
1582 | DecimalTest() {} |
1583 | |
1584 | template <size_t BYTE_WIDTH = 16> |
1585 | void MakeData(const DecimalVector& input, std::vector<uint8_t>* out) const { |
1586 | out->reserve(input.size() * BYTE_WIDTH); |
1587 | |
1588 | for (const auto& value : input) { |
1589 | auto bytes = value.ToBytes(); |
1590 | out->insert(out->end(), bytes.cbegin(), bytes.cend()); |
1591 | } |
1592 | } |
1593 | |
1594 | template <size_t BYTE_WIDTH = 16> |
1595 | void TestCreate(int32_t precision, const DecimalVector& draw, |
1596 | const std::vector<uint8_t>& valid_bytes, int64_t offset) const { |
1597 | auto type = std::make_shared<Decimal128Type>(precision, 4); |
1598 | auto builder = std::make_shared<Decimal128Builder>(type); |
1599 | |
1600 | size_t null_count = 0; |
1601 | |
1602 | const size_t size = draw.size(); |
1603 | |
1604 | ASSERT_OK(builder->Reserve(size)); |
1605 | |
1606 | for (size_t i = 0; i < size; ++i) { |
1607 | if (valid_bytes[i]) { |
1608 | ASSERT_OK(builder->Append(draw[i])); |
1609 | } else { |
1610 | ASSERT_OK(builder->AppendNull()); |
1611 | ++null_count; |
1612 | } |
1613 | } |
1614 | |
1615 | std::shared_ptr<Array> out; |
1616 | FinishAndCheckPadding(builder.get(), &out); |
1617 | |
1618 | std::vector<uint8_t> raw_bytes; |
1619 | |
1620 | raw_bytes.reserve(size * BYTE_WIDTH); |
1621 | MakeData<BYTE_WIDTH>(draw, &raw_bytes); |
1622 | |
1623 | auto expected_data = std::make_shared<Buffer>(raw_bytes.data(), BYTE_WIDTH); |
1624 | std::shared_ptr<Buffer> expected_null_bitmap; |
1625 | ASSERT_OK( |
1626 | BitUtil::BytesToBits(valid_bytes, default_memory_pool(), &expected_null_bitmap)); |
1627 | |
1628 | int64_t expected_null_count = CountNulls(valid_bytes); |
1629 | auto expected = std::make_shared<Decimal128Array>( |
1630 | type, size, expected_data, expected_null_bitmap, expected_null_count); |
1631 | |
1632 | std::shared_ptr<Array> lhs = out->Slice(offset); |
1633 | std::shared_ptr<Array> rhs = expected->Slice(offset); |
1634 | ASSERT_TRUE(lhs->Equals(rhs)); |
1635 | } |
1636 | }; |
1637 | |
1638 | TEST_P(DecimalTest, NoNulls) { |
1639 | int32_t precision = GetParam(); |
1640 | std::vector<Decimal128> draw = {Decimal128(1), Decimal128(-2), Decimal128(2389), |
1641 | Decimal128(4), Decimal128(-12348)}; |
1642 | std::vector<uint8_t> valid_bytes = {true, true, true, true, true}; |
1643 | this->TestCreate(precision, draw, valid_bytes, 0); |
1644 | this->TestCreate(precision, draw, valid_bytes, 2); |
1645 | } |
1646 | |
1647 | TEST_P(DecimalTest, WithNulls) { |
1648 | int32_t precision = GetParam(); |
1649 | std::vector<Decimal128> draw = {Decimal128(1), Decimal128(2), Decimal128(-1), |
1650 | Decimal128(4), Decimal128(-1), Decimal128(1), |
1651 | Decimal128(2)}; |
1652 | Decimal128 big; |
1653 | ASSERT_OK(Decimal128::FromString("230342903942.234234" , &big)); |
1654 | draw.push_back(big); |
1655 | |
1656 | Decimal128 big_negative; |
1657 | ASSERT_OK(Decimal128::FromString("-23049302932.235234" , &big_negative)); |
1658 | draw.push_back(big_negative); |
1659 | |
1660 | std::vector<uint8_t> valid_bytes = {true, true, false, true, false, |
1661 | true, true, true, true}; |
1662 | this->TestCreate(precision, draw, valid_bytes, 0); |
1663 | this->TestCreate(precision, draw, valid_bytes, 2); |
1664 | } |
1665 | |
1666 | INSTANTIATE_TEST_CASE_P(DecimalTest, DecimalTest, ::testing::Range(1, 38)); |
1667 | |
1668 | // ---------------------------------------------------------------------- |
1669 | // Test rechunking |
1670 | |
1671 | TEST(TestRechunkArraysConsistently, Trivial) { |
1672 | std::vector<ArrayVector> groups, rechunked; |
1673 | rechunked = internal::RechunkArraysConsistently(groups); |
1674 | ASSERT_EQ(rechunked.size(), 0); |
1675 | |
1676 | std::shared_ptr<Array> a1, a2, b1; |
1677 | ArrayFromVector<Int16Type, int16_t>({}, &a1); |
1678 | ArrayFromVector<Int16Type, int16_t>({}, &a2); |
1679 | ArrayFromVector<Int32Type, int32_t>({}, &b1); |
1680 | |
1681 | groups = {{a1, a2}, {}, {b1}}; |
1682 | rechunked = internal::RechunkArraysConsistently(groups); |
1683 | ASSERT_EQ(rechunked.size(), 3); |
1684 | |
1685 | for (auto& arrvec : rechunked) { |
1686 | for (auto& arr : arrvec) { |
1687 | AssertZeroPadded(*arr); |
1688 | TestInitialized(*arr); |
1689 | } |
1690 | } |
1691 | } |
1692 | |
1693 | TEST(TestRechunkArraysConsistently, Plain) { |
1694 | std::shared_ptr<Array> expected; |
1695 | std::shared_ptr<Array> a1, a2, a3, b1, b2, b3, b4; |
1696 | ArrayFromVector<Int16Type, int16_t>({1, 2, 3}, &a1); |
1697 | ArrayFromVector<Int16Type, int16_t>({4, 5}, &a2); |
1698 | ArrayFromVector<Int16Type, int16_t>({6, 7, 8, 9}, &a3); |
1699 | |
1700 | ArrayFromVector<Int32Type, int32_t>({41, 42}, &b1); |
1701 | ArrayFromVector<Int32Type, int32_t>({43, 44, 45}, &b2); |
1702 | ArrayFromVector<Int32Type, int32_t>({46, 47}, &b3); |
1703 | ArrayFromVector<Int32Type, int32_t>({48, 49}, &b4); |
1704 | |
1705 | ArrayVector a{a1, a2, a3}; |
1706 | ArrayVector b{b1, b2, b3, b4}; |
1707 | |
1708 | std::vector<ArrayVector> groups{a, b}, rechunked; |
1709 | rechunked = internal::RechunkArraysConsistently(groups); |
1710 | ASSERT_EQ(rechunked.size(), 2); |
1711 | auto ra = rechunked[0]; |
1712 | auto rb = rechunked[1]; |
1713 | |
1714 | ASSERT_EQ(ra.size(), 5); |
1715 | ArrayFromVector<Int16Type, int16_t>({1, 2}, &expected); |
1716 | ASSERT_ARRAYS_EQUAL(*ra[0], *expected); |
1717 | ArrayFromVector<Int16Type, int16_t>({3}, &expected); |
1718 | ASSERT_ARRAYS_EQUAL(*ra[1], *expected); |
1719 | ArrayFromVector<Int16Type, int16_t>({4, 5}, &expected); |
1720 | ASSERT_ARRAYS_EQUAL(*ra[2], *expected); |
1721 | ArrayFromVector<Int16Type, int16_t>({6, 7}, &expected); |
1722 | ASSERT_ARRAYS_EQUAL(*ra[3], *expected); |
1723 | ArrayFromVector<Int16Type, int16_t>({8, 9}, &expected); |
1724 | ASSERT_ARRAYS_EQUAL(*ra[4], *expected); |
1725 | |
1726 | ASSERT_EQ(rb.size(), 5); |
1727 | ArrayFromVector<Int32Type, int32_t>({41, 42}, &expected); |
1728 | ASSERT_ARRAYS_EQUAL(*rb[0], *expected); |
1729 | ArrayFromVector<Int32Type, int32_t>({43}, &expected); |
1730 | ASSERT_ARRAYS_EQUAL(*rb[1], *expected); |
1731 | ArrayFromVector<Int32Type, int32_t>({44, 45}, &expected); |
1732 | ASSERT_ARRAYS_EQUAL(*rb[2], *expected); |
1733 | ArrayFromVector<Int32Type, int32_t>({46, 47}, &expected); |
1734 | ASSERT_ARRAYS_EQUAL(*rb[3], *expected); |
1735 | ArrayFromVector<Int32Type, int32_t>({48, 49}, &expected); |
1736 | ASSERT_ARRAYS_EQUAL(*rb[4], *expected); |
1737 | |
1738 | for (auto& arrvec : rechunked) { |
1739 | for (auto& arr : arrvec) { |
1740 | AssertZeroPadded(*arr); |
1741 | TestInitialized(*arr); |
1742 | } |
1743 | } |
1744 | } |
1745 | |
1746 | } // namespace arrow |
1747 | |