1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18#include <algorithm>
19#include <array>
20#include <cstdint>
21#include <cstring>
22#include <iterator>
23#include <limits>
24#include <memory>
25#include <numeric>
26#include <string>
27#include <type_traits>
28#include <vector>
29
30#include <gtest/gtest.h>
31
32#include "arrow/array.h"
33#include "arrow/buffer-builder.h"
34#include "arrow/buffer.h"
35#include "arrow/builder.h"
36#include "arrow/ipc/test-common.h"
37#include "arrow/memory_pool.h"
38#include "arrow/record_batch.h"
39#include "arrow/status.h"
40#include "arrow/test-common.h"
41#include "arrow/test-util.h"
42#include "arrow/type.h"
43#include "arrow/util/bit-util.h"
44#include "arrow/util/checked_cast.h"
45#include "arrow/util/decimal.h"
46#include "arrow/util/lazy.h"
47
48// This file is compiled together with array-*-test.cc into a single
49// executable array-test.
50
51namespace arrow {
52
53using std::string;
54using std::vector;
55
56using internal::checked_cast;
57
58class TestArray : public ::testing::Test {
59 public:
60 void SetUp() { pool_ = default_memory_pool(); }
61
62 protected:
63 MemoryPool* pool_;
64};
65
66TEST_F(TestArray, TestNullCount) {
67 // These are placeholders
68 auto data = std::make_shared<Buffer>(nullptr, 0);
69 auto null_bitmap = std::make_shared<Buffer>(nullptr, 0);
70
71 std::unique_ptr<Int32Array> arr(new Int32Array(100, data, null_bitmap, 10));
72 ASSERT_EQ(10, arr->null_count());
73
74 std::unique_ptr<Int32Array> arr_no_nulls(new Int32Array(100, data));
75 ASSERT_EQ(0, arr_no_nulls->null_count());
76}
77
78TEST_F(TestArray, TestLength) {
79 // Placeholder buffer
80 auto data = std::make_shared<Buffer>(nullptr, 0);
81
82 std::unique_ptr<Int32Array> arr(new Int32Array(100, data));
83 ASSERT_EQ(arr->length(), 100);
84}
85
86Status MakeArrayFromValidBytes(const vector<uint8_t>& v, MemoryPool* pool,
87 std::shared_ptr<Array>* out) {
88 int64_t null_count = v.size() - std::accumulate(v.begin(), v.end(), 0);
89
90 std::shared_ptr<Buffer> null_buf;
91 RETURN_NOT_OK(BitUtil::BytesToBits(v, default_memory_pool(), &null_buf));
92
93 TypedBufferBuilder<int32_t> value_builder(pool);
94 for (size_t i = 0; i < v.size(); ++i) {
95 RETURN_NOT_OK(value_builder.Append(0));
96 }
97
98 std::shared_ptr<Buffer> values;
99 RETURN_NOT_OK(value_builder.Finish(&values));
100 *out = std::make_shared<Int32Array>(v.size(), values, null_buf, null_count);
101 return Status::OK();
102}
103
104TEST_F(TestArray, TestEquality) {
105 std::shared_ptr<Array> array, equal_array, unequal_array;
106
107 ASSERT_OK(MakeArrayFromValidBytes({1, 0, 1, 1, 0, 1, 0, 0}, pool_, &array));
108 ASSERT_OK(MakeArrayFromValidBytes({1, 0, 1, 1, 0, 1, 0, 0}, pool_, &equal_array));
109 ASSERT_OK(MakeArrayFromValidBytes({1, 1, 1, 1, 0, 1, 0, 0}, pool_, &unequal_array));
110
111 EXPECT_TRUE(array->Equals(array));
112 EXPECT_TRUE(array->Equals(equal_array));
113 EXPECT_TRUE(equal_array->Equals(array));
114 EXPECT_FALSE(equal_array->Equals(unequal_array));
115 EXPECT_FALSE(unequal_array->Equals(equal_array));
116 EXPECT_TRUE(array->RangeEquals(4, 8, 4, unequal_array));
117 EXPECT_FALSE(array->RangeEquals(0, 4, 0, unequal_array));
118 EXPECT_FALSE(array->RangeEquals(0, 8, 0, unequal_array));
119 EXPECT_FALSE(array->RangeEquals(1, 2, 1, unequal_array));
120
121 auto timestamp_ns_array = std::make_shared<NumericArray<TimestampType>>(
122 timestamp(TimeUnit::NANO), array->length(), array->data()->buffers[1],
123 array->data()->buffers[0], array->null_count());
124 auto timestamp_us_array = std::make_shared<NumericArray<TimestampType>>(
125 timestamp(TimeUnit::MICRO), array->length(), array->data()->buffers[1],
126 array->data()->buffers[0], array->null_count());
127 ASSERT_FALSE(array->Equals(timestamp_ns_array));
128 // ARROW-2567: Ensure that not only the type id but also the type equality
129 // itself is checked.
130 ASSERT_FALSE(timestamp_us_array->Equals(timestamp_ns_array));
131}
132
133TEST_F(TestArray, TestNullArrayEquality) {
134 auto array_1 = std::make_shared<NullArray>(10);
135 auto array_2 = std::make_shared<NullArray>(10);
136 auto array_3 = std::make_shared<NullArray>(20);
137
138 EXPECT_TRUE(array_1->Equals(array_1));
139 EXPECT_TRUE(array_1->Equals(array_2));
140 EXPECT_FALSE(array_1->Equals(array_3));
141}
142
143TEST_F(TestArray, SliceRecomputeNullCount) {
144 vector<uint8_t> valid_bytes = {1, 0, 1, 1, 0, 1, 0, 0, 0};
145
146 std::shared_ptr<Array> array;
147 ASSERT_OK(MakeArrayFromValidBytes(valid_bytes, pool_, &array));
148
149 ASSERT_EQ(5, array->null_count());
150
151 auto slice = array->Slice(1, 4);
152 ASSERT_EQ(2, slice->null_count());
153
154 slice = array->Slice(4);
155 ASSERT_EQ(4, slice->null_count());
156
157 slice = array->Slice(0);
158 ASSERT_EQ(5, slice->null_count());
159
160 // No bitmap, compute 0
161 std::shared_ptr<Buffer> data;
162 const int kBufferSize = 64;
163 ASSERT_OK(AllocateBuffer(pool_, kBufferSize, &data));
164 memset(data->mutable_data(), 0, kBufferSize);
165
166 auto arr = std::make_shared<Int32Array>(16, data, nullptr, -1);
167 ASSERT_EQ(0, arr->null_count());
168}
169
170TEST_F(TestArray, NullArraySliceNullCount) {
171 auto null_arr = std::make_shared<NullArray>(10);
172 auto null_arr_sliced = null_arr->Slice(3, 6);
173
174 // The internal null count is 6, does not require recomputation
175 ASSERT_EQ(6, null_arr_sliced->data()->null_count);
176
177 ASSERT_EQ(6, null_arr_sliced->null_count());
178}
179
180TEST_F(TestArray, TestIsNullIsValid) {
181 // clang-format off
182 vector<uint8_t> null_bitmap = {1, 0, 1, 1, 0, 1, 0, 0,
183 1, 0, 1, 1, 0, 1, 0, 0,
184 1, 0, 1, 1, 0, 1, 0, 0,
185 1, 0, 1, 1, 0, 1, 0, 0,
186 1, 0, 0, 1};
187 // clang-format on
188 int64_t null_count = 0;
189 for (uint8_t x : null_bitmap) {
190 if (x == 0) {
191 ++null_count;
192 }
193 }
194
195 std::shared_ptr<Buffer> null_buf;
196 ASSERT_OK(BitUtil::BytesToBits(null_bitmap, default_memory_pool(), &null_buf));
197
198 std::unique_ptr<Array> arr;
199 arr.reset(new Int32Array(null_bitmap.size(), nullptr, null_buf, null_count));
200
201 ASSERT_EQ(null_count, arr->null_count());
202 ASSERT_EQ(5, null_buf->size());
203
204 ASSERT_TRUE(arr->null_bitmap()->Equals(*null_buf.get()));
205
206 for (size_t i = 0; i < null_bitmap.size(); ++i) {
207 EXPECT_EQ(null_bitmap[i] != 0, !arr->IsNull(i)) << i;
208 EXPECT_EQ(null_bitmap[i] != 0, arr->IsValid(i)) << i;
209 }
210}
211
212TEST_F(TestArray, TestIsNullIsValidNoNulls) {
213 const int64_t size = 10;
214
215 std::unique_ptr<Array> arr;
216 arr.reset(new Int32Array(size, nullptr, nullptr, 0));
217
218 for (size_t i = 0; i < size; ++i) {
219 EXPECT_TRUE(arr->IsValid(i));
220 EXPECT_FALSE(arr->IsNull(i));
221 }
222}
223
224TEST_F(TestArray, BuildLargeInMemoryArray) {
225#ifdef NDEBUG
226 const int64_t length = static_cast<int64_t>(std::numeric_limits<int32_t>::max()) + 1;
227#elif !defined(ARROW_VALGRIND)
228 // use a smaller size since the insert function isn't optimized properly on debug and
229 // the test takes a long time to complete
230 const int64_t length = 2 << 24;
231#else
232 // use an even smaller size with valgrind
233 const int64_t length = 2 << 20;
234#endif
235
236 BooleanBuilder builder;
237 std::vector<bool> zeros(length);
238 ASSERT_OK(builder.AppendValues(zeros));
239
240 std::shared_ptr<Array> result;
241 FinishAndCheckPadding(&builder, &result);
242
243 ASSERT_EQ(length, result->length());
244}
245
246TEST_F(TestArray, TestCopy) {}
247
248// ----------------------------------------------------------------------
249// Null type tests
250
251TEST(TestNullBuilder, Basics) {
252 NullBuilder builder;
253 std::shared_ptr<Array> array;
254
255 ASSERT_OK(builder.AppendNull());
256 ASSERT_OK(builder.Append(nullptr));
257 ASSERT_OK(builder.AppendNull());
258 ASSERT_OK(builder.Finish(&array));
259
260 const auto& null_array = checked_cast<NullArray&>(*array);
261 ASSERT_EQ(null_array.length(), 3);
262 ASSERT_EQ(null_array.null_count(), 3);
263}
264
265// ----------------------------------------------------------------------
266// Primitive type tests
267
268TEST_F(TestBuilder, TestReserve) {
269 UInt8Builder builder(pool_);
270
271 ASSERT_OK(builder.Resize(1000));
272 ASSERT_EQ(1000, builder.capacity());
273
274 // Builder only contains 0 elements, but calling Reserve will result in a round
275 // up to next power of 2
276 ASSERT_OK(builder.Reserve(1030));
277 ASSERT_EQ(BitUtil::NextPower2(1030), builder.capacity());
278}
279
280TEST_F(TestBuilder, TestResizeDownsize) {
281 UInt8Builder builder(pool_);
282
283 ASSERT_OK(builder.Resize(1000));
284 ASSERT_EQ(1000, builder.capacity());
285
286 // Can't downsize.
287 ASSERT_RAISES(Invalid, builder.Resize(500));
288}
289
290template <typename Attrs>
291class TestPrimitiveBuilder : public TestBuilder {
292 public:
293 typedef typename Attrs::ArrayType ArrayType;
294 typedef typename Attrs::BuilderType BuilderType;
295 typedef typename Attrs::T T;
296 typedef typename Attrs::Type Type;
297
298 virtual void SetUp() {
299 TestBuilder::SetUp();
300
301 type_ = Attrs::type();
302
303 std::unique_ptr<ArrayBuilder> tmp;
304 ASSERT_OK(MakeBuilder(pool_, type_, &tmp));
305 builder_.reset(checked_cast<BuilderType*>(tmp.release()));
306
307 ASSERT_OK(MakeBuilder(pool_, type_, &tmp));
308 builder_nn_.reset(checked_cast<BuilderType*>(tmp.release()));
309 }
310
311 void RandomData(int64_t N, double pct_null = 0.1) {
312 Attrs::draw(N, &draws_);
313
314 valid_bytes_.resize(static_cast<size_t>(N));
315 random_null_bytes(N, pct_null, valid_bytes_.data());
316 }
317
318 void Check(const std::unique_ptr<BuilderType>& builder, bool nullable) {
319 int64_t size = builder->length();
320 auto ex_data = Buffer::Wrap(draws_.data(), size);
321
322 std::shared_ptr<Buffer> ex_null_bitmap;
323 int64_t ex_null_count = 0;
324
325 if (nullable) {
326 ASSERT_OK(
327 BitUtil::BytesToBits(valid_bytes_, default_memory_pool(), &ex_null_bitmap));
328 ex_null_count = CountNulls(valid_bytes_);
329 } else {
330 ex_null_bitmap = nullptr;
331 }
332
333 auto expected =
334 std::make_shared<ArrayType>(size, ex_data, ex_null_bitmap, ex_null_count);
335
336 std::shared_ptr<Array> out;
337 FinishAndCheckPadding(builder.get(), &out);
338
339 std::shared_ptr<ArrayType> result = std::dynamic_pointer_cast<ArrayType>(out);
340
341 // Builder is now reset
342 ASSERT_EQ(0, builder->length());
343 ASSERT_EQ(0, builder->capacity());
344 ASSERT_EQ(0, builder->null_count());
345
346 ASSERT_EQ(ex_null_count, result->null_count());
347 ASSERT_TRUE(result->Equals(*expected));
348 }
349
350 void FlipValue(T* ptr) {
351 auto byteptr = reinterpret_cast<uint8_t*>(ptr);
352 *byteptr = static_cast<uint8_t>(~*byteptr);
353 }
354
355 protected:
356 std::unique_ptr<BuilderType> builder_;
357 std::unique_ptr<BuilderType> builder_nn_;
358
359 vector<T> draws_;
360 vector<uint8_t> valid_bytes_;
361};
362
363/// \brief uint8_t isn't a valid template parameter to uniform_int_distribution, so
364/// we use SampleType to determine which kind of integer to use to sample.
365template <typename T,
366 typename = typename std::enable_if<std::is_integral<T>::value, T>::type>
367struct UniformIntSampleType {
368 using type = T;
369};
370
371template <>
372struct UniformIntSampleType<uint8_t> {
373 using type = uint16_t;
374};
375
376template <>
377struct UniformIntSampleType<int8_t> {
378 using type = int16_t;
379};
380
381#define PTYPE_DECL(CapType, c_type) \
382 typedef CapType##Array ArrayType; \
383 typedef CapType##Builder BuilderType; \
384 typedef CapType##Type Type; \
385 typedef c_type T; \
386 \
387 static std::shared_ptr<DataType> type() { return std::make_shared<Type>(); }
388
389#define PINT_DECL(CapType, c_type) \
390 struct P##CapType { \
391 PTYPE_DECL(CapType, c_type) \
392 static void draw(int64_t N, vector<T>* draws) { \
393 using sample_type = typename UniformIntSampleType<c_type>::type; \
394 const T lower = std::numeric_limits<T>::min(); \
395 const T upper = std::numeric_limits<T>::max(); \
396 randint(N, static_cast<sample_type>(lower), static_cast<sample_type>(upper), \
397 draws); \
398 } \
399 }
400
401#define PFLOAT_DECL(CapType, c_type, LOWER, UPPER) \
402 struct P##CapType { \
403 PTYPE_DECL(CapType, c_type) \
404 static void draw(int64_t N, vector<T>* draws) { \
405 random_real(N, 0, LOWER, UPPER, draws); \
406 } \
407 }
408
409PINT_DECL(UInt8, uint8_t);
410PINT_DECL(UInt16, uint16_t);
411PINT_DECL(UInt32, uint32_t);
412PINT_DECL(UInt64, uint64_t);
413
414PINT_DECL(Int8, int8_t);
415PINT_DECL(Int16, int16_t);
416PINT_DECL(Int32, int32_t);
417PINT_DECL(Int64, int64_t);
418
419PFLOAT_DECL(Float, float, -1000.0f, 1000.0f);
420PFLOAT_DECL(Double, double, -1000.0, 1000.0);
421
422struct PBoolean {
423 PTYPE_DECL(Boolean, uint8_t)
424};
425
426template <>
427void TestPrimitiveBuilder<PBoolean>::RandomData(int64_t N, double pct_null) {
428 draws_.resize(static_cast<size_t>(N));
429 valid_bytes_.resize(static_cast<size_t>(N));
430
431 random_null_bytes(N, 0.5, draws_.data());
432 random_null_bytes(N, pct_null, valid_bytes_.data());
433}
434
435template <>
436void TestPrimitiveBuilder<PBoolean>::FlipValue(T* ptr) {
437 *ptr = !*ptr;
438}
439
440template <>
441void TestPrimitiveBuilder<PBoolean>::Check(const std::unique_ptr<BooleanBuilder>& builder,
442 bool nullable) {
443 const int64_t size = builder->length();
444
445 // Build expected result array
446 std::shared_ptr<Buffer> ex_data;
447 std::shared_ptr<Buffer> ex_null_bitmap;
448 int64_t ex_null_count = 0;
449
450 ASSERT_OK(BitUtil::BytesToBits(draws_, default_memory_pool(), &ex_data));
451 if (nullable) {
452 ASSERT_OK(BitUtil::BytesToBits(valid_bytes_, default_memory_pool(), &ex_null_bitmap));
453 ex_null_count = CountNulls(valid_bytes_);
454 } else {
455 ex_null_bitmap = nullptr;
456 }
457 auto expected =
458 std::make_shared<BooleanArray>(size, ex_data, ex_null_bitmap, ex_null_count);
459 ASSERT_EQ(size, expected->length());
460
461 // Finish builder and check result array
462 std::shared_ptr<Array> out;
463 FinishAndCheckPadding(builder.get(), &out);
464
465 std::shared_ptr<BooleanArray> result = std::dynamic_pointer_cast<BooleanArray>(out);
466
467 ASSERT_EQ(ex_null_count, result->null_count());
468 ASSERT_EQ(size, result->length());
469
470 for (int64_t i = 0; i < size; ++i) {
471 if (nullable) {
472 ASSERT_EQ(valid_bytes_[i] == 0, result->IsNull(i)) << i;
473 } else {
474 ASSERT_FALSE(result->IsNull(i));
475 }
476 if (!result->IsNull(i)) {
477 bool actual = BitUtil::GetBit(result->values()->data(), i);
478 ASSERT_EQ(draws_[i] != 0, actual) << i;
479 }
480 }
481 ASSERT_TRUE(result->Equals(*expected));
482
483 // Builder is now reset
484 ASSERT_EQ(0, builder->length());
485 ASSERT_EQ(0, builder->capacity());
486 ASSERT_EQ(0, builder->null_count());
487}
488
489typedef ::testing::Types<PBoolean, PUInt8, PUInt16, PUInt32, PUInt64, PInt8, PInt16,
490 PInt32, PInt64, PFloat, PDouble>
491 Primitives;
492
493TYPED_TEST_CASE(TestPrimitiveBuilder, Primitives);
494
495TYPED_TEST(TestPrimitiveBuilder, TestInit) {
496 int64_t n = 1000;
497 ASSERT_OK(this->builder_->Reserve(n));
498 ASSERT_EQ(BitUtil::NextPower2(n), this->builder_->capacity());
499
500 // unsure if this should go in all builder classes
501 ASSERT_EQ(0, this->builder_->num_children());
502}
503
504TYPED_TEST(TestPrimitiveBuilder, TestAppendNull) {
505 int64_t size = 1000;
506 for (int64_t i = 0; i < size; ++i) {
507 ASSERT_OK(this->builder_->AppendNull());
508 }
509
510 std::shared_ptr<Array> out;
511 FinishAndCheckPadding(this->builder_.get(), &out);
512 auto result = std::dynamic_pointer_cast<typename TypeParam::ArrayType>(out);
513
514 for (int64_t i = 0; i < size; ++i) {
515 ASSERT_TRUE(result->IsNull(i)) << i;
516 }
517}
518
519TYPED_TEST(TestPrimitiveBuilder, TestAppendNulls) {
520 const int64_t size = 10;
521 const uint8_t valid_bytes[10] = {1, 0, 1, 0, 1, 0, 1, 0, 1, 0};
522
523 ASSERT_OK(this->builder_->AppendNulls(valid_bytes, size));
524
525 std::shared_ptr<Array> result;
526 FinishAndCheckPadding(this->builder_.get(), &result);
527
528 for (int64_t i = 0; i < size; ++i) {
529 ASSERT_EQ(result->IsValid(i), static_cast<bool>(valid_bytes[i]));
530 }
531}
532
533TYPED_TEST(TestPrimitiveBuilder, TestArrayDtorDealloc) {
534 DECL_T();
535
536 int64_t size = 1000;
537
538 vector<T>& draws = this->draws_;
539 vector<uint8_t>& valid_bytes = this->valid_bytes_;
540
541 int64_t memory_before = this->pool_->bytes_allocated();
542
543 this->RandomData(size);
544 ASSERT_OK(this->builder_->Reserve(size));
545
546 int64_t i;
547 for (i = 0; i < size; ++i) {
548 if (valid_bytes[i] > 0) {
549 ASSERT_OK(this->builder_->Append(draws[i]));
550 } else {
551 ASSERT_OK(this->builder_->AppendNull());
552 }
553 }
554
555 do {
556 std::shared_ptr<Array> result;
557 FinishAndCheckPadding(this->builder_.get(), &result);
558 } while (false);
559
560 ASSERT_EQ(memory_before, this->pool_->bytes_allocated());
561}
562
563TYPED_TEST(TestPrimitiveBuilder, Equality) {
564 DECL_T();
565
566 const int64_t size = 1000;
567 this->RandomData(size);
568 vector<T>& draws = this->draws_;
569 vector<uint8_t>& valid_bytes = this->valid_bytes_;
570 std::shared_ptr<Array> array, equal_array, unequal_array;
571 auto builder = this->builder_.get();
572 ASSERT_OK(MakeArray(valid_bytes, draws, size, builder, &array));
573 ASSERT_OK(MakeArray(valid_bytes, draws, size, builder, &equal_array));
574
575 // Make the not equal array by negating the first valid element with itself.
576 const auto first_valid = std::find_if(valid_bytes.begin(), valid_bytes.end(),
577 [](uint8_t valid) { return valid > 0; });
578 const int64_t first_valid_idx = std::distance(valid_bytes.begin(), first_valid);
579 // This should be true with a very high probability, but might introduce flakiness
580 ASSERT_LT(first_valid_idx, size - 1);
581 this->FlipValue(&draws[first_valid_idx]);
582 ASSERT_OK(MakeArray(valid_bytes, draws, size, builder, &unequal_array));
583
584 // test normal equality
585 EXPECT_TRUE(array->Equals(array));
586 EXPECT_TRUE(array->Equals(equal_array));
587 EXPECT_TRUE(equal_array->Equals(array));
588 EXPECT_FALSE(equal_array->Equals(unequal_array));
589 EXPECT_FALSE(unequal_array->Equals(equal_array));
590
591 // Test range equality
592 EXPECT_FALSE(array->RangeEquals(0, first_valid_idx + 1, 0, unequal_array));
593 EXPECT_FALSE(array->RangeEquals(first_valid_idx, size, first_valid_idx, unequal_array));
594 EXPECT_TRUE(array->RangeEquals(0, first_valid_idx, 0, unequal_array));
595 EXPECT_TRUE(
596 array->RangeEquals(first_valid_idx + 1, size, first_valid_idx + 1, unequal_array));
597}
598
599TYPED_TEST(TestPrimitiveBuilder, SliceEquality) {
600 DECL_T();
601
602 const int64_t size = 1000;
603 this->RandomData(size);
604 vector<T>& draws = this->draws_;
605 vector<uint8_t>& valid_bytes = this->valid_bytes_;
606 auto builder = this->builder_.get();
607
608 std::shared_ptr<Array> array;
609 ASSERT_OK(MakeArray(valid_bytes, draws, size, builder, &array));
610
611 std::shared_ptr<Array> slice, slice2;
612
613 slice = array->Slice(5);
614 slice2 = array->Slice(5);
615 ASSERT_EQ(size - 5, slice->length());
616
617 ASSERT_TRUE(slice->Equals(slice2));
618 ASSERT_TRUE(array->RangeEquals(5, array->length(), 0, slice));
619
620 // Chained slices
621 slice2 = array->Slice(2)->Slice(3);
622 ASSERT_TRUE(slice->Equals(slice2));
623
624 slice = array->Slice(5, 10);
625 slice2 = array->Slice(5, 10);
626 ASSERT_EQ(10, slice->length());
627
628 ASSERT_TRUE(slice->Equals(slice2));
629 ASSERT_TRUE(array->RangeEquals(5, 15, 0, slice));
630}
631
632TYPED_TEST(TestPrimitiveBuilder, TestAppendScalar) {
633 DECL_T();
634
635 const int64_t size = 10000;
636
637 vector<T>& draws = this->draws_;
638 vector<uint8_t>& valid_bytes = this->valid_bytes_;
639
640 this->RandomData(size);
641
642 ASSERT_OK(this->builder_->Reserve(1000));
643 ASSERT_OK(this->builder_nn_->Reserve(1000));
644
645 int64_t null_count = 0;
646 // Append the first 1000
647 for (size_t i = 0; i < 1000; ++i) {
648 if (valid_bytes[i] > 0) {
649 ASSERT_OK(this->builder_->Append(draws[i]));
650 } else {
651 ASSERT_OK(this->builder_->AppendNull());
652 ++null_count;
653 }
654 ASSERT_OK(this->builder_nn_->Append(draws[i]));
655 }
656
657 ASSERT_EQ(null_count, this->builder_->null_count());
658
659 ASSERT_EQ(1000, this->builder_->length());
660 ASSERT_EQ(1024, this->builder_->capacity());
661
662 ASSERT_EQ(1000, this->builder_nn_->length());
663 ASSERT_EQ(1024, this->builder_nn_->capacity());
664
665 ASSERT_OK(this->builder_->Reserve(size - 1000));
666 ASSERT_OK(this->builder_nn_->Reserve(size - 1000));
667
668 // Append the next 9000
669 for (size_t i = 1000; i < size; ++i) {
670 if (valid_bytes[i] > 0) {
671 ASSERT_OK(this->builder_->Append(draws[i]));
672 } else {
673 ASSERT_OK(this->builder_->AppendNull());
674 }
675 ASSERT_OK(this->builder_nn_->Append(draws[i]));
676 }
677
678 ASSERT_EQ(size, this->builder_->length());
679 ASSERT_EQ(BitUtil::NextPower2(size), this->builder_->capacity());
680
681 ASSERT_EQ(size, this->builder_nn_->length());
682 ASSERT_EQ(BitUtil::NextPower2(size), this->builder_nn_->capacity());
683
684 this->Check(this->builder_, true);
685 this->Check(this->builder_nn_, false);
686}
687
688TYPED_TEST(TestPrimitiveBuilder, TestAppendValues) {
689 DECL_T();
690
691 int64_t size = 10000;
692 this->RandomData(size);
693
694 vector<T>& draws = this->draws_;
695 vector<uint8_t>& valid_bytes = this->valid_bytes_;
696
697 // first slug
698 int64_t K = 1000;
699
700 ASSERT_OK(this->builder_->AppendValues(draws.data(), K, valid_bytes.data()));
701 ASSERT_OK(this->builder_nn_->AppendValues(draws.data(), K));
702
703 ASSERT_EQ(1000, this->builder_->length());
704 ASSERT_EQ(1024, this->builder_->capacity());
705
706 ASSERT_EQ(1000, this->builder_nn_->length());
707 ASSERT_EQ(1024, this->builder_nn_->capacity());
708
709 // Append the next 9000
710 ASSERT_OK(
711 this->builder_->AppendValues(draws.data() + K, size - K, valid_bytes.data() + K));
712 ASSERT_OK(this->builder_nn_->AppendValues(draws.data() + K, size - K));
713
714 ASSERT_EQ(size, this->builder_->length());
715 ASSERT_EQ(BitUtil::NextPower2(size), this->builder_->capacity());
716
717 ASSERT_EQ(size, this->builder_nn_->length());
718 ASSERT_EQ(BitUtil::NextPower2(size), this->builder_nn_->capacity());
719
720 this->Check(this->builder_, true);
721 this->Check(this->builder_nn_, false);
722}
723
724TYPED_TEST(TestPrimitiveBuilder, TestAppendValuesIter) {
725 int64_t size = 10000;
726 this->RandomData(size);
727
728 ASSERT_OK(this->builder_->AppendValues(this->draws_.begin(), this->draws_.end(),
729 this->valid_bytes_.begin()));
730 ASSERT_OK(this->builder_nn_->AppendValues(this->draws_.begin(), this->draws_.end()));
731
732 ASSERT_EQ(size, this->builder_->length());
733 ASSERT_EQ(BitUtil::NextPower2(size), this->builder_->capacity());
734
735 this->Check(this->builder_, true);
736 this->Check(this->builder_nn_, false);
737}
738
739TYPED_TEST(TestPrimitiveBuilder, TestAppendValuesIterNullValid) {
740 int64_t size = 10000;
741 this->RandomData(size);
742
743 ASSERT_OK(this->builder_nn_->AppendValues(this->draws_.begin(),
744 this->draws_.begin() + size / 2,
745 static_cast<uint8_t*>(nullptr)));
746
747 ASSERT_EQ(BitUtil::NextPower2(size / 2), this->builder_nn_->capacity());
748
749 ASSERT_OK(this->builder_nn_->AppendValues(this->draws_.begin() + size / 2,
750 this->draws_.end(),
751 static_cast<uint64_t*>(nullptr)));
752
753 this->Check(this->builder_nn_, false);
754}
755
756TYPED_TEST(TestPrimitiveBuilder, TestAppendValuesLazyIter) {
757 DECL_T();
758
759 int64_t size = 10000;
760 this->RandomData(size);
761
762 auto& draws = this->draws_;
763 auto& valid_bytes = this->valid_bytes_;
764
765 auto halve = [&draws](int64_t index) { return draws[index] / 2; };
766 auto lazy_iter = internal::MakeLazyRange(halve, size);
767
768 ASSERT_OK(this->builder_->AppendValues(lazy_iter.begin(), lazy_iter.end(),
769 valid_bytes.begin()));
770
771 std::vector<T> halved;
772 transform(draws.begin(), draws.end(), back_inserter(halved),
773 [](T in) { return in / 2; });
774
775 std::shared_ptr<Array> result;
776 FinishAndCheckPadding(this->builder_.get(), &result);
777
778 std::shared_ptr<Array> expected;
779 ASSERT_OK(
780 this->builder_->AppendValues(halved.data(), halved.size(), valid_bytes.data()));
781 FinishAndCheckPadding(this->builder_.get(), &expected);
782
783 ASSERT_TRUE(expected->Equals(result));
784}
785
786TYPED_TEST(TestPrimitiveBuilder, TestAppendValuesIterConverted) {
787 DECL_T();
788 // find type we can safely convert the tested values to and from
789 using conversion_type =
790 typename std::conditional<std::is_floating_point<T>::value, double,
791 typename std::conditional<std::is_unsigned<T>::value,
792 uint64_t, int64_t>::type>::type;
793
794 int64_t size = 10000;
795 this->RandomData(size);
796
797 // append convertible values
798 vector<conversion_type> draws_converted(this->draws_.begin(), this->draws_.end());
799 vector<int32_t> valid_bytes_converted(this->valid_bytes_.begin(),
800 this->valid_bytes_.end());
801
802 auto cast_values = internal::MakeLazyRange(
803 [&draws_converted](int64_t index) {
804 return static_cast<T>(draws_converted[index]);
805 },
806 size);
807 auto cast_valid = internal::MakeLazyRange(
808 [&valid_bytes_converted](int64_t index) {
809 return static_cast<bool>(valid_bytes_converted[index]);
810 },
811 size);
812
813 ASSERT_OK(this->builder_->AppendValues(cast_values.begin(), cast_values.end(),
814 cast_valid.begin()));
815 ASSERT_OK(this->builder_nn_->AppendValues(cast_values.begin(), cast_values.end()));
816
817 ASSERT_EQ(size, this->builder_->length());
818 ASSERT_EQ(BitUtil::NextPower2(size), this->builder_->capacity());
819
820 ASSERT_EQ(size, this->builder_->length());
821 ASSERT_EQ(BitUtil::NextPower2(size), this->builder_->capacity());
822
823 this->Check(this->builder_, true);
824 this->Check(this->builder_nn_, false);
825}
826
827TYPED_TEST(TestPrimitiveBuilder, TestZeroPadded) {
828 DECL_T();
829
830 int64_t size = 10000;
831 this->RandomData(size);
832
833 vector<T>& draws = this->draws_;
834 vector<uint8_t>& valid_bytes = this->valid_bytes_;
835
836 // first slug
837 int64_t K = 1000;
838
839 ASSERT_OK(this->builder_->AppendValues(draws.data(), K, valid_bytes.data()));
840
841 std::shared_ptr<Array> out;
842 FinishAndCheckPadding(this->builder_.get(), &out);
843}
844
845TYPED_TEST(TestPrimitiveBuilder, TestAppendValuesStdBool) {
846 // ARROW-1383
847 DECL_T();
848
849 int64_t size = 10000;
850 this->RandomData(size);
851
852 vector<T>& draws = this->draws_;
853
854 std::vector<bool> is_valid;
855
856 // first slug
857 int64_t K = 1000;
858
859 for (int64_t i = 0; i < K; ++i) {
860 is_valid.push_back(this->valid_bytes_[i] != 0);
861 }
862 ASSERT_OK(this->builder_->AppendValues(draws.data(), K, is_valid));
863 ASSERT_OK(this->builder_nn_->AppendValues(draws.data(), K));
864
865 ASSERT_EQ(1000, this->builder_->length());
866 ASSERT_EQ(1024, this->builder_->capacity());
867 ASSERT_EQ(1000, this->builder_nn_->length());
868 ASSERT_EQ(1024, this->builder_nn_->capacity());
869
870 // Append the next 9000
871 is_valid.clear();
872 std::vector<T> partial_draws;
873 for (int64_t i = K; i < size; ++i) {
874 partial_draws.push_back(draws[i]);
875 is_valid.push_back(this->valid_bytes_[i] != 0);
876 }
877
878 ASSERT_OK(this->builder_->AppendValues(partial_draws, is_valid));
879 ASSERT_OK(this->builder_nn_->AppendValues(partial_draws));
880
881 ASSERT_EQ(size, this->builder_->length());
882 ASSERT_EQ(BitUtil::NextPower2(size), this->builder_->capacity());
883
884 ASSERT_EQ(size, this->builder_nn_->length());
885 ASSERT_EQ(BitUtil::NextPower2(size), this->builder_->capacity());
886
887 this->Check(this->builder_, true);
888 this->Check(this->builder_nn_, false);
889}
890
891TYPED_TEST(TestPrimitiveBuilder, TestAdvance) {
892 int64_t n = 1000;
893 ASSERT_OK(this->builder_->Reserve(n));
894
895 ASSERT_OK(this->builder_->Advance(100));
896 ASSERT_EQ(100, this->builder_->length());
897
898 ASSERT_OK(this->builder_->Advance(900));
899
900 int64_t too_many = this->builder_->capacity() - 1000 + 1;
901 ASSERT_RAISES(Invalid, this->builder_->Advance(too_many));
902}
903
904TYPED_TEST(TestPrimitiveBuilder, TestResize) {
905 int64_t cap = kMinBuilderCapacity * 2;
906
907 ASSERT_OK(this->builder_->Reserve(cap));
908 ASSERT_EQ(cap, this->builder_->capacity());
909}
910
911TYPED_TEST(TestPrimitiveBuilder, TestReserve) {
912 ASSERT_OK(this->builder_->Reserve(10));
913 ASSERT_EQ(0, this->builder_->length());
914 ASSERT_EQ(kMinBuilderCapacity, this->builder_->capacity());
915
916 ASSERT_OK(this->builder_->Reserve(90));
917 ASSERT_OK(this->builder_->Advance(100));
918 ASSERT_OK(this->builder_->Reserve(kMinBuilderCapacity));
919
920 ASSERT_RAISES(Invalid, this->builder_->Resize(1));
921
922 ASSERT_EQ(BitUtil::NextPower2(kMinBuilderCapacity + 100), this->builder_->capacity());
923}
924
925TEST(TestBooleanBuilder, TestStdBoolVectorAppend) {
926 BooleanBuilder builder;
927 BooleanBuilder builder_nn;
928
929 std::vector<bool> values, is_valid;
930
931 const int length = 10000;
932 random_is_valid(length, 0.5, &values);
933 random_is_valid(length, 0.1, &is_valid);
934
935 const int chunksize = 1000;
936 for (int chunk = 0; chunk < length / chunksize; ++chunk) {
937 std::vector<bool> chunk_values, chunk_is_valid;
938 for (int i = chunk * chunksize; i < (chunk + 1) * chunksize; ++i) {
939 chunk_values.push_back(values[i]);
940 chunk_is_valid.push_back(is_valid[i]);
941 }
942 ASSERT_OK(builder.AppendValues(chunk_values, chunk_is_valid));
943 ASSERT_OK(builder_nn.AppendValues(chunk_values));
944 }
945
946 std::shared_ptr<Array> result, result_nn;
947 ASSERT_OK(builder.Finish(&result));
948 ASSERT_OK(builder_nn.Finish(&result_nn));
949
950 const auto& arr = checked_cast<const BooleanArray&>(*result);
951 const auto& arr_nn = checked_cast<const BooleanArray&>(*result_nn);
952 for (int i = 0; i < length; ++i) {
953 if (is_valid[i]) {
954 ASSERT_FALSE(arr.IsNull(i));
955 ASSERT_EQ(values[i], arr.Value(i));
956 } else {
957 ASSERT_TRUE(arr.IsNull(i));
958 }
959 ASSERT_EQ(values[i], arr_nn.Value(i));
960 }
961}
962
963template <typename TYPE>
964void CheckSliceApproxEquals() {
965 using T = typename TYPE::c_type;
966
967 const int64_t kSize = 50;
968 vector<T> draws1;
969 vector<T> draws2;
970
971 const uint32_t kSeed = 0;
972 random_real(kSize, kSeed, 0.0, 100.0, &draws1);
973 random_real(kSize, kSeed + 1, 0.0, 100.0, &draws2);
974
975 // Make the draws equal in the sliced segment, but unequal elsewhere (to
976 // catch not using the slice offset)
977 for (int64_t i = 10; i < 30; ++i) {
978 draws2[i] = draws1[i];
979 }
980
981 vector<bool> is_valid;
982 random_is_valid(kSize, 0.1, &is_valid);
983
984 std::shared_ptr<Array> array1, array2;
985 ArrayFromVector<TYPE, T>(is_valid, draws1, &array1);
986 ArrayFromVector<TYPE, T>(is_valid, draws2, &array2);
987
988 std::shared_ptr<Array> slice1 = array1->Slice(10, 20);
989 std::shared_ptr<Array> slice2 = array2->Slice(10, 20);
990
991 ASSERT_TRUE(slice1->ApproxEquals(slice2));
992}
993
994TEST(TestPrimitiveAdHoc, FloatingSliceApproxEquals) {
995 CheckSliceApproxEquals<FloatType>();
996 CheckSliceApproxEquals<DoubleType>();
997}
998
999// ----------------------------------------------------------------------
1000// FixedSizeBinary tests
1001
1002class TestFWBinaryArray : public ::testing::Test {
1003 public:
1004 void SetUp() {}
1005
1006 void InitBuilder(int byte_width) {
1007 auto type = fixed_size_binary(byte_width);
1008 builder_.reset(new FixedSizeBinaryBuilder(type, default_memory_pool()));
1009 }
1010
1011 protected:
1012 std::unique_ptr<FixedSizeBinaryBuilder> builder_;
1013};
1014
1015TEST_F(TestFWBinaryArray, Builder) {
1016 int32_t byte_width = 10;
1017 int64_t length = 4096;
1018
1019 int64_t nbytes = length * byte_width;
1020
1021 vector<uint8_t> data(nbytes);
1022 random_bytes(nbytes, 0, data.data());
1023
1024 vector<uint8_t> is_valid(length);
1025 random_null_bytes(length, 0.1, is_valid.data());
1026
1027 const uint8_t* raw_data = data.data();
1028
1029 std::shared_ptr<Array> result;
1030
1031 auto CheckResult = [&length, &is_valid, &raw_data, &byte_width](const Array& result) {
1032 // Verify output
1033 const auto& fw_result = checked_cast<const FixedSizeBinaryArray&>(result);
1034
1035 ASSERT_EQ(length, result.length());
1036
1037 for (int64_t i = 0; i < result.length(); ++i) {
1038 if (is_valid[i]) {
1039 ASSERT_EQ(0,
1040 memcmp(raw_data + byte_width * i, fw_result.GetValue(i), byte_width));
1041 } else {
1042 ASSERT_TRUE(fw_result.IsNull(i));
1043 }
1044 }
1045 };
1046
1047 // Build using iterative API
1048 InitBuilder(byte_width);
1049 for (int64_t i = 0; i < length; ++i) {
1050 if (is_valid[i]) {
1051 ASSERT_OK(builder_->Append(raw_data + byte_width * i));
1052 } else {
1053 ASSERT_OK(builder_->AppendNull());
1054 }
1055 }
1056
1057 FinishAndCheckPadding(builder_.get(), &result);
1058 CheckResult(*result);
1059
1060 // Build using batch API
1061 InitBuilder(byte_width);
1062
1063 const uint8_t* raw_is_valid = is_valid.data();
1064
1065 ASSERT_OK(builder_->AppendValues(raw_data, 50, raw_is_valid));
1066 ASSERT_OK(
1067 builder_->AppendValues(raw_data + 50 * byte_width, length - 50, raw_is_valid + 50));
1068 FinishAndCheckPadding(builder_.get(), &result);
1069
1070 CheckResult(*result);
1071
1072 // Build from std::string
1073 InitBuilder(byte_width);
1074 for (int64_t i = 0; i < length; ++i) {
1075 if (is_valid[i]) {
1076 ASSERT_OK(builder_->Append(
1077 string(reinterpret_cast<const char*>(raw_data + byte_width * i), byte_width)));
1078 } else {
1079 ASSERT_OK(builder_->AppendNull());
1080 }
1081 }
1082
1083 ASSERT_OK(builder_->Finish(&result));
1084 CheckResult(*result);
1085}
1086
1087TEST_F(TestFWBinaryArray, EqualsRangeEquals) {
1088 // Check that we don't compare data in null slots
1089
1090 auto type = fixed_size_binary(4);
1091 FixedSizeBinaryBuilder builder1(type);
1092 FixedSizeBinaryBuilder builder2(type);
1093
1094 ASSERT_OK(builder1.Append("foo1"));
1095 ASSERT_OK(builder1.AppendNull());
1096
1097 ASSERT_OK(builder2.Append("foo1"));
1098 ASSERT_OK(builder2.Append("foo2"));
1099
1100 std::shared_ptr<Array> array1, array2;
1101 ASSERT_OK(builder1.Finish(&array1));
1102 ASSERT_OK(builder2.Finish(&array2));
1103
1104 const auto& a1 = checked_cast<const FixedSizeBinaryArray&>(*array1);
1105 const auto& a2 = checked_cast<const FixedSizeBinaryArray&>(*array2);
1106
1107 FixedSizeBinaryArray equal1(type, 2, a1.values(), a1.null_bitmap(), 1);
1108 FixedSizeBinaryArray equal2(type, 2, a2.values(), a1.null_bitmap(), 1);
1109
1110 ASSERT_TRUE(equal1.Equals(equal2));
1111 ASSERT_TRUE(equal1.RangeEquals(equal2, 0, 2, 0));
1112}
1113
1114TEST_F(TestFWBinaryArray, ZeroSize) {
1115 auto type = fixed_size_binary(0);
1116 FixedSizeBinaryBuilder builder(type);
1117
1118 ASSERT_OK(builder.Append(""));
1119 ASSERT_OK(builder.Append(std::string()));
1120 ASSERT_OK(builder.Append(static_cast<const uint8_t*>(nullptr)));
1121 ASSERT_OK(builder.AppendNull());
1122 ASSERT_OK(builder.AppendNull());
1123 ASSERT_OK(builder.AppendNull());
1124
1125 std::shared_ptr<Array> array;
1126 ASSERT_OK(builder.Finish(&array));
1127
1128 const auto& fw_array = checked_cast<const FixedSizeBinaryArray&>(*array);
1129
1130 // data is never allocated
1131 ASSERT_TRUE(fw_array.values() == nullptr);
1132 ASSERT_EQ(0, fw_array.byte_width());
1133
1134 ASSERT_EQ(6, array->length());
1135 ASSERT_EQ(3, array->null_count());
1136}
1137
1138TEST_F(TestFWBinaryArray, ZeroPadding) {
1139 auto type = fixed_size_binary(4);
1140 FixedSizeBinaryBuilder builder(type);
1141
1142 ASSERT_OK(builder.Append("foo1"));
1143 ASSERT_OK(builder.AppendNull());
1144 ASSERT_OK(builder.Append("foo2"));
1145 ASSERT_OK(builder.AppendNull());
1146 ASSERT_OK(builder.Append("foo3"));
1147
1148 std::shared_ptr<Array> array;
1149 FinishAndCheckPadding(&builder, &array);
1150}
1151
1152TEST_F(TestFWBinaryArray, Slice) {
1153 auto type = fixed_size_binary(4);
1154 FixedSizeBinaryBuilder builder(type);
1155
1156 vector<string> strings = {"foo1", "foo2", "foo3", "foo4", "foo5"};
1157 vector<uint8_t> is_null = {0, 1, 0, 0, 0};
1158
1159 for (int i = 0; i < 5; ++i) {
1160 if (is_null[i]) {
1161 ASSERT_OK(builder.AppendNull());
1162 } else {
1163 ASSERT_OK(builder.Append(strings[i]));
1164 }
1165 }
1166
1167 std::shared_ptr<Array> array;
1168 ASSERT_OK(builder.Finish(&array));
1169
1170 std::shared_ptr<Array> slice, slice2;
1171
1172 slice = array->Slice(1);
1173 slice2 = array->Slice(1);
1174 ASSERT_EQ(4, slice->length());
1175
1176 ASSERT_TRUE(slice->Equals(slice2));
1177 ASSERT_TRUE(array->RangeEquals(1, slice->length(), 0, slice));
1178
1179 // Chained slices
1180 slice = array->Slice(2);
1181 slice2 = array->Slice(1)->Slice(1);
1182 ASSERT_TRUE(slice->Equals(slice2));
1183
1184 slice = array->Slice(1, 3);
1185 ASSERT_EQ(3, slice->length());
1186
1187 slice2 = array->Slice(1, 3);
1188 ASSERT_TRUE(slice->Equals(slice2));
1189 ASSERT_TRUE(array->RangeEquals(1, 3, 0, slice));
1190}
1191
1192// ----------------------------------------------------------------------
1193// AdaptiveInt tests
1194
1195class TestAdaptiveIntBuilder : public TestBuilder {
1196 public:
1197 void SetUp() {
1198 TestBuilder::SetUp();
1199 builder_ = std::make_shared<AdaptiveIntBuilder>(pool_);
1200 }
1201
1202 void Done() { FinishAndCheckPadding(builder_.get(), &result_); }
1203
1204 protected:
1205 std::shared_ptr<AdaptiveIntBuilder> builder_;
1206
1207 std::shared_ptr<Array> expected_;
1208 std::shared_ptr<Array> result_;
1209};
1210
1211TEST_F(TestAdaptiveIntBuilder, TestInt8) {
1212 ASSERT_OK(builder_->Append(0));
1213 ASSERT_OK(builder_->Append(127));
1214 ASSERT_OK(builder_->Append(-128));
1215
1216 Done();
1217
1218 std::vector<int8_t> expected_values({0, 127, -128});
1219 ArrayFromVector<Int8Type, int8_t>(expected_values, &expected_);
1220 AssertArraysEqual(*expected_, *result_);
1221}
1222
1223TEST_F(TestAdaptiveIntBuilder, TestInt16) {
1224 ASSERT_OK(builder_->Append(0));
1225 ASSERT_OK(builder_->Append(128));
1226 Done();
1227
1228 std::vector<int16_t> expected_values({0, 128});
1229 ArrayFromVector<Int16Type, int16_t>(expected_values, &expected_);
1230 AssertArraysEqual(*expected_, *result_);
1231
1232 SetUp();
1233 ASSERT_OK(builder_->Append(-129));
1234 expected_values = {-129};
1235 Done();
1236
1237 ArrayFromVector<Int16Type, int16_t>(expected_values, &expected_);
1238 AssertArraysEqual(*expected_, *result_);
1239
1240 SetUp();
1241 ASSERT_OK(builder_->Append(std::numeric_limits<int16_t>::max()));
1242 ASSERT_OK(builder_->Append(std::numeric_limits<int16_t>::min()));
1243 expected_values = {std::numeric_limits<int16_t>::max(),
1244 std::numeric_limits<int16_t>::min()};
1245 Done();
1246
1247 ArrayFromVector<Int16Type, int16_t>(expected_values, &expected_);
1248 AssertArraysEqual(*expected_, *result_);
1249}
1250
1251TEST_F(TestAdaptiveIntBuilder, TestInt32) {
1252 ASSERT_OK(builder_->Append(0));
1253 ASSERT_OK(
1254 builder_->Append(static_cast<int64_t>(std::numeric_limits<int16_t>::max()) + 1));
1255 Done();
1256
1257 std::vector<int32_t> expected_values(
1258 {0, static_cast<int32_t>(std::numeric_limits<int16_t>::max()) + 1});
1259 ArrayFromVector<Int32Type, int32_t>(expected_values, &expected_);
1260 AssertArraysEqual(*expected_, *result_);
1261
1262 SetUp();
1263 ASSERT_OK(
1264 builder_->Append(static_cast<int64_t>(std::numeric_limits<int16_t>::min()) - 1));
1265 expected_values = {static_cast<int32_t>(std::numeric_limits<int16_t>::min()) - 1};
1266 Done();
1267
1268 ArrayFromVector<Int32Type, int32_t>(expected_values, &expected_);
1269 AssertArraysEqual(*expected_, *result_);
1270
1271 SetUp();
1272 ASSERT_OK(builder_->Append(std::numeric_limits<int32_t>::max()));
1273 ASSERT_OK(builder_->Append(std::numeric_limits<int32_t>::min()));
1274 expected_values = {std::numeric_limits<int32_t>::max(),
1275 std::numeric_limits<int32_t>::min()};
1276 Done();
1277
1278 ArrayFromVector<Int32Type, int32_t>(expected_values, &expected_);
1279 AssertArraysEqual(*expected_, *result_);
1280}
1281
1282TEST_F(TestAdaptiveIntBuilder, TestInt64) {
1283 ASSERT_OK(builder_->Append(0));
1284 ASSERT_OK(
1285 builder_->Append(static_cast<int64_t>(std::numeric_limits<int32_t>::max()) + 1));
1286 Done();
1287
1288 std::vector<int64_t> expected_values(
1289 {0, static_cast<int64_t>(std::numeric_limits<int32_t>::max()) + 1});
1290 ArrayFromVector<Int64Type, int64_t>(expected_values, &expected_);
1291 AssertArraysEqual(*expected_, *result_);
1292
1293 SetUp();
1294 ASSERT_OK(
1295 builder_->Append(static_cast<int64_t>(std::numeric_limits<int32_t>::min()) - 1));
1296 expected_values = {static_cast<int64_t>(std::numeric_limits<int32_t>::min()) - 1};
1297 Done();
1298
1299 ArrayFromVector<Int64Type, int64_t>(expected_values, &expected_);
1300 AssertArraysEqual(*expected_, *result_);
1301
1302 SetUp();
1303 ASSERT_OK(builder_->Append(std::numeric_limits<int64_t>::max()));
1304 ASSERT_OK(builder_->Append(std::numeric_limits<int64_t>::min()));
1305 expected_values = {std::numeric_limits<int64_t>::max(),
1306 std::numeric_limits<int64_t>::min()};
1307 Done();
1308
1309 ArrayFromVector<Int64Type, int64_t>(expected_values, &expected_);
1310 AssertArraysEqual(*expected_, *result_);
1311}
1312
1313TEST_F(TestAdaptiveIntBuilder, TestAppendValues) {
1314 {
1315 std::vector<int64_t> expected_values(
1316 {0, static_cast<int64_t>(std::numeric_limits<int32_t>::max()) + 1});
1317 ASSERT_OK(builder_->AppendValues(expected_values.data(), expected_values.size()));
1318 Done();
1319
1320 ArrayFromVector<Int64Type, int64_t>(expected_values, &expected_);
1321 AssertArraysEqual(*expected_, *result_);
1322 }
1323 {
1324 SetUp();
1325 std::vector<int64_t> values(
1326 {0, std::numeric_limits<int32_t>::min(), std::numeric_limits<int32_t>::max()});
1327 ASSERT_OK(builder_->AppendValues(values.data(), values.size()));
1328 Done();
1329
1330 std::vector<int32_t> expected_values(
1331 {0, std::numeric_limits<int32_t>::min(), std::numeric_limits<int32_t>::max()});
1332
1333 ArrayFromVector<Int32Type, int32_t>(expected_values, &expected_);
1334 AssertArraysEqual(*expected_, *result_);
1335 }
1336 {
1337 SetUp();
1338 std::vector<int64_t> values(
1339 {0, std::numeric_limits<int16_t>::min(), std::numeric_limits<int16_t>::max()});
1340 ASSERT_OK(builder_->AppendValues(values.data(), values.size()));
1341 Done();
1342
1343 std::vector<int16_t> expected_values(
1344 {0, std::numeric_limits<int16_t>::min(), std::numeric_limits<int16_t>::max()});
1345
1346 ArrayFromVector<Int16Type, int16_t>(expected_values, &expected_);
1347 AssertArraysEqual(*expected_, *result_);
1348 }
1349 {
1350 SetUp();
1351 std::vector<int64_t> values(
1352 {0, std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max()});
1353 ASSERT_OK(builder_->AppendValues(values.data(), values.size()));
1354 Done();
1355
1356 std::vector<int8_t> expected_values(
1357 {0, std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max()});
1358
1359 ArrayFromVector<Int8Type, int8_t>(expected_values, &expected_);
1360 AssertArraysEqual(*expected_, *result_);
1361 }
1362}
1363
1364TEST_F(TestAdaptiveIntBuilder, TestAssertZeroPadded) {
1365 std::vector<int64_t> values(
1366 {0, static_cast<int64_t>(std::numeric_limits<int32_t>::max()) + 1});
1367 ASSERT_OK(builder_->AppendValues(values.data(), values.size()));
1368 Done();
1369}
1370
1371TEST_F(TestAdaptiveIntBuilder, TestAppendNull) {
1372 int64_t size = 1000;
1373 ASSERT_OK(builder_->Append(127));
1374 for (unsigned index = 1; index < size - 1; ++index) {
1375 ASSERT_OK(builder_->AppendNull());
1376 }
1377 ASSERT_OK(builder_->Append(-128));
1378
1379 Done();
1380
1381 std::vector<bool> expected_valid(size, false);
1382 expected_valid[0] = true;
1383 expected_valid[size - 1] = true;
1384 std::vector<int8_t> expected_values(size);
1385 expected_values[0] = 127;
1386 expected_values[size - 1] = -128;
1387 std::shared_ptr<Array> expected;
1388 ArrayFromVector<Int8Type, int8_t>(expected_valid, expected_values, &expected_);
1389 AssertArraysEqual(*expected_, *result_);
1390}
1391
1392TEST_F(TestAdaptiveIntBuilder, TestAppendNulls) {
1393 constexpr int64_t size = 10;
1394 const uint8_t valid_bytes[size] = {1, 0, 1, 0, 1, 0, 1, 0, 1, 0};
1395 ASSERT_OK(builder_->AppendNulls(valid_bytes, size));
1396
1397 Done();
1398
1399 for (unsigned index = 0; index < size; ++index) {
1400 ASSERT_EQ(result_->IsValid(index), static_cast<bool>(valid_bytes[index]));
1401 }
1402}
1403
1404class TestAdaptiveUIntBuilder : public TestBuilder {
1405 public:
1406 void SetUp() {
1407 TestBuilder::SetUp();
1408 builder_ = std::make_shared<AdaptiveUIntBuilder>(pool_);
1409 }
1410
1411 void Done() { FinishAndCheckPadding(builder_.get(), &result_); }
1412
1413 protected:
1414 std::shared_ptr<AdaptiveUIntBuilder> builder_;
1415
1416 std::shared_ptr<Array> expected_;
1417 std::shared_ptr<Array> result_;
1418};
1419
1420TEST_F(TestAdaptiveUIntBuilder, TestUInt8) {
1421 ASSERT_OK(builder_->Append(0));
1422 ASSERT_OK(builder_->Append(255));
1423
1424 Done();
1425
1426 std::vector<uint8_t> expected_values({0, 255});
1427 ArrayFromVector<UInt8Type, uint8_t>(expected_values, &expected_);
1428 ASSERT_TRUE(expected_->Equals(result_));
1429}
1430
1431TEST_F(TestAdaptiveUIntBuilder, TestUInt16) {
1432 ASSERT_OK(builder_->Append(0));
1433 ASSERT_OK(builder_->Append(256));
1434 Done();
1435
1436 std::vector<uint16_t> expected_values({0, 256});
1437 ArrayFromVector<UInt16Type, uint16_t>(expected_values, &expected_);
1438 ASSERT_TRUE(expected_->Equals(result_));
1439
1440 SetUp();
1441 ASSERT_OK(builder_->Append(std::numeric_limits<uint16_t>::max()));
1442 expected_values = {std::numeric_limits<uint16_t>::max()};
1443 Done();
1444
1445 ArrayFromVector<UInt16Type, uint16_t>(expected_values, &expected_);
1446 ASSERT_TRUE(expected_->Equals(result_));
1447}
1448
1449TEST_F(TestAdaptiveUIntBuilder, TestUInt32) {
1450 ASSERT_OK(builder_->Append(0));
1451 ASSERT_OK(
1452 builder_->Append(static_cast<uint64_t>(std::numeric_limits<uint16_t>::max()) + 1));
1453 Done();
1454
1455 std::vector<uint32_t> expected_values(
1456 {0, static_cast<uint32_t>(std::numeric_limits<uint16_t>::max()) + 1});
1457 ArrayFromVector<UInt32Type, uint32_t>(expected_values, &expected_);
1458 ASSERT_TRUE(expected_->Equals(result_));
1459
1460 SetUp();
1461 ASSERT_OK(builder_->Append(std::numeric_limits<uint32_t>::max()));
1462 expected_values = {std::numeric_limits<uint32_t>::max()};
1463 Done();
1464
1465 ArrayFromVector<UInt32Type, uint32_t>(expected_values, &expected_);
1466 ASSERT_TRUE(expected_->Equals(result_));
1467}
1468
1469TEST_F(TestAdaptiveUIntBuilder, TestUInt64) {
1470 ASSERT_OK(builder_->Append(0));
1471 ASSERT_OK(
1472 builder_->Append(static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()) + 1));
1473 Done();
1474
1475 std::vector<uint64_t> expected_values(
1476 {0, static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()) + 1});
1477 ArrayFromVector<UInt64Type, uint64_t>(expected_values, &expected_);
1478 ASSERT_TRUE(expected_->Equals(result_));
1479
1480 SetUp();
1481 ASSERT_OK(builder_->Append(std::numeric_limits<uint64_t>::max()));
1482 expected_values = {std::numeric_limits<uint64_t>::max()};
1483 Done();
1484
1485 ArrayFromVector<UInt64Type, uint64_t>(expected_values, &expected_);
1486 ASSERT_TRUE(expected_->Equals(result_));
1487}
1488
1489TEST_F(TestAdaptiveUIntBuilder, TestAppendValues) {
1490 std::vector<uint64_t> expected_values(
1491 {0, static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()) + 1});
1492 ASSERT_OK(builder_->AppendValues(expected_values.data(), expected_values.size()));
1493 Done();
1494
1495 ArrayFromVector<UInt64Type, uint64_t>(expected_values, &expected_);
1496 ASSERT_TRUE(expected_->Equals(result_));
1497}
1498
1499TEST_F(TestAdaptiveUIntBuilder, TestAssertZeroPadded) {
1500 std::vector<uint64_t> values(
1501 {0, static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()) + 1});
1502 ASSERT_OK(builder_->AppendValues(values.data(), values.size()));
1503 Done();
1504}
1505
1506TEST_F(TestAdaptiveUIntBuilder, TestAppendNull) {
1507 int64_t size = 1000;
1508 ASSERT_OK(builder_->Append(254));
1509 for (unsigned index = 1; index < size - 1; ++index) {
1510 ASSERT_OK(builder_->AppendNull());
1511 }
1512 ASSERT_OK(builder_->Append(255));
1513
1514 Done();
1515
1516 std::vector<bool> expected_valid(size, false);
1517 expected_valid[0] = true;
1518 expected_valid[size - 1] = true;
1519 std::vector<uint8_t> expected_values(size);
1520 expected_values[0] = 254;
1521 expected_values[size - 1] = 255;
1522 std::shared_ptr<Array> expected;
1523 ArrayFromVector<UInt8Type, uint8_t>(expected_valid, expected_values, &expected_);
1524 AssertArraysEqual(*expected_, *result_);
1525}
1526
1527TEST_F(TestAdaptiveUIntBuilder, TestAppendNulls) {
1528 constexpr int64_t size = 10;
1529 const uint8_t valid_bytes[size] = {1, 0, 1, 0, 1, 0, 1, 0, 1, 0};
1530 ASSERT_OK(builder_->AppendNulls(valid_bytes, size));
1531
1532 Done();
1533
1534 for (unsigned index = 0; index < size; ++index) {
1535 ASSERT_EQ(result_->IsValid(index), static_cast<bool>(valid_bytes[index]));
1536 }
1537}
1538
1539// ----------------------------------------------------------------------
1540// Union tests
1541
1542TEST(TestUnionArrayAdHoc, TestSliceEquals) {
1543 std::shared_ptr<RecordBatch> batch;
1544 ASSERT_OK(ipc::MakeUnion(&batch));
1545
1546 const int64_t size = batch->num_rows();
1547
1548 auto CheckUnion = [&size](std::shared_ptr<Array> array) {
1549 std::shared_ptr<Array> slice, slice2;
1550 slice = array->Slice(2);
1551 ASSERT_EQ(size - 2, slice->length());
1552
1553 slice2 = array->Slice(2);
1554 ASSERT_EQ(size - 2, slice->length());
1555
1556 ASSERT_TRUE(slice->Equals(slice2));
1557 ASSERT_TRUE(array->RangeEquals(2, array->length(), 0, slice));
1558
1559 // Chained slices
1560 slice2 = array->Slice(1)->Slice(1);
1561 ASSERT_TRUE(slice->Equals(slice2));
1562
1563 slice = array->Slice(1, 5);
1564 slice2 = array->Slice(1, 5);
1565 ASSERT_EQ(5, slice->length());
1566
1567 ASSERT_TRUE(slice->Equals(slice2));
1568 ASSERT_TRUE(array->RangeEquals(1, 6, 0, slice));
1569
1570 AssertZeroPadded(*array);
1571 TestInitialized(*array);
1572 };
1573
1574 CheckUnion(batch->column(1));
1575 CheckUnion(batch->column(2));
1576}
1577
1578using DecimalVector = std::vector<Decimal128>;
1579
1580class DecimalTest : public ::testing::TestWithParam<int> {
1581 public:
1582 DecimalTest() {}
1583
1584 template <size_t BYTE_WIDTH = 16>
1585 void MakeData(const DecimalVector& input, std::vector<uint8_t>* out) const {
1586 out->reserve(input.size() * BYTE_WIDTH);
1587
1588 for (const auto& value : input) {
1589 auto bytes = value.ToBytes();
1590 out->insert(out->end(), bytes.cbegin(), bytes.cend());
1591 }
1592 }
1593
1594 template <size_t BYTE_WIDTH = 16>
1595 void TestCreate(int32_t precision, const DecimalVector& draw,
1596 const std::vector<uint8_t>& valid_bytes, int64_t offset) const {
1597 auto type = std::make_shared<Decimal128Type>(precision, 4);
1598 auto builder = std::make_shared<Decimal128Builder>(type);
1599
1600 size_t null_count = 0;
1601
1602 const size_t size = draw.size();
1603
1604 ASSERT_OK(builder->Reserve(size));
1605
1606 for (size_t i = 0; i < size; ++i) {
1607 if (valid_bytes[i]) {
1608 ASSERT_OK(builder->Append(draw[i]));
1609 } else {
1610 ASSERT_OK(builder->AppendNull());
1611 ++null_count;
1612 }
1613 }
1614
1615 std::shared_ptr<Array> out;
1616 FinishAndCheckPadding(builder.get(), &out);
1617
1618 std::vector<uint8_t> raw_bytes;
1619
1620 raw_bytes.reserve(size * BYTE_WIDTH);
1621 MakeData<BYTE_WIDTH>(draw, &raw_bytes);
1622
1623 auto expected_data = std::make_shared<Buffer>(raw_bytes.data(), BYTE_WIDTH);
1624 std::shared_ptr<Buffer> expected_null_bitmap;
1625 ASSERT_OK(
1626 BitUtil::BytesToBits(valid_bytes, default_memory_pool(), &expected_null_bitmap));
1627
1628 int64_t expected_null_count = CountNulls(valid_bytes);
1629 auto expected = std::make_shared<Decimal128Array>(
1630 type, size, expected_data, expected_null_bitmap, expected_null_count);
1631
1632 std::shared_ptr<Array> lhs = out->Slice(offset);
1633 std::shared_ptr<Array> rhs = expected->Slice(offset);
1634 ASSERT_TRUE(lhs->Equals(rhs));
1635 }
1636};
1637
1638TEST_P(DecimalTest, NoNulls) {
1639 int32_t precision = GetParam();
1640 std::vector<Decimal128> draw = {Decimal128(1), Decimal128(-2), Decimal128(2389),
1641 Decimal128(4), Decimal128(-12348)};
1642 std::vector<uint8_t> valid_bytes = {true, true, true, true, true};
1643 this->TestCreate(precision, draw, valid_bytes, 0);
1644 this->TestCreate(precision, draw, valid_bytes, 2);
1645}
1646
1647TEST_P(DecimalTest, WithNulls) {
1648 int32_t precision = GetParam();
1649 std::vector<Decimal128> draw = {Decimal128(1), Decimal128(2), Decimal128(-1),
1650 Decimal128(4), Decimal128(-1), Decimal128(1),
1651 Decimal128(2)};
1652 Decimal128 big;
1653 ASSERT_OK(Decimal128::FromString("230342903942.234234", &big));
1654 draw.push_back(big);
1655
1656 Decimal128 big_negative;
1657 ASSERT_OK(Decimal128::FromString("-23049302932.235234", &big_negative));
1658 draw.push_back(big_negative);
1659
1660 std::vector<uint8_t> valid_bytes = {true, true, false, true, false,
1661 true, true, true, true};
1662 this->TestCreate(precision, draw, valid_bytes, 0);
1663 this->TestCreate(precision, draw, valid_bytes, 2);
1664}
1665
1666INSTANTIATE_TEST_CASE_P(DecimalTest, DecimalTest, ::testing::Range(1, 38));
1667
1668// ----------------------------------------------------------------------
1669// Test rechunking
1670
1671TEST(TestRechunkArraysConsistently, Trivial) {
1672 std::vector<ArrayVector> groups, rechunked;
1673 rechunked = internal::RechunkArraysConsistently(groups);
1674 ASSERT_EQ(rechunked.size(), 0);
1675
1676 std::shared_ptr<Array> a1, a2, b1;
1677 ArrayFromVector<Int16Type, int16_t>({}, &a1);
1678 ArrayFromVector<Int16Type, int16_t>({}, &a2);
1679 ArrayFromVector<Int32Type, int32_t>({}, &b1);
1680
1681 groups = {{a1, a2}, {}, {b1}};
1682 rechunked = internal::RechunkArraysConsistently(groups);
1683 ASSERT_EQ(rechunked.size(), 3);
1684
1685 for (auto& arrvec : rechunked) {
1686 for (auto& arr : arrvec) {
1687 AssertZeroPadded(*arr);
1688 TestInitialized(*arr);
1689 }
1690 }
1691}
1692
1693TEST(TestRechunkArraysConsistently, Plain) {
1694 std::shared_ptr<Array> expected;
1695 std::shared_ptr<Array> a1, a2, a3, b1, b2, b3, b4;
1696 ArrayFromVector<Int16Type, int16_t>({1, 2, 3}, &a1);
1697 ArrayFromVector<Int16Type, int16_t>({4, 5}, &a2);
1698 ArrayFromVector<Int16Type, int16_t>({6, 7, 8, 9}, &a3);
1699
1700 ArrayFromVector<Int32Type, int32_t>({41, 42}, &b1);
1701 ArrayFromVector<Int32Type, int32_t>({43, 44, 45}, &b2);
1702 ArrayFromVector<Int32Type, int32_t>({46, 47}, &b3);
1703 ArrayFromVector<Int32Type, int32_t>({48, 49}, &b4);
1704
1705 ArrayVector a{a1, a2, a3};
1706 ArrayVector b{b1, b2, b3, b4};
1707
1708 std::vector<ArrayVector> groups{a, b}, rechunked;
1709 rechunked = internal::RechunkArraysConsistently(groups);
1710 ASSERT_EQ(rechunked.size(), 2);
1711 auto ra = rechunked[0];
1712 auto rb = rechunked[1];
1713
1714 ASSERT_EQ(ra.size(), 5);
1715 ArrayFromVector<Int16Type, int16_t>({1, 2}, &expected);
1716 ASSERT_ARRAYS_EQUAL(*ra[0], *expected);
1717 ArrayFromVector<Int16Type, int16_t>({3}, &expected);
1718 ASSERT_ARRAYS_EQUAL(*ra[1], *expected);
1719 ArrayFromVector<Int16Type, int16_t>({4, 5}, &expected);
1720 ASSERT_ARRAYS_EQUAL(*ra[2], *expected);
1721 ArrayFromVector<Int16Type, int16_t>({6, 7}, &expected);
1722 ASSERT_ARRAYS_EQUAL(*ra[3], *expected);
1723 ArrayFromVector<Int16Type, int16_t>({8, 9}, &expected);
1724 ASSERT_ARRAYS_EQUAL(*ra[4], *expected);
1725
1726 ASSERT_EQ(rb.size(), 5);
1727 ArrayFromVector<Int32Type, int32_t>({41, 42}, &expected);
1728 ASSERT_ARRAYS_EQUAL(*rb[0], *expected);
1729 ArrayFromVector<Int32Type, int32_t>({43}, &expected);
1730 ASSERT_ARRAYS_EQUAL(*rb[1], *expected);
1731 ArrayFromVector<Int32Type, int32_t>({44, 45}, &expected);
1732 ASSERT_ARRAYS_EQUAL(*rb[2], *expected);
1733 ArrayFromVector<Int32Type, int32_t>({46, 47}, &expected);
1734 ASSERT_ARRAYS_EQUAL(*rb[3], *expected);
1735 ArrayFromVector<Int32Type, int32_t>({48, 49}, &expected);
1736 ASSERT_ARRAYS_EQUAL(*rb[4], *expected);
1737
1738 for (auto& arrvec : rechunked) {
1739 for (auto& arr : arrvec) {
1740 AssertZeroPadded(*arr);
1741 TestInitialized(*arr);
1742 }
1743 }
1744}
1745
1746} // namespace arrow
1747