1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18#include <cstdint>
19#include <cstring>
20#include <memory>
21#include <string>
22#include <vector>
23
24#include <gtest/gtest.h>
25
26#include "arrow/array.h"
27#include "arrow/buffer.h"
28#include "arrow/builder.h"
29#include "arrow/memory_pool.h"
30#include "arrow/status.h"
31#include "arrow/test-common.h"
32#include "arrow/test-util.h"
33#include "arrow/type.h"
34#include "arrow/type_traits.h"
35#include "arrow/util/bit-util.h"
36#include "arrow/util/checked_cast.h"
37
38namespace arrow {
39
40using std::string;
41using std::vector;
42
43using internal::checked_cast;
44
45// ----------------------------------------------------------------------
46// String / Binary tests
47
48class TestStringArray : public ::testing::Test {
49 public:
50 void SetUp() {
51 chars_ = {'a', 'b', 'b', 'c', 'c', 'c'};
52 offsets_ = {0, 1, 1, 1, 3, 6};
53 valid_bytes_ = {1, 1, 0, 1, 1};
54 expected_ = {"a", "", "", "bb", "ccc"};
55
56 MakeArray();
57 }
58
59 void MakeArray() {
60 length_ = static_cast<int64_t>(offsets_.size()) - 1;
61 value_buf_ = Buffer::Wrap(chars_);
62 offsets_buf_ = Buffer::Wrap(offsets_);
63 ASSERT_OK(BitUtil::BytesToBits(valid_bytes_, default_memory_pool(), &null_bitmap_));
64 null_count_ = CountNulls(valid_bytes_);
65
66 strings_ = std::make_shared<StringArray>(length_, offsets_buf_, value_buf_,
67 null_bitmap_, null_count_);
68 }
69
70 protected:
71 vector<int32_t> offsets_;
72 vector<char> chars_;
73 vector<uint8_t> valid_bytes_;
74
75 vector<string> expected_;
76
77 std::shared_ptr<Buffer> value_buf_;
78 std::shared_ptr<Buffer> offsets_buf_;
79 std::shared_ptr<Buffer> null_bitmap_;
80
81 int64_t null_count_;
82 int64_t length_;
83
84 std::shared_ptr<StringArray> strings_;
85};
86
87TEST_F(TestStringArray, TestArrayBasics) {
88 ASSERT_EQ(length_, strings_->length());
89 ASSERT_EQ(1, strings_->null_count());
90 ASSERT_OK(ValidateArray(*strings_));
91}
92
93TEST_F(TestStringArray, TestType) {
94 std::shared_ptr<DataType> type = strings_->type();
95
96 ASSERT_EQ(Type::STRING, type->id());
97 ASSERT_EQ(Type::STRING, strings_->type_id());
98}
99
100TEST_F(TestStringArray, TestListFunctions) {
101 int pos = 0;
102 for (size_t i = 0; i < expected_.size(); ++i) {
103 ASSERT_EQ(pos, strings_->value_offset(i));
104 ASSERT_EQ(static_cast<int>(expected_[i].size()), strings_->value_length(i));
105 pos += static_cast<int>(expected_[i].size());
106 }
107}
108
109TEST_F(TestStringArray, TestDestructor) {
110 auto arr = std::make_shared<StringArray>(length_, offsets_buf_, value_buf_,
111 null_bitmap_, null_count_);
112}
113
114TEST_F(TestStringArray, TestGetString) {
115 for (size_t i = 0; i < expected_.size(); ++i) {
116 if (valid_bytes_[i] == 0) {
117 ASSERT_TRUE(strings_->IsNull(i));
118 } else {
119 ASSERT_EQ(expected_[i], strings_->GetString(i));
120 }
121 }
122}
123
124TEST_F(TestStringArray, TestEmptyStringComparison) {
125 offsets_ = {0, 0, 0, 0, 0, 0};
126 offsets_buf_ = Buffer::Wrap(offsets_);
127 length_ = static_cast<int64_t>(offsets_.size() - 1);
128
129 auto strings_a = std::make_shared<StringArray>(length_, offsets_buf_, nullptr,
130 null_bitmap_, null_count_);
131 auto strings_b = std::make_shared<StringArray>(length_, offsets_buf_, nullptr,
132 null_bitmap_, null_count_);
133 ASSERT_TRUE(strings_a->Equals(strings_b));
134}
135
136TEST_F(TestStringArray, CompareNullByteSlots) {
137 StringBuilder builder;
138 StringBuilder builder2;
139 StringBuilder builder3;
140
141 ASSERT_OK(builder.Append("foo"));
142 ASSERT_OK(builder2.Append("foo"));
143 ASSERT_OK(builder3.Append("foo"));
144
145 ASSERT_OK(builder.Append("bar"));
146 ASSERT_OK(builder2.AppendNull());
147
148 // same length, but different
149 ASSERT_OK(builder3.Append("xyz"));
150
151 ASSERT_OK(builder.Append("baz"));
152 ASSERT_OK(builder2.Append("baz"));
153 ASSERT_OK(builder3.Append("baz"));
154
155 std::shared_ptr<Array> array, array2, array3;
156 FinishAndCheckPadding(&builder, &array);
157 ASSERT_OK(builder2.Finish(&array2));
158 ASSERT_OK(builder3.Finish(&array3));
159
160 const auto& a1 = checked_cast<const StringArray&>(*array);
161 const auto& a2 = checked_cast<const StringArray&>(*array2);
162 const auto& a3 = checked_cast<const StringArray&>(*array3);
163
164 // The validity bitmaps are the same, the data is different, but the unequal
165 // portion is masked out
166 StringArray equal_array(3, a1.value_offsets(), a1.value_data(), a2.null_bitmap(), 1);
167 StringArray equal_array2(3, a3.value_offsets(), a3.value_data(), a2.null_bitmap(), 1);
168
169 ASSERT_TRUE(equal_array.Equals(equal_array2));
170 ASSERT_TRUE(a2.RangeEquals(equal_array2, 0, 3, 0));
171
172 ASSERT_TRUE(equal_array.Array::Slice(1)->Equals(equal_array2.Array::Slice(1)));
173 ASSERT_TRUE(
174 equal_array.Array::Slice(1)->RangeEquals(0, 2, 0, equal_array2.Array::Slice(1)));
175}
176
177TEST_F(TestStringArray, TestSliceGetString) {
178 StringBuilder builder;
179
180 ASSERT_OK(builder.Append("a"));
181 ASSERT_OK(builder.Append("b"));
182 ASSERT_OK(builder.Append("c"));
183
184 std::shared_ptr<Array> array;
185 ASSERT_OK(builder.Finish(&array));
186 auto s = array->Slice(1, 10);
187 auto arr = std::dynamic_pointer_cast<StringArray>(s);
188 ASSERT_EQ(arr->GetString(0), "b");
189}
190
191// ----------------------------------------------------------------------
192// String builder tests
193
194class TestStringBuilder : public TestBuilder {
195 public:
196 void SetUp() {
197 TestBuilder::SetUp();
198 builder_.reset(new StringBuilder(pool_));
199 }
200
201 void Done() {
202 std::shared_ptr<Array> out;
203 FinishAndCheckPadding(builder_.get(), &out);
204
205 result_ = std::dynamic_pointer_cast<StringArray>(out);
206 ASSERT_OK(ValidateArray(*result_));
207 }
208
209 protected:
210 std::unique_ptr<StringBuilder> builder_;
211 std::shared_ptr<StringArray> result_;
212};
213
214TEST_F(TestStringBuilder, TestScalarAppend) {
215 vector<string> strings = {"", "bb", "a", "", "ccc"};
216 vector<uint8_t> is_null = {0, 0, 0, 1, 0};
217
218 int N = static_cast<int>(strings.size());
219 int reps = 1000;
220
221 for (int j = 0; j < reps; ++j) {
222 for (int i = 0; i < N; ++i) {
223 if (is_null[i]) {
224 ASSERT_OK(builder_->AppendNull());
225 } else {
226 ASSERT_OK(builder_->Append(strings[i]));
227 }
228 }
229 }
230 Done();
231
232 ASSERT_EQ(reps * N, result_->length());
233 ASSERT_EQ(reps, result_->null_count());
234 ASSERT_EQ(reps * 6, result_->value_data()->size());
235
236 int32_t length;
237 int32_t pos = 0;
238 for (int i = 0; i < N * reps; ++i) {
239 if (is_null[i % N]) {
240 ASSERT_TRUE(result_->IsNull(i));
241 } else {
242 ASSERT_FALSE(result_->IsNull(i));
243 result_->GetValue(i, &length);
244 ASSERT_EQ(pos, result_->value_offset(i));
245 ASSERT_EQ(static_cast<int>(strings[i % N].size()), length);
246 ASSERT_EQ(strings[i % N], result_->GetString(i));
247
248 pos += length;
249 }
250 }
251}
252
253TEST_F(TestStringBuilder, TestAppendVector) {
254 vector<string> strings = {"", "bb", "a", "", "ccc"};
255 vector<uint8_t> valid_bytes = {1, 1, 1, 0, 1};
256
257 int N = static_cast<int>(strings.size());
258 int reps = 1000;
259
260 for (int j = 0; j < reps; ++j) {
261 ASSERT_OK(builder_->AppendValues(strings, valid_bytes.data()));
262 }
263 Done();
264
265 ASSERT_EQ(reps * N, result_->length());
266 ASSERT_EQ(reps, result_->null_count());
267 ASSERT_EQ(reps * 6, result_->value_data()->size());
268
269 int32_t length;
270 int32_t pos = 0;
271 for (int i = 0; i < N * reps; ++i) {
272 if (valid_bytes[i % N]) {
273 ASSERT_FALSE(result_->IsNull(i));
274 result_->GetValue(i, &length);
275 ASSERT_EQ(pos, result_->value_offset(i));
276 ASSERT_EQ(static_cast<int>(strings[i % N].size()), length);
277 ASSERT_EQ(strings[i % N], result_->GetString(i));
278
279 pos += length;
280 } else {
281 ASSERT_TRUE(result_->IsNull(i));
282 }
283 }
284}
285
286TEST_F(TestStringBuilder, TestAppendCStringsWithValidBytes) {
287 const char* strings[] = {nullptr, "aaa", nullptr, "ignored", ""};
288 vector<uint8_t> valid_bytes = {1, 1, 1, 0, 1};
289
290 int N = static_cast<int>(sizeof(strings) / sizeof(strings[0]));
291 int reps = 1000;
292
293 for (int j = 0; j < reps; ++j) {
294 ASSERT_OK(builder_->AppendValues(strings, N, valid_bytes.data()));
295 }
296 Done();
297
298 ASSERT_EQ(reps * N, result_->length());
299 ASSERT_EQ(reps * 3, result_->null_count());
300 ASSERT_EQ(reps * 3, result_->value_data()->size());
301
302 int32_t length;
303 int32_t pos = 0;
304 for (int i = 0; i < N * reps; ++i) {
305 auto string = strings[i % N];
306 if (string && valid_bytes[i % N]) {
307 ASSERT_FALSE(result_->IsNull(i));
308 result_->GetValue(i, &length);
309 ASSERT_EQ(pos, result_->value_offset(i));
310 ASSERT_EQ(static_cast<int32_t>(strlen(string)), length);
311 ASSERT_EQ(strings[i % N], result_->GetString(i));
312
313 pos += length;
314 } else {
315 ASSERT_TRUE(result_->IsNull(i));
316 }
317 }
318}
319
320TEST_F(TestStringBuilder, TestAppendCStringsWithoutValidBytes) {
321 const char* strings[] = {"", "bb", "a", nullptr, "ccc"};
322
323 int N = static_cast<int>(sizeof(strings) / sizeof(strings[0]));
324 int reps = 1000;
325
326 for (int j = 0; j < reps; ++j) {
327 ASSERT_OK(builder_->AppendValues(strings, N));
328 }
329 Done();
330
331 ASSERT_EQ(reps * N, result_->length());
332 ASSERT_EQ(reps, result_->null_count());
333 ASSERT_EQ(reps * 6, result_->value_data()->size());
334
335 int32_t length;
336 int32_t pos = 0;
337 for (int i = 0; i < N * reps; ++i) {
338 if (strings[i % N]) {
339 ASSERT_FALSE(result_->IsNull(i));
340 result_->GetValue(i, &length);
341 ASSERT_EQ(pos, result_->value_offset(i));
342 ASSERT_EQ(static_cast<int32_t>(strlen(strings[i % N])), length);
343 ASSERT_EQ(strings[i % N], result_->GetString(i));
344
345 pos += length;
346 } else {
347 ASSERT_TRUE(result_->IsNull(i));
348 }
349 }
350}
351
352TEST_F(TestStringBuilder, TestZeroLength) {
353 // All buffers are null
354 Done();
355}
356
357// Binary container type
358// TODO(emkornfield) there should be some way to refactor these to avoid code duplicating
359// with String
360class TestBinaryArray : public ::testing::Test {
361 public:
362 void SetUp() {
363 chars_ = {'a', 'b', 'b', 'c', 'c', 'c'};
364 offsets_ = {0, 1, 1, 1, 3, 6};
365 valid_bytes_ = {1, 1, 0, 1, 1};
366 expected_ = {"a", "", "", "bb", "ccc"};
367
368 MakeArray();
369 }
370
371 void MakeArray() {
372 length_ = static_cast<int64_t>(offsets_.size() - 1);
373 value_buf_ = Buffer::Wrap(chars_);
374 offsets_buf_ = Buffer::Wrap(offsets_);
375
376 ASSERT_OK(BitUtil::BytesToBits(valid_bytes_, default_memory_pool(), &null_bitmap_));
377 null_count_ = CountNulls(valid_bytes_);
378
379 strings_ = std::make_shared<BinaryArray>(length_, offsets_buf_, value_buf_,
380 null_bitmap_, null_count_);
381 }
382
383 protected:
384 vector<int32_t> offsets_;
385 vector<char> chars_;
386 vector<uint8_t> valid_bytes_;
387
388 vector<string> expected_;
389
390 std::shared_ptr<Buffer> value_buf_;
391 std::shared_ptr<Buffer> offsets_buf_;
392 std::shared_ptr<Buffer> null_bitmap_;
393
394 int64_t null_count_;
395 int64_t length_;
396
397 std::shared_ptr<BinaryArray> strings_;
398};
399
400TEST_F(TestBinaryArray, TestArrayBasics) {
401 ASSERT_EQ(length_, strings_->length());
402 ASSERT_EQ(1, strings_->null_count());
403 ASSERT_OK(ValidateArray(*strings_));
404}
405
406TEST_F(TestBinaryArray, TestType) {
407 std::shared_ptr<DataType> type = strings_->type();
408
409 ASSERT_EQ(Type::BINARY, type->id());
410 ASSERT_EQ(Type::BINARY, strings_->type_id());
411}
412
413TEST_F(TestBinaryArray, TestListFunctions) {
414 size_t pos = 0;
415 for (size_t i = 0; i < expected_.size(); ++i) {
416 ASSERT_EQ(pos, strings_->value_offset(i));
417 ASSERT_EQ(static_cast<int>(expected_[i].size()), strings_->value_length(i));
418 pos += expected_[i].size();
419 }
420}
421
422TEST_F(TestBinaryArray, TestDestructor) {
423 auto arr = std::make_shared<BinaryArray>(length_, offsets_buf_, value_buf_,
424 null_bitmap_, null_count_);
425}
426
427TEST_F(TestBinaryArray, TestGetValue) {
428 for (size_t i = 0; i < expected_.size(); ++i) {
429 if (valid_bytes_[i] == 0) {
430 ASSERT_TRUE(strings_->IsNull(i));
431 } else {
432 ASSERT_FALSE(strings_->IsNull(i));
433 ASSERT_EQ(strings_->GetString(i), expected_[i]);
434 }
435 }
436}
437
438TEST_F(TestBinaryArray, TestNullValuesInitialized) {
439 for (size_t i = 0; i < expected_.size(); ++i) {
440 if (valid_bytes_[i] == 0) {
441 ASSERT_TRUE(strings_->IsNull(i));
442 } else {
443 ASSERT_FALSE(strings_->IsNull(i));
444 ASSERT_EQ(strings_->GetString(i), expected_[i]);
445 }
446 }
447 TestInitialized(*strings_);
448}
449
450TEST_F(TestBinaryArray, TestPaddingZeroed) { AssertZeroPadded(*strings_); }
451
452TEST_F(TestBinaryArray, TestGetString) {
453 for (size_t i = 0; i < expected_.size(); ++i) {
454 if (valid_bytes_[i] == 0) {
455 ASSERT_TRUE(strings_->IsNull(i));
456 } else {
457 std::string val = strings_->GetString(i);
458 ASSERT_EQ(0, std::memcmp(expected_[i].data(), val.c_str(), val.size()));
459 }
460 }
461}
462
463TEST_F(TestBinaryArray, TestEqualsEmptyStrings) {
464 BinaryBuilder builder;
465
466 string empty_string("");
467 for (int i = 0; i < 5; ++i) {
468 ASSERT_OK(builder.Append(empty_string));
469 }
470
471 std::shared_ptr<Array> left_arr;
472 FinishAndCheckPadding(&builder, &left_arr);
473
474 const BinaryArray& left = checked_cast<const BinaryArray&>(*left_arr);
475 std::shared_ptr<Array> right =
476 std::make_shared<BinaryArray>(left.length(), left.value_offsets(), nullptr,
477 left.null_bitmap(), left.null_count());
478
479 ASSERT_TRUE(left.Equals(right));
480 ASSERT_TRUE(left.RangeEquals(0, left.length(), 0, right));
481}
482
483class TestBinaryBuilder : public TestBuilder {
484 public:
485 void SetUp() {
486 TestBuilder::SetUp();
487 builder_.reset(new BinaryBuilder(pool_));
488 }
489
490 void Done() {
491 std::shared_ptr<Array> out;
492 FinishAndCheckPadding(builder_.get(), &out);
493
494 result_ = std::dynamic_pointer_cast<BinaryArray>(out);
495 ASSERT_OK(ValidateArray(*result_));
496 }
497
498 protected:
499 std::unique_ptr<BinaryBuilder> builder_;
500 std::shared_ptr<BinaryArray> result_;
501};
502
503TEST_F(TestBinaryBuilder, TestScalarAppend) {
504 vector<string> strings = {"", "bb", "a", "", "ccc"};
505 vector<uint8_t> is_null = {0, 0, 0, 1, 0};
506
507 int N = static_cast<int>(strings.size());
508 int reps = 10;
509
510 for (int j = 0; j < reps; ++j) {
511 for (int i = 0; i < N; ++i) {
512 if (is_null[i]) {
513 ASSERT_OK(builder_->AppendNull());
514 } else {
515 ASSERT_OK(builder_->Append(strings[i]));
516 }
517 }
518 }
519 Done();
520 ASSERT_OK(ValidateArray(*result_));
521 ASSERT_EQ(reps * N, result_->length());
522 ASSERT_EQ(reps, result_->null_count());
523 ASSERT_EQ(reps * 6, result_->value_data()->size());
524
525 int32_t length;
526 for (int i = 0; i < N * reps; ++i) {
527 if (is_null[i % N]) {
528 ASSERT_TRUE(result_->IsNull(i));
529 } else {
530 ASSERT_FALSE(result_->IsNull(i));
531 const uint8_t* vals = result_->GetValue(i, &length);
532 ASSERT_EQ(static_cast<int>(strings[i % N].size()), length);
533 ASSERT_EQ(0, std::memcmp(vals, strings[i % N].data(), length));
534 }
535 }
536}
537
538TEST_F(TestBinaryBuilder, TestScalarAppendUnsafe) {
539 vector<string> strings = {"", "bb", "a", "", "ccc"};
540 vector<uint8_t> is_null = {0, 0, 0, 1, 0};
541
542 int N = static_cast<int>(strings.size());
543 int reps = 13;
544 int total_length = 0;
545 for (auto&& s : strings) total_length += static_cast<int>(s.size());
546
547 ASSERT_OK(builder_->Reserve(N * reps));
548 ASSERT_OK(builder_->ReserveData(total_length * reps));
549
550 for (int j = 0; j < reps; ++j) {
551 for (int i = 0; i < N; ++i) {
552 if (is_null[i]) {
553 builder_->UnsafeAppendNull();
554 } else {
555 builder_->UnsafeAppend(strings[i]);
556 }
557 }
558 }
559 ASSERT_EQ(builder_->value_data_length(), total_length * reps);
560 Done();
561 ASSERT_OK(ValidateArray(*result_));
562 ASSERT_EQ(reps * N, result_->length());
563 ASSERT_EQ(reps, result_->null_count());
564 ASSERT_EQ(reps * total_length, result_->value_data()->size());
565
566 int32_t length;
567 for (int i = 0; i < N * reps; ++i) {
568 if (is_null[i % N]) {
569 ASSERT_TRUE(result_->IsNull(i));
570 } else {
571 ASSERT_FALSE(result_->IsNull(i));
572 const uint8_t* vals = result_->GetValue(i, &length);
573 ASSERT_EQ(static_cast<int>(strings[i % N].size()), length);
574 ASSERT_EQ(0, std::memcmp(vals, strings[i % N].data(), length));
575 }
576 }
577}
578
579TEST_F(TestBinaryBuilder, TestCapacityReserve) {
580 vector<string> strings = {"aaaaa", "bbbbbbbbbb", "ccccccccccccccc", "dddddddddd"};
581 int N = static_cast<int>(strings.size());
582 int reps = 15;
583 int64_t length = 0;
584 int64_t capacity = 1000;
585 int64_t expected_capacity = BitUtil::RoundUpToMultipleOf64(capacity);
586
587 ASSERT_OK(builder_->ReserveData(capacity));
588
589 ASSERT_EQ(length, builder_->value_data_length());
590 ASSERT_EQ(expected_capacity, builder_->value_data_capacity());
591
592 for (int j = 0; j < reps; ++j) {
593 for (int i = 0; i < N; ++i) {
594 ASSERT_OK(builder_->Append(strings[i]));
595 length += static_cast<int>(strings[i].size());
596
597 ASSERT_EQ(length, builder_->value_data_length());
598 ASSERT_EQ(expected_capacity, builder_->value_data_capacity());
599 }
600 }
601
602 int extra_capacity = 500;
603 expected_capacity = BitUtil::RoundUpToMultipleOf64(length + extra_capacity);
604
605 ASSERT_OK(builder_->ReserveData(extra_capacity));
606
607 ASSERT_EQ(length, builder_->value_data_length());
608 ASSERT_EQ(expected_capacity, builder_->value_data_capacity());
609
610 Done();
611
612 ASSERT_EQ(reps * N, result_->length());
613 ASSERT_EQ(0, result_->null_count());
614 ASSERT_EQ(reps * 40, result_->value_data()->size());
615
616 // Capacity is shrunk after `Finish`
617 ASSERT_EQ(640, result_->value_data()->capacity());
618}
619
620TEST_F(TestBinaryBuilder, TestZeroLength) {
621 // All buffers are null
622 Done();
623}
624
625// ----------------------------------------------------------------------
626// Slice tests
627
628template <typename TYPE>
629void CheckSliceEquality() {
630 using Traits = TypeTraits<TYPE>;
631 using BuilderType = typename Traits::BuilderType;
632
633 BuilderType builder;
634
635 vector<string> strings = {"foo", "", "bar", "baz", "qux", ""};
636 vector<uint8_t> is_null = {0, 1, 0, 1, 0, 0};
637
638 int N = static_cast<int>(strings.size());
639 int reps = 10;
640
641 for (int j = 0; j < reps; ++j) {
642 for (int i = 0; i < N; ++i) {
643 if (is_null[i]) {
644 ASSERT_OK(builder.AppendNull());
645 } else {
646 ASSERT_OK(builder.Append(strings[i]));
647 }
648 }
649 }
650
651 std::shared_ptr<Array> array;
652 FinishAndCheckPadding(&builder, &array);
653
654 std::shared_ptr<Array> slice, slice2;
655
656 slice = array->Slice(5);
657 slice2 = array->Slice(5);
658 ASSERT_EQ(N * reps - 5, slice->length());
659
660 ASSERT_TRUE(slice->Equals(slice2));
661 ASSERT_TRUE(array->RangeEquals(5, slice->length(), 0, slice));
662
663 // Chained slices
664 slice2 = array->Slice(2)->Slice(3);
665 ASSERT_TRUE(slice->Equals(slice2));
666
667 slice = array->Slice(5, 20);
668 slice2 = array->Slice(5, 20);
669 ASSERT_EQ(20, slice->length());
670
671 ASSERT_TRUE(slice->Equals(slice2));
672 ASSERT_TRUE(array->RangeEquals(5, 25, 0, slice));
673}
674
675TEST_F(TestBinaryArray, TestSliceEquality) { CheckSliceEquality<BinaryType>(); }
676
677TEST_F(TestStringArray, TestSliceEquality) { CheckSliceEquality<BinaryType>(); }
678
679TEST_F(TestBinaryArray, LengthZeroCtor) { BinaryArray array(0, nullptr, nullptr); }
680
681// ----------------------------------------------------------------------
682// ChunkedBinaryBuilder tests
683
684class TestChunkedBinaryBuilder : public ::testing::Test {
685 public:
686 void SetUp() {}
687
688 void Init(int32_t chunksize) {
689 builder_.reset(new internal::ChunkedBinaryBuilder(chunksize));
690 }
691
692 protected:
693 std::unique_ptr<internal::ChunkedBinaryBuilder> builder_;
694};
695
696TEST_F(TestChunkedBinaryBuilder, BasicOperation) {
697 const int32_t chunksize = 1000;
698 Init(chunksize);
699
700 const int elem_size = 10;
701 uint8_t buf[elem_size];
702
703 BinaryBuilder unchunked_builder;
704
705 const int iterations = 1000;
706 for (int i = 0; i < iterations; ++i) {
707 random_bytes(elem_size, i, buf);
708
709 ASSERT_OK(unchunked_builder.Append(buf, elem_size));
710 ASSERT_OK(builder_->Append(buf, elem_size));
711 }
712
713 std::shared_ptr<Array> unchunked;
714 ASSERT_OK(unchunked_builder.Finish(&unchunked));
715
716 ArrayVector chunks;
717 ASSERT_OK(builder_->Finish(&chunks));
718
719 // This assumes that everything is evenly divisible
720 ArrayVector expected_chunks;
721 const int elems_per_chunk = chunksize / elem_size;
722 for (int i = 0; i < iterations / elems_per_chunk; ++i) {
723 expected_chunks.emplace_back(unchunked->Slice(i * elems_per_chunk, elems_per_chunk));
724 }
725
726 ASSERT_EQ(expected_chunks.size(), chunks.size());
727 for (size_t i = 0; i < chunks.size(); ++i) {
728 AssertArraysEqual(*expected_chunks[i], *chunks[i]);
729 }
730}
731
732TEST_F(TestChunkedBinaryBuilder, NoData) {
733 Init(1000);
734
735 ArrayVector chunks;
736 ASSERT_OK(builder_->Finish(&chunks));
737
738 ASSERT_EQ(1, chunks.size());
739 ASSERT_EQ(0, chunks[0]->length());
740}
741
742TEST_F(TestChunkedBinaryBuilder, LargeElements) {
743 Init(100);
744
745 const int bufsize = 101;
746 uint8_t buf[bufsize];
747
748 const int iterations = 100;
749 for (int i = 0; i < iterations; ++i) {
750 random_bytes(bufsize, i, buf);
751 ASSERT_OK(builder_->Append(buf, bufsize));
752 }
753
754 ArrayVector chunks;
755 ASSERT_OK(builder_->Finish(&chunks));
756 ASSERT_EQ(iterations, static_cast<int>(chunks.size()));
757
758 int64_t total_data_size = 0;
759 for (auto chunk : chunks) {
760 ASSERT_EQ(1, chunk->length());
761 total_data_size +=
762 static_cast<int64_t>(static_cast<const BinaryArray&>(*chunk).GetView(0).size());
763 }
764 ASSERT_EQ(iterations * bufsize, total_data_size);
765}
766
767TEST(TestChunkedStringBuilder, BasicOperation) {
768 const int chunksize = 100;
769 internal::ChunkedStringBuilder builder(chunksize);
770
771 std::string value = "0123456789";
772
773 const int iterations = 100;
774 for (int i = 0; i < iterations; ++i) {
775 ASSERT_OK(builder.Append(value));
776 }
777
778 ArrayVector chunks;
779 ASSERT_OK(builder.Finish(&chunks));
780
781 ASSERT_EQ(10, chunks.size());
782
783 // Type is correct
784 for (auto chunk : chunks) {
785 ASSERT_TRUE(chunk->type()->Equals(*::arrow::utf8()));
786 }
787}
788
789} // namespace arrow
790