1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18#include <cstdint>
19#include <cstring>
20#include <memory>
21#include <string>
22#include <vector>
23
24#include <gtest/gtest.h>
25
26#include "arrow/array.h"
27#include "arrow/builder.h"
28#include "arrow/status.h"
29#include "arrow/test-common.h"
30#include "arrow/test-util.h"
31#include "arrow/type.h"
32#include "arrow/util/checked_cast.h"
33
34namespace arrow {
35
36using std::string;
37using std::vector;
38
39using internal::checked_cast;
40
41// ----------------------------------------------------------------------
42// Struct tests
43
44void ValidateBasicStructArray(const StructArray* result,
45 const vector<uint8_t>& struct_is_valid,
46 const vector<char>& list_values,
47 const vector<uint8_t>& list_is_valid,
48 const vector<int>& list_lengths,
49 const vector<int>& list_offsets,
50 const vector<int32_t>& int_values) {
51 ASSERT_EQ(4, result->length());
52 ASSERT_OK(ValidateArray(*result));
53
54 auto list_char_arr = std::dynamic_pointer_cast<ListArray>(result->field(0));
55 auto char_arr = std::dynamic_pointer_cast<Int8Array>(list_char_arr->values());
56 auto int32_arr = std::dynamic_pointer_cast<Int32Array>(result->field(1));
57
58 ASSERT_EQ(nullptr, result->GetFieldByName("non-existing"));
59 ASSERT_TRUE(list_char_arr->Equals(result->GetFieldByName("list")));
60 ASSERT_TRUE(int32_arr->Equals(result->GetFieldByName("int")));
61
62 ASSERT_EQ(0, result->null_count());
63 ASSERT_EQ(1, list_char_arr->null_count());
64 ASSERT_EQ(0, int32_arr->null_count());
65
66 // List<char>
67 ASSERT_EQ(4, list_char_arr->length());
68 ASSERT_EQ(10, list_char_arr->values()->length());
69 for (size_t i = 0; i < list_offsets.size(); ++i) {
70 ASSERT_EQ(list_offsets[i], list_char_arr->raw_value_offsets()[i]);
71 }
72 for (size_t i = 0; i < list_values.size(); ++i) {
73 ASSERT_EQ(list_values[i], char_arr->Value(i));
74 }
75
76 // Int32
77 ASSERT_EQ(4, int32_arr->length());
78 for (size_t i = 0; i < int_values.size(); ++i) {
79 ASSERT_EQ(int_values[i], int32_arr->Value(i));
80 }
81}
82
83// ----------------------------------------------------------------------------------
84// Struct test
85class TestStructBuilder : public TestBuilder {
86 public:
87 void SetUp() {
88 TestBuilder::SetUp();
89
90 auto int32_type = int32();
91 auto char_type = int8();
92 auto list_type = list(char_type);
93
94 vector<std::shared_ptr<DataType>> types = {list_type, int32_type};
95 vector<std::shared_ptr<Field>> fields;
96 fields.push_back(field("list", list_type));
97 fields.push_back(field("int", int32_type));
98
99 type_ = struct_(fields);
100 value_fields_ = fields;
101
102 std::unique_ptr<ArrayBuilder> tmp;
103 ASSERT_OK(MakeBuilder(pool_, type_, &tmp));
104 builder_.reset(checked_cast<StructBuilder*>(tmp.release()));
105 ASSERT_EQ(2, static_cast<int>(builder_->num_fields()));
106 }
107
108 void Done() {
109 std::shared_ptr<Array> out;
110 FinishAndCheckPadding(builder_.get(), &out);
111 result_ = std::dynamic_pointer_cast<StructArray>(out);
112 }
113
114 protected:
115 vector<std::shared_ptr<Field>> value_fields_;
116
117 std::shared_ptr<StructBuilder> builder_;
118 std::shared_ptr<StructArray> result_;
119};
120
121TEST_F(TestStructBuilder, TestAppendNull) {
122 ASSERT_OK(builder_->AppendNull());
123 ASSERT_OK(builder_->AppendNull());
124 ASSERT_EQ(2, static_cast<int>(builder_->num_fields()));
125
126 ListBuilder* list_vb = checked_cast<ListBuilder*>(builder_->field_builder(0));
127 ASSERT_OK(list_vb->AppendNull());
128 ASSERT_OK(list_vb->AppendNull());
129 ASSERT_EQ(2, list_vb->length());
130
131 Int32Builder* int_vb = checked_cast<Int32Builder*>(builder_->field_builder(1));
132 ASSERT_OK(int_vb->AppendNull());
133 ASSERT_OK(int_vb->AppendNull());
134 ASSERT_EQ(2, int_vb->length());
135
136 Done();
137
138 ASSERT_OK(ValidateArray(*result_));
139
140 ASSERT_EQ(2, static_cast<int>(result_->num_fields()));
141 ASSERT_EQ(2, result_->length());
142 ASSERT_EQ(2, result_->field(0)->length());
143 ASSERT_EQ(2, result_->field(1)->length());
144 ASSERT_TRUE(result_->IsNull(0));
145 ASSERT_TRUE(result_->IsNull(1));
146 ASSERT_TRUE(result_->field(0)->IsNull(0));
147 ASSERT_TRUE(result_->field(0)->IsNull(1));
148 ASSERT_TRUE(result_->field(1)->IsNull(0));
149 ASSERT_TRUE(result_->field(1)->IsNull(1));
150
151 ASSERT_EQ(Type::LIST, result_->field(0)->type_id());
152 ASSERT_EQ(Type::INT32, result_->field(1)->type_id());
153}
154
155TEST_F(TestStructBuilder, TestBasics) {
156 vector<int32_t> int_values = {1, 2, 3, 4};
157 vector<char> list_values = {'j', 'o', 'e', 'b', 'o', 'b', 'm', 'a', 'r', 'k'};
158 vector<int> list_lengths = {3, 0, 3, 4};
159 vector<int> list_offsets = {0, 3, 3, 6, 10};
160 vector<uint8_t> list_is_valid = {1, 0, 1, 1};
161 vector<uint8_t> struct_is_valid = {1, 1, 1, 1};
162
163 ListBuilder* list_vb = checked_cast<ListBuilder*>(builder_->field_builder(0));
164 Int8Builder* char_vb = checked_cast<Int8Builder*>(list_vb->value_builder());
165 Int32Builder* int_vb = checked_cast<Int32Builder*>(builder_->field_builder(1));
166 ASSERT_EQ(2, static_cast<int>(builder_->num_fields()));
167
168 EXPECT_OK(builder_->Resize(list_lengths.size()));
169 EXPECT_OK(char_vb->Resize(list_values.size()));
170 EXPECT_OK(int_vb->Resize(int_values.size()));
171
172 int pos = 0;
173 for (size_t i = 0; i < list_lengths.size(); ++i) {
174 ASSERT_OK(list_vb->Append(list_is_valid[i] > 0));
175 int_vb->UnsafeAppend(int_values[i]);
176 for (int j = 0; j < list_lengths[i]; ++j) {
177 char_vb->UnsafeAppend(list_values[pos++]);
178 }
179 }
180
181 for (size_t i = 0; i < struct_is_valid.size(); ++i) {
182 ASSERT_OK(builder_->Append(struct_is_valid[i] > 0));
183 }
184
185 Done();
186
187 ValidateBasicStructArray(result_.get(), struct_is_valid, list_values, list_is_valid,
188 list_lengths, list_offsets, int_values);
189}
190
191TEST_F(TestStructBuilder, BulkAppend) {
192 vector<int32_t> int_values = {1, 2, 3, 4};
193 vector<char> list_values = {'j', 'o', 'e', 'b', 'o', 'b', 'm', 'a', 'r', 'k'};
194 vector<int> list_lengths = {3, 0, 3, 4};
195 vector<int> list_offsets = {0, 3, 3, 6};
196 vector<uint8_t> list_is_valid = {1, 0, 1, 1};
197 vector<uint8_t> struct_is_valid = {1, 1, 1, 1};
198
199 ListBuilder* list_vb = checked_cast<ListBuilder*>(builder_->field_builder(0));
200 Int8Builder* char_vb = checked_cast<Int8Builder*>(list_vb->value_builder());
201 Int32Builder* int_vb = checked_cast<Int32Builder*>(builder_->field_builder(1));
202
203 ASSERT_OK(builder_->Resize(list_lengths.size()));
204 ASSERT_OK(char_vb->Resize(list_values.size()));
205 ASSERT_OK(int_vb->Resize(int_values.size()));
206
207 ASSERT_OK(builder_->AppendValues(struct_is_valid.size(), struct_is_valid.data()));
208
209 ASSERT_OK(list_vb->AppendValues(list_offsets.data(), list_offsets.size(),
210 list_is_valid.data()));
211 for (int8_t value : list_values) {
212 char_vb->UnsafeAppend(value);
213 }
214 for (int32_t value : int_values) {
215 int_vb->UnsafeAppend(value);
216 }
217
218 Done();
219 ValidateBasicStructArray(result_.get(), struct_is_valid, list_values, list_is_valid,
220 list_lengths, list_offsets, int_values);
221}
222
223TEST_F(TestStructBuilder, BulkAppendInvalid) {
224 vector<int32_t> int_values = {1, 2, 3, 4};
225 vector<char> list_values = {'j', 'o', 'e', 'b', 'o', 'b', 'm', 'a', 'r', 'k'};
226 vector<int> list_lengths = {3, 0, 3, 4};
227 vector<int> list_offsets = {0, 3, 3, 6};
228 vector<uint8_t> list_is_valid = {1, 0, 1, 1};
229 vector<uint8_t> struct_is_valid = {1, 0, 1, 1}; // should be 1, 1, 1, 1
230
231 ListBuilder* list_vb = checked_cast<ListBuilder*>(builder_->field_builder(0));
232 Int8Builder* char_vb = checked_cast<Int8Builder*>(list_vb->value_builder());
233 Int32Builder* int_vb = checked_cast<Int32Builder*>(builder_->field_builder(1));
234
235 ASSERT_OK(builder_->Reserve(list_lengths.size()));
236 ASSERT_OK(char_vb->Reserve(list_values.size()));
237 ASSERT_OK(int_vb->Reserve(int_values.size()));
238
239 ASSERT_OK(builder_->AppendValues(struct_is_valid.size(), struct_is_valid.data()));
240
241 ASSERT_OK(list_vb->AppendValues(list_offsets.data(), list_offsets.size(),
242 list_is_valid.data()));
243 for (int8_t value : list_values) {
244 char_vb->UnsafeAppend(value);
245 }
246 for (int32_t value : int_values) {
247 int_vb->UnsafeAppend(value);
248 }
249
250 Done();
251 // Even null bitmap of the parent Struct is not valid, validate will ignore it.
252 ASSERT_OK(ValidateArray(*result_));
253}
254
255TEST_F(TestStructBuilder, TestEquality) {
256 std::shared_ptr<Array> array, equal_array;
257 std::shared_ptr<Array> unequal_bitmap_array, unequal_offsets_array,
258 unequal_values_array;
259
260 vector<int32_t> int_values = {101, 102, 103, 104};
261 vector<char> list_values = {'j', 'o', 'e', 'b', 'o', 'b', 'm', 'a', 'r', 'k'};
262 vector<int> list_lengths = {3, 0, 3, 4};
263 vector<int> list_offsets = {0, 3, 3, 6};
264 vector<uint8_t> list_is_valid = {1, 0, 1, 1};
265 vector<uint8_t> struct_is_valid = {1, 1, 1, 1};
266
267 vector<int32_t> unequal_int_values = {104, 102, 103, 101};
268 vector<char> unequal_list_values = {'j', 'o', 'e', 'b', 'o', 'b', 'l', 'u', 'c', 'y'};
269 vector<int> unequal_list_offsets = {0, 3, 4, 6};
270 vector<uint8_t> unequal_list_is_valid = {1, 1, 1, 1};
271 vector<uint8_t> unequal_struct_is_valid = {1, 0, 0, 1};
272
273 ListBuilder* list_vb = checked_cast<ListBuilder*>(builder_->field_builder(0));
274 Int8Builder* char_vb = checked_cast<Int8Builder*>(list_vb->value_builder());
275 Int32Builder* int_vb = checked_cast<Int32Builder*>(builder_->field_builder(1));
276 ASSERT_OK(builder_->Reserve(list_lengths.size()));
277 ASSERT_OK(char_vb->Reserve(list_values.size()));
278 ASSERT_OK(int_vb->Reserve(int_values.size()));
279
280 // setup two equal arrays, one of which takes an unequal bitmap
281 ASSERT_OK(builder_->AppendValues(struct_is_valid.size(), struct_is_valid.data()));
282 ASSERT_OK(list_vb->AppendValues(list_offsets.data(), list_offsets.size(),
283 list_is_valid.data()));
284 for (int8_t value : list_values) {
285 char_vb->UnsafeAppend(value);
286 }
287 for (int32_t value : int_values) {
288 int_vb->UnsafeAppend(value);
289 }
290
291 FinishAndCheckPadding(builder_.get(), &array);
292
293 ASSERT_OK(builder_->Resize(list_lengths.size()));
294 ASSERT_OK(char_vb->Resize(list_values.size()));
295 ASSERT_OK(int_vb->Resize(int_values.size()));
296
297 ASSERT_OK(builder_->AppendValues(struct_is_valid.size(), struct_is_valid.data()));
298 ASSERT_OK(list_vb->AppendValues(list_offsets.data(), list_offsets.size(),
299 list_is_valid.data()));
300 for (int8_t value : list_values) {
301 char_vb->UnsafeAppend(value);
302 }
303 for (int32_t value : int_values) {
304 int_vb->UnsafeAppend(value);
305 }
306
307 ASSERT_OK(builder_->Finish(&equal_array));
308
309 ASSERT_OK(builder_->Resize(list_lengths.size()));
310 ASSERT_OK(char_vb->Resize(list_values.size()));
311 ASSERT_OK(int_vb->Resize(int_values.size()));
312
313 // setup an unequal one with the unequal bitmap
314 ASSERT_OK(builder_->AppendValues(unequal_struct_is_valid.size(),
315 unequal_struct_is_valid.data()));
316 ASSERT_OK(list_vb->AppendValues(list_offsets.data(), list_offsets.size(),
317 list_is_valid.data()));
318 for (int8_t value : list_values) {
319 char_vb->UnsafeAppend(value);
320 }
321 for (int32_t value : int_values) {
322 int_vb->UnsafeAppend(value);
323 }
324
325 ASSERT_OK(builder_->Finish(&unequal_bitmap_array));
326
327 ASSERT_OK(builder_->Resize(list_lengths.size()));
328 ASSERT_OK(char_vb->Resize(list_values.size()));
329 ASSERT_OK(int_vb->Resize(int_values.size()));
330
331 // setup an unequal one with unequal offsets
332 ASSERT_OK(builder_->AppendValues(struct_is_valid.size(), struct_is_valid.data()));
333 ASSERT_OK(list_vb->AppendValues(unequal_list_offsets.data(),
334 unequal_list_offsets.size(),
335 unequal_list_is_valid.data()));
336 for (int8_t value : list_values) {
337 char_vb->UnsafeAppend(value);
338 }
339 for (int32_t value : int_values) {
340 int_vb->UnsafeAppend(value);
341 }
342
343 ASSERT_OK(builder_->Finish(&unequal_offsets_array));
344
345 ASSERT_OK(builder_->Resize(list_lengths.size()));
346 ASSERT_OK(char_vb->Resize(list_values.size()));
347 ASSERT_OK(int_vb->Resize(int_values.size()));
348
349 // setup anunequal one with unequal values
350 ASSERT_OK(builder_->AppendValues(struct_is_valid.size(), struct_is_valid.data()));
351 ASSERT_OK(list_vb->AppendValues(list_offsets.data(), list_offsets.size(),
352 list_is_valid.data()));
353 for (int8_t value : unequal_list_values) {
354 char_vb->UnsafeAppend(value);
355 }
356 for (int32_t value : unequal_int_values) {
357 int_vb->UnsafeAppend(value);
358 }
359
360 ASSERT_OK(builder_->Finish(&unequal_values_array));
361
362 // Test array equality
363 EXPECT_TRUE(array->Equals(array));
364 EXPECT_TRUE(array->Equals(equal_array));
365 EXPECT_TRUE(equal_array->Equals(array));
366 EXPECT_FALSE(equal_array->Equals(unequal_bitmap_array));
367 EXPECT_FALSE(unequal_bitmap_array->Equals(equal_array));
368 EXPECT_FALSE(unequal_bitmap_array->Equals(unequal_values_array));
369 EXPECT_FALSE(unequal_values_array->Equals(unequal_bitmap_array));
370 EXPECT_FALSE(unequal_bitmap_array->Equals(unequal_offsets_array));
371 EXPECT_FALSE(unequal_offsets_array->Equals(unequal_bitmap_array));
372
373 // Test range equality
374 EXPECT_TRUE(array->RangeEquals(0, 4, 0, equal_array));
375 EXPECT_TRUE(array->RangeEquals(3, 4, 3, unequal_bitmap_array));
376 EXPECT_TRUE(array->RangeEquals(0, 1, 0, unequal_offsets_array));
377 EXPECT_FALSE(array->RangeEquals(0, 2, 0, unequal_offsets_array));
378 EXPECT_FALSE(array->RangeEquals(1, 2, 1, unequal_offsets_array));
379 EXPECT_FALSE(array->RangeEquals(0, 1, 0, unequal_values_array));
380 EXPECT_TRUE(array->RangeEquals(1, 3, 1, unequal_values_array));
381 EXPECT_FALSE(array->RangeEquals(3, 4, 3, unequal_values_array));
382}
383
384TEST_F(TestStructBuilder, TestZeroLength) {
385 // All buffers are null
386 Done();
387 ASSERT_OK(ValidateArray(*result_));
388}
389
390TEST_F(TestStructBuilder, TestSlice) {
391 std::shared_ptr<Array> array, equal_array;
392 std::shared_ptr<Array> unequal_bitmap_array, unequal_offsets_array,
393 unequal_values_array;
394
395 vector<int32_t> int_values = {101, 102, 103, 104};
396 vector<char> list_values = {'j', 'o', 'e', 'b', 'o', 'b', 'm', 'a', 'r', 'k'};
397 vector<int> list_lengths = {3, 0, 3, 4};
398 vector<int> list_offsets = {0, 3, 3, 6};
399 vector<uint8_t> list_is_valid = {1, 0, 1, 1};
400 vector<uint8_t> struct_is_valid = {1, 1, 1, 1};
401
402 ListBuilder* list_vb = checked_cast<ListBuilder*>(builder_->field_builder(0));
403 Int8Builder* char_vb = checked_cast<Int8Builder*>(list_vb->value_builder());
404 Int32Builder* int_vb = checked_cast<Int32Builder*>(builder_->field_builder(1));
405 ASSERT_OK(builder_->Reserve(list_lengths.size()));
406 ASSERT_OK(char_vb->Reserve(list_values.size()));
407 ASSERT_OK(int_vb->Reserve(int_values.size()));
408
409 ASSERT_OK(builder_->AppendValues(struct_is_valid.size(), struct_is_valid.data()));
410 ASSERT_OK(list_vb->AppendValues(list_offsets.data(), list_offsets.size(),
411 list_is_valid.data()));
412 for (int8_t value : list_values) {
413 char_vb->UnsafeAppend(value);
414 }
415 for (int32_t value : int_values) {
416 int_vb->UnsafeAppend(value);
417 }
418 FinishAndCheckPadding(builder_.get(), &array);
419
420 std::shared_ptr<StructArray> slice, slice2;
421 std::shared_ptr<Int32Array> int_field;
422 std::shared_ptr<ListArray> list_field;
423
424 slice = std::dynamic_pointer_cast<StructArray>(array->Slice(2));
425 slice2 = std::dynamic_pointer_cast<StructArray>(array->Slice(2));
426 ASSERT_EQ(array->length() - 2, slice->length());
427
428 ASSERT_TRUE(slice->Equals(slice2));
429 ASSERT_TRUE(array->RangeEquals(2, slice->length(), 0, slice));
430
431 int_field = std::dynamic_pointer_cast<Int32Array>(slice->field(1));
432 ASSERT_EQ(int_field->length(), slice->length());
433 ASSERT_EQ(int_field->Value(0), 103);
434 ASSERT_EQ(int_field->Value(1), 104);
435 ASSERT_EQ(int_field->null_count(), 0);
436 list_field = std::dynamic_pointer_cast<ListArray>(slice->field(0));
437 ASSERT_FALSE(list_field->IsNull(0));
438 ASSERT_FALSE(list_field->IsNull(1));
439 ASSERT_EQ(list_field->value_length(0), 3);
440 ASSERT_EQ(list_field->value_length(1), 4);
441 ASSERT_EQ(list_field->null_count(), 0);
442
443 slice = std::dynamic_pointer_cast<StructArray>(array->Slice(1, 2));
444 slice2 = std::dynamic_pointer_cast<StructArray>(array->Slice(1, 2));
445 ASSERT_EQ(2, slice->length());
446
447 ASSERT_TRUE(slice->Equals(slice2));
448 ASSERT_TRUE(array->RangeEquals(1, 3, 0, slice));
449
450 int_field = std::dynamic_pointer_cast<Int32Array>(slice->field(1));
451 ASSERT_EQ(int_field->length(), slice->length());
452 ASSERT_EQ(int_field->Value(0), 102);
453 ASSERT_EQ(int_field->Value(1), 103);
454 ASSERT_EQ(int_field->null_count(), 0);
455 list_field = std::dynamic_pointer_cast<ListArray>(slice->field(0));
456 ASSERT_TRUE(list_field->IsNull(0));
457 ASSERT_FALSE(list_field->IsNull(1));
458 ASSERT_EQ(list_field->value_length(0), 0);
459 ASSERT_EQ(list_field->value_length(1), 3);
460 ASSERT_EQ(list_field->null_count(), 1);
461}
462
463} // namespace arrow
464