1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18#include <cstdint>
19#include <cstring>
20#include <memory>
21#include <string>
22#include <vector>
23
24#include <gtest/gtest.h>
25
26#include "arrow/array.h"
27#include "arrow/buffer.h"
28#include "arrow/builder.h"
29#include "arrow/status.h"
30#include "arrow/test-common.h"
31#include "arrow/test-util.h"
32#include "arrow/type.h"
33#include "arrow/util/bit-util.h"
34#include "arrow/util/checked_cast.h"
35
36namespace arrow {
37
38using std::string;
39using std::vector;
40
41using internal::checked_cast;
42
43// ----------------------------------------------------------------------
44// List tests
45
46class TestListArray : public TestBuilder {
47 public:
48 void SetUp() {
49 TestBuilder::SetUp();
50
51 value_type_ = int32();
52 type_ = list(value_type_);
53
54 std::unique_ptr<ArrayBuilder> tmp;
55 ASSERT_OK(MakeBuilder(pool_, type_, &tmp));
56 builder_.reset(checked_cast<ListBuilder*>(tmp.release()));
57 }
58
59 void Done() {
60 std::shared_ptr<Array> out;
61 FinishAndCheckPadding(builder_.get(), &out);
62 result_ = std::dynamic_pointer_cast<ListArray>(out);
63 }
64
65 protected:
66 std::shared_ptr<DataType> value_type_;
67
68 std::shared_ptr<ListBuilder> builder_;
69 std::shared_ptr<ListArray> result_;
70};
71
72TEST_F(TestListArray, Equality) {
73 Int32Builder* vb = checked_cast<Int32Builder*>(builder_->value_builder());
74
75 std::shared_ptr<Array> array, equal_array, unequal_array;
76 vector<int32_t> equal_offsets = {0, 1, 2, 5, 6, 7, 8, 10};
77 vector<int32_t> equal_values = {1, 2, 3, 4, 5, 2, 2, 2, 5, 6};
78 vector<int32_t> unequal_offsets = {0, 1, 4, 7};
79 vector<int32_t> unequal_values = {1, 2, 2, 2, 3, 4, 5};
80
81 // setup two equal arrays
82 ASSERT_OK(builder_->AppendValues(equal_offsets.data(), equal_offsets.size()));
83 ASSERT_OK(vb->AppendValues(equal_values.data(), equal_values.size()));
84
85 ASSERT_OK(builder_->Finish(&array));
86 ASSERT_OK(builder_->AppendValues(equal_offsets.data(), equal_offsets.size()));
87 ASSERT_OK(vb->AppendValues(equal_values.data(), equal_values.size()));
88
89 ASSERT_OK(builder_->Finish(&equal_array));
90 // now an unequal one
91 ASSERT_OK(builder_->AppendValues(unequal_offsets.data(), unequal_offsets.size()));
92 ASSERT_OK(vb->AppendValues(unequal_values.data(), unequal_values.size()));
93
94 ASSERT_OK(builder_->Finish(&unequal_array));
95
96 // Test array equality
97 EXPECT_TRUE(array->Equals(array));
98 EXPECT_TRUE(array->Equals(equal_array));
99 EXPECT_TRUE(equal_array->Equals(array));
100 EXPECT_FALSE(equal_array->Equals(unequal_array));
101 EXPECT_FALSE(unequal_array->Equals(equal_array));
102
103 // Test range equality
104 EXPECT_TRUE(array->RangeEquals(0, 1, 0, unequal_array));
105 EXPECT_FALSE(array->RangeEquals(0, 2, 0, unequal_array));
106 EXPECT_FALSE(array->RangeEquals(1, 2, 1, unequal_array));
107 EXPECT_TRUE(array->RangeEquals(2, 3, 2, unequal_array));
108
109 // Check with slices, ARROW-33
110 std::shared_ptr<Array> slice, slice2;
111
112 slice = array->Slice(2);
113 slice2 = array->Slice(2);
114 ASSERT_EQ(array->length() - 2, slice->length());
115
116 ASSERT_TRUE(slice->Equals(slice2));
117 ASSERT_TRUE(array->RangeEquals(2, slice->length(), 0, slice));
118
119 // Chained slices
120 slice2 = array->Slice(1)->Slice(1);
121 ASSERT_TRUE(slice->Equals(slice2));
122
123 slice = array->Slice(1, 4);
124 slice2 = array->Slice(1, 4);
125 ASSERT_EQ(4, slice->length());
126
127 ASSERT_TRUE(slice->Equals(slice2));
128 ASSERT_TRUE(array->RangeEquals(1, 5, 0, slice));
129}
130
131TEST_F(TestListArray, TestResize) {}
132
133TEST_F(TestListArray, TestFromArrays) {
134 std::shared_ptr<Array> offsets1, offsets2, offsets3, offsets4, values;
135
136 std::vector<bool> offsets_is_valid3 = {true, false, true, true};
137 std::vector<bool> offsets_is_valid4 = {true, true, false, true};
138
139 std::vector<bool> values_is_valid = {true, false, true, true, true, true};
140
141 std::vector<int32_t> offset1_values = {0, 2, 2, 6};
142 std::vector<int32_t> offset2_values = {0, 2, 6, 6};
143
144 std::vector<int8_t> values_values = {0, 1, 2, 3, 4, 5};
145 const int length = 3;
146
147 ArrayFromVector<Int32Type, int32_t>(offset1_values, &offsets1);
148 ArrayFromVector<Int32Type, int32_t>(offset2_values, &offsets2);
149
150 ArrayFromVector<Int32Type, int32_t>(offsets_is_valid3, offset1_values, &offsets3);
151 ArrayFromVector<Int32Type, int32_t>(offsets_is_valid4, offset2_values, &offsets4);
152
153 ArrayFromVector<Int8Type, int8_t>(values_is_valid, values_values, &values);
154
155 auto list_type = list(int8());
156
157 std::shared_ptr<Array> list1, list3, list4;
158 ASSERT_OK(ListArray::FromArrays(*offsets1, *values, pool_, &list1));
159 ASSERT_OK(ListArray::FromArrays(*offsets3, *values, pool_, &list3));
160 ASSERT_OK(ListArray::FromArrays(*offsets4, *values, pool_, &list4));
161
162 ListArray expected1(list_type, length, offsets1->data()->buffers[1], values,
163 offsets1->data()->buffers[0], 0);
164 AssertArraysEqual(expected1, *list1);
165
166 // Use null bitmap from offsets3, but clean offsets from non-null version
167 ListArray expected3(list_type, length, offsets1->data()->buffers[1], values,
168 offsets3->data()->buffers[0], 1);
169 AssertArraysEqual(expected3, *list3);
170
171 // Check that the last offset bit is zero
172 ASSERT_FALSE(BitUtil::GetBit(list3->null_bitmap()->data(), length + 1));
173
174 ListArray expected4(list_type, length, offsets2->data()->buffers[1], values,
175 offsets4->data()->buffers[0], 1);
176 AssertArraysEqual(expected4, *list4);
177
178 // Test failure modes
179
180 std::shared_ptr<Array> tmp;
181
182 // Zero-length offsets
183 ASSERT_RAISES(Invalid,
184 ListArray::FromArrays(*offsets1->Slice(0, 0), *values, pool_, &tmp));
185
186 // Offsets not int32
187 ASSERT_RAISES(Invalid, ListArray::FromArrays(*values, *offsets1, pool_, &tmp));
188}
189
190TEST_F(TestListArray, TestAppendNull) {
191 ASSERT_OK(builder_->AppendNull());
192 ASSERT_OK(builder_->AppendNull());
193
194 Done();
195
196 ASSERT_OK(ValidateArray(*result_));
197 ASSERT_TRUE(result_->IsNull(0));
198 ASSERT_TRUE(result_->IsNull(1));
199
200 ASSERT_EQ(0, result_->raw_value_offsets()[0]);
201 ASSERT_EQ(0, result_->value_offset(1));
202 ASSERT_EQ(0, result_->value_offset(2));
203
204 auto values = result_->values();
205 ASSERT_EQ(0, values->length());
206 // Values buffer should be non-null
207 ASSERT_NE(nullptr, values->data()->buffers[1]);
208}
209
210void ValidateBasicListArray(const ListArray* result, const vector<int32_t>& values,
211 const vector<uint8_t>& is_valid) {
212 ASSERT_OK(ValidateArray(*result));
213 ASSERT_EQ(1, result->null_count());
214 ASSERT_EQ(0, result->values()->null_count());
215
216 ASSERT_EQ(3, result->length());
217 vector<int32_t> ex_offsets = {0, 3, 3, 7};
218 for (size_t i = 0; i < ex_offsets.size(); ++i) {
219 ASSERT_EQ(ex_offsets[i], result->value_offset(i));
220 }
221
222 for (int i = 0; i < result->length(); ++i) {
223 ASSERT_EQ(is_valid[i] == 0, result->IsNull(i));
224 }
225
226 ASSERT_EQ(7, result->values()->length());
227 auto varr = std::dynamic_pointer_cast<Int32Array>(result->values());
228
229 for (size_t i = 0; i < values.size(); ++i) {
230 ASSERT_EQ(values[i], varr->Value(i));
231 }
232}
233
234TEST_F(TestListArray, TestBasics) {
235 vector<int32_t> values = {0, 1, 2, 3, 4, 5, 6};
236 vector<int> lengths = {3, 0, 4};
237 vector<uint8_t> is_valid = {1, 0, 1};
238
239 Int32Builder* vb = checked_cast<Int32Builder*>(builder_->value_builder());
240
241 ASSERT_OK(builder_->Reserve(lengths.size()));
242 ASSERT_OK(vb->Reserve(values.size()));
243
244 int pos = 0;
245 for (size_t i = 0; i < lengths.size(); ++i) {
246 ASSERT_OK(builder_->Append(is_valid[i] > 0));
247 for (int j = 0; j < lengths[i]; ++j) {
248 ASSERT_OK(vb->Append(values[pos++]));
249 }
250 }
251
252 Done();
253 ValidateBasicListArray(result_.get(), values, is_valid);
254}
255
256TEST_F(TestListArray, BulkAppend) {
257 vector<int32_t> values = {0, 1, 2, 3, 4, 5, 6};
258 vector<int> lengths = {3, 0, 4};
259 vector<uint8_t> is_valid = {1, 0, 1};
260 vector<int32_t> offsets = {0, 3, 3};
261
262 Int32Builder* vb = checked_cast<Int32Builder*>(builder_->value_builder());
263 ASSERT_OK(vb->Reserve(values.size()));
264
265 ASSERT_OK(builder_->AppendValues(offsets.data(), offsets.size(), is_valid.data()));
266 for (int32_t value : values) {
267 ASSERT_OK(vb->Append(value));
268 }
269 Done();
270 ValidateBasicListArray(result_.get(), values, is_valid);
271}
272
273TEST_F(TestListArray, BulkAppendInvalid) {
274 vector<int32_t> values = {0, 1, 2, 3, 4, 5, 6};
275 vector<int> lengths = {3, 0, 4};
276 vector<uint8_t> is_null = {0, 1, 0};
277 vector<uint8_t> is_valid = {1, 0, 1};
278 vector<int32_t> offsets = {0, 2, 4}; // should be 0, 3, 3 given the is_null array
279
280 Int32Builder* vb = checked_cast<Int32Builder*>(builder_->value_builder());
281 ASSERT_OK(vb->Reserve(values.size()));
282
283 ASSERT_OK(builder_->AppendValues(offsets.data(), offsets.size(), is_valid.data()));
284 ASSERT_OK(builder_->AppendValues(offsets.data(), offsets.size(), is_valid.data()));
285 for (int32_t value : values) {
286 ASSERT_OK(vb->Append(value));
287 }
288
289 Done();
290 ASSERT_RAISES(Invalid, ValidateArray(*result_));
291}
292
293TEST_F(TestListArray, TestZeroLength) {
294 // All buffers are null
295 Done();
296 ASSERT_OK(ValidateArray(*result_));
297}
298
299} // namespace arrow
300