1 | // Licensed to the Apache Software Foundation (ASF) under one |
2 | // or more contributor license agreements. See the NOTICE file |
3 | // distributed with this work for additional information |
4 | // regarding copyright ownership. The ASF licenses this file |
5 | // to you under the Apache License, Version 2.0 (the |
6 | // "License"); you may not use this file except in compliance |
7 | // with the License. You may obtain a copy of the License at |
8 | // |
9 | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | // |
11 | // Unless required by applicable law or agreed to in writing, |
12 | // software distributed under the License is distributed on an |
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | // KIND, either express or implied. See the License for the |
15 | // specific language governing permissions and limitations |
16 | // under the License. |
17 | |
18 | #include <cstdint> |
19 | #include <cstring> |
20 | #include <memory> |
21 | #include <string> |
22 | #include <vector> |
23 | |
24 | #include <gtest/gtest.h> |
25 | |
26 | #include "arrow/array.h" |
27 | #include "arrow/buffer.h" |
28 | #include "arrow/builder.h" |
29 | #include "arrow/status.h" |
30 | #include "arrow/test-common.h" |
31 | #include "arrow/test-util.h" |
32 | #include "arrow/type.h" |
33 | #include "arrow/util/bit-util.h" |
34 | #include "arrow/util/checked_cast.h" |
35 | |
36 | namespace arrow { |
37 | |
38 | using std::string; |
39 | using std::vector; |
40 | |
41 | using internal::checked_cast; |
42 | |
43 | // ---------------------------------------------------------------------- |
44 | // List tests |
45 | |
46 | class TestListArray : public TestBuilder { |
47 | public: |
48 | void SetUp() { |
49 | TestBuilder::SetUp(); |
50 | |
51 | value_type_ = int32(); |
52 | type_ = list(value_type_); |
53 | |
54 | std::unique_ptr<ArrayBuilder> tmp; |
55 | ASSERT_OK(MakeBuilder(pool_, type_, &tmp)); |
56 | builder_.reset(checked_cast<ListBuilder*>(tmp.release())); |
57 | } |
58 | |
59 | void Done() { |
60 | std::shared_ptr<Array> out; |
61 | FinishAndCheckPadding(builder_.get(), &out); |
62 | result_ = std::dynamic_pointer_cast<ListArray>(out); |
63 | } |
64 | |
65 | protected: |
66 | std::shared_ptr<DataType> value_type_; |
67 | |
68 | std::shared_ptr<ListBuilder> builder_; |
69 | std::shared_ptr<ListArray> result_; |
70 | }; |
71 | |
72 | TEST_F(TestListArray, Equality) { |
73 | Int32Builder* vb = checked_cast<Int32Builder*>(builder_->value_builder()); |
74 | |
75 | std::shared_ptr<Array> array, equal_array, unequal_array; |
76 | vector<int32_t> equal_offsets = {0, 1, 2, 5, 6, 7, 8, 10}; |
77 | vector<int32_t> equal_values = {1, 2, 3, 4, 5, 2, 2, 2, 5, 6}; |
78 | vector<int32_t> unequal_offsets = {0, 1, 4, 7}; |
79 | vector<int32_t> unequal_values = {1, 2, 2, 2, 3, 4, 5}; |
80 | |
81 | // setup two equal arrays |
82 | ASSERT_OK(builder_->AppendValues(equal_offsets.data(), equal_offsets.size())); |
83 | ASSERT_OK(vb->AppendValues(equal_values.data(), equal_values.size())); |
84 | |
85 | ASSERT_OK(builder_->Finish(&array)); |
86 | ASSERT_OK(builder_->AppendValues(equal_offsets.data(), equal_offsets.size())); |
87 | ASSERT_OK(vb->AppendValues(equal_values.data(), equal_values.size())); |
88 | |
89 | ASSERT_OK(builder_->Finish(&equal_array)); |
90 | // now an unequal one |
91 | ASSERT_OK(builder_->AppendValues(unequal_offsets.data(), unequal_offsets.size())); |
92 | ASSERT_OK(vb->AppendValues(unequal_values.data(), unequal_values.size())); |
93 | |
94 | ASSERT_OK(builder_->Finish(&unequal_array)); |
95 | |
96 | // Test array equality |
97 | EXPECT_TRUE(array->Equals(array)); |
98 | EXPECT_TRUE(array->Equals(equal_array)); |
99 | EXPECT_TRUE(equal_array->Equals(array)); |
100 | EXPECT_FALSE(equal_array->Equals(unequal_array)); |
101 | EXPECT_FALSE(unequal_array->Equals(equal_array)); |
102 | |
103 | // Test range equality |
104 | EXPECT_TRUE(array->RangeEquals(0, 1, 0, unequal_array)); |
105 | EXPECT_FALSE(array->RangeEquals(0, 2, 0, unequal_array)); |
106 | EXPECT_FALSE(array->RangeEquals(1, 2, 1, unequal_array)); |
107 | EXPECT_TRUE(array->RangeEquals(2, 3, 2, unequal_array)); |
108 | |
109 | // Check with slices, ARROW-33 |
110 | std::shared_ptr<Array> slice, slice2; |
111 | |
112 | slice = array->Slice(2); |
113 | slice2 = array->Slice(2); |
114 | ASSERT_EQ(array->length() - 2, slice->length()); |
115 | |
116 | ASSERT_TRUE(slice->Equals(slice2)); |
117 | ASSERT_TRUE(array->RangeEquals(2, slice->length(), 0, slice)); |
118 | |
119 | // Chained slices |
120 | slice2 = array->Slice(1)->Slice(1); |
121 | ASSERT_TRUE(slice->Equals(slice2)); |
122 | |
123 | slice = array->Slice(1, 4); |
124 | slice2 = array->Slice(1, 4); |
125 | ASSERT_EQ(4, slice->length()); |
126 | |
127 | ASSERT_TRUE(slice->Equals(slice2)); |
128 | ASSERT_TRUE(array->RangeEquals(1, 5, 0, slice)); |
129 | } |
130 | |
131 | TEST_F(TestListArray, TestResize) {} |
132 | |
133 | TEST_F(TestListArray, TestFromArrays) { |
134 | std::shared_ptr<Array> offsets1, offsets2, offsets3, offsets4, values; |
135 | |
136 | std::vector<bool> offsets_is_valid3 = {true, false, true, true}; |
137 | std::vector<bool> offsets_is_valid4 = {true, true, false, true}; |
138 | |
139 | std::vector<bool> values_is_valid = {true, false, true, true, true, true}; |
140 | |
141 | std::vector<int32_t> offset1_values = {0, 2, 2, 6}; |
142 | std::vector<int32_t> offset2_values = {0, 2, 6, 6}; |
143 | |
144 | std::vector<int8_t> values_values = {0, 1, 2, 3, 4, 5}; |
145 | const int length = 3; |
146 | |
147 | ArrayFromVector<Int32Type, int32_t>(offset1_values, &offsets1); |
148 | ArrayFromVector<Int32Type, int32_t>(offset2_values, &offsets2); |
149 | |
150 | ArrayFromVector<Int32Type, int32_t>(offsets_is_valid3, offset1_values, &offsets3); |
151 | ArrayFromVector<Int32Type, int32_t>(offsets_is_valid4, offset2_values, &offsets4); |
152 | |
153 | ArrayFromVector<Int8Type, int8_t>(values_is_valid, values_values, &values); |
154 | |
155 | auto list_type = list(int8()); |
156 | |
157 | std::shared_ptr<Array> list1, list3, list4; |
158 | ASSERT_OK(ListArray::FromArrays(*offsets1, *values, pool_, &list1)); |
159 | ASSERT_OK(ListArray::FromArrays(*offsets3, *values, pool_, &list3)); |
160 | ASSERT_OK(ListArray::FromArrays(*offsets4, *values, pool_, &list4)); |
161 | |
162 | ListArray expected1(list_type, length, offsets1->data()->buffers[1], values, |
163 | offsets1->data()->buffers[0], 0); |
164 | AssertArraysEqual(expected1, *list1); |
165 | |
166 | // Use null bitmap from offsets3, but clean offsets from non-null version |
167 | ListArray expected3(list_type, length, offsets1->data()->buffers[1], values, |
168 | offsets3->data()->buffers[0], 1); |
169 | AssertArraysEqual(expected3, *list3); |
170 | |
171 | // Check that the last offset bit is zero |
172 | ASSERT_FALSE(BitUtil::GetBit(list3->null_bitmap()->data(), length + 1)); |
173 | |
174 | ListArray expected4(list_type, length, offsets2->data()->buffers[1], values, |
175 | offsets4->data()->buffers[0], 1); |
176 | AssertArraysEqual(expected4, *list4); |
177 | |
178 | // Test failure modes |
179 | |
180 | std::shared_ptr<Array> tmp; |
181 | |
182 | // Zero-length offsets |
183 | ASSERT_RAISES(Invalid, |
184 | ListArray::FromArrays(*offsets1->Slice(0, 0), *values, pool_, &tmp)); |
185 | |
186 | // Offsets not int32 |
187 | ASSERT_RAISES(Invalid, ListArray::FromArrays(*values, *offsets1, pool_, &tmp)); |
188 | } |
189 | |
190 | TEST_F(TestListArray, TestAppendNull) { |
191 | ASSERT_OK(builder_->AppendNull()); |
192 | ASSERT_OK(builder_->AppendNull()); |
193 | |
194 | Done(); |
195 | |
196 | ASSERT_OK(ValidateArray(*result_)); |
197 | ASSERT_TRUE(result_->IsNull(0)); |
198 | ASSERT_TRUE(result_->IsNull(1)); |
199 | |
200 | ASSERT_EQ(0, result_->raw_value_offsets()[0]); |
201 | ASSERT_EQ(0, result_->value_offset(1)); |
202 | ASSERT_EQ(0, result_->value_offset(2)); |
203 | |
204 | auto values = result_->values(); |
205 | ASSERT_EQ(0, values->length()); |
206 | // Values buffer should be non-null |
207 | ASSERT_NE(nullptr, values->data()->buffers[1]); |
208 | } |
209 | |
210 | void ValidateBasicListArray(const ListArray* result, const vector<int32_t>& values, |
211 | const vector<uint8_t>& is_valid) { |
212 | ASSERT_OK(ValidateArray(*result)); |
213 | ASSERT_EQ(1, result->null_count()); |
214 | ASSERT_EQ(0, result->values()->null_count()); |
215 | |
216 | ASSERT_EQ(3, result->length()); |
217 | vector<int32_t> ex_offsets = {0, 3, 3, 7}; |
218 | for (size_t i = 0; i < ex_offsets.size(); ++i) { |
219 | ASSERT_EQ(ex_offsets[i], result->value_offset(i)); |
220 | } |
221 | |
222 | for (int i = 0; i < result->length(); ++i) { |
223 | ASSERT_EQ(is_valid[i] == 0, result->IsNull(i)); |
224 | } |
225 | |
226 | ASSERT_EQ(7, result->values()->length()); |
227 | auto varr = std::dynamic_pointer_cast<Int32Array>(result->values()); |
228 | |
229 | for (size_t i = 0; i < values.size(); ++i) { |
230 | ASSERT_EQ(values[i], varr->Value(i)); |
231 | } |
232 | } |
233 | |
234 | TEST_F(TestListArray, TestBasics) { |
235 | vector<int32_t> values = {0, 1, 2, 3, 4, 5, 6}; |
236 | vector<int> lengths = {3, 0, 4}; |
237 | vector<uint8_t> is_valid = {1, 0, 1}; |
238 | |
239 | Int32Builder* vb = checked_cast<Int32Builder*>(builder_->value_builder()); |
240 | |
241 | ASSERT_OK(builder_->Reserve(lengths.size())); |
242 | ASSERT_OK(vb->Reserve(values.size())); |
243 | |
244 | int pos = 0; |
245 | for (size_t i = 0; i < lengths.size(); ++i) { |
246 | ASSERT_OK(builder_->Append(is_valid[i] > 0)); |
247 | for (int j = 0; j < lengths[i]; ++j) { |
248 | ASSERT_OK(vb->Append(values[pos++])); |
249 | } |
250 | } |
251 | |
252 | Done(); |
253 | ValidateBasicListArray(result_.get(), values, is_valid); |
254 | } |
255 | |
256 | TEST_F(TestListArray, BulkAppend) { |
257 | vector<int32_t> values = {0, 1, 2, 3, 4, 5, 6}; |
258 | vector<int> lengths = {3, 0, 4}; |
259 | vector<uint8_t> is_valid = {1, 0, 1}; |
260 | vector<int32_t> offsets = {0, 3, 3}; |
261 | |
262 | Int32Builder* vb = checked_cast<Int32Builder*>(builder_->value_builder()); |
263 | ASSERT_OK(vb->Reserve(values.size())); |
264 | |
265 | ASSERT_OK(builder_->AppendValues(offsets.data(), offsets.size(), is_valid.data())); |
266 | for (int32_t value : values) { |
267 | ASSERT_OK(vb->Append(value)); |
268 | } |
269 | Done(); |
270 | ValidateBasicListArray(result_.get(), values, is_valid); |
271 | } |
272 | |
273 | TEST_F(TestListArray, BulkAppendInvalid) { |
274 | vector<int32_t> values = {0, 1, 2, 3, 4, 5, 6}; |
275 | vector<int> lengths = {3, 0, 4}; |
276 | vector<uint8_t> is_null = {0, 1, 0}; |
277 | vector<uint8_t> is_valid = {1, 0, 1}; |
278 | vector<int32_t> offsets = {0, 2, 4}; // should be 0, 3, 3 given the is_null array |
279 | |
280 | Int32Builder* vb = checked_cast<Int32Builder*>(builder_->value_builder()); |
281 | ASSERT_OK(vb->Reserve(values.size())); |
282 | |
283 | ASSERT_OK(builder_->AppendValues(offsets.data(), offsets.size(), is_valid.data())); |
284 | ASSERT_OK(builder_->AppendValues(offsets.data(), offsets.size(), is_valid.data())); |
285 | for (int32_t value : values) { |
286 | ASSERT_OK(vb->Append(value)); |
287 | } |
288 | |
289 | Done(); |
290 | ASSERT_RAISES(Invalid, ValidateArray(*result_)); |
291 | } |
292 | |
293 | TEST_F(TestListArray, TestZeroLength) { |
294 | // All buffers are null |
295 | Done(); |
296 | ASSERT_OK(ValidateArray(*result_)); |
297 | } |
298 | |
299 | } // namespace arrow |
300 | |