1 | // Licensed to the Apache Software Foundation (ASF) under one |
2 | // or more contributor license agreements. See the NOTICE file |
3 | // distributed with this work for additional information |
4 | // regarding copyright ownership. The ASF licenses this file |
5 | // to you under the Apache License, Version 2.0 (the |
6 | // "License"); you may not use this file except in compliance |
7 | // with the License. You may obtain a copy of the License at |
8 | // |
9 | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | // |
11 | // Unless required by applicable law or agreed to in writing, |
12 | // software distributed under the License is distributed on an |
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | // KIND, either express or implied. See the License for the |
15 | // specific language governing permissions and limitations |
16 | // under the License. |
17 | |
18 | #include <cmath> |
19 | #include <cstddef> |
20 | #include <cstdint> |
21 | #include <cstring> |
22 | #include <limits> |
23 | #include <memory> |
24 | #include <sstream> |
25 | #include <string> |
26 | #include <type_traits> |
27 | #include <vector> |
28 | |
29 | #include <gtest/gtest.h> |
30 | |
31 | #include "arrow/array.h" |
32 | #include "arrow/ipc/json-simple.h" |
33 | #include "arrow/test-util.h" |
34 | #include "arrow/type.h" |
35 | #include "arrow/type_traits.h" |
36 | #include "arrow/util/checked_cast.h" |
37 | #include "arrow/util/decimal.h" |
38 | |
39 | #if defined(_MSC_VER) |
40 | // "warning C4307: '+': integral constant overflow" |
41 | #pragma warning(disable : 4307) |
42 | #endif |
43 | |
44 | namespace arrow { |
45 | namespace ipc { |
46 | namespace internal { |
47 | namespace json { |
48 | |
49 | using ::arrow::internal::checked_cast; |
50 | |
51 | // Avoid undefined behaviour on signed overflow |
52 | template <typename Signed> |
53 | Signed SafeSignedAdd(Signed u, Signed v) { |
54 | using Unsigned = typename std::make_unsigned<Signed>::type; |
55 | return static_cast<Signed>(static_cast<Unsigned>(u) + static_cast<Unsigned>(v)); |
56 | } |
57 | |
58 | // Special case for 8-bit ints (must output their decimal value, not the |
59 | // corresponding ASCII character) |
60 | void JSONArrayInternal(std::ostream* ss, int8_t value) { |
61 | *ss << static_cast<int16_t>(value); |
62 | } |
63 | |
64 | void JSONArrayInternal(std::ostream* ss, uint8_t value) { |
65 | *ss << static_cast<int16_t>(value); |
66 | } |
67 | |
68 | template <typename Value> |
69 | void JSONArrayInternal(std::ostream* ss, const Value& value) { |
70 | *ss << value; |
71 | } |
72 | |
73 | template <typename Value, typename... Tail> |
74 | void JSONArrayInternal(std::ostream* ss, const Value& value, Tail... tail) { |
75 | JSONArrayInternal(ss, value); |
76 | *ss << ", " ; |
77 | JSONArrayInternal(ss, std::forward<Tail>(tail)...); |
78 | } |
79 | |
80 | template <typename... Args> |
81 | std::string JSONArray(Args... args) { |
82 | std::stringstream ss; |
83 | ss << "[" ; |
84 | JSONArrayInternal(&ss, std::forward<Args>(args)...); |
85 | ss << "]" ; |
86 | return ss.str(); |
87 | } |
88 | |
89 | template <typename T, typename C_TYPE = typename T::c_type> |
90 | void AssertJSONArray(const std::shared_ptr<DataType>& type, const std::string& json, |
91 | const std::vector<C_TYPE>& values) { |
92 | std::shared_ptr<Array> actual, expected; |
93 | |
94 | ASSERT_OK(ArrayFromJSON(type, json, &actual)); |
95 | ASSERT_OK(ValidateArray(*actual)); |
96 | ArrayFromVector<T, C_TYPE>(type, values, &expected); |
97 | AssertArraysEqual(*expected, *actual); |
98 | } |
99 | |
100 | template <typename T, typename C_TYPE = typename T::c_type> |
101 | void AssertJSONArray(const std::shared_ptr<DataType>& type, const std::string& json, |
102 | const std::vector<bool>& is_valid, |
103 | const std::vector<C_TYPE>& values) { |
104 | std::shared_ptr<Array> actual, expected; |
105 | |
106 | ASSERT_OK(ArrayFromJSON(type, json, &actual)); |
107 | ASSERT_OK(ValidateArray(*actual)); |
108 | ArrayFromVector<T, C_TYPE>(type, is_valid, values, &expected); |
109 | AssertArraysEqual(*expected, *actual); |
110 | } |
111 | |
112 | TEST(TestHelper, JSONArray) { |
113 | // Test the JSONArray helper func |
114 | std::string s = |
115 | JSONArray(123, -4.5, static_cast<int8_t>(-12), static_cast<uint8_t>(34)); |
116 | ASSERT_EQ(s, "[123, -4.5, -12, 34]" ); |
117 | s = JSONArray(9223372036854775807LL, 9223372036854775808ULL, -9223372036854775807LL - 1, |
118 | 18446744073709551615ULL); |
119 | ASSERT_EQ(s, |
120 | "[9223372036854775807, 9223372036854775808, -9223372036854775808, " |
121 | "18446744073709551615]" ); |
122 | } |
123 | |
124 | TEST(TestHelper, SafeSignedAdd) { |
125 | ASSERT_EQ(0, SafeSignedAdd<int8_t>(-128, -128)); |
126 | ASSERT_EQ(1, SafeSignedAdd<int8_t>(-128, -127)); |
127 | ASSERT_EQ(-128, SafeSignedAdd<int8_t>(1, 127)); |
128 | ASSERT_EQ(-2147483648LL, SafeSignedAdd<int32_t>(1, 2147483647)); |
129 | } |
130 | |
131 | template <typename T> |
132 | class TestIntegers : public ::testing::Test {}; |
133 | |
134 | TYPED_TEST_CASE_P(TestIntegers); |
135 | |
136 | TYPED_TEST_P(TestIntegers, Basics) { |
137 | using T = TypeParam; |
138 | using c_type = typename T::c_type; |
139 | |
140 | std::shared_ptr<Array> expected, actual; |
141 | std::shared_ptr<DataType> type = TypeTraits<T>::type_singleton(); |
142 | |
143 | AssertJSONArray<T>(type, "[]" , {}); |
144 | AssertJSONArray<T>(type, "[4, 0, 5]" , {4, 0, 5}); |
145 | AssertJSONArray<T>(type, "[4, null, 5]" , {true, false, true}, {4, 0, 5}); |
146 | |
147 | // Test limits |
148 | const auto min_val = std::numeric_limits<c_type>::min(); |
149 | const auto max_val = std::numeric_limits<c_type>::max(); |
150 | std::string json_string = JSONArray(0, 1, min_val); |
151 | AssertJSONArray<T>(type, json_string, {0, 1, min_val}); |
152 | json_string = JSONArray(0, 1, max_val); |
153 | AssertJSONArray<T>(type, json_string, {0, 1, max_val}); |
154 | } |
155 | |
156 | TYPED_TEST_P(TestIntegers, Errors) { |
157 | using T = TypeParam; |
158 | |
159 | std::shared_ptr<Array> array; |
160 | std::shared_ptr<DataType> type = TypeTraits<T>::type_singleton(); |
161 | |
162 | ASSERT_RAISES(Invalid, ArrayFromJSON(type, "" , &array)); |
163 | ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[" , &array)); |
164 | ASSERT_RAISES(Invalid, ArrayFromJSON(type, "0" , &array)); |
165 | ASSERT_RAISES(Invalid, ArrayFromJSON(type, "{}" , &array)); |
166 | ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[0.0]" , &array)); |
167 | ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[\"0\"]" , &array)); |
168 | ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[0]]" , &array)); |
169 | } |
170 | |
171 | TYPED_TEST_P(TestIntegers, OutOfBounds) { |
172 | using T = TypeParam; |
173 | using c_type = typename T::c_type; |
174 | |
175 | std::shared_ptr<Array> array; |
176 | std::shared_ptr<DataType> type = TypeTraits<T>::type_singleton(); |
177 | |
178 | if (type->id() == Type::UINT64) { |
179 | ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[18446744073709551616]" , &array)); |
180 | ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[-1]" , &array)); |
181 | } else if (type->id() == Type::INT64) { |
182 | ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[9223372036854775808]" , &array)); |
183 | ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[-9223372036854775809]" , &array)); |
184 | } else if (std::is_signed<c_type>::value) { |
185 | const auto lower = SafeSignedAdd<int64_t>(std::numeric_limits<c_type>::min(), -1); |
186 | const auto upper = SafeSignedAdd<int64_t>(std::numeric_limits<c_type>::max(), +1); |
187 | auto json_string = JSONArray(lower); |
188 | ASSERT_RAISES(Invalid, ArrayFromJSON(type, json_string, &array)); |
189 | json_string = JSONArray(upper); |
190 | ASSERT_RAISES(Invalid, ArrayFromJSON(type, json_string, &array)); |
191 | } else { |
192 | const auto upper = static_cast<uint64_t>(std::numeric_limits<c_type>::max()) + 1; |
193 | auto json_string = JSONArray(upper); |
194 | ASSERT_RAISES(Invalid, ArrayFromJSON(type, json_string, &array)); |
195 | ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[-1]" , &array)); |
196 | } |
197 | } |
198 | |
199 | REGISTER_TYPED_TEST_CASE_P(TestIntegers, Basics, Errors, OutOfBounds); |
200 | |
201 | INSTANTIATE_TYPED_TEST_CASE_P(TestInt8, TestIntegers, Int8Type); |
202 | INSTANTIATE_TYPED_TEST_CASE_P(TestInt16, TestIntegers, Int16Type); |
203 | INSTANTIATE_TYPED_TEST_CASE_P(TestInt32, TestIntegers, Int32Type); |
204 | INSTANTIATE_TYPED_TEST_CASE_P(TestInt64, TestIntegers, Int64Type); |
205 | INSTANTIATE_TYPED_TEST_CASE_P(TestUInt8, TestIntegers, UInt8Type); |
206 | INSTANTIATE_TYPED_TEST_CASE_P(TestUInt16, TestIntegers, UInt16Type); |
207 | INSTANTIATE_TYPED_TEST_CASE_P(TestUInt32, TestIntegers, UInt32Type); |
208 | INSTANTIATE_TYPED_TEST_CASE_P(TestUInt64, TestIntegers, UInt64Type); |
209 | |
210 | TEST(TestNull, Basics) { |
211 | std::shared_ptr<DataType> type = null(); |
212 | std::shared_ptr<Array> expected, actual; |
213 | |
214 | AssertJSONArray<NullType, std::nullptr_t>(type, "[]" , {}); |
215 | AssertJSONArray<NullType, std::nullptr_t>(type, "[null, null]" , {nullptr, nullptr}); |
216 | } |
217 | |
218 | TEST(TestNull, Errors) { |
219 | std::shared_ptr<DataType> type = null(); |
220 | std::shared_ptr<Array> array; |
221 | |
222 | ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[]]" , &array)); |
223 | ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[0]" , &array)); |
224 | ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[NaN]" , &array)); |
225 | } |
226 | |
227 | TEST(TestBoolean, Basics) { |
228 | std::shared_ptr<DataType> type = boolean(); |
229 | std::shared_ptr<Array> expected, actual; |
230 | |
231 | AssertJSONArray<BooleanType, bool>(type, "[]" , {}); |
232 | AssertJSONArray<BooleanType, bool>(type, "[false, true, false]" , {false, true, false}); |
233 | AssertJSONArray<BooleanType, bool>(type, "[false, true, null]" , {true, true, false}, |
234 | {false, true, false}); |
235 | } |
236 | |
237 | TEST(TestBoolean, Errors) { |
238 | std::shared_ptr<DataType> type = boolean(); |
239 | std::shared_ptr<Array> array; |
240 | |
241 | ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[0]" , &array)); |
242 | ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[\"true\"]" , &array)); |
243 | } |
244 | |
245 | TEST(TestFloat, Basics) { |
246 | std::shared_ptr<DataType> type = float32(); |
247 | std::shared_ptr<Array> expected, actual; |
248 | |
249 | AssertJSONArray<FloatType>(type, "[]" , {}); |
250 | AssertJSONArray<FloatType>(type, "[1, 2.5, -3e4]" , {1.0f, 2.5f, -3.0e4f}); |
251 | AssertJSONArray<FloatType>(type, "[-0.0, Inf, -Inf, null]" , {true, true, true, false}, |
252 | {-0.0f, INFINITY, -INFINITY, 0.0f}); |
253 | |
254 | // Check NaN separately as AssertArraysEqual simply memcmp's array contents |
255 | // and NaNs can have many bit representations. |
256 | ASSERT_OK(ArrayFromJSON(type, "[NaN]" , &actual)); |
257 | ASSERT_OK(ValidateArray(*actual)); |
258 | float value = checked_cast<FloatArray&>(*actual).Value(0); |
259 | ASSERT_TRUE(std::isnan(value)); |
260 | } |
261 | |
262 | TEST(TestFloat, Errors) { |
263 | std::shared_ptr<DataType> type = float32(); |
264 | std::shared_ptr<Array> array; |
265 | |
266 | ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[true]" , &array)); |
267 | } |
268 | |
269 | TEST(TestDouble, Basics) { |
270 | std::shared_ptr<DataType> type = float64(); |
271 | std::shared_ptr<Array> expected, actual; |
272 | |
273 | AssertJSONArray<DoubleType>(type, "[]" , {}); |
274 | AssertJSONArray<DoubleType>(type, "[1, 2.5, -3e4]" , {1.0, 2.5, -3.0e4}); |
275 | AssertJSONArray<DoubleType>(type, "[-0.0, Inf, -Inf, null]" , {true, true, true, false}, |
276 | {-0.0, INFINITY, -INFINITY, 0.0}); |
277 | |
278 | ASSERT_OK(ArrayFromJSON(type, "[NaN]" , &actual)); |
279 | ASSERT_OK(ValidateArray(*actual)); |
280 | double value = checked_cast<DoubleArray&>(*actual).Value(0); |
281 | ASSERT_TRUE(std::isnan(value)); |
282 | } |
283 | |
284 | TEST(TestDouble, Errors) { |
285 | std::shared_ptr<DataType> type = float64(); |
286 | std::shared_ptr<Array> array; |
287 | |
288 | ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[true]" , &array)); |
289 | } |
290 | |
291 | TEST(TestString, Basics) { |
292 | // String type |
293 | std::shared_ptr<DataType> type = utf8(); |
294 | std::shared_ptr<Array> expected, actual; |
295 | |
296 | AssertJSONArray<StringType, std::string>(type, "[]" , {}); |
297 | AssertJSONArray<StringType, std::string>(type, "[\"\", \"foo\"]" , {"" , "foo" }); |
298 | AssertJSONArray<StringType, std::string>(type, "[\"\", null]" , {true, false}, {"" , "" }); |
299 | // NUL character in string |
300 | std::string s = "some" ; |
301 | s += '\x00'; |
302 | s += "char" ; |
303 | AssertJSONArray<StringType, std::string>(type, "[\"\", \"some\\u0000char\"]" , {"" , s}); |
304 | // UTF8 sequence in string |
305 | AssertJSONArray<StringType, std::string>(type, "[\"\xc3\xa9\"]" , {"\xc3\xa9" }); |
306 | |
307 | // Binary type |
308 | type = binary(); |
309 | AssertJSONArray<BinaryType, std::string>(type, "[\"\", \"foo\", null]" , |
310 | {true, true, false}, {"" , "foo" , "" }); |
311 | // Arbitrary binary (non-UTF8) sequence in string |
312 | s = "\xff\x9f" ; |
313 | AssertJSONArray<BinaryType, std::string>(type, "[\"" + s + "\"]" , {s}); |
314 | // Bytes < 0x20 can be represented as JSON unicode escapes |
315 | s = '\x00'; |
316 | s += "\x1f" ; |
317 | AssertJSONArray<BinaryType, std::string>(type, "[\"\\u0000\\u001f\"]" , {s}); |
318 | } |
319 | |
320 | TEST(TestString, Errors) { |
321 | std::shared_ptr<DataType> type = utf8(); |
322 | std::shared_ptr<Array> array; |
323 | |
324 | ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[0]" , &array)); |
325 | ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[]]" , &array)); |
326 | } |
327 | |
328 | TEST(TestFixedSizeBinary, Basics) { |
329 | std::shared_ptr<DataType> type = fixed_size_binary(3); |
330 | std::shared_ptr<Array> expected, actual; |
331 | |
332 | AssertJSONArray<FixedSizeBinaryType, std::string>(type, "[]" , {}); |
333 | AssertJSONArray<FixedSizeBinaryType, std::string>(type, "[\"foo\", \"bar\"]" , |
334 | {"foo" , "bar" }); |
335 | AssertJSONArray<FixedSizeBinaryType, std::string>(type, "[null, \"foo\"]" , |
336 | {false, true}, {"" , "foo" }); |
337 | // Arbitrary binary (non-UTF8) sequence in string |
338 | std::string s = "\xff\x9f\xcc" ; |
339 | AssertJSONArray<FixedSizeBinaryType, std::string>(type, "[\"" + s + "\"]" , {s}); |
340 | } |
341 | |
342 | TEST(TestFixedSizeBinary, Errors) { |
343 | std::shared_ptr<DataType> type = fixed_size_binary(3); |
344 | std::shared_ptr<Array> array; |
345 | |
346 | ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[0]" , &array)); |
347 | ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[]]" , &array)); |
348 | // Invalid length |
349 | ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[\"\"]" , &array)); |
350 | ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[\"abcd\"]" , &array)); |
351 | } |
352 | |
353 | TEST(TestDecimal, Basics) { |
354 | std::shared_ptr<DataType> type = decimal(10, 4); |
355 | std::shared_ptr<Array> expected, actual; |
356 | |
357 | ASSERT_OK(ArrayFromJSON(type, "[]" , &actual)); |
358 | ASSERT_OK(ValidateArray(*actual)); |
359 | { |
360 | Decimal128Builder builder(type); |
361 | ASSERT_OK(builder.Finish(&expected)); |
362 | } |
363 | AssertArraysEqual(*expected, *actual); |
364 | |
365 | ASSERT_OK(ArrayFromJSON(type, "[\"123.4567\", \"-78.9000\"]" , &actual)); |
366 | ASSERT_OK(ValidateArray(*actual)); |
367 | { |
368 | Decimal128Builder builder(type); |
369 | ASSERT_OK(builder.Append(Decimal128(1234567))); |
370 | ASSERT_OK(builder.Append(Decimal128(-789000))); |
371 | ASSERT_OK(builder.Finish(&expected)); |
372 | } |
373 | AssertArraysEqual(*expected, *actual); |
374 | |
375 | ASSERT_OK(ArrayFromJSON(type, "[\"123.4567\", null]" , &actual)); |
376 | ASSERT_OK(ValidateArray(*actual)); |
377 | { |
378 | Decimal128Builder builder(type); |
379 | ASSERT_OK(builder.Append(Decimal128(1234567))); |
380 | ASSERT_OK(builder.AppendNull()); |
381 | ASSERT_OK(builder.Finish(&expected)); |
382 | } |
383 | AssertArraysEqual(*expected, *actual); |
384 | } |
385 | |
386 | TEST(TestDecimal, Errors) { |
387 | std::shared_ptr<DataType> type = decimal(10, 4); |
388 | std::shared_ptr<Array> array; |
389 | |
390 | ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[0]" , &array)); |
391 | ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[12.3456]" , &array)); |
392 | // Bad scale |
393 | ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[\"12.345\"]" , &array)); |
394 | ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[\"12.34560\"]" , &array)); |
395 | } |
396 | |
397 | TEST(TestList, IntegerList) { |
398 | auto pool = default_memory_pool(); |
399 | std::shared_ptr<DataType> type = list(int64()); |
400 | std::shared_ptr<Array> offsets, values, expected, actual; |
401 | |
402 | ASSERT_OK(ArrayFromJSON(type, "[]" , &actual)); |
403 | ASSERT_OK(ValidateArray(*actual)); |
404 | ArrayFromVector<Int32Type>({0}, &offsets); |
405 | ArrayFromVector<Int64Type>({}, &values); |
406 | ASSERT_OK(ListArray::FromArrays(*offsets, *values, pool, &expected)); |
407 | AssertArraysEqual(*expected, *actual); |
408 | |
409 | ASSERT_OK(ArrayFromJSON(type, "[[4, 5], [], [6]]" , &actual)); |
410 | ASSERT_OK(ValidateArray(*actual)); |
411 | ArrayFromVector<Int32Type>({0, 2, 2, 3}, &offsets); |
412 | ArrayFromVector<Int64Type>({4, 5, 6}, &values); |
413 | ASSERT_OK(ListArray::FromArrays(*offsets, *values, pool, &expected)); |
414 | AssertArraysEqual(*expected, *actual); |
415 | |
416 | ASSERT_OK(ArrayFromJSON(type, "[[], [null], [6, null]]" , &actual)); |
417 | ASSERT_OK(ValidateArray(*actual)); |
418 | ArrayFromVector<Int32Type>({0, 0, 1, 3}, &offsets); |
419 | auto is_valid = std::vector<bool>{false, true, false}; |
420 | ArrayFromVector<Int64Type>(is_valid, {0, 6, 0}, &values); |
421 | ASSERT_OK(ListArray::FromArrays(*offsets, *values, pool, &expected)); |
422 | AssertArraysEqual(*expected, *actual); |
423 | |
424 | ASSERT_OK(ArrayFromJSON(type, "[null, [], null]" , &actual)); |
425 | ASSERT_OK(ValidateArray(*actual)); |
426 | { |
427 | std::unique_ptr<ArrayBuilder> builder; |
428 | ASSERT_OK(MakeBuilder(pool, type, &builder)); |
429 | auto& list_builder = checked_cast<ListBuilder&>(*builder); |
430 | ASSERT_OK(list_builder.AppendNull()); |
431 | ASSERT_OK(list_builder.Append()); |
432 | ASSERT_OK(list_builder.AppendNull()); |
433 | ASSERT_OK(list_builder.Finish(&expected)); |
434 | } |
435 | AssertArraysEqual(*expected, *actual); |
436 | } |
437 | |
438 | TEST(TestList, IntegerListErrors) { |
439 | std::shared_ptr<DataType> type = list(int64()); |
440 | std::shared_ptr<Array> array; |
441 | |
442 | ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[0]" , &array)); |
443 | ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[0.0]]" , &array)); |
444 | ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[9223372036854775808]]" , &array)); |
445 | } |
446 | |
447 | TEST(TestList, NullList) { |
448 | auto pool = default_memory_pool(); |
449 | std::shared_ptr<DataType> type = list(null()); |
450 | std::shared_ptr<Array> offsets, values, expected, actual; |
451 | |
452 | ASSERT_OK(ArrayFromJSON(type, "[]" , &actual)); |
453 | ASSERT_OK(ValidateArray(*actual)); |
454 | ArrayFromVector<Int32Type>({0}, &offsets); |
455 | values = std::make_shared<NullArray>(0); |
456 | ASSERT_OK(ListArray::FromArrays(*offsets, *values, pool, &expected)); |
457 | AssertArraysEqual(*expected, *actual); |
458 | |
459 | ASSERT_OK(ArrayFromJSON(type, "[[], [null], [null, null]]" , &actual)); |
460 | ASSERT_OK(ValidateArray(*actual)); |
461 | ArrayFromVector<Int32Type>({0, 0, 1, 3}, &offsets); |
462 | values = std::make_shared<NullArray>(3); |
463 | ASSERT_OK(ListArray::FromArrays(*offsets, *values, pool, &expected)); |
464 | AssertArraysEqual(*expected, *actual); |
465 | |
466 | ASSERT_OK(ArrayFromJSON(type, "[null, [], null]" , &actual)); |
467 | ASSERT_OK(ValidateArray(*actual)); |
468 | { |
469 | std::unique_ptr<ArrayBuilder> builder; |
470 | ASSERT_OK(MakeBuilder(pool, type, &builder)); |
471 | auto& list_builder = checked_cast<ListBuilder&>(*builder); |
472 | ASSERT_OK(list_builder.AppendNull()); |
473 | ASSERT_OK(list_builder.Append()); |
474 | ASSERT_OK(list_builder.AppendNull()); |
475 | ASSERT_OK(list_builder.Finish(&expected)); |
476 | } |
477 | AssertArraysEqual(*expected, *actual); |
478 | } |
479 | |
480 | TEST(TestList, IntegerListList) { |
481 | auto pool = default_memory_pool(); |
482 | std::shared_ptr<DataType> type = list(list(uint8())); |
483 | std::shared_ptr<Array> offsets, values, nested, expected, actual; |
484 | |
485 | ASSERT_OK(ArrayFromJSON(type, "[[[4], [5, 6]], [[7, 8, 9]]]" , &actual)); |
486 | ASSERT_OK(ValidateArray(*actual)); |
487 | ArrayFromVector<Int32Type>({0, 1, 3, 6}, &offsets); |
488 | ArrayFromVector<UInt8Type>({4, 5, 6, 7, 8, 9}, &values); |
489 | ASSERT_OK(ListArray::FromArrays(*offsets, *values, pool, &nested)); |
490 | ArrayFromVector<Int32Type>({0, 2, 3}, &offsets); |
491 | ASSERT_OK(ListArray::FromArrays(*offsets, *nested, pool, &expected)); |
492 | ASSERT_EQ(actual->length(), 2); |
493 | AssertArraysEqual(*expected, *actual); |
494 | |
495 | ASSERT_OK(ArrayFromJSON(type, "[[], [[]], [[4], [], [5, 6]], [[7, 8, 9]]]" , &actual)); |
496 | ASSERT_OK(ValidateArray(*actual)); |
497 | ArrayFromVector<Int32Type>({0, 0, 1, 1, 3, 6}, &offsets); |
498 | ArrayFromVector<UInt8Type>({4, 5, 6, 7, 8, 9}, &values); |
499 | ASSERT_OK(ListArray::FromArrays(*offsets, *values, pool, &nested)); |
500 | ArrayFromVector<Int32Type>({0, 0, 1, 4, 5}, &offsets); |
501 | ASSERT_OK(ListArray::FromArrays(*offsets, *nested, pool, &expected)); |
502 | ASSERT_EQ(actual->length(), 4); |
503 | AssertArraysEqual(*expected, *actual); |
504 | |
505 | ASSERT_OK(ArrayFromJSON(type, "[null, [null], [[null]]]" , &actual)); |
506 | ASSERT_OK(ValidateArray(*actual)); |
507 | { |
508 | std::unique_ptr<ArrayBuilder> builder; |
509 | ASSERT_OK(MakeBuilder(pool, type, &builder)); |
510 | auto& list_builder = checked_cast<ListBuilder&>(*builder); |
511 | auto& child_builder = checked_cast<ListBuilder&>(*list_builder.value_builder()); |
512 | ASSERT_OK(list_builder.AppendNull()); |
513 | ASSERT_OK(list_builder.Append()); |
514 | ASSERT_OK(child_builder.AppendNull()); |
515 | ASSERT_OK(list_builder.Append()); |
516 | ASSERT_OK(child_builder.Append()); |
517 | ASSERT_OK(list_builder.Finish(&expected)); |
518 | } |
519 | } |
520 | |
521 | TEST(TestStruct, SimpleStruct) { |
522 | auto field_a = field("a" , int8()); |
523 | auto field_b = field("b" , boolean()); |
524 | std::shared_ptr<DataType> type = struct_({field_a, field_b}); |
525 | std::shared_ptr<Array> a, b, expected, actual; |
526 | std::shared_ptr<Buffer> null_bitmap; |
527 | std::vector<bool> is_valid; |
528 | std::vector<std::shared_ptr<Array>> children; |
529 | |
530 | // Trivial |
531 | ASSERT_OK(ArrayFromJSON(type, "[]" , &actual)); |
532 | ASSERT_OK(ValidateArray(*actual)); |
533 | ArrayFromVector<Int8Type>({}, &a); |
534 | ArrayFromVector<BooleanType, bool>({}, &b); |
535 | children.assign({a, b}); |
536 | expected = std::make_shared<StructArray>(type, 0, children); |
537 | AssertArraysEqual(*expected, *actual); |
538 | |
539 | // Non-empty |
540 | ArrayFromVector<Int8Type>({5, 6}, &a); |
541 | ArrayFromVector<BooleanType, bool>({true, false}, &b); |
542 | children.assign({a, b}); |
543 | expected = std::make_shared<StructArray>(type, 2, children); |
544 | |
545 | ASSERT_OK(ArrayFromJSON(type, "[[5, true], [6, false]]" , &actual)); |
546 | ASSERT_OK(ValidateArray(*actual)); |
547 | AssertArraysEqual(*expected, *actual); |
548 | ASSERT_OK(ArrayFromJSON(type, "[{\"a\": 5, \"b\": true}, {\"b\": false, \"a\": 6}]" , |
549 | &actual)); |
550 | ASSERT_OK(ValidateArray(*actual)); |
551 | AssertArraysEqual(*expected, *actual); |
552 | |
553 | // With nulls |
554 | is_valid = {false, true, false, false}; |
555 | ArrayFromVector<Int8Type>(is_valid, {0, 5, 6, 0}, &a); |
556 | is_valid = {false, false, true, false}; |
557 | ArrayFromVector<BooleanType, bool>(is_valid, {false, true, false, false}, &b); |
558 | children.assign({a, b}); |
559 | BitmapFromVector<bool>({false, true, true, true}, &null_bitmap); |
560 | expected = std::make_shared<StructArray>(type, 4, children, null_bitmap, 1); |
561 | |
562 | ASSERT_OK( |
563 | ArrayFromJSON(type, "[null, [5, null], [null, false], [null, null]]" , &actual)); |
564 | ASSERT_OK(ValidateArray(*actual)); |
565 | AssertArraysEqual(*expected, *actual); |
566 | // When using object notation, null members can be omitted |
567 | ASSERT_OK(ArrayFromJSON(type, "[null, {\"a\": 5, \"b\": null}, {\"b\": false}, {}]" , |
568 | &actual)); |
569 | ASSERT_OK(ValidateArray(*actual)); |
570 | AssertArraysEqual(*expected, *actual); |
571 | } |
572 | |
573 | TEST(TestStruct, NestedStruct) { |
574 | auto field_a = field("a" , int8()); |
575 | auto field_b = field("b" , boolean()); |
576 | auto field_c = field("c" , float64()); |
577 | std::shared_ptr<DataType> nested_type = struct_({field_a, field_b}); |
578 | auto field_nested = field("nested" , nested_type); |
579 | std::shared_ptr<DataType> type = struct_({field_nested, field_c}); |
580 | std::shared_ptr<Array> expected, actual; |
581 | std::shared_ptr<Buffer> null_bitmap; |
582 | std::vector<bool> is_valid; |
583 | std::vector<std::shared_ptr<Array>> children(2); |
584 | |
585 | ASSERT_OK(ArrayFromJSON(type, "[]" , &actual)); |
586 | ASSERT_OK(ValidateArray(*actual)); |
587 | ArrayFromVector<Int8Type>({}, &children[0]); |
588 | ArrayFromVector<BooleanType, bool>({}, &children[1]); |
589 | children[0] = std::make_shared<StructArray>(nested_type, 0, children); |
590 | ArrayFromVector<DoubleType>({}, &children[1]); |
591 | expected = std::make_shared<StructArray>(type, 0, children); |
592 | AssertArraysEqual(*expected, *actual); |
593 | |
594 | ASSERT_OK(ArrayFromJSON(type, "[[[5, true], 1.5], [[6, false], -3e2]]" , &actual)); |
595 | ASSERT_OK(ValidateArray(*actual)); |
596 | ArrayFromVector<Int8Type>({5, 6}, &children[0]); |
597 | ArrayFromVector<BooleanType, bool>({true, false}, &children[1]); |
598 | children[0] = std::make_shared<StructArray>(nested_type, 2, children); |
599 | ArrayFromVector<DoubleType>({1.5, -300.0}, &children[1]); |
600 | expected = std::make_shared<StructArray>(type, 2, children); |
601 | AssertArraysEqual(*expected, *actual); |
602 | |
603 | ASSERT_OK(ArrayFromJSON(type, "[null, [[5, null], null], [null, -3e2]]" , &actual)); |
604 | ASSERT_OK(ValidateArray(*actual)); |
605 | is_valid = {false, true, false}; |
606 | ArrayFromVector<Int8Type>(is_valid, {0, 5, 0}, &children[0]); |
607 | is_valid = {false, false, false}; |
608 | ArrayFromVector<BooleanType, bool>(is_valid, {false, false, false}, &children[1]); |
609 | BitmapFromVector<bool>({false, true, false}, &null_bitmap); |
610 | children[0] = std::make_shared<StructArray>(nested_type, 3, children, null_bitmap, 2); |
611 | is_valid = {false, false, true}; |
612 | ArrayFromVector<DoubleType>(is_valid, {0.0, 0.0, -300.0}, &children[1]); |
613 | BitmapFromVector<bool>({false, true, true}, &null_bitmap); |
614 | expected = std::make_shared<StructArray>(type, 3, children, null_bitmap, 1); |
615 | AssertArraysEqual(*expected, *actual); |
616 | } |
617 | |
618 | TEST(TestStruct, Errors) { |
619 | auto field_a = field("a" , int8()); |
620 | auto field_b = field("b" , boolean()); |
621 | std::shared_ptr<DataType> type = struct_({field_a, field_b}); |
622 | std::shared_ptr<Array> array; |
623 | |
624 | ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[0, true]" , &array)); |
625 | ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[0]]" , &array)); |
626 | ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[0, true, 1]]" , &array)); |
627 | ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[true, 0]]" , &array)); |
628 | ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[{\"b\": 0, \"a\": true}]" , &array)); |
629 | ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[{\"c\": 0}]" , &array)); |
630 | } |
631 | |
632 | } // namespace json |
633 | } // namespace internal |
634 | } // namespace ipc |
635 | } // namespace arrow |
636 | |