1 | // Licensed to the Apache Software Foundation (ASF) under one |
2 | // or more contributor license agreements. See the NOTICE file |
3 | // distributed with this work for additional information |
4 | // regarding copyright ownership. The ASF licenses this file |
5 | // to you under the Apache License, Version 2.0 (the |
6 | // "License"); you may not use this file except in compliance |
7 | // with the License. You may obtain a copy of the License at |
8 | // |
9 | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | // |
11 | // Unless required by applicable law or agreed to in writing, |
12 | // software distributed under the License is distributed on an |
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | // KIND, either express or implied. See the License for the |
15 | // specific language governing permissions and limitations |
16 | // under the License. |
17 | |
18 | #include <cstdint> |
19 | #include <cstring> |
20 | #include <memory> |
21 | #include <sstream> |
22 | #include <string> |
23 | #include <vector> |
24 | |
25 | #include <gtest/gtest.h> |
26 | |
27 | #include "arrow/array.h" |
28 | #include "arrow/builder.h" |
29 | #include "arrow/pretty_print.h" |
30 | #include "arrow/table.h" |
31 | #include "arrow/test-util.h" |
32 | #include "arrow/type.h" |
33 | |
34 | namespace arrow { |
35 | |
36 | class TestPrettyPrint : public ::testing::Test { |
37 | public: |
38 | void SetUp() {} |
39 | |
40 | void Print(const Array& array) {} |
41 | |
42 | private: |
43 | std::ostringstream sink_; |
44 | }; |
45 | |
46 | template <typename T> |
47 | void CheckStream(const T& obj, const PrettyPrintOptions& options, const char* expected) { |
48 | std::ostringstream sink; |
49 | ASSERT_OK(PrettyPrint(obj, options, &sink)); |
50 | std::string result = sink.str(); |
51 | ASSERT_EQ(std::string(expected, strlen(expected)), result); |
52 | } |
53 | |
54 | void CheckArray(const Array& arr, const PrettyPrintOptions& options, const char* expected, |
55 | bool check_operator = true) { |
56 | CheckStream(arr, options, expected); |
57 | |
58 | if (options.indent == 0 && check_operator) { |
59 | std::stringstream ss; |
60 | ss << arr; |
61 | std::string result = std::string(expected, strlen(expected)); |
62 | ASSERT_EQ(result, ss.str()); |
63 | } |
64 | } |
65 | |
66 | template <typename T> |
67 | void Check(const T& obj, const PrettyPrintOptions& options, const char* expected) { |
68 | std::string result; |
69 | ASSERT_OK(PrettyPrint(obj, options, &result)); |
70 | ASSERT_EQ(std::string(expected, strlen(expected)), result); |
71 | } |
72 | |
73 | template <typename TYPE, typename C_TYPE> |
74 | void CheckPrimitive(const PrettyPrintOptions& options, const std::vector<bool>& is_valid, |
75 | const std::vector<C_TYPE>& values, const char* expected, |
76 | bool check_operator = true) { |
77 | std::shared_ptr<Array> array; |
78 | ArrayFromVector<TYPE, C_TYPE>(is_valid, values, &array); |
79 | CheckArray(*array, options, expected, check_operator); |
80 | } |
81 | |
82 | TEST_F(TestPrettyPrint, PrimitiveType) { |
83 | std::vector<bool> is_valid = {true, true, false, true, false}; |
84 | |
85 | std::vector<int32_t> values = {0, 1, 2, 3, 4}; |
86 | static const char* expected = R"expected([ |
87 | 0, |
88 | 1, |
89 | null, |
90 | 3, |
91 | null |
92 | ])expected" ; |
93 | CheckPrimitive<Int32Type, int32_t>({0, 10}, is_valid, values, expected); |
94 | |
95 | static const char* expected_na = R"expected([ |
96 | 0, |
97 | 1, |
98 | NA, |
99 | 3, |
100 | NA |
101 | ])expected" ; |
102 | CheckPrimitive<Int32Type, int32_t>({0, 10, 2, "NA" }, is_valid, values, expected_na, |
103 | false); |
104 | |
105 | static const char* ex_in2 = R"expected( [ |
106 | 0, |
107 | 1, |
108 | null, |
109 | 3, |
110 | null |
111 | ])expected" ; |
112 | CheckPrimitive<Int32Type, int32_t>({2, 10}, is_valid, values, ex_in2); |
113 | static const char* ex_in2_w2 = R"expected( [ |
114 | 0, |
115 | 1, |
116 | ... |
117 | 3, |
118 | null |
119 | ])expected" ; |
120 | CheckPrimitive<Int32Type, int32_t>({2, 2}, is_valid, values, ex_in2_w2); |
121 | |
122 | std::vector<double> values2 = {0., 1., 2., 3., 4.}; |
123 | static const char* ex2 = R"expected([ |
124 | 0, |
125 | 1, |
126 | null, |
127 | 3, |
128 | null |
129 | ])expected" ; |
130 | CheckPrimitive<DoubleType, double>({0, 10}, is_valid, values2, ex2); |
131 | static const char* ex2_in2 = R"expected( [ |
132 | 0, |
133 | 1, |
134 | null, |
135 | 3, |
136 | null |
137 | ])expected" ; |
138 | CheckPrimitive<DoubleType, double>({2, 10}, is_valid, values2, ex2_in2); |
139 | |
140 | std::vector<std::string> values3 = {"foo" , "bar" , "" , "baz" , "" }; |
141 | static const char* ex3 = R"expected([ |
142 | "foo", |
143 | "bar", |
144 | null, |
145 | "baz", |
146 | null |
147 | ])expected" ; |
148 | CheckPrimitive<StringType, std::string>({0, 10}, is_valid, values3, ex3); |
149 | static const char* ex3_in2 = R"expected( [ |
150 | "foo", |
151 | "bar", |
152 | null, |
153 | "baz", |
154 | null |
155 | ])expected" ; |
156 | CheckPrimitive<StringType, std::string>({2, 10}, is_valid, values3, ex3_in2); |
157 | } |
158 | |
159 | TEST_F(TestPrettyPrint, StructTypeBasic) { |
160 | auto simple_1 = field("one" , int32()); |
161 | auto simple_2 = field("two" , int32()); |
162 | auto simple_struct = struct_({simple_1, simple_2}); |
163 | |
164 | auto array = ArrayFromJSON(simple_struct, "[[11, 22]]" ); |
165 | |
166 | static const char* ex = R"expected(-- is_valid: all not null |
167 | -- child 0 type: int32 |
168 | [ |
169 | 11 |
170 | ] |
171 | -- child 1 type: int32 |
172 | [ |
173 | 22 |
174 | ])expected" ; |
175 | CheckStream(*array, {0, 10}, ex); |
176 | |
177 | static const char* ex_2 = R"expected( -- is_valid: all not null |
178 | -- child 0 type: int32 |
179 | [ |
180 | 11 |
181 | ] |
182 | -- child 1 type: int32 |
183 | [ |
184 | 22 |
185 | ])expected" ; |
186 | CheckStream(*array, {2, 10}, ex_2); |
187 | } |
188 | |
189 | TEST_F(TestPrettyPrint, StructTypeAdvanced) { |
190 | auto simple_1 = field("one" , int32()); |
191 | auto simple_2 = field("two" , int32()); |
192 | auto simple_struct = struct_({simple_1, simple_2}); |
193 | |
194 | auto array = ArrayFromJSON(simple_struct, "[[11, 22], null, [null, 33]]" ); |
195 | |
196 | static const char* ex = R"expected(-- is_valid: |
197 | [ |
198 | true, |
199 | false, |
200 | true |
201 | ] |
202 | -- child 0 type: int32 |
203 | [ |
204 | 11, |
205 | null, |
206 | null |
207 | ] |
208 | -- child 1 type: int32 |
209 | [ |
210 | 22, |
211 | null, |
212 | 33 |
213 | ])expected" ; |
214 | CheckStream(*array, {0, 10}, ex); |
215 | } |
216 | |
217 | TEST_F(TestPrettyPrint, BinaryType) { |
218 | std::vector<bool> is_valid = {true, true, false, true, false}; |
219 | std::vector<std::string> values = {"foo" , "bar" , "" , "baz" , "" }; |
220 | static const char* ex = "[\n 666F6F,\n 626172,\n null,\n 62617A,\n null\n]" ; |
221 | CheckPrimitive<BinaryType, std::string>({0}, is_valid, values, ex); |
222 | static const char* ex_in2 = |
223 | " [\n 666F6F,\n 626172,\n null,\n 62617A,\n null\n ]" ; |
224 | CheckPrimitive<BinaryType, std::string>({2}, is_valid, values, ex_in2); |
225 | } |
226 | |
227 | TEST_F(TestPrettyPrint, ListType) { |
228 | auto list_type = list(int64()); |
229 | auto array = ArrayFromJSON(list_type, "[[null], [], null, [4, 6, 7], [2, 3]]" ); |
230 | |
231 | static const char* ex = R"expected([ |
232 | [ |
233 | null |
234 | ], |
235 | [], |
236 | null, |
237 | [ |
238 | 4, |
239 | 6, |
240 | 7 |
241 | ], |
242 | [ |
243 | 2, |
244 | 3 |
245 | ] |
246 | ])expected" ; |
247 | CheckArray(*array, {0, 10}, ex); |
248 | static const char* ex_2 = R"expected( [ |
249 | [ |
250 | null |
251 | ], |
252 | [], |
253 | null, |
254 | [ |
255 | 4, |
256 | 6, |
257 | 7 |
258 | ], |
259 | [ |
260 | 2, |
261 | 3 |
262 | ] |
263 | ])expected" ; |
264 | CheckArray(*array, {2, 10}, ex_2); |
265 | static const char* ex_3 = R"expected([ |
266 | [ |
267 | null |
268 | ], |
269 | ... |
270 | [ |
271 | 2, |
272 | 3 |
273 | ] |
274 | ])expected" ; |
275 | CheckStream(*array, {0, 1}, ex_3); |
276 | } |
277 | |
278 | TEST_F(TestPrettyPrint, FixedSizeBinaryType) { |
279 | std::vector<bool> is_valid = {true, true, false, true, false}; |
280 | |
281 | auto type = fixed_size_binary(3); |
282 | auto array = ArrayFromJSON(type, "[\"foo\", \"bar\", null, \"baz\"]" ); |
283 | |
284 | static const char* ex = "[\n 666F6F,\n 626172,\n null,\n 62617A\n]" ; |
285 | CheckArray(*array, {0, 10}, ex); |
286 | static const char* ex_2 = " [\n 666F6F,\n ...\n 62617A\n ]" ; |
287 | CheckArray(*array, {2, 1}, ex_2); |
288 | } |
289 | |
290 | TEST_F(TestPrettyPrint, Decimal128Type) { |
291 | int32_t p = 19; |
292 | int32_t s = 4; |
293 | |
294 | auto type = decimal(p, s); |
295 | auto array = ArrayFromJSON(type, "[\"123.4567\", \"456.7891\", null]" ); |
296 | |
297 | static const char* ex = "[\n 123.4567,\n 456.7891,\n null\n]" ; |
298 | CheckArray(*array, {0}, ex); |
299 | } |
300 | |
301 | TEST_F(TestPrettyPrint, DictionaryType) { |
302 | std::vector<bool> is_valid = {true, true, false, true, true, true}; |
303 | |
304 | std::shared_ptr<Array> dict; |
305 | std::vector<std::string> dict_values = {"foo" , "bar" , "baz" }; |
306 | ArrayFromVector<StringType, std::string>(dict_values, &dict); |
307 | std::shared_ptr<DataType> dict_type = dictionary(int16(), dict); |
308 | |
309 | std::shared_ptr<Array> indices; |
310 | std::vector<int16_t> indices_values = {1, 2, -1, 0, 2, 0}; |
311 | ArrayFromVector<Int16Type, int16_t>(is_valid, indices_values, &indices); |
312 | auto arr = std::make_shared<DictionaryArray>(dict_type, indices); |
313 | |
314 | static const char* expected = R"expected( |
315 | -- dictionary: |
316 | [ |
317 | "foo", |
318 | "bar", |
319 | "baz" |
320 | ] |
321 | -- indices: |
322 | [ |
323 | 1, |
324 | 2, |
325 | null, |
326 | 0, |
327 | 2, |
328 | 0 |
329 | ])expected" ; |
330 | |
331 | CheckArray(*arr, {0}, expected); |
332 | } |
333 | |
334 | TEST_F(TestPrettyPrint, ChunkedArrayPrimitiveType) { |
335 | auto array = ArrayFromJSON(int32(), "[0, 1, null, 3, null]" ); |
336 | ChunkedArray chunked_array(array); |
337 | |
338 | static const char* expected = R"expected([ |
339 | [ |
340 | 0, |
341 | 1, |
342 | null, |
343 | 3, |
344 | null |
345 | ] |
346 | ])expected" ; |
347 | CheckStream(chunked_array, {0}, expected); |
348 | |
349 | ChunkedArray chunked_array_2({array, array}); |
350 | |
351 | static const char* expected_2 = R"expected([ |
352 | [ |
353 | 0, |
354 | 1, |
355 | null, |
356 | 3, |
357 | null |
358 | ], |
359 | [ |
360 | 0, |
361 | 1, |
362 | null, |
363 | 3, |
364 | null |
365 | ] |
366 | ])expected" ; |
367 | |
368 | CheckStream(chunked_array_2, {0}, expected_2); |
369 | } |
370 | |
371 | TEST_F(TestPrettyPrint, ColumnPrimitiveType) { |
372 | std::shared_ptr<Field> int_field = field("column" , int32()); |
373 | auto array = ArrayFromJSON(int_field->type(), "[0, 1, null, 3, null]" ); |
374 | Column column(int_field, ArrayVector({array})); |
375 | |
376 | static const char* expected = R"expected(column: int32 |
377 | [ |
378 | [ |
379 | 0, |
380 | 1, |
381 | null, |
382 | 3, |
383 | null |
384 | ] |
385 | ])expected" ; |
386 | CheckStream(column, {0}, expected); |
387 | |
388 | Column column_2(int_field, {array, array}); |
389 | |
390 | static const char* expected_2 = R"expected(column: int32 |
391 | [ |
392 | [ |
393 | 0, |
394 | 1, |
395 | null, |
396 | 3, |
397 | null |
398 | ], |
399 | [ |
400 | 0, |
401 | 1, |
402 | null, |
403 | 3, |
404 | null |
405 | ] |
406 | ])expected" ; |
407 | |
408 | CheckStream(column_2, {0}, expected_2); |
409 | } |
410 | |
411 | TEST_F(TestPrettyPrint, TablePrimitive) { |
412 | std::shared_ptr<Field> int_field = field("column" , int32()); |
413 | auto array = ArrayFromJSON(int_field->type(), "[0, 1, null, 3, null]" ); |
414 | std::shared_ptr<Column> column = |
415 | std::make_shared<Column>(int_field, ArrayVector({array})); |
416 | std::shared_ptr<Schema> table_schema = schema({int_field}); |
417 | std::shared_ptr<Table> table = Table::Make(table_schema, {column}); |
418 | |
419 | static const char* expected = R"expected(column: int32 |
420 | ---- |
421 | column: |
422 | [ |
423 | [ |
424 | 0, |
425 | 1, |
426 | null, |
427 | 3, |
428 | null |
429 | ] |
430 | ] |
431 | )expected" ; |
432 | CheckStream(*table, {0}, expected); |
433 | } |
434 | |
435 | TEST_F(TestPrettyPrint, SchemaWithDictionary) { |
436 | std::vector<bool> is_valid = {true, true, false, true, true, true}; |
437 | |
438 | std::shared_ptr<Array> dict; |
439 | std::vector<std::string> dict_values = {"foo" , "bar" , "baz" }; |
440 | ArrayFromVector<StringType, std::string>(dict_values, &dict); |
441 | |
442 | auto simple = field("one" , int32()); |
443 | auto simple_dict = field("two" , dictionary(int16(), dict)); |
444 | auto list_of_dict = field("three" , list(simple_dict)); |
445 | |
446 | auto struct_with_dict = field("four" , struct_({simple, simple_dict})); |
447 | |
448 | auto sch = schema({simple, simple_dict, list_of_dict, struct_with_dict}); |
449 | |
450 | static const char* expected = R"expected(one: int32 |
451 | two: dictionary<values=string, indices=int16, ordered=0> |
452 | dictionary: |
453 | [ |
454 | "foo", |
455 | "bar", |
456 | "baz" |
457 | ] |
458 | three: list<two: dictionary<values=string, indices=int16, ordered=0>> |
459 | child 0, two: dictionary<values=string, indices=int16, ordered=0> |
460 | dictionary: |
461 | [ |
462 | "foo", |
463 | "bar", |
464 | "baz" |
465 | ] |
466 | four: struct<one: int32, two: dictionary<values=string, indices=int16, ordered=0>> |
467 | child 0, one: int32 |
468 | child 1, two: dictionary<values=string, indices=int16, ordered=0> |
469 | dictionary: |
470 | [ |
471 | "foo", |
472 | "bar", |
473 | "baz" |
474 | ])expected" ; |
475 | |
476 | PrettyPrintOptions options{0}; |
477 | |
478 | Check(*sch, options, expected); |
479 | } |
480 | |
481 | } // namespace arrow |
482 | |