1 | // Licensed to the Apache Software Foundation (ASF) under one |
2 | // or more contributor license agreements. See the NOTICE file |
3 | // distributed with this work for additional information |
4 | // regarding copyright ownership. The ASF licenses this file |
5 | // to you under the Apache License, Version 2.0 (the |
6 | // "License"); you may not use this file except in compliance |
7 | // with the License. You may obtain a copy of the License at |
8 | // |
9 | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | // |
11 | // Unless required by applicable law or agreed to in writing, |
12 | // software distributed under the License is distributed on an |
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | // KIND, either express or implied. See the License for the |
15 | // specific language governing permissions and limitations |
16 | // under the License. |
17 | |
18 | // Unit tests for DataType (and subclasses), Field, and Schema |
19 | |
20 | #include <cstdint> |
21 | #include <memory> |
22 | #include <string> |
23 | #include <vector> |
24 | |
25 | #include <gtest/gtest.h> |
26 | |
27 | #include "arrow/memory_pool.h" |
28 | #include "arrow/test-util.h" |
29 | #include "arrow/type.h" |
30 | #include "arrow/util/checked_cast.h" |
31 | |
32 | using std::shared_ptr; |
33 | using std::vector; |
34 | |
35 | namespace arrow { |
36 | |
37 | using internal::checked_cast; |
38 | |
39 | TEST(TestField, Basics) { |
40 | Field f0("f0" , int32()); |
41 | Field f0_nn("f0" , int32(), false); |
42 | |
43 | ASSERT_EQ(f0.name(), "f0" ); |
44 | ASSERT_EQ(f0.type()->ToString(), int32()->ToString()); |
45 | |
46 | ASSERT_TRUE(f0.nullable()); |
47 | ASSERT_FALSE(f0_nn.nullable()); |
48 | } |
49 | |
50 | TEST(TestField, Equals) { |
51 | auto meta = key_value_metadata({{"a" , "1" }, {"b" , "2" }}); |
52 | |
53 | Field f0("f0" , int32()); |
54 | Field f0_nn("f0" , int32(), false); |
55 | Field f0_other("f0" , int32()); |
56 | Field f0_with_meta("f0" , int32(), true, meta); |
57 | |
58 | ASSERT_TRUE(f0.Equals(f0_other)); |
59 | ASSERT_FALSE(f0.Equals(f0_nn)); |
60 | ASSERT_FALSE(f0.Equals(f0_with_meta)); |
61 | ASSERT_TRUE(f0.Equals(f0_with_meta, false)); |
62 | } |
63 | |
64 | TEST(TestField, TestMetadataConstruction) { |
65 | auto metadata = std::shared_ptr<KeyValueMetadata>( |
66 | new KeyValueMetadata({"foo" , "bar" }, {"bizz" , "buzz" })); |
67 | auto metadata2 = metadata->Copy(); |
68 | auto f0 = field("f0" , int32(), true, metadata); |
69 | auto f1 = field("f0" , int32(), true, metadata2); |
70 | ASSERT_TRUE(metadata->Equals(*f0->metadata())); |
71 | ASSERT_TRUE(f0->Equals(*f1)); |
72 | } |
73 | |
74 | TEST(TestField, TestAddMetadata) { |
75 | auto metadata = std::shared_ptr<KeyValueMetadata>( |
76 | new KeyValueMetadata({"foo" , "bar" }, {"bizz" , "buzz" })); |
77 | auto f0 = field("f0" , int32()); |
78 | auto f1 = field("f0" , int32(), true, metadata); |
79 | std::shared_ptr<Field> f2 = f0->AddMetadata(metadata); |
80 | |
81 | ASSERT_FALSE(f2->Equals(*f0)); |
82 | ASSERT_TRUE(f2->Equals(*f1)); |
83 | |
84 | // Not copied |
85 | ASSERT_TRUE(metadata.get() == f1->metadata().get()); |
86 | } |
87 | |
88 | TEST(TestField, TestRemoveMetadata) { |
89 | auto metadata = std::shared_ptr<KeyValueMetadata>( |
90 | new KeyValueMetadata({"foo" , "bar" }, {"bizz" , "buzz" })); |
91 | auto f0 = field("f0" , int32()); |
92 | auto f1 = field("f0" , int32(), true, metadata); |
93 | std::shared_ptr<Field> f2 = f1->RemoveMetadata(); |
94 | ASSERT_TRUE(f2->metadata() == nullptr); |
95 | } |
96 | |
97 | TEST(TestField, TestFlatten) { |
98 | auto metadata = std::shared_ptr<KeyValueMetadata>( |
99 | new KeyValueMetadata({"foo" , "bar" }, {"bizz" , "buzz" })); |
100 | auto f0 = field("f0" , int32(), true /* nullable */, metadata); |
101 | auto vec = f0->Flatten(); |
102 | ASSERT_EQ(vec.size(), 1); |
103 | ASSERT_TRUE(vec[0]->Equals(*f0)); |
104 | |
105 | auto f1 = field("f1" , float64(), false /* nullable */); |
106 | auto ff = field("nest" , struct_({f0, f1})); |
107 | vec = ff->Flatten(); |
108 | ASSERT_EQ(vec.size(), 2); |
109 | auto expected0 = field("nest.f0" , int32(), true /* nullable */, metadata); |
110 | // nullable parent implies nullable flattened child |
111 | auto expected1 = field("nest.f1" , float64(), true /* nullable */); |
112 | ASSERT_TRUE(vec[0]->Equals(*expected0)); |
113 | ASSERT_TRUE(vec[1]->Equals(*expected1)); |
114 | |
115 | ff = field("nest" , struct_({f0, f1}), false /* nullable */); |
116 | vec = ff->Flatten(); |
117 | ASSERT_EQ(vec.size(), 2); |
118 | expected0 = field("nest.f0" , int32(), true /* nullable */, metadata); |
119 | expected1 = field("nest.f1" , float64(), false /* nullable */); |
120 | ASSERT_TRUE(vec[0]->Equals(*expected0)); |
121 | ASSERT_TRUE(vec[1]->Equals(*expected1)); |
122 | } |
123 | |
124 | class TestSchema : public ::testing::Test { |
125 | public: |
126 | void SetUp() {} |
127 | }; |
128 | |
129 | TEST_F(TestSchema, Basics) { |
130 | auto f0 = field("f0" , int32()); |
131 | auto f1 = field("f1" , uint8(), false); |
132 | auto f1_optional = field("f1" , uint8()); |
133 | |
134 | auto f2 = field("f2" , utf8()); |
135 | |
136 | auto schema = ::arrow::schema({f0, f1, f2}); |
137 | |
138 | ASSERT_EQ(3, schema->num_fields()); |
139 | ASSERT_TRUE(f0->Equals(schema->field(0))); |
140 | ASSERT_TRUE(f1->Equals(schema->field(1))); |
141 | ASSERT_TRUE(f2->Equals(schema->field(2))); |
142 | |
143 | auto schema2 = ::arrow::schema({f0, f1, f2}); |
144 | |
145 | vector<shared_ptr<Field>> fields3 = {f0, f1_optional, f2}; |
146 | auto schema3 = std::make_shared<Schema>(fields3); |
147 | ASSERT_TRUE(schema->Equals(*schema2)); |
148 | ASSERT_FALSE(schema->Equals(*schema3)); |
149 | } |
150 | |
151 | TEST_F(TestSchema, ToString) { |
152 | auto f0 = field("f0" , int32()); |
153 | auto f1 = field("f1" , uint8(), false); |
154 | auto f2 = field("f2" , utf8()); |
155 | auto f3 = field("f3" , list(int16())); |
156 | |
157 | auto schema = ::arrow::schema({f0, f1, f2, f3}); |
158 | |
159 | std::string result = schema->ToString(); |
160 | std::string expected = R"(f0: int32 |
161 | f1: uint8 not null |
162 | f2: string |
163 | f3: list<item: int16>)" ; |
164 | |
165 | ASSERT_EQ(expected, result); |
166 | } |
167 | |
168 | TEST_F(TestSchema, GetFieldByName) { |
169 | auto f0 = field("f0" , int32()); |
170 | auto f1 = field("f1" , uint8(), false); |
171 | auto f2 = field("f2" , utf8()); |
172 | auto f3 = field("f3" , list(int16())); |
173 | |
174 | auto schema = ::arrow::schema({f0, f1, f2, f3}); |
175 | |
176 | std::shared_ptr<Field> result; |
177 | |
178 | result = schema->GetFieldByName("f1" ); |
179 | ASSERT_TRUE(f1->Equals(result)); |
180 | |
181 | result = schema->GetFieldByName("f3" ); |
182 | ASSERT_TRUE(f3->Equals(result)); |
183 | |
184 | result = schema->GetFieldByName("not-found" ); |
185 | ASSERT_TRUE(result == nullptr); |
186 | } |
187 | |
188 | TEST_F(TestSchema, GetFieldIndex) { |
189 | auto f0 = field("f0" , int32()); |
190 | auto f1 = field("f1" , uint8(), false); |
191 | auto f2 = field("f2" , utf8()); |
192 | auto f3 = field("f3" , list(int16())); |
193 | |
194 | auto schema = ::arrow::schema({f0, f1, f2, f3}); |
195 | |
196 | ASSERT_EQ(0, schema->GetFieldIndex(f0->name())); |
197 | ASSERT_EQ(1, schema->GetFieldIndex(f1->name())); |
198 | ASSERT_EQ(2, schema->GetFieldIndex(f2->name())); |
199 | ASSERT_EQ(3, schema->GetFieldIndex(f3->name())); |
200 | ASSERT_EQ(-1, schema->GetFieldIndex("not-found" )); |
201 | } |
202 | |
203 | TEST_F(TestSchema, TestMetadataConstruction) { |
204 | auto metadata0 = key_value_metadata({{"foo" , "bar" }, {"bizz" , "buzz" }}); |
205 | auto metadata1 = key_value_metadata({{"foo" , "baz" }}); |
206 | |
207 | auto f0 = field("f0" , int32()); |
208 | auto f1 = field("f1" , uint8(), false); |
209 | auto f2 = field("f2" , utf8(), true); |
210 | auto f3 = field("f2" , utf8(), true, metadata1->Copy()); |
211 | |
212 | auto schema0 = ::arrow::schema({f0, f1, f2}, metadata0); |
213 | auto schema1 = ::arrow::schema({f0, f1, f2}, metadata1); |
214 | auto schema2 = ::arrow::schema({f0, f1, f2}, metadata0->Copy()); |
215 | auto schema3 = ::arrow::schema({f0, f1, f3}, metadata0->Copy()); |
216 | |
217 | ASSERT_TRUE(metadata0->Equals(*schema0->metadata())); |
218 | ASSERT_TRUE(metadata1->Equals(*schema1->metadata())); |
219 | ASSERT_TRUE(metadata0->Equals(*schema2->metadata())); |
220 | ASSERT_TRUE(schema0->Equals(*schema2)); |
221 | ASSERT_FALSE(schema0->Equals(*schema1)); |
222 | ASSERT_FALSE(schema2->Equals(*schema1)); |
223 | ASSERT_FALSE(schema2->Equals(*schema3)); |
224 | |
225 | // don't check metadata |
226 | ASSERT_TRUE(schema0->Equals(*schema1, false)); |
227 | ASSERT_TRUE(schema2->Equals(*schema1, false)); |
228 | ASSERT_TRUE(schema2->Equals(*schema3, false)); |
229 | } |
230 | |
231 | TEST_F(TestSchema, TestAddMetadata) { |
232 | auto f0 = field("f0" , int32()); |
233 | auto f1 = field("f1" , uint8(), false); |
234 | auto f2 = field("f2" , utf8()); |
235 | vector<shared_ptr<Field>> fields = {f0, f1, f2}; |
236 | auto metadata = std::shared_ptr<KeyValueMetadata>( |
237 | new KeyValueMetadata({"foo" , "bar" }, {"bizz" , "buzz" })); |
238 | auto schema = std::make_shared<Schema>(fields); |
239 | std::shared_ptr<Schema> new_schema = schema->AddMetadata(metadata); |
240 | ASSERT_TRUE(metadata->Equals(*new_schema->metadata())); |
241 | |
242 | // Not copied |
243 | ASSERT_TRUE(metadata.get() == new_schema->metadata().get()); |
244 | } |
245 | |
246 | TEST_F(TestSchema, TestRemoveMetadata) { |
247 | auto f0 = field("f0" , int32()); |
248 | auto f1 = field("f1" , uint8(), false); |
249 | auto f2 = field("f2" , utf8()); |
250 | vector<shared_ptr<Field>> fields = {f0, f1, f2}; |
251 | KeyValueMetadata metadata({"foo" , "bar" }, {"bizz" , "buzz" }); |
252 | auto schema = std::make_shared<Schema>(fields); |
253 | std::shared_ptr<Schema> new_schema = schema->RemoveMetadata(); |
254 | ASSERT_TRUE(new_schema->metadata() == nullptr); |
255 | } |
256 | |
257 | #define PRIMITIVE_TEST(KLASS, ENUM, NAME) \ |
258 | TEST(TypesTest, TestPrimitive_##ENUM) { \ |
259 | KLASS tp; \ |
260 | \ |
261 | ASSERT_EQ(tp.id(), Type::ENUM); \ |
262 | ASSERT_EQ(tp.ToString(), std::string(NAME)); \ |
263 | } |
264 | |
265 | PRIMITIVE_TEST(Int8Type, INT8, "int8" ) |
266 | PRIMITIVE_TEST(Int16Type, INT16, "int16" ) |
267 | PRIMITIVE_TEST(Int32Type, INT32, "int32" ) |
268 | PRIMITIVE_TEST(Int64Type, INT64, "int64" ) |
269 | PRIMITIVE_TEST(UInt8Type, UINT8, "uint8" ) |
270 | PRIMITIVE_TEST(UInt16Type, UINT16, "uint16" ) |
271 | PRIMITIVE_TEST(UInt32Type, UINT32, "uint32" ) |
272 | PRIMITIVE_TEST(UInt64Type, UINT64, "uint64" ) |
273 | |
274 | PRIMITIVE_TEST(FloatType, FLOAT, "float" ) |
275 | PRIMITIVE_TEST(DoubleType, DOUBLE, "double" ) |
276 | |
277 | PRIMITIVE_TEST(BooleanType, BOOL, "bool" ) |
278 | |
279 | TEST(TestBinaryType, ToString) { |
280 | BinaryType t1; |
281 | BinaryType e1; |
282 | StringType t2; |
283 | EXPECT_TRUE(t1.Equals(e1)); |
284 | EXPECT_FALSE(t1.Equals(t2)); |
285 | ASSERT_EQ(t1.id(), Type::BINARY); |
286 | ASSERT_EQ(t1.ToString(), std::string("binary" )); |
287 | } |
288 | |
289 | TEST(TestStringType, ToString) { |
290 | StringType str; |
291 | ASSERT_EQ(str.id(), Type::STRING); |
292 | ASSERT_EQ(str.ToString(), std::string("string" )); |
293 | } |
294 | |
295 | TEST(TestFixedSizeBinaryType, ToString) { |
296 | auto t = fixed_size_binary(10); |
297 | ASSERT_EQ(t->id(), Type::FIXED_SIZE_BINARY); |
298 | ASSERT_EQ("fixed_size_binary[10]" , t->ToString()); |
299 | } |
300 | |
301 | TEST(TestFixedSizeBinaryType, Equals) { |
302 | auto t1 = fixed_size_binary(10); |
303 | auto t2 = fixed_size_binary(10); |
304 | auto t3 = fixed_size_binary(3); |
305 | |
306 | ASSERT_TRUE(t1->Equals(t1)); |
307 | ASSERT_TRUE(t1->Equals(t2)); |
308 | ASSERT_FALSE(t1->Equals(t3)); |
309 | } |
310 | |
311 | TEST(TestListType, Basics) { |
312 | std::shared_ptr<DataType> vt = std::make_shared<UInt8Type>(); |
313 | |
314 | ListType list_type(vt); |
315 | ASSERT_EQ(list_type.id(), Type::LIST); |
316 | |
317 | ASSERT_EQ("list" , list_type.name()); |
318 | ASSERT_EQ("list<item: uint8>" , list_type.ToString()); |
319 | |
320 | ASSERT_EQ(list_type.value_type()->id(), vt->id()); |
321 | ASSERT_EQ(list_type.value_type()->id(), vt->id()); |
322 | |
323 | std::shared_ptr<DataType> st = std::make_shared<StringType>(); |
324 | std::shared_ptr<DataType> lt = std::make_shared<ListType>(st); |
325 | ASSERT_EQ("list<item: string>" , lt->ToString()); |
326 | |
327 | ListType lt2(lt); |
328 | ASSERT_EQ("list<item: list<item: string>>" , lt2.ToString()); |
329 | } |
330 | |
331 | TEST(TestDateTypes, Attrs) { |
332 | auto t1 = date32(); |
333 | auto t2 = date64(); |
334 | |
335 | ASSERT_EQ("date32[day]" , t1->ToString()); |
336 | ASSERT_EQ("date64[ms]" , t2->ToString()); |
337 | |
338 | ASSERT_EQ(32, checked_cast<const FixedWidthType&>(*t1).bit_width()); |
339 | ASSERT_EQ(64, checked_cast<const FixedWidthType&>(*t2).bit_width()); |
340 | } |
341 | |
342 | TEST(TestTimeType, Equals) { |
343 | Time32Type t0; |
344 | Time32Type t1(TimeUnit::SECOND); |
345 | Time32Type t2(TimeUnit::MILLI); |
346 | Time64Type t3(TimeUnit::MICRO); |
347 | Time64Type t4(TimeUnit::NANO); |
348 | Time64Type t5(TimeUnit::MICRO); |
349 | |
350 | ASSERT_EQ(32, t0.bit_width()); |
351 | ASSERT_EQ(64, t3.bit_width()); |
352 | |
353 | ASSERT_TRUE(t0.Equals(t2)); |
354 | ASSERT_TRUE(t1.Equals(t1)); |
355 | ASSERT_FALSE(t1.Equals(t3)); |
356 | ASSERT_FALSE(t3.Equals(t4)); |
357 | ASSERT_TRUE(t3.Equals(t5)); |
358 | } |
359 | |
360 | TEST(TestTimeType, ToString) { |
361 | auto t1 = time32(TimeUnit::MILLI); |
362 | auto t2 = time64(TimeUnit::NANO); |
363 | auto t3 = time32(TimeUnit::SECOND); |
364 | auto t4 = time64(TimeUnit::MICRO); |
365 | |
366 | ASSERT_EQ("time32[ms]" , t1->ToString()); |
367 | ASSERT_EQ("time64[ns]" , t2->ToString()); |
368 | ASSERT_EQ("time32[s]" , t3->ToString()); |
369 | ASSERT_EQ("time64[us]" , t4->ToString()); |
370 | } |
371 | |
372 | TEST(TestTimestampType, Equals) { |
373 | TimestampType t1; |
374 | TimestampType t2; |
375 | TimestampType t3(TimeUnit::NANO); |
376 | TimestampType t4(TimeUnit::NANO); |
377 | |
378 | ASSERT_TRUE(t1.Equals(t2)); |
379 | ASSERT_FALSE(t1.Equals(t3)); |
380 | ASSERT_TRUE(t3.Equals(t4)); |
381 | } |
382 | |
383 | TEST(TestTimestampType, ToString) { |
384 | auto t1 = timestamp(TimeUnit::MILLI); |
385 | auto t2 = timestamp(TimeUnit::NANO, "US/Eastern" ); |
386 | auto t3 = timestamp(TimeUnit::SECOND); |
387 | auto t4 = timestamp(TimeUnit::MICRO); |
388 | |
389 | ASSERT_EQ("timestamp[ms]" , t1->ToString()); |
390 | ASSERT_EQ("timestamp[ns, tz=US/Eastern]" , t2->ToString()); |
391 | ASSERT_EQ("timestamp[s]" , t3->ToString()); |
392 | ASSERT_EQ("timestamp[us]" , t4->ToString()); |
393 | } |
394 | |
395 | TEST(TestNestedType, Equals) { |
396 | auto create_struct = [](std::string inner_name, |
397 | std::string struct_name) -> shared_ptr<Field> { |
398 | auto f_type = field(inner_name, int32()); |
399 | vector<shared_ptr<Field>> fields = {f_type}; |
400 | auto s_type = std::make_shared<StructType>(fields); |
401 | return field(struct_name, s_type); |
402 | }; |
403 | |
404 | auto create_union = [](std::string inner_name, |
405 | std::string union_name) -> shared_ptr<Field> { |
406 | auto f_type = field(inner_name, int32()); |
407 | vector<shared_ptr<Field>> fields = {f_type}; |
408 | vector<uint8_t> codes = {Type::INT32}; |
409 | auto u_type = std::make_shared<UnionType>(fields, codes, UnionMode::SPARSE); |
410 | return field(union_name, u_type); |
411 | }; |
412 | |
413 | auto s0 = create_struct("f0" , "s0" ); |
414 | auto s0_other = create_struct("f0" , "s0" ); |
415 | auto s0_bad = create_struct("f1" , "s0" ); |
416 | auto s1 = create_struct("f1" , "s1" ); |
417 | |
418 | ASSERT_TRUE(s0->Equals(s0_other)); |
419 | ASSERT_FALSE(s0->Equals(s1)); |
420 | ASSERT_FALSE(s0->Equals(s0_bad)); |
421 | |
422 | auto u0 = create_union("f0" , "u0" ); |
423 | auto u0_other = create_union("f0" , "u0" ); |
424 | auto u0_bad = create_union("f1" , "u0" ); |
425 | auto u1 = create_union("f1" , "u1" ); |
426 | |
427 | ASSERT_TRUE(u0->Equals(u0_other)); |
428 | ASSERT_FALSE(u0->Equals(u1)); |
429 | ASSERT_FALSE(u0->Equals(u0_bad)); |
430 | } |
431 | |
432 | TEST(TestStructType, Basics) { |
433 | auto f0_type = int32(); |
434 | auto f0 = field("f0" , f0_type); |
435 | |
436 | auto f1_type = utf8(); |
437 | auto f1 = field("f1" , f1_type); |
438 | |
439 | auto f2_type = uint8(); |
440 | auto f2 = field("f2" , f2_type); |
441 | |
442 | vector<std::shared_ptr<Field>> fields = {f0, f1, f2}; |
443 | |
444 | StructType struct_type(fields); |
445 | |
446 | ASSERT_TRUE(struct_type.child(0)->Equals(f0)); |
447 | ASSERT_TRUE(struct_type.child(1)->Equals(f1)); |
448 | ASSERT_TRUE(struct_type.child(2)->Equals(f2)); |
449 | |
450 | ASSERT_EQ(struct_type.ToString(), "struct<f0: int32, f1: string, f2: uint8>" ); |
451 | |
452 | // TODO(wesm): out of bounds for field(...) |
453 | } |
454 | |
455 | TEST(TestStructType, GetFieldByName) { |
456 | auto f0 = field("f0" , int32()); |
457 | auto f1 = field("f1" , uint8(), false); |
458 | auto f2 = field("f2" , utf8()); |
459 | auto f3 = field("f3" , list(int16())); |
460 | |
461 | StructType struct_type({f0, f1, f2, f3}); |
462 | std::shared_ptr<Field> result; |
463 | |
464 | result = struct_type.GetFieldByName("f1" ); |
465 | ASSERT_EQ(f1, result); |
466 | |
467 | result = struct_type.GetFieldByName("f3" ); |
468 | ASSERT_EQ(f3, result); |
469 | |
470 | result = struct_type.GetFieldByName("not-found" ); |
471 | ASSERT_EQ(result, nullptr); |
472 | } |
473 | |
474 | TEST(TestStructType, GetFieldIndex) { |
475 | auto f0 = field("f0" , int32()); |
476 | auto f1 = field("f1" , uint8(), false); |
477 | auto f2 = field("f2" , utf8()); |
478 | auto f3 = field("f3" , list(int16())); |
479 | |
480 | StructType struct_type({f0, f1, f2, f3}); |
481 | |
482 | ASSERT_EQ(0, struct_type.GetFieldIndex(f0->name())); |
483 | ASSERT_EQ(1, struct_type.GetFieldIndex(f1->name())); |
484 | ASSERT_EQ(2, struct_type.GetFieldIndex(f2->name())); |
485 | ASSERT_EQ(3, struct_type.GetFieldIndex(f3->name())); |
486 | ASSERT_EQ(-1, struct_type.GetFieldIndex("not-found" )); |
487 | } |
488 | |
489 | TEST(TestStructType, GetFieldIndexDuplicates) { |
490 | auto f0 = field("f0" , int32()); |
491 | auto f1 = field("f1" , int64()); |
492 | auto f2 = field("f1" , utf8()); |
493 | StructType struct_type({f0, f1, f2}); |
494 | |
495 | ASSERT_EQ(0, struct_type.GetFieldIndex("f0" )); |
496 | ASSERT_EQ(-1, struct_type.GetFieldIndex("f1" )); |
497 | } |
498 | |
499 | TEST(TestDictionaryType, Equals) { |
500 | auto t1 = dictionary(int8(), ArrayFromJSON(int32(), "[3, 4, 5, 6]" )); |
501 | auto t2 = dictionary(int8(), ArrayFromJSON(int32(), "[3, 4, 5, 6]" )); |
502 | auto t3 = dictionary(int16(), ArrayFromJSON(int32(), "[3, 4, 5, 6]" )); |
503 | auto t4 = dictionary(int8(), ArrayFromJSON(int16(), "[3, 4, 5, 6]" )); |
504 | auto t5 = dictionary(int8(), ArrayFromJSON(int32(), "[3, 4, 7, 6]" )); |
505 | |
506 | ASSERT_TRUE(t1->Equals(t2)); |
507 | // Different index type |
508 | ASSERT_FALSE(t1->Equals(t3)); |
509 | // Different value type |
510 | ASSERT_FALSE(t1->Equals(t4)); |
511 | // Different values |
512 | ASSERT_FALSE(t1->Equals(t5)); |
513 | } |
514 | |
515 | TEST(TestDictionaryType, UnifyNumeric) { |
516 | auto t1 = dictionary(int8(), ArrayFromJSON(int64(), "[3, 4, 7]" )); |
517 | auto t2 = dictionary(int8(), ArrayFromJSON(int64(), "[1, 7, 4, 8]" )); |
518 | auto t3 = dictionary(int8(), ArrayFromJSON(int64(), "[1, -200]" )); |
519 | |
520 | auto expected = dictionary(int8(), ArrayFromJSON(int64(), "[3, 4, 7, 1, 8, -200]" )); |
521 | |
522 | std::shared_ptr<DataType> dict_type; |
523 | ASSERT_OK(DictionaryType::Unify(default_memory_pool(), {t1.get(), t2.get(), t3.get()}, |
524 | &dict_type)); |
525 | ASSERT_TRUE(dict_type->Equals(expected)); |
526 | |
527 | std::vector<std::vector<int32_t>> transpose_maps; |
528 | ASSERT_OK(DictionaryType::Unify(default_memory_pool(), {t1.get(), t2.get(), t3.get()}, |
529 | &dict_type, &transpose_maps)); |
530 | ASSERT_TRUE(dict_type->Equals(expected)); |
531 | ASSERT_EQ(transpose_maps.size(), 3); |
532 | ASSERT_EQ(transpose_maps[0], std::vector<int32_t>({0, 1, 2})); |
533 | ASSERT_EQ(transpose_maps[1], std::vector<int32_t>({3, 2, 1, 4})); |
534 | ASSERT_EQ(transpose_maps[2], std::vector<int32_t>({3, 5})); |
535 | } |
536 | |
537 | TEST(TestDictionaryType, UnifyString) { |
538 | auto t1 = dictionary(int16(), ArrayFromJSON(utf8(), "[\"foo\", \"bar\"]" )); |
539 | auto t2 = dictionary(int32(), ArrayFromJSON(utf8(), "[\"quux\", \"foo\"]" )); |
540 | |
541 | auto expected = |
542 | dictionary(int8(), ArrayFromJSON(utf8(), "[\"foo\", \"bar\", \"quux\"]" )); |
543 | |
544 | std::shared_ptr<DataType> dict_type; |
545 | ASSERT_OK( |
546 | DictionaryType::Unify(default_memory_pool(), {t1.get(), t2.get()}, &dict_type)); |
547 | ASSERT_TRUE(dict_type->Equals(expected)); |
548 | |
549 | std::vector<std::vector<int32_t>> transpose_maps; |
550 | ASSERT_OK(DictionaryType::Unify(default_memory_pool(), {t1.get(), t2.get()}, &dict_type, |
551 | &transpose_maps)); |
552 | ASSERT_TRUE(dict_type->Equals(expected)); |
553 | |
554 | ASSERT_EQ(transpose_maps.size(), 2); |
555 | ASSERT_EQ(transpose_maps[0], std::vector<int32_t>({0, 1})); |
556 | ASSERT_EQ(transpose_maps[1], std::vector<int32_t>({2, 0})); |
557 | } |
558 | |
559 | TEST(TestDictionaryType, UnifyFixedSizeBinary) { |
560 | auto type = fixed_size_binary(3); |
561 | |
562 | std::string data = "foobarbazqux" ; |
563 | auto buf = std::make_shared<Buffer>(data); |
564 | // ["foo", "bar"] |
565 | auto dict1 = std::make_shared<FixedSizeBinaryArray>(type, 2, SliceBuffer(buf, 0, 6)); |
566 | auto t1 = dictionary(int16(), dict1); |
567 | // ["bar", "baz", "qux"] |
568 | auto dict2 = std::make_shared<FixedSizeBinaryArray>(type, 3, SliceBuffer(buf, 3, 9)); |
569 | auto t2 = dictionary(int16(), dict2); |
570 | |
571 | // ["foo", "bar", "baz", "qux"] |
572 | auto expected_dict = std::make_shared<FixedSizeBinaryArray>(type, 4, buf); |
573 | auto expected = dictionary(int8(), expected_dict); |
574 | |
575 | std::shared_ptr<DataType> dict_type; |
576 | ASSERT_OK( |
577 | DictionaryType::Unify(default_memory_pool(), {t1.get(), t2.get()}, &dict_type)); |
578 | ASSERT_TRUE(dict_type->Equals(expected)); |
579 | |
580 | std::vector<std::vector<int32_t>> transpose_maps; |
581 | ASSERT_OK(DictionaryType::Unify(default_memory_pool(), {t1.get(), t2.get()}, &dict_type, |
582 | &transpose_maps)); |
583 | ASSERT_TRUE(dict_type->Equals(expected)); |
584 | ASSERT_EQ(transpose_maps.size(), 2); |
585 | ASSERT_EQ(transpose_maps[0], std::vector<int32_t>({0, 1})); |
586 | ASSERT_EQ(transpose_maps[1], std::vector<int32_t>({1, 2, 3})); |
587 | } |
588 | |
589 | TEST(TestDictionaryType, UnifyLarge) { |
590 | // Unifying "large" dictionary types should choose the right index type |
591 | std::shared_ptr<Array> dict1, dict2, expected_dict; |
592 | |
593 | Int32Builder builder; |
594 | ASSERT_OK(builder.Reserve(120)); |
595 | for (int32_t i = 0; i < 120; ++i) { |
596 | builder.UnsafeAppend(i); |
597 | } |
598 | ASSERT_OK(builder.Finish(&dict1)); |
599 | ASSERT_EQ(dict1->length(), 120); |
600 | auto t1 = dictionary(int8(), dict1); |
601 | |
602 | ASSERT_OK(builder.Reserve(30)); |
603 | for (int32_t i = 110; i < 140; ++i) { |
604 | builder.UnsafeAppend(i); |
605 | } |
606 | ASSERT_OK(builder.Finish(&dict2)); |
607 | ASSERT_EQ(dict2->length(), 30); |
608 | auto t2 = dictionary(int8(), dict2); |
609 | |
610 | ASSERT_OK(builder.Reserve(140)); |
611 | for (int32_t i = 0; i < 140; ++i) { |
612 | builder.UnsafeAppend(i); |
613 | } |
614 | ASSERT_OK(builder.Finish(&expected_dict)); |
615 | ASSERT_EQ(expected_dict->length(), 140); |
616 | // int8 would be too narrow to hold all possible index values |
617 | auto expected = dictionary(int16(), expected_dict); |
618 | |
619 | std::shared_ptr<DataType> dict_type; |
620 | ASSERT_OK( |
621 | DictionaryType::Unify(default_memory_pool(), {t1.get(), t2.get()}, &dict_type)); |
622 | ASSERT_TRUE(dict_type->Equals(expected)); |
623 | } |
624 | |
625 | TEST(TypesTest, TestDecimal128Small) { |
626 | Decimal128Type t1(8, 4); |
627 | |
628 | ASSERT_EQ(t1.id(), Type::DECIMAL); |
629 | ASSERT_EQ(t1.precision(), 8); |
630 | ASSERT_EQ(t1.scale(), 4); |
631 | |
632 | ASSERT_EQ(t1.ToString(), std::string("decimal(8, 4)" )); |
633 | |
634 | // Test properties |
635 | ASSERT_EQ(t1.byte_width(), 16); |
636 | ASSERT_EQ(t1.bit_width(), 128); |
637 | } |
638 | |
639 | TEST(TypesTest, TestDecimal128Medium) { |
640 | Decimal128Type t1(12, 5); |
641 | |
642 | ASSERT_EQ(t1.id(), Type::DECIMAL); |
643 | ASSERT_EQ(t1.precision(), 12); |
644 | ASSERT_EQ(t1.scale(), 5); |
645 | |
646 | ASSERT_EQ(t1.ToString(), std::string("decimal(12, 5)" )); |
647 | |
648 | // Test properties |
649 | ASSERT_EQ(t1.byte_width(), 16); |
650 | ASSERT_EQ(t1.bit_width(), 128); |
651 | } |
652 | |
653 | TEST(TypesTest, TestDecimal128Large) { |
654 | Decimal128Type t1(27, 7); |
655 | |
656 | ASSERT_EQ(t1.id(), Type::DECIMAL); |
657 | ASSERT_EQ(t1.precision(), 27); |
658 | ASSERT_EQ(t1.scale(), 7); |
659 | |
660 | ASSERT_EQ(t1.ToString(), std::string("decimal(27, 7)" )); |
661 | |
662 | // Test properties |
663 | ASSERT_EQ(t1.byte_width(), 16); |
664 | ASSERT_EQ(t1.bit_width(), 128); |
665 | } |
666 | |
667 | } // namespace arrow |
668 | |