1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18// Unit tests for DataType (and subclasses), Field, and Schema
19
20#include <cstdint>
21#include <memory>
22#include <string>
23#include <vector>
24
25#include <gtest/gtest.h>
26
27#include "arrow/memory_pool.h"
28#include "arrow/test-util.h"
29#include "arrow/type.h"
30#include "arrow/util/checked_cast.h"
31
32using std::shared_ptr;
33using std::vector;
34
35namespace arrow {
36
37using internal::checked_cast;
38
39TEST(TestField, Basics) {
40 Field f0("f0", int32());
41 Field f0_nn("f0", int32(), false);
42
43 ASSERT_EQ(f0.name(), "f0");
44 ASSERT_EQ(f0.type()->ToString(), int32()->ToString());
45
46 ASSERT_TRUE(f0.nullable());
47 ASSERT_FALSE(f0_nn.nullable());
48}
49
50TEST(TestField, Equals) {
51 auto meta = key_value_metadata({{"a", "1"}, {"b", "2"}});
52
53 Field f0("f0", int32());
54 Field f0_nn("f0", int32(), false);
55 Field f0_other("f0", int32());
56 Field f0_with_meta("f0", int32(), true, meta);
57
58 ASSERT_TRUE(f0.Equals(f0_other));
59 ASSERT_FALSE(f0.Equals(f0_nn));
60 ASSERT_FALSE(f0.Equals(f0_with_meta));
61 ASSERT_TRUE(f0.Equals(f0_with_meta, false));
62}
63
64TEST(TestField, TestMetadataConstruction) {
65 auto metadata = std::shared_ptr<KeyValueMetadata>(
66 new KeyValueMetadata({"foo", "bar"}, {"bizz", "buzz"}));
67 auto metadata2 = metadata->Copy();
68 auto f0 = field("f0", int32(), true, metadata);
69 auto f1 = field("f0", int32(), true, metadata2);
70 ASSERT_TRUE(metadata->Equals(*f0->metadata()));
71 ASSERT_TRUE(f0->Equals(*f1));
72}
73
74TEST(TestField, TestAddMetadata) {
75 auto metadata = std::shared_ptr<KeyValueMetadata>(
76 new KeyValueMetadata({"foo", "bar"}, {"bizz", "buzz"}));
77 auto f0 = field("f0", int32());
78 auto f1 = field("f0", int32(), true, metadata);
79 std::shared_ptr<Field> f2 = f0->AddMetadata(metadata);
80
81 ASSERT_FALSE(f2->Equals(*f0));
82 ASSERT_TRUE(f2->Equals(*f1));
83
84 // Not copied
85 ASSERT_TRUE(metadata.get() == f1->metadata().get());
86}
87
88TEST(TestField, TestRemoveMetadata) {
89 auto metadata = std::shared_ptr<KeyValueMetadata>(
90 new KeyValueMetadata({"foo", "bar"}, {"bizz", "buzz"}));
91 auto f0 = field("f0", int32());
92 auto f1 = field("f0", int32(), true, metadata);
93 std::shared_ptr<Field> f2 = f1->RemoveMetadata();
94 ASSERT_TRUE(f2->metadata() == nullptr);
95}
96
97TEST(TestField, TestFlatten) {
98 auto metadata = std::shared_ptr<KeyValueMetadata>(
99 new KeyValueMetadata({"foo", "bar"}, {"bizz", "buzz"}));
100 auto f0 = field("f0", int32(), true /* nullable */, metadata);
101 auto vec = f0->Flatten();
102 ASSERT_EQ(vec.size(), 1);
103 ASSERT_TRUE(vec[0]->Equals(*f0));
104
105 auto f1 = field("f1", float64(), false /* nullable */);
106 auto ff = field("nest", struct_({f0, f1}));
107 vec = ff->Flatten();
108 ASSERT_EQ(vec.size(), 2);
109 auto expected0 = field("nest.f0", int32(), true /* nullable */, metadata);
110 // nullable parent implies nullable flattened child
111 auto expected1 = field("nest.f1", float64(), true /* nullable */);
112 ASSERT_TRUE(vec[0]->Equals(*expected0));
113 ASSERT_TRUE(vec[1]->Equals(*expected1));
114
115 ff = field("nest", struct_({f0, f1}), false /* nullable */);
116 vec = ff->Flatten();
117 ASSERT_EQ(vec.size(), 2);
118 expected0 = field("nest.f0", int32(), true /* nullable */, metadata);
119 expected1 = field("nest.f1", float64(), false /* nullable */);
120 ASSERT_TRUE(vec[0]->Equals(*expected0));
121 ASSERT_TRUE(vec[1]->Equals(*expected1));
122}
123
124class TestSchema : public ::testing::Test {
125 public:
126 void SetUp() {}
127};
128
129TEST_F(TestSchema, Basics) {
130 auto f0 = field("f0", int32());
131 auto f1 = field("f1", uint8(), false);
132 auto f1_optional = field("f1", uint8());
133
134 auto f2 = field("f2", utf8());
135
136 auto schema = ::arrow::schema({f0, f1, f2});
137
138 ASSERT_EQ(3, schema->num_fields());
139 ASSERT_TRUE(f0->Equals(schema->field(0)));
140 ASSERT_TRUE(f1->Equals(schema->field(1)));
141 ASSERT_TRUE(f2->Equals(schema->field(2)));
142
143 auto schema2 = ::arrow::schema({f0, f1, f2});
144
145 vector<shared_ptr<Field>> fields3 = {f0, f1_optional, f2};
146 auto schema3 = std::make_shared<Schema>(fields3);
147 ASSERT_TRUE(schema->Equals(*schema2));
148 ASSERT_FALSE(schema->Equals(*schema3));
149}
150
151TEST_F(TestSchema, ToString) {
152 auto f0 = field("f0", int32());
153 auto f1 = field("f1", uint8(), false);
154 auto f2 = field("f2", utf8());
155 auto f3 = field("f3", list(int16()));
156
157 auto schema = ::arrow::schema({f0, f1, f2, f3});
158
159 std::string result = schema->ToString();
160 std::string expected = R"(f0: int32
161f1: uint8 not null
162f2: string
163f3: list<item: int16>)";
164
165 ASSERT_EQ(expected, result);
166}
167
168TEST_F(TestSchema, GetFieldByName) {
169 auto f0 = field("f0", int32());
170 auto f1 = field("f1", uint8(), false);
171 auto f2 = field("f2", utf8());
172 auto f3 = field("f3", list(int16()));
173
174 auto schema = ::arrow::schema({f0, f1, f2, f3});
175
176 std::shared_ptr<Field> result;
177
178 result = schema->GetFieldByName("f1");
179 ASSERT_TRUE(f1->Equals(result));
180
181 result = schema->GetFieldByName("f3");
182 ASSERT_TRUE(f3->Equals(result));
183
184 result = schema->GetFieldByName("not-found");
185 ASSERT_TRUE(result == nullptr);
186}
187
188TEST_F(TestSchema, GetFieldIndex) {
189 auto f0 = field("f0", int32());
190 auto f1 = field("f1", uint8(), false);
191 auto f2 = field("f2", utf8());
192 auto f3 = field("f3", list(int16()));
193
194 auto schema = ::arrow::schema({f0, f1, f2, f3});
195
196 ASSERT_EQ(0, schema->GetFieldIndex(f0->name()));
197 ASSERT_EQ(1, schema->GetFieldIndex(f1->name()));
198 ASSERT_EQ(2, schema->GetFieldIndex(f2->name()));
199 ASSERT_EQ(3, schema->GetFieldIndex(f3->name()));
200 ASSERT_EQ(-1, schema->GetFieldIndex("not-found"));
201}
202
203TEST_F(TestSchema, TestMetadataConstruction) {
204 auto metadata0 = key_value_metadata({{"foo", "bar"}, {"bizz", "buzz"}});
205 auto metadata1 = key_value_metadata({{"foo", "baz"}});
206
207 auto f0 = field("f0", int32());
208 auto f1 = field("f1", uint8(), false);
209 auto f2 = field("f2", utf8(), true);
210 auto f3 = field("f2", utf8(), true, metadata1->Copy());
211
212 auto schema0 = ::arrow::schema({f0, f1, f2}, metadata0);
213 auto schema1 = ::arrow::schema({f0, f1, f2}, metadata1);
214 auto schema2 = ::arrow::schema({f0, f1, f2}, metadata0->Copy());
215 auto schema3 = ::arrow::schema({f0, f1, f3}, metadata0->Copy());
216
217 ASSERT_TRUE(metadata0->Equals(*schema0->metadata()));
218 ASSERT_TRUE(metadata1->Equals(*schema1->metadata()));
219 ASSERT_TRUE(metadata0->Equals(*schema2->metadata()));
220 ASSERT_TRUE(schema0->Equals(*schema2));
221 ASSERT_FALSE(schema0->Equals(*schema1));
222 ASSERT_FALSE(schema2->Equals(*schema1));
223 ASSERT_FALSE(schema2->Equals(*schema3));
224
225 // don't check metadata
226 ASSERT_TRUE(schema0->Equals(*schema1, false));
227 ASSERT_TRUE(schema2->Equals(*schema1, false));
228 ASSERT_TRUE(schema2->Equals(*schema3, false));
229}
230
231TEST_F(TestSchema, TestAddMetadata) {
232 auto f0 = field("f0", int32());
233 auto f1 = field("f1", uint8(), false);
234 auto f2 = field("f2", utf8());
235 vector<shared_ptr<Field>> fields = {f0, f1, f2};
236 auto metadata = std::shared_ptr<KeyValueMetadata>(
237 new KeyValueMetadata({"foo", "bar"}, {"bizz", "buzz"}));
238 auto schema = std::make_shared<Schema>(fields);
239 std::shared_ptr<Schema> new_schema = schema->AddMetadata(metadata);
240 ASSERT_TRUE(metadata->Equals(*new_schema->metadata()));
241
242 // Not copied
243 ASSERT_TRUE(metadata.get() == new_schema->metadata().get());
244}
245
246TEST_F(TestSchema, TestRemoveMetadata) {
247 auto f0 = field("f0", int32());
248 auto f1 = field("f1", uint8(), false);
249 auto f2 = field("f2", utf8());
250 vector<shared_ptr<Field>> fields = {f0, f1, f2};
251 KeyValueMetadata metadata({"foo", "bar"}, {"bizz", "buzz"});
252 auto schema = std::make_shared<Schema>(fields);
253 std::shared_ptr<Schema> new_schema = schema->RemoveMetadata();
254 ASSERT_TRUE(new_schema->metadata() == nullptr);
255}
256
257#define PRIMITIVE_TEST(KLASS, ENUM, NAME) \
258 TEST(TypesTest, TestPrimitive_##ENUM) { \
259 KLASS tp; \
260 \
261 ASSERT_EQ(tp.id(), Type::ENUM); \
262 ASSERT_EQ(tp.ToString(), std::string(NAME)); \
263 }
264
265PRIMITIVE_TEST(Int8Type, INT8, "int8")
266PRIMITIVE_TEST(Int16Type, INT16, "int16")
267PRIMITIVE_TEST(Int32Type, INT32, "int32")
268PRIMITIVE_TEST(Int64Type, INT64, "int64")
269PRIMITIVE_TEST(UInt8Type, UINT8, "uint8")
270PRIMITIVE_TEST(UInt16Type, UINT16, "uint16")
271PRIMITIVE_TEST(UInt32Type, UINT32, "uint32")
272PRIMITIVE_TEST(UInt64Type, UINT64, "uint64")
273
274PRIMITIVE_TEST(FloatType, FLOAT, "float")
275PRIMITIVE_TEST(DoubleType, DOUBLE, "double")
276
277PRIMITIVE_TEST(BooleanType, BOOL, "bool")
278
279TEST(TestBinaryType, ToString) {
280 BinaryType t1;
281 BinaryType e1;
282 StringType t2;
283 EXPECT_TRUE(t1.Equals(e1));
284 EXPECT_FALSE(t1.Equals(t2));
285 ASSERT_EQ(t1.id(), Type::BINARY);
286 ASSERT_EQ(t1.ToString(), std::string("binary"));
287}
288
289TEST(TestStringType, ToString) {
290 StringType str;
291 ASSERT_EQ(str.id(), Type::STRING);
292 ASSERT_EQ(str.ToString(), std::string("string"));
293}
294
295TEST(TestFixedSizeBinaryType, ToString) {
296 auto t = fixed_size_binary(10);
297 ASSERT_EQ(t->id(), Type::FIXED_SIZE_BINARY);
298 ASSERT_EQ("fixed_size_binary[10]", t->ToString());
299}
300
301TEST(TestFixedSizeBinaryType, Equals) {
302 auto t1 = fixed_size_binary(10);
303 auto t2 = fixed_size_binary(10);
304 auto t3 = fixed_size_binary(3);
305
306 ASSERT_TRUE(t1->Equals(t1));
307 ASSERT_TRUE(t1->Equals(t2));
308 ASSERT_FALSE(t1->Equals(t3));
309}
310
311TEST(TestListType, Basics) {
312 std::shared_ptr<DataType> vt = std::make_shared<UInt8Type>();
313
314 ListType list_type(vt);
315 ASSERT_EQ(list_type.id(), Type::LIST);
316
317 ASSERT_EQ("list", list_type.name());
318 ASSERT_EQ("list<item: uint8>", list_type.ToString());
319
320 ASSERT_EQ(list_type.value_type()->id(), vt->id());
321 ASSERT_EQ(list_type.value_type()->id(), vt->id());
322
323 std::shared_ptr<DataType> st = std::make_shared<StringType>();
324 std::shared_ptr<DataType> lt = std::make_shared<ListType>(st);
325 ASSERT_EQ("list<item: string>", lt->ToString());
326
327 ListType lt2(lt);
328 ASSERT_EQ("list<item: list<item: string>>", lt2.ToString());
329}
330
331TEST(TestDateTypes, Attrs) {
332 auto t1 = date32();
333 auto t2 = date64();
334
335 ASSERT_EQ("date32[day]", t1->ToString());
336 ASSERT_EQ("date64[ms]", t2->ToString());
337
338 ASSERT_EQ(32, checked_cast<const FixedWidthType&>(*t1).bit_width());
339 ASSERT_EQ(64, checked_cast<const FixedWidthType&>(*t2).bit_width());
340}
341
342TEST(TestTimeType, Equals) {
343 Time32Type t0;
344 Time32Type t1(TimeUnit::SECOND);
345 Time32Type t2(TimeUnit::MILLI);
346 Time64Type t3(TimeUnit::MICRO);
347 Time64Type t4(TimeUnit::NANO);
348 Time64Type t5(TimeUnit::MICRO);
349
350 ASSERT_EQ(32, t0.bit_width());
351 ASSERT_EQ(64, t3.bit_width());
352
353 ASSERT_TRUE(t0.Equals(t2));
354 ASSERT_TRUE(t1.Equals(t1));
355 ASSERT_FALSE(t1.Equals(t3));
356 ASSERT_FALSE(t3.Equals(t4));
357 ASSERT_TRUE(t3.Equals(t5));
358}
359
360TEST(TestTimeType, ToString) {
361 auto t1 = time32(TimeUnit::MILLI);
362 auto t2 = time64(TimeUnit::NANO);
363 auto t3 = time32(TimeUnit::SECOND);
364 auto t4 = time64(TimeUnit::MICRO);
365
366 ASSERT_EQ("time32[ms]", t1->ToString());
367 ASSERT_EQ("time64[ns]", t2->ToString());
368 ASSERT_EQ("time32[s]", t3->ToString());
369 ASSERT_EQ("time64[us]", t4->ToString());
370}
371
372TEST(TestTimestampType, Equals) {
373 TimestampType t1;
374 TimestampType t2;
375 TimestampType t3(TimeUnit::NANO);
376 TimestampType t4(TimeUnit::NANO);
377
378 ASSERT_TRUE(t1.Equals(t2));
379 ASSERT_FALSE(t1.Equals(t3));
380 ASSERT_TRUE(t3.Equals(t4));
381}
382
383TEST(TestTimestampType, ToString) {
384 auto t1 = timestamp(TimeUnit::MILLI);
385 auto t2 = timestamp(TimeUnit::NANO, "US/Eastern");
386 auto t3 = timestamp(TimeUnit::SECOND);
387 auto t4 = timestamp(TimeUnit::MICRO);
388
389 ASSERT_EQ("timestamp[ms]", t1->ToString());
390 ASSERT_EQ("timestamp[ns, tz=US/Eastern]", t2->ToString());
391 ASSERT_EQ("timestamp[s]", t3->ToString());
392 ASSERT_EQ("timestamp[us]", t4->ToString());
393}
394
395TEST(TestNestedType, Equals) {
396 auto create_struct = [](std::string inner_name,
397 std::string struct_name) -> shared_ptr<Field> {
398 auto f_type = field(inner_name, int32());
399 vector<shared_ptr<Field>> fields = {f_type};
400 auto s_type = std::make_shared<StructType>(fields);
401 return field(struct_name, s_type);
402 };
403
404 auto create_union = [](std::string inner_name,
405 std::string union_name) -> shared_ptr<Field> {
406 auto f_type = field(inner_name, int32());
407 vector<shared_ptr<Field>> fields = {f_type};
408 vector<uint8_t> codes = {Type::INT32};
409 auto u_type = std::make_shared<UnionType>(fields, codes, UnionMode::SPARSE);
410 return field(union_name, u_type);
411 };
412
413 auto s0 = create_struct("f0", "s0");
414 auto s0_other = create_struct("f0", "s0");
415 auto s0_bad = create_struct("f1", "s0");
416 auto s1 = create_struct("f1", "s1");
417
418 ASSERT_TRUE(s0->Equals(s0_other));
419 ASSERT_FALSE(s0->Equals(s1));
420 ASSERT_FALSE(s0->Equals(s0_bad));
421
422 auto u0 = create_union("f0", "u0");
423 auto u0_other = create_union("f0", "u0");
424 auto u0_bad = create_union("f1", "u0");
425 auto u1 = create_union("f1", "u1");
426
427 ASSERT_TRUE(u0->Equals(u0_other));
428 ASSERT_FALSE(u0->Equals(u1));
429 ASSERT_FALSE(u0->Equals(u0_bad));
430}
431
432TEST(TestStructType, Basics) {
433 auto f0_type = int32();
434 auto f0 = field("f0", f0_type);
435
436 auto f1_type = utf8();
437 auto f1 = field("f1", f1_type);
438
439 auto f2_type = uint8();
440 auto f2 = field("f2", f2_type);
441
442 vector<std::shared_ptr<Field>> fields = {f0, f1, f2};
443
444 StructType struct_type(fields);
445
446 ASSERT_TRUE(struct_type.child(0)->Equals(f0));
447 ASSERT_TRUE(struct_type.child(1)->Equals(f1));
448 ASSERT_TRUE(struct_type.child(2)->Equals(f2));
449
450 ASSERT_EQ(struct_type.ToString(), "struct<f0: int32, f1: string, f2: uint8>");
451
452 // TODO(wesm): out of bounds for field(...)
453}
454
455TEST(TestStructType, GetFieldByName) {
456 auto f0 = field("f0", int32());
457 auto f1 = field("f1", uint8(), false);
458 auto f2 = field("f2", utf8());
459 auto f3 = field("f3", list(int16()));
460
461 StructType struct_type({f0, f1, f2, f3});
462 std::shared_ptr<Field> result;
463
464 result = struct_type.GetFieldByName("f1");
465 ASSERT_EQ(f1, result);
466
467 result = struct_type.GetFieldByName("f3");
468 ASSERT_EQ(f3, result);
469
470 result = struct_type.GetFieldByName("not-found");
471 ASSERT_EQ(result, nullptr);
472}
473
474TEST(TestStructType, GetFieldIndex) {
475 auto f0 = field("f0", int32());
476 auto f1 = field("f1", uint8(), false);
477 auto f2 = field("f2", utf8());
478 auto f3 = field("f3", list(int16()));
479
480 StructType struct_type({f0, f1, f2, f3});
481
482 ASSERT_EQ(0, struct_type.GetFieldIndex(f0->name()));
483 ASSERT_EQ(1, struct_type.GetFieldIndex(f1->name()));
484 ASSERT_EQ(2, struct_type.GetFieldIndex(f2->name()));
485 ASSERT_EQ(3, struct_type.GetFieldIndex(f3->name()));
486 ASSERT_EQ(-1, struct_type.GetFieldIndex("not-found"));
487}
488
489TEST(TestStructType, GetFieldIndexDuplicates) {
490 auto f0 = field("f0", int32());
491 auto f1 = field("f1", int64());
492 auto f2 = field("f1", utf8());
493 StructType struct_type({f0, f1, f2});
494
495 ASSERT_EQ(0, struct_type.GetFieldIndex("f0"));
496 ASSERT_EQ(-1, struct_type.GetFieldIndex("f1"));
497}
498
499TEST(TestDictionaryType, Equals) {
500 auto t1 = dictionary(int8(), ArrayFromJSON(int32(), "[3, 4, 5, 6]"));
501 auto t2 = dictionary(int8(), ArrayFromJSON(int32(), "[3, 4, 5, 6]"));
502 auto t3 = dictionary(int16(), ArrayFromJSON(int32(), "[3, 4, 5, 6]"));
503 auto t4 = dictionary(int8(), ArrayFromJSON(int16(), "[3, 4, 5, 6]"));
504 auto t5 = dictionary(int8(), ArrayFromJSON(int32(), "[3, 4, 7, 6]"));
505
506 ASSERT_TRUE(t1->Equals(t2));
507 // Different index type
508 ASSERT_FALSE(t1->Equals(t3));
509 // Different value type
510 ASSERT_FALSE(t1->Equals(t4));
511 // Different values
512 ASSERT_FALSE(t1->Equals(t5));
513}
514
515TEST(TestDictionaryType, UnifyNumeric) {
516 auto t1 = dictionary(int8(), ArrayFromJSON(int64(), "[3, 4, 7]"));
517 auto t2 = dictionary(int8(), ArrayFromJSON(int64(), "[1, 7, 4, 8]"));
518 auto t3 = dictionary(int8(), ArrayFromJSON(int64(), "[1, -200]"));
519
520 auto expected = dictionary(int8(), ArrayFromJSON(int64(), "[3, 4, 7, 1, 8, -200]"));
521
522 std::shared_ptr<DataType> dict_type;
523 ASSERT_OK(DictionaryType::Unify(default_memory_pool(), {t1.get(), t2.get(), t3.get()},
524 &dict_type));
525 ASSERT_TRUE(dict_type->Equals(expected));
526
527 std::vector<std::vector<int32_t>> transpose_maps;
528 ASSERT_OK(DictionaryType::Unify(default_memory_pool(), {t1.get(), t2.get(), t3.get()},
529 &dict_type, &transpose_maps));
530 ASSERT_TRUE(dict_type->Equals(expected));
531 ASSERT_EQ(transpose_maps.size(), 3);
532 ASSERT_EQ(transpose_maps[0], std::vector<int32_t>({0, 1, 2}));
533 ASSERT_EQ(transpose_maps[1], std::vector<int32_t>({3, 2, 1, 4}));
534 ASSERT_EQ(transpose_maps[2], std::vector<int32_t>({3, 5}));
535}
536
537TEST(TestDictionaryType, UnifyString) {
538 auto t1 = dictionary(int16(), ArrayFromJSON(utf8(), "[\"foo\", \"bar\"]"));
539 auto t2 = dictionary(int32(), ArrayFromJSON(utf8(), "[\"quux\", \"foo\"]"));
540
541 auto expected =
542 dictionary(int8(), ArrayFromJSON(utf8(), "[\"foo\", \"bar\", \"quux\"]"));
543
544 std::shared_ptr<DataType> dict_type;
545 ASSERT_OK(
546 DictionaryType::Unify(default_memory_pool(), {t1.get(), t2.get()}, &dict_type));
547 ASSERT_TRUE(dict_type->Equals(expected));
548
549 std::vector<std::vector<int32_t>> transpose_maps;
550 ASSERT_OK(DictionaryType::Unify(default_memory_pool(), {t1.get(), t2.get()}, &dict_type,
551 &transpose_maps));
552 ASSERT_TRUE(dict_type->Equals(expected));
553
554 ASSERT_EQ(transpose_maps.size(), 2);
555 ASSERT_EQ(transpose_maps[0], std::vector<int32_t>({0, 1}));
556 ASSERT_EQ(transpose_maps[1], std::vector<int32_t>({2, 0}));
557}
558
559TEST(TestDictionaryType, UnifyFixedSizeBinary) {
560 auto type = fixed_size_binary(3);
561
562 std::string data = "foobarbazqux";
563 auto buf = std::make_shared<Buffer>(data);
564 // ["foo", "bar"]
565 auto dict1 = std::make_shared<FixedSizeBinaryArray>(type, 2, SliceBuffer(buf, 0, 6));
566 auto t1 = dictionary(int16(), dict1);
567 // ["bar", "baz", "qux"]
568 auto dict2 = std::make_shared<FixedSizeBinaryArray>(type, 3, SliceBuffer(buf, 3, 9));
569 auto t2 = dictionary(int16(), dict2);
570
571 // ["foo", "bar", "baz", "qux"]
572 auto expected_dict = std::make_shared<FixedSizeBinaryArray>(type, 4, buf);
573 auto expected = dictionary(int8(), expected_dict);
574
575 std::shared_ptr<DataType> dict_type;
576 ASSERT_OK(
577 DictionaryType::Unify(default_memory_pool(), {t1.get(), t2.get()}, &dict_type));
578 ASSERT_TRUE(dict_type->Equals(expected));
579
580 std::vector<std::vector<int32_t>> transpose_maps;
581 ASSERT_OK(DictionaryType::Unify(default_memory_pool(), {t1.get(), t2.get()}, &dict_type,
582 &transpose_maps));
583 ASSERT_TRUE(dict_type->Equals(expected));
584 ASSERT_EQ(transpose_maps.size(), 2);
585 ASSERT_EQ(transpose_maps[0], std::vector<int32_t>({0, 1}));
586 ASSERT_EQ(transpose_maps[1], std::vector<int32_t>({1, 2, 3}));
587}
588
589TEST(TestDictionaryType, UnifyLarge) {
590 // Unifying "large" dictionary types should choose the right index type
591 std::shared_ptr<Array> dict1, dict2, expected_dict;
592
593 Int32Builder builder;
594 ASSERT_OK(builder.Reserve(120));
595 for (int32_t i = 0; i < 120; ++i) {
596 builder.UnsafeAppend(i);
597 }
598 ASSERT_OK(builder.Finish(&dict1));
599 ASSERT_EQ(dict1->length(), 120);
600 auto t1 = dictionary(int8(), dict1);
601
602 ASSERT_OK(builder.Reserve(30));
603 for (int32_t i = 110; i < 140; ++i) {
604 builder.UnsafeAppend(i);
605 }
606 ASSERT_OK(builder.Finish(&dict2));
607 ASSERT_EQ(dict2->length(), 30);
608 auto t2 = dictionary(int8(), dict2);
609
610 ASSERT_OK(builder.Reserve(140));
611 for (int32_t i = 0; i < 140; ++i) {
612 builder.UnsafeAppend(i);
613 }
614 ASSERT_OK(builder.Finish(&expected_dict));
615 ASSERT_EQ(expected_dict->length(), 140);
616 // int8 would be too narrow to hold all possible index values
617 auto expected = dictionary(int16(), expected_dict);
618
619 std::shared_ptr<DataType> dict_type;
620 ASSERT_OK(
621 DictionaryType::Unify(default_memory_pool(), {t1.get(), t2.get()}, &dict_type));
622 ASSERT_TRUE(dict_type->Equals(expected));
623}
624
625TEST(TypesTest, TestDecimal128Small) {
626 Decimal128Type t1(8, 4);
627
628 ASSERT_EQ(t1.id(), Type::DECIMAL);
629 ASSERT_EQ(t1.precision(), 8);
630 ASSERT_EQ(t1.scale(), 4);
631
632 ASSERT_EQ(t1.ToString(), std::string("decimal(8, 4)"));
633
634 // Test properties
635 ASSERT_EQ(t1.byte_width(), 16);
636 ASSERT_EQ(t1.bit_width(), 128);
637}
638
639TEST(TypesTest, TestDecimal128Medium) {
640 Decimal128Type t1(12, 5);
641
642 ASSERT_EQ(t1.id(), Type::DECIMAL);
643 ASSERT_EQ(t1.precision(), 12);
644 ASSERT_EQ(t1.scale(), 5);
645
646 ASSERT_EQ(t1.ToString(), std::string("decimal(12, 5)"));
647
648 // Test properties
649 ASSERT_EQ(t1.byte_width(), 16);
650 ASSERT_EQ(t1.bit_width(), 128);
651}
652
653TEST(TypesTest, TestDecimal128Large) {
654 Decimal128Type t1(27, 7);
655
656 ASSERT_EQ(t1.id(), Type::DECIMAL);
657 ASSERT_EQ(t1.precision(), 27);
658 ASSERT_EQ(t1.scale(), 7);
659
660 ASSERT_EQ(t1.ToString(), std::string("decimal(27, 7)"));
661
662 // Test properties
663 ASSERT_EQ(t1.byte_width(), 16);
664 ASSERT_EQ(t1.bit_width(), 128);
665}
666
667} // namespace arrow
668