1 | // Licensed to the Apache Software Foundation (ASF) under one |
2 | // or more contributor license agreements. See the NOTICE file |
3 | // distributed with this work for additional information |
4 | // regarding copyright ownership. The ASF licenses this file |
5 | // to you under the Apache License, Version 2.0 (the |
6 | // "License"); you may not use this file except in compliance |
7 | // with the License. You may obtain a copy of the License at |
8 | // |
9 | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | // |
11 | // Unless required by applicable law or agreed to in writing, |
12 | // software distributed under the License is distributed on an |
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | // KIND, either express or implied. See the License for the |
15 | // specific language governing permissions and limitations |
16 | // under the License. |
17 | |
18 | #include <gtest/gtest.h> |
19 | |
20 | #include <cstdint> |
21 | #include <iostream> |
22 | #include <vector> |
23 | |
24 | #include "parquet/schema.h" |
25 | #include "parquet/types.h" |
26 | #include "parquet/util/comparison.h" |
27 | |
28 | namespace parquet { |
29 | |
30 | namespace test { |
31 | |
32 | using parquet::schema::NodePtr; |
33 | using parquet::schema::PrimitiveNode; |
34 | |
35 | static ByteArray ByteArrayFromString(const std::string& s) { |
36 | auto ptr = reinterpret_cast<const uint8_t*>(s.data()); |
37 | return ByteArray(static_cast<uint32_t>(s.size()), ptr); |
38 | } |
39 | |
40 | static FLBA FLBAFromString(const std::string& s) { |
41 | auto ptr = reinterpret_cast<const uint8_t*>(s.data()); |
42 | return FLBA(ptr); |
43 | } |
44 | |
45 | TEST(Comparison, signedByteArray) { |
46 | NodePtr node = |
47 | PrimitiveNode::Make("SignedByteArray" , Repetition::REQUIRED, Type::BYTE_ARRAY); |
48 | ColumnDescriptor descr(node, 0, 0); |
49 | |
50 | CompareDefaultByteArray less; |
51 | |
52 | std::string s1 = "12345" ; |
53 | std::string s2 = "12345678" ; |
54 | ByteArray s1ba = ByteArrayFromString(s1); |
55 | ByteArray s2ba = ByteArrayFromString(s2); |
56 | ASSERT_TRUE(less(s1ba, s2ba)); |
57 | |
58 | // This is case where signed comparision UTF-8 (PARQUET-686) is incorrect |
59 | // This example is to only check signed comparison and not UTF-8. |
60 | s1 = u8"bügeln" ; |
61 | s2 = u8"braten" ; |
62 | s1ba = ByteArrayFromString(s1); |
63 | s2ba = ByteArrayFromString(s2); |
64 | ASSERT_TRUE(less(s1ba, s2ba)); |
65 | } |
66 | |
67 | TEST(Comparison, UnsignedByteArray) { |
68 | NodePtr node = PrimitiveNode::Make("UnsignedByteArray" , Repetition::REQUIRED, |
69 | Type::BYTE_ARRAY, LogicalType::UTF8); |
70 | ColumnDescriptor descr(node, 0, 0); |
71 | |
72 | // Check if UTF-8 is compared using unsigned correctly |
73 | CompareUnsignedByteArray uless; |
74 | |
75 | std::string s1 = "arrange" ; |
76 | std::string s2 = "arrangement" ; |
77 | ByteArray s1ba = ByteArrayFromString(s1); |
78 | ByteArray s2ba = ByteArrayFromString(s2); |
79 | ASSERT_TRUE(uless(s1ba, s2ba)); |
80 | |
81 | // Multi-byte UTF-8 characters |
82 | s1 = u8"braten" ; |
83 | s2 = u8"bügeln" ; |
84 | s1ba = ByteArrayFromString(s1); |
85 | s2ba = ByteArrayFromString(s2); |
86 | ASSERT_TRUE(uless(s1ba, s2ba)); |
87 | |
88 | s1 = u8"ünk123456" ; // ü = 252 |
89 | s2 = u8"ănk123456" ; // ă = 259 |
90 | s1ba = ByteArrayFromString(s1); |
91 | s2ba = ByteArrayFromString(s2); |
92 | ASSERT_TRUE(uless(s1ba, s2ba)); |
93 | } |
94 | |
95 | TEST(Comparison, SignedFLBA) { |
96 | int size = 10; |
97 | NodePtr node = PrimitiveNode::Make("SignedFLBA" , Repetition::REQUIRED, |
98 | Type::FIXED_LEN_BYTE_ARRAY, LogicalType::NONE, size); |
99 | ColumnDescriptor descr(node, 0, 0); |
100 | |
101 | CompareDefaultFLBA less(descr.type_length()); |
102 | |
103 | std::string s1 = "Anti123456" ; |
104 | std::string s2 = "Bunkd123456" ; |
105 | FLBA s1flba = FLBAFromString(s1); |
106 | FLBA s2flba = FLBAFromString(s2); |
107 | ASSERT_TRUE(less(s1flba, s2flba)); |
108 | |
109 | s1 = "Bünk123456" ; |
110 | s2 = "Bunk123456" ; |
111 | s1flba = FLBAFromString(s1); |
112 | s2flba = FLBAFromString(s2); |
113 | ASSERT_TRUE(less(s1flba, s2flba)); |
114 | } |
115 | |
116 | TEST(Comparison, UnsignedFLBA) { |
117 | int size = 10; |
118 | NodePtr node = PrimitiveNode::Make("UnsignedFLBA" , Repetition::REQUIRED, |
119 | Type::FIXED_LEN_BYTE_ARRAY, LogicalType::NONE, size); |
120 | ColumnDescriptor descr(node, 0, 0); |
121 | |
122 | CompareUnsignedFLBA uless(descr.type_length()); |
123 | |
124 | std::string s1 = "Anti123456" ; |
125 | std::string s2 = "Bunkd123456" ; |
126 | FLBA s1flba = FLBAFromString(s1); |
127 | FLBA s2flba = FLBAFromString(s2); |
128 | ASSERT_TRUE(uless(s1flba, s2flba)); |
129 | |
130 | s1 = "Bunk123456" ; |
131 | s2 = "Bünk123456" ; |
132 | s1flba = FLBAFromString(s1); |
133 | s2flba = FLBAFromString(s2); |
134 | ASSERT_TRUE(uless(s1flba, s2flba)); |
135 | } |
136 | |
137 | TEST(Comparison, SignedInt96) { |
138 | parquet::Int96 a{{1, 41, 14}}, b{{1, 41, 42}}; |
139 | parquet::Int96 aa{{1, 41, 14}}, bb{{1, 41, 14}}; |
140 | parquet::Int96 aaa{{1, 41, static_cast<uint32_t>(-14)}}, bbb{{1, 41, 42}}; |
141 | |
142 | NodePtr node = PrimitiveNode::Make("SignedInt96" , Repetition::REQUIRED, Type::INT96); |
143 | ColumnDescriptor descr(node, 0, 0); |
144 | |
145 | CompareDefaultInt96 less; |
146 | |
147 | ASSERT_TRUE(less(a, b)); |
148 | ASSERT_TRUE(!less(aa, bb) && !less(bb, aa)); |
149 | ASSERT_TRUE(less(aaa, bbb)); |
150 | } |
151 | |
152 | TEST(Comparison, UnsignedInt96) { |
153 | parquet::Int96 a{{1, 41, 14}}, b{{1, static_cast<uint32_t>(-41), 42}}; |
154 | parquet::Int96 aa{{1, 41, 14}}, bb{{1, 41, static_cast<uint32_t>(-14)}}; |
155 | parquet::Int96 aaa, bbb; |
156 | |
157 | NodePtr node = PrimitiveNode::Make("UnsignedInt96" , Repetition::REQUIRED, Type::INT96); |
158 | ColumnDescriptor descr(node, 0, 0); |
159 | |
160 | CompareUnsignedInt96 uless; |
161 | |
162 | ASSERT_TRUE(uless(a, b)); |
163 | ASSERT_TRUE(uless(aa, bb)); |
164 | |
165 | // INT96 Timestamp |
166 | aaa.value[2] = 2451545; // 2000-01-01 |
167 | bbb.value[2] = 2451546; // 2000-01-02 |
168 | // 12 hours + 34 minutes + 56 seconds. |
169 | Int96SetNanoSeconds(aaa, 45296000000000); |
170 | // 12 hours + 34 minutes + 50 seconds. |
171 | Int96SetNanoSeconds(bbb, 45290000000000); |
172 | ASSERT_TRUE(uless(aaa, bbb)); |
173 | |
174 | aaa.value[2] = 2451545; // 2000-01-01 |
175 | bbb.value[2] = 2451545; // 2000-01-01 |
176 | // 11 hours + 34 minutes + 56 seconds. |
177 | Int96SetNanoSeconds(aaa, 41696000000000); |
178 | // 12 hours + 34 minutes + 50 seconds. |
179 | Int96SetNanoSeconds(bbb, 45290000000000); |
180 | ASSERT_TRUE(uless(aaa, bbb)); |
181 | |
182 | aaa.value[2] = 2451545; // 2000-01-01 |
183 | bbb.value[2] = 2451545; // 2000-01-01 |
184 | // 12 hours + 34 minutes + 55 seconds. |
185 | Int96SetNanoSeconds(aaa, 45295000000000); |
186 | // 12 hours + 34 minutes + 56 seconds. |
187 | Int96SetNanoSeconds(bbb, 45296000000000); |
188 | ASSERT_TRUE(uless(aaa, bbb)); |
189 | } |
190 | |
191 | TEST(Comparison, SignedInt64) { |
192 | int64_t a = 1, b = 4; |
193 | int64_t aa = 1, bb = 1; |
194 | int64_t aaa = -1, bbb = 1; |
195 | |
196 | NodePtr node = PrimitiveNode::Make("SignedInt64" , Repetition::REQUIRED, Type::INT64); |
197 | ColumnDescriptor descr(node, 0, 0); |
198 | |
199 | CompareDefaultInt64 less; |
200 | |
201 | ASSERT_TRUE(less(a, b)); |
202 | ASSERT_TRUE(!less(aa, bb) && !less(bb, aa)); |
203 | ASSERT_TRUE(less(aaa, bbb)); |
204 | } |
205 | |
206 | TEST(Comparison, UnsignedInt64) { |
207 | uint64_t a = 1, b = 4; |
208 | uint64_t aa = 1, bb = 1; |
209 | uint64_t aaa = 1, bbb = -1; |
210 | |
211 | NodePtr node = PrimitiveNode::Make("UnsignedInt64" , Repetition::REQUIRED, Type::INT64); |
212 | ColumnDescriptor descr(node, 0, 0); |
213 | |
214 | CompareUnsignedInt64 less; |
215 | |
216 | ASSERT_TRUE(less(a, b)); |
217 | ASSERT_TRUE(!less(aa, bb) && !less(bb, aa)); |
218 | ASSERT_TRUE(less(aaa, bbb)); |
219 | } |
220 | |
221 | TEST(Comparison, UnsignedInt32) { |
222 | uint32_t a = 1, b = 4; |
223 | uint32_t aa = 1, bb = 1; |
224 | uint32_t aaa = 1, bbb = -1; |
225 | |
226 | NodePtr node = PrimitiveNode::Make("UnsignedInt32" , Repetition::REQUIRED, Type::INT32); |
227 | ColumnDescriptor descr(node, 0, 0); |
228 | |
229 | CompareUnsignedInt32 less; |
230 | |
231 | ASSERT_TRUE(less(a, b)); |
232 | ASSERT_TRUE(!less(aa, bb) && !less(bb, aa)); |
233 | ASSERT_TRUE(less(aaa, bbb)); |
234 | } |
235 | |
236 | TEST(Comparison, UnknownSortOrder) { |
237 | NodePtr node = |
238 | PrimitiveNode::Make("Unknown" , Repetition::REQUIRED, Type::FIXED_LEN_BYTE_ARRAY, |
239 | LogicalType::INTERVAL, 12); |
240 | ColumnDescriptor descr(node, 0, 0); |
241 | |
242 | ASSERT_THROW(Comparator::Make(&descr), ParquetException); |
243 | } |
244 | |
245 | } // namespace test |
246 | |
247 | } // namespace parquet |
248 | |