1 | // Licensed to the Apache Software Foundation (ASF) under one |
2 | // or more contributor license agreements. See the NOTICE file |
3 | // distributed with this work for additional information |
4 | // regarding copyright ownership. The ASF licenses this file |
5 | // to you under the Apache License, Version 2.0 (the |
6 | // "License"); you may not use this file except in compliance |
7 | // with the License. You may obtain a copy of the License at |
8 | // |
9 | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | // |
11 | // Unless required by applicable law or agreed to in writing, |
12 | // software distributed under the License is distributed on an |
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | // KIND, either express or implied. See the License for the |
15 | // specific language governing permissions and limitations |
16 | // under the License. |
17 | |
18 | #include <algorithm> |
19 | #include <memory> |
20 | |
21 | #include "parquet/exception.h" |
22 | #include "parquet/schema.h" |
23 | #include "parquet/types.h" |
24 | #include "parquet/util/comparison.h" |
25 | |
26 | namespace parquet { |
27 | |
28 | std::shared_ptr<Comparator> Comparator::Make(const ColumnDescriptor* descr) { |
29 | if (SortOrder::SIGNED == descr->sort_order()) { |
30 | switch (descr->physical_type()) { |
31 | case Type::BOOLEAN: |
32 | return std::make_shared<CompareDefaultBoolean>(); |
33 | case Type::INT32: |
34 | return std::make_shared<CompareDefaultInt32>(); |
35 | case Type::INT64: |
36 | return std::make_shared<CompareDefaultInt64>(); |
37 | case Type::FLOAT: |
38 | return std::make_shared<CompareDefaultFloat>(); |
39 | case Type::DOUBLE: |
40 | return std::make_shared<CompareDefaultDouble>(); |
41 | case Type::BYTE_ARRAY: |
42 | return std::make_shared<CompareDefaultByteArray>(); |
43 | case Type::FIXED_LEN_BYTE_ARRAY: |
44 | return std::make_shared<CompareDefaultFLBA>(descr->type_length()); |
45 | default: |
46 | ParquetException::NYI("Signed Compare not implemented" ); |
47 | } |
48 | } else if (SortOrder::UNSIGNED == descr->sort_order()) { |
49 | switch (descr->physical_type()) { |
50 | case Type::INT32: |
51 | return std::make_shared<CompareUnsignedInt32>(); |
52 | case Type::INT64: |
53 | return std::make_shared<CompareUnsignedInt64>(); |
54 | case Type::BYTE_ARRAY: |
55 | return std::make_shared<CompareUnsignedByteArray>(); |
56 | case Type::FIXED_LEN_BYTE_ARRAY: |
57 | return std::make_shared<CompareUnsignedFLBA>(descr->type_length()); |
58 | default: |
59 | ParquetException::NYI("Unsigned Compare not implemented" ); |
60 | } |
61 | } else { |
62 | throw ParquetException("UNKNOWN Sort Order" ); |
63 | } |
64 | return nullptr; |
65 | } |
66 | |
67 | template class PARQUET_TEMPLATE_EXPORT CompareDefault<BooleanType>; |
68 | template class PARQUET_TEMPLATE_EXPORT CompareDefault<Int32Type>; |
69 | template class PARQUET_TEMPLATE_EXPORT CompareDefault<Int64Type>; |
70 | template class PARQUET_TEMPLATE_EXPORT CompareDefault<Int96Type>; |
71 | template class PARQUET_TEMPLATE_EXPORT CompareDefault<FloatType>; |
72 | template class PARQUET_TEMPLATE_EXPORT CompareDefault<DoubleType>; |
73 | template class PARQUET_TEMPLATE_EXPORT CompareDefault<ByteArrayType>; |
74 | template class PARQUET_TEMPLATE_EXPORT CompareDefault<FLBAType>; |
75 | |
76 | bool CompareUnsignedInt32::operator()(const int32_t& a, const int32_t& b) { |
77 | const uint32_t ua = a; |
78 | const uint32_t ub = b; |
79 | return (ua < ub); |
80 | } |
81 | |
82 | bool CompareUnsignedInt64::operator()(const int64_t& a, const int64_t& b) { |
83 | const uint64_t ua = a; |
84 | const uint64_t ub = b; |
85 | return (ua < ub); |
86 | } |
87 | |
88 | bool CompareUnsignedInt96::operator()(const Int96& a, const Int96& b) { |
89 | if (a.value[2] != b.value[2]) { |
90 | return (a.value[2] < b.value[2]); |
91 | } else if (a.value[1] != b.value[1]) { |
92 | return (a.value[1] < b.value[1]); |
93 | } |
94 | return (a.value[0] < b.value[0]); |
95 | } |
96 | |
97 | bool CompareUnsignedByteArray::operator()(const ByteArray& a, const ByteArray& b) { |
98 | const uint8_t* aptr = reinterpret_cast<const uint8_t*>(a.ptr); |
99 | const uint8_t* bptr = reinterpret_cast<const uint8_t*>(b.ptr); |
100 | return std::lexicographical_compare(aptr, aptr + a.len, bptr, bptr + b.len); |
101 | } |
102 | |
103 | CompareUnsignedFLBA::CompareUnsignedFLBA(int length) : CompareDefaultFLBA(length) {} |
104 | |
105 | bool CompareUnsignedFLBA::operator()(const FLBA& a, const FLBA& b) { |
106 | const uint8_t* aptr = reinterpret_cast<const uint8_t*>(a.ptr); |
107 | const uint8_t* bptr = reinterpret_cast<const uint8_t*>(b.ptr); |
108 | return std::lexicographical_compare(aptr, aptr + type_length_, bptr, |
109 | bptr + type_length_); |
110 | } |
111 | |
112 | } // namespace parquet |
113 | |