1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18#include <cstdint>
19#include <cstring>
20#include <sstream>
21#include <string>
22
23#include "parquet/types.h"
24
25namespace parquet {
26
27std::string FormatStatValue(Type::type parquet_type, const std::string& val) {
28 std::stringstream result;
29 switch (parquet_type) {
30 case Type::BOOLEAN:
31 result << reinterpret_cast<const bool*>(val.c_str())[0];
32 break;
33 case Type::INT32:
34 result << reinterpret_cast<const int32_t*>(val.c_str())[0];
35 break;
36 case Type::INT64:
37 result << reinterpret_cast<const int64_t*>(val.c_str())[0];
38 break;
39 case Type::DOUBLE:
40 result << reinterpret_cast<const double*>(val.c_str())[0];
41 break;
42 case Type::FLOAT:
43 result << reinterpret_cast<const float*>(val.c_str())[0];
44 break;
45 case Type::INT96: {
46 auto const i32_val = reinterpret_cast<const int32_t*>(val.c_str());
47 result << i32_val[0] << " " << i32_val[1] << " " << i32_val[2];
48 break;
49 }
50 case Type::BYTE_ARRAY: {
51 return val;
52 }
53 case Type::FIXED_LEN_BYTE_ARRAY: {
54 return val;
55 }
56 default:
57 break;
58 }
59 return result.str();
60}
61
62std::string FormatStatValue(Type::type parquet_type, const char* val) {
63 std::stringstream result;
64 switch (parquet_type) {
65 case Type::BOOLEAN:
66 result << reinterpret_cast<const bool*>(val)[0];
67 break;
68 case Type::INT32:
69 result << reinterpret_cast<const int32_t*>(val)[0];
70 break;
71 case Type::INT64:
72 result << reinterpret_cast<const int64_t*>(val)[0];
73 break;
74 case Type::DOUBLE:
75 result << reinterpret_cast<const double*>(val)[0];
76 break;
77 case Type::FLOAT:
78 result << reinterpret_cast<const float*>(val)[0];
79 break;
80 case Type::INT96: {
81 auto const i32_val = reinterpret_cast<const int32_t*>(val);
82 result << i32_val[0] << " " << i32_val[1] << " " << i32_val[2];
83 break;
84 }
85 case Type::BYTE_ARRAY: {
86 result << val;
87 break;
88 }
89 case Type::FIXED_LEN_BYTE_ARRAY: {
90 result << val;
91 break;
92 }
93 default:
94 break;
95 }
96 return result.str();
97}
98
99std::string EncodingToString(Encoding::type t) {
100 switch (t) {
101 case Encoding::PLAIN:
102 return "PLAIN";
103 case Encoding::PLAIN_DICTIONARY:
104 return "PLAIN_DICTIONARY";
105 case Encoding::RLE:
106 return "RLE";
107 case Encoding::BIT_PACKED:
108 return "BIT_PACKED";
109 case Encoding::DELTA_BINARY_PACKED:
110 return "DELTA_BINARY_PACKED";
111 case Encoding::DELTA_LENGTH_BYTE_ARRAY:
112 return "DELTA_LENGTH_BYTE_ARRAY";
113 case Encoding::DELTA_BYTE_ARRAY:
114 return "DELTA_BYTE_ARRAY";
115 case Encoding::RLE_DICTIONARY:
116 return "RLE_DICTIONARY";
117 default:
118 return "UNKNOWN";
119 }
120}
121
122std::string CompressionToString(Compression::type t) {
123 switch (t) {
124 case Compression::UNCOMPRESSED:
125 return "UNCOMPRESSED";
126 case Compression::SNAPPY:
127 return "SNAPPY";
128 case Compression::GZIP:
129 return "GZIP";
130 case Compression::LZO:
131 return "LZO";
132 case Compression::BROTLI:
133 return "BROTLI";
134 case Compression::LZ4:
135 return "LZ4";
136 case Compression::ZSTD:
137 return "ZSTD";
138 default:
139 return "UNKNOWN";
140 }
141}
142
143std::string TypeToString(Type::type t) {
144 switch (t) {
145 case Type::BOOLEAN:
146 return "BOOLEAN";
147 case Type::INT32:
148 return "INT32";
149 case Type::INT64:
150 return "INT64";
151 case Type::INT96:
152 return "INT96";
153 case Type::FLOAT:
154 return "FLOAT";
155 case Type::DOUBLE:
156 return "DOUBLE";
157 case Type::BYTE_ARRAY:
158 return "BYTE_ARRAY";
159 case Type::FIXED_LEN_BYTE_ARRAY:
160 return "FIXED_LEN_BYTE_ARRAY";
161 default:
162 return "UNKNOWN";
163 }
164}
165
166std::string LogicalTypeToString(LogicalType::type t) {
167 switch (t) {
168 case LogicalType::NONE:
169 return "NONE";
170 case LogicalType::UTF8:
171 return "UTF8";
172 case LogicalType::MAP_KEY_VALUE:
173 return "MAP_KEY_VALUE";
174 case LogicalType::LIST:
175 return "LIST";
176 case LogicalType::ENUM:
177 return "ENUM";
178 case LogicalType::DECIMAL:
179 return "DECIMAL";
180 case LogicalType::DATE:
181 return "DATE";
182 case LogicalType::TIME_MILLIS:
183 return "TIME_MILLIS";
184 case LogicalType::TIME_MICROS:
185 return "TIME_MICROS";
186 case LogicalType::TIMESTAMP_MILLIS:
187 return "TIMESTAMP_MILLIS";
188 case LogicalType::TIMESTAMP_MICROS:
189 return "TIMESTAMP_MICROS";
190 case LogicalType::UINT_8:
191 return "UINT_8";
192 case LogicalType::UINT_16:
193 return "UINT_16";
194 case LogicalType::UINT_32:
195 return "UINT_32";
196 case LogicalType::UINT_64:
197 return "UINT_64";
198 case LogicalType::INT_8:
199 return "INT_8";
200 case LogicalType::INT_16:
201 return "INT_16";
202 case LogicalType::INT_32:
203 return "INT_32";
204 case LogicalType::INT_64:
205 return "INT_64";
206 case LogicalType::JSON:
207 return "JSON";
208 case LogicalType::BSON:
209 return "BSON";
210 case LogicalType::INTERVAL:
211 return "INTERVAL";
212 default:
213 return "UNKNOWN";
214 }
215}
216
217int GetTypeByteSize(Type::type parquet_type) {
218 switch (parquet_type) {
219 case Type::BOOLEAN:
220 return type_traits<BooleanType::type_num>::value_byte_size;
221 case Type::INT32:
222 return type_traits<Int32Type::type_num>::value_byte_size;
223 case Type::INT64:
224 return type_traits<Int64Type::type_num>::value_byte_size;
225 case Type::INT96:
226 return type_traits<Int96Type::type_num>::value_byte_size;
227 case Type::DOUBLE:
228 return type_traits<DoubleType::type_num>::value_byte_size;
229 case Type::FLOAT:
230 return type_traits<FloatType::type_num>::value_byte_size;
231 case Type::BYTE_ARRAY:
232 return type_traits<ByteArrayType::type_num>::value_byte_size;
233 case Type::FIXED_LEN_BYTE_ARRAY:
234 return type_traits<FLBAType::type_num>::value_byte_size;
235 default:
236 return 0;
237 }
238 return 0;
239}
240
241// Return the Sort Order of the Parquet Physical Types
242SortOrder::type DefaultSortOrder(Type::type primitive) {
243 switch (primitive) {
244 case Type::BOOLEAN:
245 case Type::INT32:
246 case Type::INT64:
247 case Type::FLOAT:
248 case Type::DOUBLE:
249 return SortOrder::SIGNED;
250 case Type::BYTE_ARRAY:
251 case Type::FIXED_LEN_BYTE_ARRAY:
252 return SortOrder::UNSIGNED;
253 case Type::INT96:
254 return SortOrder::UNKNOWN;
255 }
256 return SortOrder::UNKNOWN;
257}
258
259// Return the SortOrder of the Parquet Types using Logical or Physical Types
260SortOrder::type GetSortOrder(LogicalType::type converted, Type::type primitive) {
261 if (converted == LogicalType::NONE) return DefaultSortOrder(primitive);
262 switch (converted) {
263 case LogicalType::INT_8:
264 case LogicalType::INT_16:
265 case LogicalType::INT_32:
266 case LogicalType::INT_64:
267 case LogicalType::DATE:
268 case LogicalType::TIME_MICROS:
269 case LogicalType::TIME_MILLIS:
270 case LogicalType::TIMESTAMP_MICROS:
271 case LogicalType::TIMESTAMP_MILLIS:
272 return SortOrder::SIGNED;
273 case LogicalType::UINT_8:
274 case LogicalType::UINT_16:
275 case LogicalType::UINT_32:
276 case LogicalType::UINT_64:
277 case LogicalType::ENUM:
278 case LogicalType::UTF8:
279 case LogicalType::BSON:
280 case LogicalType::JSON:
281 return SortOrder::UNSIGNED;
282 case LogicalType::DECIMAL:
283 case LogicalType::LIST:
284 case LogicalType::MAP:
285 case LogicalType::MAP_KEY_VALUE:
286 case LogicalType::INTERVAL:
287 case LogicalType::NONE: // required instead of default
288 case LogicalType::NA: // required instead of default
289 return SortOrder::UNKNOWN;
290 }
291 return SortOrder::UNKNOWN;
292}
293
294ColumnOrder ColumnOrder::undefined_ = ColumnOrder(ColumnOrder::UNDEFINED);
295ColumnOrder ColumnOrder::type_defined_ = ColumnOrder(ColumnOrder::TYPE_DEFINED_ORDER);
296
297} // namespace parquet
298