1 | // Licensed to the Apache Software Foundation (ASF) under one |
2 | // or more contributor license agreements. See the NOTICE file |
3 | // distributed with this work for additional information |
4 | // regarding copyright ownership. The ASF licenses this file |
5 | // to you under the Apache License, Version 2.0 (the |
6 | // "License"); you may not use this file except in compliance |
7 | // with the License. You may obtain a copy of the License at |
8 | // |
9 | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | // |
11 | // Unless required by applicable law or agreed to in writing, |
12 | // software distributed under the License is distributed on an |
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | // KIND, either express or implied. See the License for the |
15 | // specific language governing permissions and limitations |
16 | // under the License. |
17 | |
18 | #ifndef PARQUET_TYPES_H |
19 | #define PARQUET_TYPES_H |
20 | |
21 | #include <algorithm> |
22 | #include <cstdint> |
23 | #include <cstring> |
24 | #include <memory> |
25 | #include <sstream> |
26 | #include <string> |
27 | |
28 | #include "parquet/platform.h" |
29 | |
30 | namespace arrow { |
31 | namespace util { |
32 | |
33 | class Codec; |
34 | |
35 | } // namespace util |
36 | } // namespace arrow |
37 | |
38 | namespace parquet { |
39 | |
40 | // ---------------------------------------------------------------------- |
41 | // Metadata enums to match Thrift metadata |
42 | // |
43 | // The reason we maintain our own enums is to avoid transitive dependency on |
44 | // the compiled Thrift headers (and thus thrift/Thrift.h) for users of the |
45 | // public API. After building parquet-cpp, you should not need to include |
46 | // Thrift headers in your application. This means some boilerplate to convert |
47 | // between our types and Parquet's Thrift types. |
48 | // |
49 | // We can also add special values like NONE to distinguish between metadata |
50 | // values being set and not set. As an example consider ConvertedType and |
51 | // CompressionCodec |
52 | |
53 | // Mirrors parquet::Type |
54 | struct Type { |
55 | enum type { |
56 | BOOLEAN = 0, |
57 | INT32 = 1, |
58 | INT64 = 2, |
59 | INT96 = 3, |
60 | FLOAT = 4, |
61 | DOUBLE = 5, |
62 | BYTE_ARRAY = 6, |
63 | FIXED_LEN_BYTE_ARRAY = 7, |
64 | // Should always be last element. |
65 | UNDEFINED = 8 |
66 | }; |
67 | }; |
68 | |
69 | // Mirrors parquet::ConvertedType |
70 | struct ConvertedType { |
71 | enum type { |
72 | NONE, |
73 | UTF8, |
74 | MAP, |
75 | MAP_KEY_VALUE, |
76 | LIST, |
77 | ENUM, |
78 | DECIMAL, |
79 | DATE, |
80 | TIME_MILLIS, |
81 | TIME_MICROS, |
82 | TIMESTAMP_MILLIS, |
83 | TIMESTAMP_MICROS, |
84 | UINT_8, |
85 | UINT_16, |
86 | UINT_32, |
87 | UINT_64, |
88 | INT_8, |
89 | INT_16, |
90 | INT_32, |
91 | INT_64, |
92 | JSON, |
93 | BSON, |
94 | INTERVAL, |
95 | NA = 25, |
96 | // Should always be last element. |
97 | UNDEFINED = 26 |
98 | }; |
99 | }; |
100 | |
101 | // forward declaration |
102 | namespace format { |
103 | |
104 | class LogicalType; |
105 | |
106 | } |
107 | |
108 | // Mirrors parquet::FieldRepetitionType |
109 | struct Repetition { |
110 | enum type { REQUIRED = 0, OPTIONAL = 1, REPEATED = 2, /*Always last*/ UNDEFINED = 3 }; |
111 | }; |
112 | |
113 | // Reference: |
114 | // parquet-mr/parquet-hadoop/src/main/java/org/apache/parquet/ |
115 | // format/converter/ParquetMetadataConverter.java |
116 | // Sort order for page and column statistics. Types are associated with sort |
117 | // orders (e.g., UTF8 columns should use UNSIGNED) and column stats are |
118 | // aggregated using a sort order. As of parquet-format version 2.3.1, the |
119 | // order used to aggregate stats is always SIGNED and is not stored in the |
120 | // Parquet file. These stats are discarded for types that need unsigned. |
121 | // See PARQUET-686. |
122 | struct SortOrder { |
123 | enum type { SIGNED, UNSIGNED, UNKNOWN }; |
124 | }; |
125 | |
126 | namespace schema { |
127 | |
128 | struct DecimalMetadata { |
129 | bool isset; |
130 | int32_t scale; |
131 | int32_t precision; |
132 | }; |
133 | |
134 | } // namespace schema |
135 | |
136 | /// \brief Implementation of parquet.thrift LogicalType types. |
137 | class PARQUET_EXPORT LogicalType { |
138 | public: |
139 | struct Type { |
140 | enum type { |
141 | UNKNOWN = 0, |
142 | STRING = 1, |
143 | MAP, |
144 | LIST, |
145 | ENUM, |
146 | DECIMAL, |
147 | DATE, |
148 | TIME, |
149 | TIMESTAMP, |
150 | INTERVAL, |
151 | INT, |
152 | NIL, // Thrift NullType |
153 | JSON, |
154 | BSON, |
155 | UUID, |
156 | NONE |
157 | }; |
158 | }; |
159 | |
160 | struct TimeUnit { |
161 | enum unit { UNKNOWN = 0, MILLIS = 1, MICROS, NANOS }; |
162 | }; |
163 | |
164 | /// \brief If possible, return a logical type equivalent to the given legacy |
165 | /// converted type (and decimal metadata if applicable). |
166 | static std::shared_ptr<const LogicalType> FromConvertedType( |
167 | const parquet::ConvertedType::type converted_type, |
168 | const parquet::schema::DecimalMetadata converted_decimal_metadata = {false, -1, |
169 | -1}); |
170 | |
171 | /// \brief Return the logical type represented by the Thrift intermediary object. |
172 | static std::shared_ptr<const LogicalType> FromThrift( |
173 | const parquet::format::LogicalType& thrift_logical_type); |
174 | |
175 | /// \brief Return the explicitly requested logical type. |
176 | static std::shared_ptr<const LogicalType> String(); |
177 | static std::shared_ptr<const LogicalType> Map(); |
178 | static std::shared_ptr<const LogicalType> List(); |
179 | static std::shared_ptr<const LogicalType> Enum(); |
180 | static std::shared_ptr<const LogicalType> Decimal(int32_t precision, int32_t scale = 0); |
181 | static std::shared_ptr<const LogicalType> Date(); |
182 | static std::shared_ptr<const LogicalType> Time(bool is_adjusted_to_utc, |
183 | LogicalType::TimeUnit::unit time_unit); |
184 | |
185 | /// \brief Create a Timestamp logical type |
186 | /// \param[in] is_adjusted_to_utc set true if the data is UTC-normalized |
187 | /// \param[in] time_unit the resolution of the timestamp |
188 | /// \param[in] is_from_converted_type if true, the timestamp was generated |
189 | /// by translating a legacy converted type of TIMESTAMP_MILLIS or |
190 | /// TIMESTAMP_MICROS. Default is false. |
191 | /// \param[in] force_set_converted_type if true, always set the |
192 | /// legacy ConvertedType TIMESTAMP_MICROS and TIMESTAMP_MILLIS |
193 | /// metadata. Default is false |
194 | static std::shared_ptr<const LogicalType> Timestamp( |
195 | bool is_adjusted_to_utc, LogicalType::TimeUnit::unit time_unit, |
196 | bool is_from_converted_type = false, bool force_set_converted_type = false); |
197 | |
198 | static std::shared_ptr<const LogicalType> Interval(); |
199 | static std::shared_ptr<const LogicalType> Int(int bit_width, bool is_signed); |
200 | static std::shared_ptr<const LogicalType> Null(); |
201 | static std::shared_ptr<const LogicalType> JSON(); |
202 | static std::shared_ptr<const LogicalType> BSON(); |
203 | static std::shared_ptr<const LogicalType> UUID(); |
204 | static std::shared_ptr<const LogicalType> None(); |
205 | static std::shared_ptr<const LogicalType> Unknown(); |
206 | |
207 | /// \brief Return true if this logical type is consistent with the given underlying |
208 | /// physical type. |
209 | bool is_applicable(parquet::Type::type primitive_type, |
210 | int32_t primitive_length = -1) const; |
211 | |
212 | /// \brief Return true if this logical type is equivalent to the given legacy converted |
213 | /// type (and decimal metadata if applicable). |
214 | bool is_compatible(parquet::ConvertedType::type converted_type, |
215 | parquet::schema::DecimalMetadata converted_decimal_metadata = { |
216 | false, -1, -1}) const; |
217 | |
218 | /// \brief If possible, return the legacy converted type (and decimal metadata if |
219 | /// applicable) equivalent to this logical type. |
220 | parquet::ConvertedType::type ToConvertedType( |
221 | parquet::schema::DecimalMetadata* out_decimal_metadata) const; |
222 | |
223 | /// \brief Return a printable representation of this logical type. |
224 | std::string ToString() const; |
225 | |
226 | /// \brief Return a JSON representation of this logical type. |
227 | std::string ToJSON() const; |
228 | |
229 | /// \brief Return a serializable Thrift object for this logical type. |
230 | parquet::format::LogicalType ToThrift() const; |
231 | |
232 | /// \brief Return true if the given logical type is equivalent to this logical type. |
233 | bool Equals(const LogicalType& other) const; |
234 | |
235 | /// \brief Return the enumerated type of this logical type. |
236 | LogicalType::Type::type type() const; |
237 | |
238 | /// \brief Return the appropriate sort order for this logical type. |
239 | SortOrder::type sort_order() const; |
240 | |
241 | // Type checks ... |
242 | bool is_string() const; |
243 | bool is_map() const; |
244 | bool is_list() const; |
245 | bool is_enum() const; |
246 | bool is_decimal() const; |
247 | bool is_date() const; |
248 | bool is_time() const; |
249 | bool is_timestamp() const; |
250 | bool is_interval() const; |
251 | bool is_int() const; |
252 | bool is_null() const; |
253 | bool is_JSON() const; |
254 | bool is_BSON() const; |
255 | bool is_UUID() const; |
256 | bool is_none() const; |
257 | /// \brief Return true if this logical type is of a known type. |
258 | bool is_valid() const; |
259 | bool is_invalid() const; |
260 | /// \brief Return true if this logical type is suitable for a schema GroupNode. |
261 | bool is_nested() const; |
262 | bool is_nonnested() const; |
263 | /// \brief Return true if this logical type is included in the Thrift output for its |
264 | /// node. |
265 | bool is_serialized() const; |
266 | |
267 | LogicalType(const LogicalType&) = delete; |
268 | LogicalType& operator=(const LogicalType&) = delete; |
269 | virtual ~LogicalType() noexcept; |
270 | |
271 | protected: |
272 | LogicalType(); |
273 | |
274 | class Impl; |
275 | std::unique_ptr<const Impl> impl_; |
276 | }; |
277 | |
278 | /// \brief Allowed for physical type BYTE_ARRAY, must be encoded as UTF-8. |
279 | class PARQUET_EXPORT StringLogicalType : public LogicalType { |
280 | public: |
281 | static std::shared_ptr<const LogicalType> Make(); |
282 | |
283 | private: |
284 | StringLogicalType() = default; |
285 | }; |
286 | |
287 | /// \brief Allowed for group nodes only. |
288 | class PARQUET_EXPORT MapLogicalType : public LogicalType { |
289 | public: |
290 | static std::shared_ptr<const LogicalType> Make(); |
291 | |
292 | private: |
293 | MapLogicalType() = default; |
294 | }; |
295 | |
296 | /// \brief Allowed for group nodes only. |
297 | class PARQUET_EXPORT ListLogicalType : public LogicalType { |
298 | public: |
299 | static std::shared_ptr<const LogicalType> Make(); |
300 | |
301 | private: |
302 | ListLogicalType() = default; |
303 | }; |
304 | |
305 | /// \brief Allowed for physical type BYTE_ARRAY, must be encoded as UTF-8. |
306 | class PARQUET_EXPORT EnumLogicalType : public LogicalType { |
307 | public: |
308 | static std::shared_ptr<const LogicalType> Make(); |
309 | |
310 | private: |
311 | EnumLogicalType() = default; |
312 | }; |
313 | |
314 | /// \brief Allowed for physical type INT32, INT64, FIXED_LEN_BYTE_ARRAY, or BYTE_ARRAY, |
315 | /// depending on the precision. |
316 | class PARQUET_EXPORT DecimalLogicalType : public LogicalType { |
317 | public: |
318 | static std::shared_ptr<const LogicalType> Make(int32_t precision, int32_t scale = 0); |
319 | int32_t precision() const; |
320 | int32_t scale() const; |
321 | |
322 | private: |
323 | DecimalLogicalType() = default; |
324 | }; |
325 | |
326 | /// \brief Allowed for physical type INT32. |
327 | class PARQUET_EXPORT DateLogicalType : public LogicalType { |
328 | public: |
329 | static std::shared_ptr<const LogicalType> Make(); |
330 | |
331 | private: |
332 | DateLogicalType() = default; |
333 | }; |
334 | |
335 | /// \brief Allowed for physical type INT32 (for MILLIS) or INT64 (for MICROS and NANOS). |
336 | class PARQUET_EXPORT TimeLogicalType : public LogicalType { |
337 | public: |
338 | static std::shared_ptr<const LogicalType> Make(bool is_adjusted_to_utc, |
339 | LogicalType::TimeUnit::unit time_unit); |
340 | bool is_adjusted_to_utc() const; |
341 | LogicalType::TimeUnit::unit time_unit() const; |
342 | |
343 | private: |
344 | TimeLogicalType() = default; |
345 | }; |
346 | |
347 | /// \brief Allowed for physical type INT64. |
348 | class PARQUET_EXPORT TimestampLogicalType : public LogicalType { |
349 | public: |
350 | static std::shared_ptr<const LogicalType> Make(bool is_adjusted_to_utc, |
351 | LogicalType::TimeUnit::unit time_unit, |
352 | bool is_from_converted_type = false, |
353 | bool force_set_converted_type = false); |
354 | bool is_adjusted_to_utc() const; |
355 | LogicalType::TimeUnit::unit time_unit() const; |
356 | |
357 | /// \brief If true, will not set LogicalType in Thrift metadata |
358 | bool is_from_converted_type() const; |
359 | |
360 | /// \brief If true, will set ConvertedType for micros and millis |
361 | /// resolution in legacy ConvertedType Thrift metadata |
362 | bool force_set_converted_type() const; |
363 | |
364 | private: |
365 | TimestampLogicalType() = default; |
366 | }; |
367 | |
368 | /// \brief Allowed for physical type FIXED_LEN_BYTE_ARRAY with length 12 |
369 | class PARQUET_EXPORT IntervalLogicalType : public LogicalType { |
370 | public: |
371 | static std::shared_ptr<const LogicalType> Make(); |
372 | |
373 | private: |
374 | IntervalLogicalType() = default; |
375 | }; |
376 | |
377 | /// \brief Allowed for physical type INT32 (for bit widths 8, 16, and 32) and INT64 |
378 | /// (for bit width 64). |
379 | class PARQUET_EXPORT IntLogicalType : public LogicalType { |
380 | public: |
381 | static std::shared_ptr<const LogicalType> Make(int bit_width, bool is_signed); |
382 | int bit_width() const; |
383 | bool is_signed() const; |
384 | |
385 | private: |
386 | IntLogicalType() = default; |
387 | }; |
388 | |
389 | /// \brief Allowed for any physical type. |
390 | class PARQUET_EXPORT NullLogicalType : public LogicalType { |
391 | public: |
392 | static std::shared_ptr<const LogicalType> Make(); |
393 | |
394 | private: |
395 | NullLogicalType() = default; |
396 | }; |
397 | |
398 | /// \brief Allowed for physical type BYTE_ARRAY. |
399 | class PARQUET_EXPORT JSONLogicalType : public LogicalType { |
400 | public: |
401 | static std::shared_ptr<const LogicalType> Make(); |
402 | |
403 | private: |
404 | JSONLogicalType() = default; |
405 | }; |
406 | |
407 | /// \brief Allowed for physical type BYTE_ARRAY. |
408 | class PARQUET_EXPORT BSONLogicalType : public LogicalType { |
409 | public: |
410 | static std::shared_ptr<const LogicalType> Make(); |
411 | |
412 | private: |
413 | BSONLogicalType() = default; |
414 | }; |
415 | |
416 | /// \brief Allowed for physical type FIXED_LEN_BYTE_ARRAY with length 16, |
417 | /// must encode raw UUID bytes. |
418 | class PARQUET_EXPORT UUIDLogicalType : public LogicalType { |
419 | public: |
420 | static std::shared_ptr<const LogicalType> Make(); |
421 | |
422 | private: |
423 | UUIDLogicalType() = default; |
424 | }; |
425 | |
426 | /// \brief Allowed for any physical type. |
427 | class PARQUET_EXPORT NoLogicalType : public LogicalType { |
428 | public: |
429 | static std::shared_ptr<const LogicalType> Make(); |
430 | |
431 | private: |
432 | NoLogicalType() = default; |
433 | }; |
434 | |
435 | /// \brief Allowed for any type. |
436 | class PARQUET_EXPORT UnknownLogicalType : public LogicalType { |
437 | public: |
438 | static std::shared_ptr<const LogicalType> Make(); |
439 | |
440 | private: |
441 | UnknownLogicalType() = default; |
442 | }; |
443 | |
444 | // Data encodings. Mirrors parquet::Encoding |
445 | struct Encoding { |
446 | enum type { |
447 | PLAIN = 0, |
448 | PLAIN_DICTIONARY = 2, |
449 | RLE = 3, |
450 | BIT_PACKED = 4, |
451 | DELTA_BINARY_PACKED = 5, |
452 | DELTA_LENGTH_BYTE_ARRAY = 6, |
453 | DELTA_BYTE_ARRAY = 7, |
454 | RLE_DICTIONARY = 8, |
455 | UNKNOWN = 999 |
456 | }; |
457 | }; |
458 | |
459 | /// \brief Return true if Parquet supports indicated compression type |
460 | PARQUET_EXPORT |
461 | bool IsCodecSupported(Compression::type codec); |
462 | |
463 | PARQUET_EXPORT |
464 | std::unique_ptr<Codec> GetCodec(Compression::type codec); |
465 | |
466 | PARQUET_EXPORT |
467 | std::unique_ptr<Codec> GetCodec(Compression::type codec, int compression_level); |
468 | |
469 | struct Encryption { |
470 | enum type { AES_GCM_V1 = 0, AES_GCM_CTR_V1 = 1 }; |
471 | }; |
472 | |
473 | // parquet::PageType |
474 | struct PageType { |
475 | enum type { DATA_PAGE, INDEX_PAGE, DICTIONARY_PAGE, DATA_PAGE_V2 }; |
476 | }; |
477 | |
478 | class ColumnOrder { |
479 | public: |
480 | enum type { UNDEFINED, TYPE_DEFINED_ORDER }; |
481 | explicit ColumnOrder(ColumnOrder::type column_order) : column_order_(column_order) {} |
482 | // Default to Type Defined Order |
483 | ColumnOrder() : column_order_(type::TYPE_DEFINED_ORDER) {} |
484 | ColumnOrder::type get_order() { return column_order_; } |
485 | |
486 | static ColumnOrder undefined_; |
487 | static ColumnOrder type_defined_; |
488 | |
489 | private: |
490 | ColumnOrder::type column_order_; |
491 | }; |
492 | |
493 | // ---------------------------------------------------------------------- |
494 | |
495 | struct ByteArray { |
496 | ByteArray() : len(0), ptr(NULLPTR) {} |
497 | ByteArray(uint32_t len, const uint8_t* ptr) : len(len), ptr(ptr) {} |
498 | |
499 | ByteArray(::arrow::util::string_view view) // NOLINT implicit conversion |
500 | : ByteArray(static_cast<uint32_t>(view.size()), |
501 | reinterpret_cast<const uint8_t*>(view.data())) {} |
502 | uint32_t len; |
503 | const uint8_t* ptr; |
504 | }; |
505 | |
506 | inline bool operator==(const ByteArray& left, const ByteArray& right) { |
507 | return left.len == right.len && |
508 | (left.len == 0 || std::memcmp(left.ptr, right.ptr, left.len) == 0); |
509 | } |
510 | |
511 | inline bool operator!=(const ByteArray& left, const ByteArray& right) { |
512 | return !(left == right); |
513 | } |
514 | |
515 | struct FixedLenByteArray { |
516 | FixedLenByteArray() : ptr(NULLPTR) {} |
517 | explicit FixedLenByteArray(const uint8_t* ptr) : ptr(ptr) {} |
518 | const uint8_t* ptr; |
519 | }; |
520 | |
521 | using FLBA = FixedLenByteArray; |
522 | |
523 | // Julian day at unix epoch. |
524 | // |
525 | // The Julian Day Number (JDN) is the integer assigned to a whole solar day in |
526 | // the Julian day count starting from noon Universal time, with Julian day |
527 | // number 0 assigned to the day starting at noon on Monday, January 1, 4713 BC, |
528 | // proleptic Julian calendar (November 24, 4714 BC, in the proleptic Gregorian |
529 | // calendar), |
530 | constexpr int64_t kJulianToUnixEpochDays = INT64_C(2440588); |
531 | constexpr int64_t kSecondsPerDay = INT64_C(60 * 60 * 24); |
532 | constexpr int64_t kMillisecondsPerDay = kSecondsPerDay * INT64_C(1000); |
533 | constexpr int64_t kMicrosecondsPerDay = kMillisecondsPerDay * INT64_C(1000); |
534 | constexpr int64_t kNanosecondsPerDay = kMicrosecondsPerDay * INT64_C(1000); |
535 | |
536 | MANUALLY_ALIGNED_STRUCT(1) Int96 { uint32_t value[3]; }; |
537 | STRUCT_END(Int96, 12); |
538 | |
539 | inline bool operator==(const Int96& left, const Int96& right) { |
540 | return std::equal(left.value, left.value + 3, right.value); |
541 | } |
542 | |
543 | inline bool operator!=(const Int96& left, const Int96& right) { return !(left == right); } |
544 | |
545 | static inline std::string ByteArrayToString(const ByteArray& a) { |
546 | return std::string(reinterpret_cast<const char*>(a.ptr), a.len); |
547 | } |
548 | |
549 | static inline void Int96SetNanoSeconds(parquet::Int96& i96, int64_t nanoseconds) { |
550 | std::memcpy(&i96.value, &nanoseconds, sizeof(nanoseconds)); |
551 | } |
552 | |
553 | static inline int64_t Int96GetNanoSeconds(const parquet::Int96& i96) { |
554 | int64_t days_since_epoch = i96.value[2] - kJulianToUnixEpochDays; |
555 | int64_t nanoseconds = 0; |
556 | |
557 | memcpy(&nanoseconds, &i96.value, sizeof(int64_t)); |
558 | return days_since_epoch * kNanosecondsPerDay + nanoseconds; |
559 | } |
560 | |
561 | static inline std::string Int96ToString(const Int96& a) { |
562 | std::ostringstream result; |
563 | std::copy(a.value, a.value + 3, std::ostream_iterator<uint32_t>(result, " " )); |
564 | return result.str(); |
565 | } |
566 | |
567 | static inline std::string FixedLenByteArrayToString(const FixedLenByteArray& a, int len) { |
568 | std::ostringstream result; |
569 | std::copy(a.ptr, a.ptr + len, std::ostream_iterator<uint32_t>(result, " " )); |
570 | return result.str(); |
571 | } |
572 | |
573 | template <Type::type TYPE> |
574 | struct type_traits {}; |
575 | |
576 | template <> |
577 | struct type_traits<Type::BOOLEAN> { |
578 | using value_type = bool; |
579 | |
580 | static constexpr int value_byte_size = 1; |
581 | static constexpr const char* printf_code = "d" ; |
582 | }; |
583 | |
584 | template <> |
585 | struct type_traits<Type::INT32> { |
586 | using value_type = int32_t; |
587 | |
588 | static constexpr int value_byte_size = 4; |
589 | static constexpr const char* printf_code = "d" ; |
590 | }; |
591 | |
592 | template <> |
593 | struct type_traits<Type::INT64> { |
594 | using value_type = int64_t; |
595 | |
596 | static constexpr int value_byte_size = 8; |
597 | static constexpr const char* printf_code = "ld" ; |
598 | }; |
599 | |
600 | template <> |
601 | struct type_traits<Type::INT96> { |
602 | using value_type = Int96; |
603 | |
604 | static constexpr int value_byte_size = 12; |
605 | static constexpr const char* printf_code = "s" ; |
606 | }; |
607 | |
608 | template <> |
609 | struct type_traits<Type::FLOAT> { |
610 | using value_type = float; |
611 | |
612 | static constexpr int value_byte_size = 4; |
613 | static constexpr const char* printf_code = "f" ; |
614 | }; |
615 | |
616 | template <> |
617 | struct type_traits<Type::DOUBLE> { |
618 | using value_type = double; |
619 | |
620 | static constexpr int value_byte_size = 8; |
621 | static constexpr const char* printf_code = "lf" ; |
622 | }; |
623 | |
624 | template <> |
625 | struct type_traits<Type::BYTE_ARRAY> { |
626 | using value_type = ByteArray; |
627 | |
628 | static constexpr int value_byte_size = sizeof(ByteArray); |
629 | static constexpr const char* printf_code = "s" ; |
630 | }; |
631 | |
632 | template <> |
633 | struct type_traits<Type::FIXED_LEN_BYTE_ARRAY> { |
634 | using value_type = FixedLenByteArray; |
635 | |
636 | static constexpr int value_byte_size = sizeof(FixedLenByteArray); |
637 | static constexpr const char* printf_code = "s" ; |
638 | }; |
639 | |
640 | template <Type::type TYPE> |
641 | struct PhysicalType { |
642 | using c_type = typename type_traits<TYPE>::value_type; |
643 | static constexpr Type::type type_num = TYPE; |
644 | }; |
645 | |
646 | using BooleanType = PhysicalType<Type::BOOLEAN>; |
647 | using Int32Type = PhysicalType<Type::INT32>; |
648 | using Int64Type = PhysicalType<Type::INT64>; |
649 | using Int96Type = PhysicalType<Type::INT96>; |
650 | using FloatType = PhysicalType<Type::FLOAT>; |
651 | using DoubleType = PhysicalType<Type::DOUBLE>; |
652 | using ByteArrayType = PhysicalType<Type::BYTE_ARRAY>; |
653 | using FLBAType = PhysicalType<Type::FIXED_LEN_BYTE_ARRAY>; |
654 | |
655 | template <typename Type> |
656 | inline std::string format_fwf(int width) { |
657 | std::stringstream ss; |
658 | ss << "%-" << width << type_traits<Type::type_num>::printf_code; |
659 | return ss.str(); |
660 | } |
661 | |
662 | PARQUET_EXPORT std::string EncodingToString(Encoding::type t); |
663 | |
664 | PARQUET_EXPORT std::string ConvertedTypeToString(ConvertedType::type t); |
665 | |
666 | PARQUET_EXPORT std::string TypeToString(Type::type t); |
667 | |
668 | PARQUET_EXPORT std::string FormatStatValue(Type::type parquet_type, |
669 | const std::string& val); |
670 | |
671 | /// \deprecated Since 1.5.0 |
672 | ARROW_DEPRECATED("Use std::string instead of char* as input" ) |
673 | PARQUET_EXPORT std::string FormatStatValue(Type::type parquet_type, const char* val); |
674 | |
675 | PARQUET_EXPORT int GetTypeByteSize(Type::type t); |
676 | |
677 | PARQUET_EXPORT SortOrder::type DefaultSortOrder(Type::type primitive); |
678 | |
679 | PARQUET_EXPORT SortOrder::type GetSortOrder(ConvertedType::type converted, |
680 | Type::type primitive); |
681 | |
682 | PARQUET_EXPORT SortOrder::type GetSortOrder( |
683 | const std::shared_ptr<const LogicalType>& logical_type, Type::type primitive); |
684 | |
685 | namespace internal { |
686 | |
687 | PARQUET_EXPORT |
688 | int32_t DecimalSize(int32_t precision); |
689 | |
690 | } // namespace internal |
691 | } // namespace parquet |
692 | |
693 | #endif // PARQUET_TYPES_H |
694 | |