1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18#ifndef ARROW_TYPE_H
19#define ARROW_TYPE_H
20
21#include <climits>
22#include <cstdint>
23#include <memory>
24#include <ostream>
25#include <string>
26#include <type_traits>
27#include <unordered_map>
28#include <vector>
29
30#include "arrow/status.h"
31#include "arrow/type_fwd.h" // IWYU pragma: export
32#include "arrow/util/checked_cast.h"
33#include "arrow/util/key_value_metadata.h" // IWYU pragma: export
34#include "arrow/util/macros.h"
35#include "arrow/util/visibility.h"
36#include "arrow/visitor.h" // IWYU pragma: keep
37
38namespace arrow {
39
40class Array;
41class Field;
42class MemoryPool;
43
44struct Type {
45 /// \brief Main data type enumeration
46 ///
47 /// This enumeration provides a quick way to interrogate the category
48 /// of a DataType instance.
49 enum type {
50 /// A NULL type having no physical storage
51 NA,
52
53 /// Boolean as 1 bit, LSB bit-packed ordering
54 BOOL,
55
56 /// Unsigned 8-bit little-endian integer
57 UINT8,
58
59 /// Signed 8-bit little-endian integer
60 INT8,
61
62 /// Unsigned 16-bit little-endian integer
63 UINT16,
64
65 /// Signed 16-bit little-endian integer
66 INT16,
67
68 /// Unsigned 32-bit little-endian integer
69 UINT32,
70
71 /// Signed 32-bit little-endian integer
72 INT32,
73
74 /// Unsigned 64-bit little-endian integer
75 UINT64,
76
77 /// Signed 64-bit little-endian integer
78 INT64,
79
80 /// 2-byte floating point value
81 HALF_FLOAT,
82
83 /// 4-byte floating point value
84 FLOAT,
85
86 /// 8-byte floating point value
87 DOUBLE,
88
89 /// UTF8 variable-length string as List<Char>
90 STRING,
91
92 /// Variable-length bytes (no guarantee of UTF8-ness)
93 BINARY,
94
95 /// Fixed-size binary. Each value occupies the same number of bytes
96 FIXED_SIZE_BINARY,
97
98 /// int32_t days since the UNIX epoch
99 DATE32,
100
101 /// int64_t milliseconds since the UNIX epoch
102 DATE64,
103
104 /// Exact timestamp encoded with int64 since UNIX epoch
105 /// Default unit millisecond
106 TIMESTAMP,
107
108 /// Time as signed 32-bit integer, representing either seconds or
109 /// milliseconds since midnight
110 TIME32,
111
112 /// Time as signed 64-bit integer, representing either microseconds or
113 /// nanoseconds since midnight
114 TIME64,
115
116 /// YEAR_MONTH or DAY_TIME interval in SQL style
117 INTERVAL,
118
119 /// Precision- and scale-based decimal type. Storage type depends on the
120 /// parameters.
121 DECIMAL,
122
123 /// A list of some logical data type
124 LIST,
125
126 /// Struct of logical types
127 STRUCT,
128
129 /// Unions of logical types
130 UNION,
131
132 /// Dictionary aka Category type
133 DICTIONARY,
134
135 /// Map, a repeated struct logical type
136 MAP
137 };
138};
139
140/// \brief Base class for all data types
141///
142/// Data types in this library are all *logical*. They can be expressed as
143/// either a primitive physical type (bytes or bits of some fixed size), a
144/// nested type consisting of other data types, or another data type (e.g. a
145/// timestamp encoded as an int64).
146///
147/// Simple datatypes may be entirely described by their Type::type id, but
148/// complex datatypes are usually parametric.
149class ARROW_EXPORT DataType {
150 public:
151 explicit DataType(Type::type id) : id_(id) {}
152 virtual ~DataType();
153
154 /// \brief Return whether the types are equal
155 ///
156 /// Types that are logically convertible from one to another (e.g. List<UInt8>
157 /// and Binary) are NOT equal.
158 virtual bool Equals(const DataType& other) const;
159 /// \brief Return whether the types are equal
160 bool Equals(const std::shared_ptr<DataType>& other) const;
161
162 std::shared_ptr<Field> child(int i) const { return children_[i]; }
163
164 const std::vector<std::shared_ptr<Field>>& children() const { return children_; }
165
166 int num_children() const { return static_cast<int>(children_.size()); }
167
168 virtual Status Accept(TypeVisitor* visitor) const = 0;
169
170 /// \brief A string representation of the type, including any children
171 virtual std::string ToString() const = 0;
172
173 /// \brief A string name of the type, omitting any child fields
174 ///
175 /// \note Experimental API
176 /// \since 0.7.0
177 virtual std::string name() const = 0;
178
179 /// \brief Return the type category
180 Type::type id() const { return id_; }
181
182 protected:
183 Type::type id_;
184 std::vector<std::shared_ptr<Field>> children_;
185
186 private:
187 ARROW_DISALLOW_COPY_AND_ASSIGN(DataType);
188};
189
190inline std::ostream& operator<<(std::ostream& os, const DataType& type) {
191 os << type.ToString();
192 return os;
193}
194
195/// \brief Base class for all fixed-width data types
196class ARROW_EXPORT FixedWidthType : public DataType {
197 public:
198 using DataType::DataType;
199
200 virtual int bit_width() const = 0;
201};
202
203/// \brief Base class for all data types representing primitive values
204class ARROW_EXPORT PrimitiveCType : public FixedWidthType {
205 public:
206 using FixedWidthType::FixedWidthType;
207};
208
209/// \brief Base class for all numeric data types
210class ARROW_EXPORT Number : public PrimitiveCType {
211 public:
212 using PrimitiveCType::PrimitiveCType;
213};
214
215/// \brief Base class for all integral data types
216class ARROW_EXPORT Integer : public Number {
217 public:
218 using Number::Number;
219 virtual bool is_signed() const = 0;
220};
221
222/// \brief Base class for all floating-point data types
223class ARROW_EXPORT FloatingPoint : public Number {
224 public:
225 using Number::Number;
226 enum Precision { HALF, SINGLE, DOUBLE };
227 virtual Precision precision() const = 0;
228};
229
230/// \brief Base class for all parametric data types
231class ParametricType {};
232
233class ARROW_EXPORT NestedType : public DataType, public ParametricType {
234 public:
235 using DataType::DataType;
236};
237
238class NoExtraMeta {};
239
240/// \brief The combination of a field name and data type, with optional metadata
241///
242/// Fields are used to describe the individual constituents of a
243/// nested DataType or a Schema.
244///
245/// A field's metadata is represented by a KeyValueMetadata instance,
246/// which holds arbitrary key-value pairs.
247class ARROW_EXPORT Field {
248 public:
249 Field(const std::string& name, const std::shared_ptr<DataType>& type,
250 bool nullable = true,
251 const std::shared_ptr<const KeyValueMetadata>& metadata = NULLPTR)
252 : name_(name), type_(type), nullable_(nullable), metadata_(metadata) {}
253
254 /// \brief Return the field's attached metadata
255 std::shared_ptr<const KeyValueMetadata> metadata() const { return metadata_; }
256
257 /// \brief Return whether the field has non-empty metadata
258 bool HasMetadata() const;
259
260 /// \brief Return a copy of this field with the given metadata attached to it
261 std::shared_ptr<Field> AddMetadata(
262 const std::shared_ptr<const KeyValueMetadata>& metadata) const;
263 /// \brief Return a copy of this field without any metadata attached to it
264 std::shared_ptr<Field> RemoveMetadata() const;
265
266 std::vector<std::shared_ptr<Field>> Flatten() const;
267
268 bool Equals(const Field& other, bool check_metadata = true) const;
269 bool Equals(const std::shared_ptr<Field>& other, bool check_metadata = true) const;
270
271 /// \brief Return a string representation ot the field
272 std::string ToString() const;
273
274 /// \brief Return the field name
275 const std::string& name() const { return name_; }
276 /// \brief Return the field data type
277 std::shared_ptr<DataType> type() const { return type_; }
278 /// \brief Return whether the field is nullable
279 bool nullable() const { return nullable_; }
280
281 private:
282 // Field name
283 std::string name_;
284
285 // The field's data type
286 std::shared_ptr<DataType> type_;
287
288 // Fields can be nullable
289 bool nullable_;
290
291 // The field's metadata, if any
292 std::shared_ptr<const KeyValueMetadata> metadata_;
293};
294
295namespace detail {
296
297template <typename DERIVED, typename BASE, Type::type TYPE_ID, typename C_TYPE>
298class ARROW_EXPORT CTypeImpl : public BASE {
299 public:
300 using c_type = C_TYPE;
301 static constexpr Type::type type_id = TYPE_ID;
302
303 CTypeImpl() : BASE(TYPE_ID) {}
304
305 int bit_width() const override { return static_cast<int>(sizeof(C_TYPE) * CHAR_BIT); }
306
307 Status Accept(TypeVisitor* visitor) const override {
308 return visitor->Visit(internal::checked_cast<const DERIVED&>(*this));
309 }
310
311 std::string ToString() const override { return this->name(); }
312};
313
314template <typename DERIVED, Type::type TYPE_ID, typename C_TYPE>
315class IntegerTypeImpl : public detail::CTypeImpl<DERIVED, Integer, TYPE_ID, C_TYPE> {
316 bool is_signed() const override { return std::is_signed<C_TYPE>::value; }
317};
318
319} // namespace detail
320
321/// Concrete type class for always-null data
322class ARROW_EXPORT NullType : public DataType, public NoExtraMeta {
323 public:
324 static constexpr Type::type type_id = Type::NA;
325
326 NullType() : DataType(Type::NA) {}
327
328 Status Accept(TypeVisitor* visitor) const override;
329 std::string ToString() const override;
330
331 std::string name() const override { return "null"; }
332};
333
334/// Concrete type class for boolean data
335class ARROW_EXPORT BooleanType : public FixedWidthType, public NoExtraMeta {
336 public:
337 static constexpr Type::type type_id = Type::BOOL;
338
339 BooleanType() : FixedWidthType(Type::BOOL) {}
340
341 Status Accept(TypeVisitor* visitor) const override;
342 std::string ToString() const override;
343
344 int bit_width() const override { return 1; }
345 std::string name() const override { return "bool"; }
346};
347
348/// Concrete type class for unsigned 8-bit integer data
349class ARROW_EXPORT UInt8Type
350 : public detail::IntegerTypeImpl<UInt8Type, Type::UINT8, uint8_t> {
351 public:
352 std::string name() const override { return "uint8"; }
353};
354
355/// Concrete type class for signed 8-bit integer data
356class ARROW_EXPORT Int8Type
357 : public detail::IntegerTypeImpl<Int8Type, Type::INT8, int8_t> {
358 public:
359 std::string name() const override { return "int8"; }
360};
361
362/// Concrete type class for unsigned 16-bit integer data
363class ARROW_EXPORT UInt16Type
364 : public detail::IntegerTypeImpl<UInt16Type, Type::UINT16, uint16_t> {
365 public:
366 std::string name() const override { return "uint16"; }
367};
368
369/// Concrete type class for signed 16-bit integer data
370class ARROW_EXPORT Int16Type
371 : public detail::IntegerTypeImpl<Int16Type, Type::INT16, int16_t> {
372 public:
373 std::string name() const override { return "int16"; }
374};
375
376/// Concrete type class for unsigned 32-bit integer data
377class ARROW_EXPORT UInt32Type
378 : public detail::IntegerTypeImpl<UInt32Type, Type::UINT32, uint32_t> {
379 public:
380 std::string name() const override { return "uint32"; }
381};
382
383/// Concrete type class for signed 32-bit integer data
384class ARROW_EXPORT Int32Type
385 : public detail::IntegerTypeImpl<Int32Type, Type::INT32, int32_t> {
386 public:
387 std::string name() const override { return "int32"; }
388};
389
390/// Concrete type class for unsigned 64-bit integer data
391class ARROW_EXPORT UInt64Type
392 : public detail::IntegerTypeImpl<UInt64Type, Type::UINT64, uint64_t> {
393 public:
394 std::string name() const override { return "uint64"; }
395};
396
397/// Concrete type class for signed 64-bit integer data
398class ARROW_EXPORT Int64Type
399 : public detail::IntegerTypeImpl<Int64Type, Type::INT64, int64_t> {
400 public:
401 std::string name() const override { return "int64"; }
402};
403
404/// Concrete type class for 16-bit floating-point data
405class ARROW_EXPORT HalfFloatType
406 : public detail::CTypeImpl<HalfFloatType, FloatingPoint, Type::HALF_FLOAT, uint16_t> {
407 public:
408 Precision precision() const override;
409 std::string name() const override { return "halffloat"; }
410};
411
412/// Concrete type class for 32-bit floating-point data (C "float")
413class ARROW_EXPORT FloatType
414 : public detail::CTypeImpl<FloatType, FloatingPoint, Type::FLOAT, float> {
415 public:
416 Precision precision() const override;
417 std::string name() const override { return "float"; }
418};
419
420/// Concrete type class for 64-bit floating-point data (C "double")
421class ARROW_EXPORT DoubleType
422 : public detail::CTypeImpl<DoubleType, FloatingPoint, Type::DOUBLE, double> {
423 public:
424 Precision precision() const override;
425 std::string name() const override { return "double"; }
426};
427
428/// \brief Concrete type class for list data
429///
430/// List data is nested data where each value is a variable number of
431/// child items. Lists can be recursively nested, for example
432/// list(list(int32)).
433class ARROW_EXPORT ListType : public NestedType {
434 public:
435 static constexpr Type::type type_id = Type::LIST;
436
437 // List can contain any other logical value type
438 explicit ListType(const std::shared_ptr<DataType>& value_type)
439 : ListType(std::make_shared<Field>("item", value_type)) {}
440
441 explicit ListType(const std::shared_ptr<Field>& value_field) : NestedType(Type::LIST) {
442 children_ = {value_field};
443 }
444
445 std::shared_ptr<Field> value_field() const { return children_[0]; }
446
447 std::shared_ptr<DataType> value_type() const { return children_[0]->type(); }
448
449 Status Accept(TypeVisitor* visitor) const override;
450 std::string ToString() const override;
451
452 std::string name() const override { return "list"; }
453};
454
455/// \brief Concrete type class for variable-size binary data
456class ARROW_EXPORT BinaryType : public DataType, public NoExtraMeta {
457 public:
458 static constexpr Type::type type_id = Type::BINARY;
459
460 BinaryType() : BinaryType(Type::BINARY) {}
461
462 Status Accept(TypeVisitor* visitor) const override;
463 std::string ToString() const override;
464 std::string name() const override { return "binary"; }
465
466 protected:
467 // Allow subclasses to change the logical type.
468 explicit BinaryType(Type::type logical_type) : DataType(logical_type) {}
469};
470
471/// \brief Concrete type class for fixed-size binary data
472class ARROW_EXPORT FixedSizeBinaryType : public FixedWidthType, public ParametricType {
473 public:
474 static constexpr Type::type type_id = Type::FIXED_SIZE_BINARY;
475
476 explicit FixedSizeBinaryType(int32_t byte_width)
477 : FixedWidthType(Type::FIXED_SIZE_BINARY), byte_width_(byte_width) {}
478 explicit FixedSizeBinaryType(int32_t byte_width, Type::type override_type_id)
479 : FixedWidthType(override_type_id), byte_width_(byte_width) {}
480
481 Status Accept(TypeVisitor* visitor) const override;
482 std::string ToString() const override;
483 std::string name() const override { return "fixed_size_binary"; }
484
485 int32_t byte_width() const { return byte_width_; }
486 int bit_width() const override;
487
488 protected:
489 int32_t byte_width_;
490};
491
492/// \brief Concrete type class for variable-size string data, utf8-encoded
493class ARROW_EXPORT StringType : public BinaryType {
494 public:
495 static constexpr Type::type type_id = Type::STRING;
496
497 StringType() : BinaryType(Type::STRING) {}
498
499 Status Accept(TypeVisitor* visitor) const override;
500 std::string ToString() const override;
501 std::string name() const override { return "utf8"; }
502};
503
504/// \brief Concrete type class for struct data
505class ARROW_EXPORT StructType : public NestedType {
506 public:
507 static constexpr Type::type type_id = Type::STRUCT;
508
509 explicit StructType(const std::vector<std::shared_ptr<Field>>& fields);
510
511 Status Accept(TypeVisitor* visitor) const override;
512 std::string ToString() const override;
513 std::string name() const override { return "struct"; }
514
515 /// Returns null if name not found
516 std::shared_ptr<Field> GetFieldByName(const std::string& name) const;
517
518 /// Returns -1 if name not found or if there are multiple fields having the
519 /// same name
520 int GetFieldIndex(const std::string& name) const;
521
522 ARROW_DEPRECATED("Use GetFieldByName")
523 std::shared_ptr<Field> GetChildByName(const std::string& name) const;
524
525 ARROW_DEPRECATED("Use GetFieldIndex")
526 int GetChildIndex(const std::string& name) const;
527
528 private:
529 std::unordered_map<std::string, int> name_to_index_;
530};
531
532/// \brief Base type class for (fixed-size) decimal data
533class ARROW_EXPORT DecimalType : public FixedSizeBinaryType {
534 public:
535 explicit DecimalType(int32_t byte_width, int32_t precision, int32_t scale)
536 : FixedSizeBinaryType(byte_width, Type::DECIMAL),
537 precision_(precision),
538 scale_(scale) {}
539
540 int32_t precision() const { return precision_; }
541 int32_t scale() const { return scale_; }
542
543 protected:
544 int32_t precision_;
545 int32_t scale_;
546};
547
548/// \brief Concrete type class for 128-bit decimal data
549class ARROW_EXPORT Decimal128Type : public DecimalType {
550 public:
551 static constexpr Type::type type_id = Type::DECIMAL;
552
553 explicit Decimal128Type(int32_t precision, int32_t scale)
554 : DecimalType(16, precision, scale) {}
555
556 Status Accept(TypeVisitor* visitor) const override;
557 std::string ToString() const override;
558 std::string name() const override { return "decimal"; }
559};
560
561struct UnionMode {
562 enum type { SPARSE, DENSE };
563};
564
565/// \brief Concrete type class for union data
566class ARROW_EXPORT UnionType : public NestedType {
567 public:
568 static constexpr Type::type type_id = Type::UNION;
569
570 UnionType(const std::vector<std::shared_ptr<Field>>& fields,
571 const std::vector<uint8_t>& type_codes,
572 UnionMode::type mode = UnionMode::SPARSE);
573
574 std::string ToString() const override;
575 std::string name() const override { return "union"; }
576 Status Accept(TypeVisitor* visitor) const override;
577
578 const std::vector<uint8_t>& type_codes() const { return type_codes_; }
579
580 UnionMode::type mode() const { return mode_; }
581
582 private:
583 UnionMode::type mode_;
584
585 // The type id used in the data to indicate each data type in the union. For
586 // example, the first type in the union might be denoted by the id 5 (instead
587 // of 0).
588 std::vector<uint8_t> type_codes_;
589};
590
591// ----------------------------------------------------------------------
592// Date and time types
593
594enum class DateUnit : char { DAY = 0, MILLI = 1 };
595
596/// \brief Base type class for date data
597class ARROW_EXPORT DateType : public FixedWidthType {
598 public:
599 virtual DateUnit unit() const = 0;
600
601 protected:
602 explicit DateType(Type::type type_id);
603};
604
605/// Concrete type class for 32-bit date data (as number of days since UNIX epoch)
606class ARROW_EXPORT Date32Type : public DateType {
607 public:
608 static constexpr Type::type type_id = Type::DATE32;
609 static constexpr DateUnit UNIT = DateUnit::DAY;
610
611 using c_type = int32_t;
612
613 Date32Type();
614
615 int bit_width() const override { return static_cast<int>(sizeof(c_type) * CHAR_BIT); }
616
617 Status Accept(TypeVisitor* visitor) const override;
618 std::string ToString() const override;
619
620 std::string name() const override { return "date32"; }
621 DateUnit unit() const override { return UNIT; }
622};
623
624/// Concrete type class for 64-bit date data (as number of milliseconds since UNIX epoch)
625class ARROW_EXPORT Date64Type : public DateType {
626 public:
627 static constexpr Type::type type_id = Type::DATE64;
628 static constexpr DateUnit UNIT = DateUnit::MILLI;
629
630 using c_type = int64_t;
631
632 Date64Type();
633
634 int bit_width() const override { return static_cast<int>(sizeof(c_type) * CHAR_BIT); }
635
636 Status Accept(TypeVisitor* visitor) const override;
637 std::string ToString() const override;
638
639 std::string name() const override { return "date64"; }
640 DateUnit unit() const override { return UNIT; }
641};
642
643struct TimeUnit {
644 /// The unit for a time or timestamp DataType
645 enum type { SECOND = 0, MILLI = 1, MICRO = 2, NANO = 3 };
646};
647
648static inline std::ostream& operator<<(std::ostream& os, TimeUnit::type unit) {
649 switch (unit) {
650 case TimeUnit::SECOND:
651 os << "s";
652 break;
653 case TimeUnit::MILLI:
654 os << "ms";
655 break;
656 case TimeUnit::MICRO:
657 os << "us";
658 break;
659 case TimeUnit::NANO:
660 os << "ns";
661 break;
662 }
663 return os;
664}
665
666/// Base type class for time data
667class ARROW_EXPORT TimeType : public FixedWidthType, public ParametricType {
668 public:
669 TimeUnit::type unit() const { return unit_; }
670
671 protected:
672 TimeType(Type::type type_id, TimeUnit::type unit);
673 TimeUnit::type unit_;
674};
675
676class ARROW_EXPORT Time32Type : public TimeType {
677 public:
678 static constexpr Type::type type_id = Type::TIME32;
679 using c_type = int32_t;
680
681 int bit_width() const override { return static_cast<int>(sizeof(c_type) * CHAR_BIT); }
682
683 explicit Time32Type(TimeUnit::type unit = TimeUnit::MILLI);
684
685 Status Accept(TypeVisitor* visitor) const override;
686 std::string ToString() const override;
687
688 std::string name() const override { return "time32"; }
689};
690
691class ARROW_EXPORT Time64Type : public TimeType {
692 public:
693 static constexpr Type::type type_id = Type::TIME64;
694 using c_type = int64_t;
695
696 int bit_width() const override { return static_cast<int>(sizeof(c_type) * CHAR_BIT); }
697
698 explicit Time64Type(TimeUnit::type unit = TimeUnit::MILLI);
699
700 Status Accept(TypeVisitor* visitor) const override;
701 std::string ToString() const override;
702
703 std::string name() const override { return "time64"; }
704};
705
706class ARROW_EXPORT TimestampType : public FixedWidthType, public ParametricType {
707 public:
708 using Unit = TimeUnit;
709
710 typedef int64_t c_type;
711 static constexpr Type::type type_id = Type::TIMESTAMP;
712
713 int bit_width() const override { return static_cast<int>(sizeof(int64_t) * CHAR_BIT); }
714
715 explicit TimestampType(TimeUnit::type unit = TimeUnit::MILLI)
716 : FixedWidthType(Type::TIMESTAMP), unit_(unit) {}
717
718 explicit TimestampType(TimeUnit::type unit, const std::string& timezone)
719 : FixedWidthType(Type::TIMESTAMP), unit_(unit), timezone_(timezone) {}
720
721 Status Accept(TypeVisitor* visitor) const override;
722 std::string ToString() const override;
723 std::string name() const override { return "timestamp"; }
724
725 TimeUnit::type unit() const { return unit_; }
726 const std::string& timezone() const { return timezone_; }
727
728 private:
729 TimeUnit::type unit_;
730 std::string timezone_;
731};
732
733class ARROW_EXPORT IntervalType : public FixedWidthType {
734 public:
735 enum class Unit : char { YEAR_MONTH = 0, DAY_TIME = 1 };
736
737 using c_type = int64_t;
738 static constexpr Type::type type_id = Type::INTERVAL;
739
740 int bit_width() const override { return static_cast<int>(sizeof(int64_t) * CHAR_BIT); }
741
742 explicit IntervalType(Unit unit = Unit::YEAR_MONTH)
743 : FixedWidthType(Type::INTERVAL), unit_(unit) {}
744
745 Status Accept(TypeVisitor* visitor) const override;
746 std::string ToString() const override { return name(); }
747 std::string name() const override { return "date"; }
748
749 Unit unit() const { return unit_; }
750
751 private:
752 Unit unit_;
753};
754
755// ----------------------------------------------------------------------
756// DictionaryType (for categorical or dictionary-encoded data)
757
758/// Concrete type class for dictionary data
759class ARROW_EXPORT DictionaryType : public FixedWidthType {
760 public:
761 static constexpr Type::type type_id = Type::DICTIONARY;
762
763 DictionaryType(const std::shared_ptr<DataType>& index_type,
764 const std::shared_ptr<Array>& dictionary, bool ordered = false);
765
766 int bit_width() const override;
767
768 std::shared_ptr<DataType> index_type() const { return index_type_; }
769
770 std::shared_ptr<Array> dictionary() const;
771
772 Status Accept(TypeVisitor* visitor) const override;
773 std::string ToString() const override;
774 std::string name() const override { return "dictionary"; }
775
776 bool ordered() const { return ordered_; }
777
778 /// \brief Unify several dictionary types
779 ///
780 /// Compute a resulting dictionary that will allow the union of values
781 /// of all input dictionary types. The input types must all have the
782 /// same value type.
783 /// \param[in] pool Memory pool to allocate dictionary values from
784 /// \param[in] types A sequence of input dictionary types
785 /// \param[out] out_type The unified dictionary type
786 /// \param[out] out_transpose_maps (optionally) A sequence of integer vectors,
787 /// one per input type. Each integer vector represents the transposition
788 /// of input type indices into unified type indices.
789 // XXX Should we return something special (an empty transpose map?) when
790 // the transposition is the identity function?
791 static Status Unify(MemoryPool* pool, const std::vector<const DataType*>& types,
792 std::shared_ptr<DataType>* out_type,
793 std::vector<std::vector<int32_t>>* out_transpose_maps = NULLPTR);
794
795 private:
796 // Must be an integer type (not currently checked)
797 std::shared_ptr<DataType> index_type_;
798 std::shared_ptr<Array> dictionary_;
799 bool ordered_;
800};
801
802// ----------------------------------------------------------------------
803// Schema
804
805/// \class Schema
806/// \brief Sequence of arrow::Field objects describing the columns of a record
807/// batch or table data structure
808class ARROW_EXPORT Schema {
809 public:
810 explicit Schema(const std::vector<std::shared_ptr<Field>>& fields,
811 const std::shared_ptr<const KeyValueMetadata>& metadata = NULLPTR);
812
813 explicit Schema(std::vector<std::shared_ptr<Field>>&& fields,
814 const std::shared_ptr<const KeyValueMetadata>& metadata = NULLPTR);
815
816 virtual ~Schema() = default;
817
818 /// Returns true if all of the schema fields are equal
819 bool Equals(const Schema& other, bool check_metadata = true) const;
820
821 /// Return the ith schema element. Does not boundscheck
822 std::shared_ptr<Field> field(int i) const { return fields_[i]; }
823
824 /// Returns null if name not found
825 std::shared_ptr<Field> GetFieldByName(const std::string& name) const;
826
827 /// Returns -1 if name not found
828 int64_t GetFieldIndex(const std::string& name) const;
829
830 const std::vector<std::shared_ptr<Field>>& fields() const { return fields_; }
831
832 /// \brief The custom key-value metadata, if any
833 ///
834 /// \return metadata may be null
835 std::shared_ptr<const KeyValueMetadata> metadata() const;
836
837 /// \brief Render a string representation of the schema suitable for debugging
838 std::string ToString() const;
839
840 Status AddField(int i, const std::shared_ptr<Field>& field,
841 std::shared_ptr<Schema>* out) const;
842 Status RemoveField(int i, std::shared_ptr<Schema>* out) const;
843 Status SetField(int i, const std::shared_ptr<Field>& field,
844 std::shared_ptr<Schema>* out) const;
845
846 /// \brief Replace key-value metadata with new metadata
847 ///
848 /// \param[in] metadata new KeyValueMetadata
849 /// \return new Schema
850 std::shared_ptr<Schema> AddMetadata(
851 const std::shared_ptr<const KeyValueMetadata>& metadata) const;
852
853 /// \brief Return copy of Schema without the KeyValueMetadata
854 std::shared_ptr<Schema> RemoveMetadata() const;
855
856 /// \brief Indicates that Schema has non-empty KevValueMetadata
857 bool HasMetadata() const;
858
859 /// \brief Return the number of fields (columns) in the schema
860 int num_fields() const { return static_cast<int>(fields_.size()); }
861
862 private:
863 std::vector<std::shared_ptr<Field>> fields_;
864
865 std::unordered_map<std::string, int> name_to_index_;
866
867 std::shared_ptr<const KeyValueMetadata> metadata_;
868};
869
870// ----------------------------------------------------------------------
871// Parametric factory functions
872// Other factory functions are in type_fwd.h
873
874/// \addtogroup type-factories
875/// @{
876
877/// \brief Create a FixedSizeBinaryType instance
878ARROW_EXPORT
879std::shared_ptr<DataType> fixed_size_binary(int32_t byte_width);
880
881/// \brief Create a Decimal128Type instance
882ARROW_EXPORT
883std::shared_ptr<DataType> decimal(int32_t precision, int32_t scale);
884
885/// \brief Create a ListType instance from its child Field type
886ARROW_EXPORT
887std::shared_ptr<DataType> list(const std::shared_ptr<Field>& value_type);
888
889/// \brief Create a ListType instance from its child DataType
890ARROW_EXPORT
891std::shared_ptr<DataType> list(const std::shared_ptr<DataType>& value_type);
892
893/// \brief Create a TimestampType instance from its unit
894ARROW_EXPORT
895std::shared_ptr<DataType> timestamp(TimeUnit::type unit);
896
897/// \brief Create a TimestampType instance from its unit and timezone
898ARROW_EXPORT
899std::shared_ptr<DataType> timestamp(TimeUnit::type unit, const std::string& timezone);
900
901/// \brief Create a 32-bit time type instance
902///
903/// Unit can be either SECOND or MILLI
904std::shared_ptr<DataType> ARROW_EXPORT time32(TimeUnit::type unit);
905
906/// \brief Create a 64-bit time type instance
907///
908/// Unit can be either MICRO or NANO
909std::shared_ptr<DataType> ARROW_EXPORT time64(TimeUnit::type unit);
910
911/// \brief Create a StructType instance
912std::shared_ptr<DataType> ARROW_EXPORT
913struct_(const std::vector<std::shared_ptr<Field>>& fields);
914
915/// \brief Create a UnionType instance
916std::shared_ptr<DataType> ARROW_EXPORT
917union_(const std::vector<std::shared_ptr<Field>>& child_fields,
918 const std::vector<uint8_t>& type_codes, UnionMode::type mode = UnionMode::SPARSE);
919
920/// \brief Create a UnionType instance
921std::shared_ptr<DataType> ARROW_EXPORT
922union_(const std::vector<std::shared_ptr<Array>>& children,
923 UnionMode::type mode = UnionMode::SPARSE);
924
925/// \brief Create a DictionaryType instance
926std::shared_ptr<DataType> ARROW_EXPORT
927dictionary(const std::shared_ptr<DataType>& index_type,
928 const std::shared_ptr<Array>& values, bool ordered = false);
929
930/// @}
931
932/// \defgroup schema-factories Factory functions for fields and schemas
933///
934/// Factory functions for fields and schemas
935/// @{
936
937/// \brief Create a Field instance
938///
939/// \param name the field name
940/// \param type the field value type
941/// \param nullable whether the values are nullable, default true
942/// \param metadata any custom key-value metadata, default null
943std::shared_ptr<Field> ARROW_EXPORT field(
944 const std::string& name, const std::shared_ptr<DataType>& type, bool nullable = true,
945 const std::shared_ptr<const KeyValueMetadata>& metadata = NULLPTR);
946
947/// \brief Create a Schema instance
948///
949/// \param fields the schema's fields
950/// \param metadata any custom key-value metadata, default null
951/// \return schema shared_ptr to Schema
952ARROW_EXPORT
953std::shared_ptr<Schema> schema(
954 const std::vector<std::shared_ptr<Field>>& fields,
955 const std::shared_ptr<const KeyValueMetadata>& metadata = NULLPTR);
956
957/// \brief Create a Schema instance
958///
959/// \param fields the schema's fields (rvalue reference)
960/// \param metadata any custom key-value metadata, default null
961/// \return schema shared_ptr to Schema
962ARROW_EXPORT
963std::shared_ptr<Schema> schema(
964 std::vector<std::shared_ptr<Field>>&& fields,
965 const std::shared_ptr<const KeyValueMetadata>& metadata = NULLPTR);
966
967/// @}
968
969} // namespace arrow
970
971#endif // ARROW_TYPE_H
972