1#pragma once
2
3#include <Columns/ColumnDecimal.h>
4#include <Columns/ColumnString.h>
5#include <Common/HashTable/HashMap.h>
6#include "DictionaryStructure.h"
7#include "IDictionary.h"
8#include "IDictionarySource.h"
9
10#include <atomic>
11#include <memory>
12#include <variant>
13
14
15namespace DB
16{
17class RangeHashedDictionary final : public IDictionaryBase
18{
19public:
20 RangeHashedDictionary(
21 const std::string & database_,
22 const std::string & name_,
23 const DictionaryStructure & dict_struct_,
24 DictionarySourcePtr source_ptr_,
25 const DictionaryLifetime dict_lifetime_,
26 bool require_nonempty_);
27
28 const std::string & getDatabase() const override { return database; }
29 const std::string & getName() const override { return name; }
30 const std::string & getFullName() const override { return full_name; }
31
32 std::string getTypeName() const override { return "RangeHashed"; }
33
34 size_t getBytesAllocated() const override { return bytes_allocated; }
35
36 size_t getQueryCount() const override { return query_count.load(std::memory_order_relaxed); }
37
38 double getHitRate() const override { return 1.0; }
39
40 size_t getElementCount() const override { return element_count; }
41
42 double getLoadFactor() const override { return static_cast<double>(element_count) / bucket_count; }
43
44 std::shared_ptr<const IExternalLoadable> clone() const override
45 {
46 return std::make_shared<RangeHashedDictionary>(database, name, dict_struct, source_ptr->clone(), dict_lifetime, require_nonempty);
47 }
48
49 const IDictionarySource * getSource() const override { return source_ptr.get(); }
50
51 const DictionaryLifetime & getLifetime() const override { return dict_lifetime; }
52
53 const DictionaryStructure & getStructure() const override { return dict_struct; }
54
55 bool isInjective(const std::string & attribute_name) const override
56 {
57 return dict_struct.attributes[&getAttribute(attribute_name) - attributes.data()].injective;
58 }
59
60 typedef Int64 RangeStorageType;
61
62 template <typename T>
63 using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
64
65#define DECLARE_MULTIPLE_GETTER(TYPE) \
66 void get##TYPE( \
67 const std::string & attribute_name, \
68 const PaddedPODArray<Key> & ids, \
69 const PaddedPODArray<RangeStorageType> & dates, \
70 ResultArrayType<TYPE> & out) const;
71 DECLARE_MULTIPLE_GETTER(UInt8)
72 DECLARE_MULTIPLE_GETTER(UInt16)
73 DECLARE_MULTIPLE_GETTER(UInt32)
74 DECLARE_MULTIPLE_GETTER(UInt64)
75 DECLARE_MULTIPLE_GETTER(UInt128)
76 DECLARE_MULTIPLE_GETTER(Int8)
77 DECLARE_MULTIPLE_GETTER(Int16)
78 DECLARE_MULTIPLE_GETTER(Int32)
79 DECLARE_MULTIPLE_GETTER(Int64)
80 DECLARE_MULTIPLE_GETTER(Float32)
81 DECLARE_MULTIPLE_GETTER(Float64)
82 DECLARE_MULTIPLE_GETTER(Decimal32)
83 DECLARE_MULTIPLE_GETTER(Decimal64)
84 DECLARE_MULTIPLE_GETTER(Decimal128)
85#undef DECLARE_MULTIPLE_GETTER
86
87 void getString(
88 const std::string & attribute_name,
89 const PaddedPODArray<Key> & ids,
90 const PaddedPODArray<RangeStorageType> & dates,
91 ColumnString * out) const;
92
93 BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
94
95 struct Range
96 {
97 RangeStorageType left;
98 RangeStorageType right;
99
100 static bool isCorrectDate(const RangeStorageType & date);
101 bool contains(const RangeStorageType & value) const;
102 };
103
104private:
105 template <typename T>
106 struct Value final
107 {
108 Range range;
109 T value;
110 };
111
112 template <typename T>
113 using Values = std::vector<Value<T>>;
114 template <typename T>
115 using Collection = HashMap<UInt64, Values<T>>;
116 template <typename T>
117 using Ptr = std::unique_ptr<Collection<T>>;
118
119 struct Attribute final
120 {
121 public:
122 AttributeUnderlyingType type;
123 std::variant<
124 UInt8,
125 UInt16,
126 UInt32,
127 UInt64,
128 UInt128,
129 Int8,
130 Int16,
131 Int32,
132 Int64,
133 Decimal32,
134 Decimal64,
135 Decimal128,
136 Float32,
137 Float64,
138 String>
139 null_values;
140 std::variant<
141 Ptr<UInt8>,
142 Ptr<UInt16>,
143 Ptr<UInt32>,
144 Ptr<UInt64>,
145 Ptr<UInt128>,
146 Ptr<Int8>,
147 Ptr<Int16>,
148 Ptr<Int32>,
149 Ptr<Int64>,
150 Ptr<Decimal32>,
151 Ptr<Decimal64>,
152 Ptr<Decimal128>,
153 Ptr<Float32>,
154 Ptr<Float64>,
155 Ptr<StringRef>>
156 maps;
157 std::unique_ptr<Arena> string_arena;
158 };
159
160 void createAttributes();
161
162 void loadData();
163
164 template <typename T>
165 void addAttributeSize(const Attribute & attribute);
166
167 void calculateBytesAllocated();
168
169 template <typename T>
170 void createAttributeImpl(Attribute & attribute, const Field & null_value);
171
172 Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value);
173
174
175 template <typename OutputType>
176 void getItems(
177 const Attribute & attribute,
178 const PaddedPODArray<Key> & ids,
179 const PaddedPODArray<RangeStorageType> & dates,
180 PaddedPODArray<OutputType> & out) const;
181
182 template <typename AttributeType, typename OutputType>
183 void getItemsImpl(
184 const Attribute & attribute,
185 const PaddedPODArray<Key> & ids,
186 const PaddedPODArray<RangeStorageType> & dates,
187 PaddedPODArray<OutputType> & out) const;
188
189
190 template <typename T>
191 void setAttributeValueImpl(Attribute & attribute, const Key id, const Range & range, const T value);
192
193 void setAttributeValue(Attribute & attribute, const Key id, const Range & range, const Field & value);
194
195 const Attribute & getAttribute(const std::string & attribute_name) const;
196
197 const Attribute & getAttributeWithType(const std::string & name, const AttributeUnderlyingType type) const;
198
199 template <typename RangeType>
200 void getIdsAndDates(PaddedPODArray<Key> & ids, PaddedPODArray<RangeType> & start_dates, PaddedPODArray<RangeType> & end_dates) const;
201
202 template <typename T, typename RangeType>
203 void getIdsAndDates(
204 const Attribute & attribute,
205 PaddedPODArray<Key> & ids,
206 PaddedPODArray<RangeType> & start_dates,
207 PaddedPODArray<RangeType> & end_dates) const;
208
209 template <typename RangeType>
210 BlockInputStreamPtr getBlockInputStreamImpl(const Names & column_names, size_t max_block_size) const;
211
212 friend struct RangeHashedDIctionaryCallGetBlockInputStreamImpl;
213
214 const std::string database;
215 const std::string name;
216 const std::string full_name;
217 const DictionaryStructure dict_struct;
218 const DictionarySourcePtr source_ptr;
219 const DictionaryLifetime dict_lifetime;
220 const bool require_nonempty;
221
222 std::map<std::string, size_t> attribute_index_by_name;
223 std::vector<Attribute> attributes;
224
225 size_t bytes_allocated = 0;
226 size_t element_count = 0;
227 size_t bucket_count = 0;
228 mutable std::atomic<size_t> query_count{0};
229};
230
231}
232