1#include "duckdb/common/exception.hpp"
2#include "duckdb/common/field_writer.hpp"
3#include "duckdb/common/string_util.hpp"
4#include "duckdb/common/types/vector.hpp"
5#include "duckdb/storage/statistics/base_statistics.hpp"
6#include "duckdb/storage/statistics/list_stats.hpp"
7#include "duckdb/storage/statistics/struct_stats.hpp"
8
9namespace duckdb {
10
11BaseStatistics::BaseStatistics() : type(LogicalType::INVALID) {
12}
13
14BaseStatistics::BaseStatistics(LogicalType type) {
15 Construct(stats&: *this, type: std::move(type));
16}
17
18void BaseStatistics::Construct(BaseStatistics &stats, LogicalType type) {
19 stats.distinct_count = 0;
20 stats.type = std::move(type);
21 switch (GetStatsType(type: stats.type)) {
22 case StatisticsType::LIST_STATS:
23 ListStats::Construct(stats);
24 break;
25 case StatisticsType::STRUCT_STATS:
26 StructStats::Construct(stats);
27 break;
28 default:
29 break;
30 }
31}
32
33BaseStatistics::~BaseStatistics() {
34}
35
36BaseStatistics::BaseStatistics(BaseStatistics &&other) noexcept {
37 std::swap(a&: type, b&: other.type);
38 has_null = other.has_null;
39 has_no_null = other.has_no_null;
40 distinct_count = other.distinct_count;
41 stats_union = other.stats_union;
42 std::swap(a&: child_stats, b&: other.child_stats);
43}
44
45BaseStatistics &BaseStatistics::operator=(BaseStatistics &&other) noexcept {
46 std::swap(a&: type, b&: other.type);
47 has_null = other.has_null;
48 has_no_null = other.has_no_null;
49 distinct_count = other.distinct_count;
50 stats_union = other.stats_union;
51 std::swap(a&: child_stats, b&: other.child_stats);
52 return *this;
53}
54
55StatisticsType BaseStatistics::GetStatsType(const LogicalType &type) {
56 if (type.id() == LogicalTypeId::SQLNULL) {
57 return StatisticsType::BASE_STATS;
58 }
59 switch (type.InternalType()) {
60 case PhysicalType::BOOL:
61 case PhysicalType::INT8:
62 case PhysicalType::INT16:
63 case PhysicalType::INT32:
64 case PhysicalType::INT64:
65 case PhysicalType::UINT8:
66 case PhysicalType::UINT16:
67 case PhysicalType::UINT32:
68 case PhysicalType::UINT64:
69 case PhysicalType::INT128:
70 case PhysicalType::FLOAT:
71 case PhysicalType::DOUBLE:
72 return StatisticsType::NUMERIC_STATS;
73 case PhysicalType::VARCHAR:
74 return StatisticsType::STRING_STATS;
75 case PhysicalType::STRUCT:
76 return StatisticsType::STRUCT_STATS;
77 case PhysicalType::LIST:
78 return StatisticsType::LIST_STATS;
79 case PhysicalType::BIT:
80 case PhysicalType::INTERVAL:
81 default:
82 return StatisticsType::BASE_STATS;
83 }
84}
85
86StatisticsType BaseStatistics::GetStatsType() const {
87 return GetStatsType(type: GetType());
88}
89
90void BaseStatistics::InitializeUnknown() {
91 has_null = true;
92 has_no_null = true;
93}
94
95void BaseStatistics::InitializeEmpty() {
96 has_null = false;
97 has_no_null = true;
98}
99
100bool BaseStatistics::CanHaveNull() const {
101 return has_null;
102}
103
104bool BaseStatistics::CanHaveNoNull() const {
105 return has_no_null;
106}
107
108bool BaseStatistics::IsConstant() const {
109 if (type.id() == LogicalTypeId::VALIDITY) {
110 // validity mask
111 if (CanHaveNull() && !CanHaveNoNull()) {
112 return true;
113 }
114 if (!CanHaveNull() && CanHaveNoNull()) {
115 return true;
116 }
117 return false;
118 }
119 switch (GetStatsType()) {
120 case StatisticsType::NUMERIC_STATS:
121 return NumericStats::IsConstant(stats: *this);
122 default:
123 break;
124 }
125 return false;
126}
127
128void BaseStatistics::Merge(const BaseStatistics &other) {
129 has_null = has_null || other.has_null;
130 has_no_null = has_no_null || other.has_no_null;
131 switch (GetStatsType()) {
132 case StatisticsType::NUMERIC_STATS:
133 NumericStats::Merge(stats&: *this, other_p: other);
134 break;
135 case StatisticsType::STRING_STATS:
136 StringStats::Merge(stats&: *this, other);
137 break;
138 case StatisticsType::LIST_STATS:
139 ListStats::Merge(stats&: *this, other);
140 break;
141 case StatisticsType::STRUCT_STATS:
142 StructStats::Merge(stats&: *this, other);
143 break;
144 default:
145 break;
146 }
147}
148
149idx_t BaseStatistics::GetDistinctCount() {
150 return distinct_count;
151}
152
153BaseStatistics BaseStatistics::CreateUnknownType(LogicalType type) {
154 switch (GetStatsType(type)) {
155 case StatisticsType::NUMERIC_STATS:
156 return NumericStats::CreateUnknown(type: std::move(type));
157 case StatisticsType::STRING_STATS:
158 return StringStats::CreateUnknown(type: std::move(type));
159 case StatisticsType::LIST_STATS:
160 return ListStats::CreateUnknown(type: std::move(type));
161 case StatisticsType::STRUCT_STATS:
162 return StructStats::CreateUnknown(type: std::move(type));
163 default:
164 return BaseStatistics(std::move(type));
165 }
166}
167
168BaseStatistics BaseStatistics::CreateEmptyType(LogicalType type) {
169 switch (GetStatsType(type)) {
170 case StatisticsType::NUMERIC_STATS:
171 return NumericStats::CreateEmpty(type: std::move(type));
172 case StatisticsType::STRING_STATS:
173 return StringStats::CreateEmpty(type: std::move(type));
174 case StatisticsType::LIST_STATS:
175 return ListStats::CreateEmpty(type: std::move(type));
176 case StatisticsType::STRUCT_STATS:
177 return StructStats::CreateEmpty(type: std::move(type));
178 default:
179 return BaseStatistics(std::move(type));
180 }
181}
182
183BaseStatistics BaseStatistics::CreateUnknown(LogicalType type) {
184 auto result = CreateUnknownType(type: std::move(type));
185 result.InitializeUnknown();
186 return result;
187}
188
189BaseStatistics BaseStatistics::CreateEmpty(LogicalType type) {
190 if (type.InternalType() == PhysicalType::BIT) {
191 // FIXME: this special case should not be necessary
192 // but currently InitializeEmpty sets StatsInfo::CAN_HAVE_VALID_VALUES
193 BaseStatistics result(std::move(type));
194 result.Set(StatsInfo::CANNOT_HAVE_NULL_VALUES);
195 result.Set(StatsInfo::CANNOT_HAVE_VALID_VALUES);
196 return result;
197 }
198 auto result = CreateEmptyType(type: std::move(type));
199 result.InitializeEmpty();
200 return result;
201}
202
203void BaseStatistics::Copy(const BaseStatistics &other) {
204 D_ASSERT(GetType() == other.GetType());
205 CopyBase(orig: other);
206 stats_union = other.stats_union;
207 switch (GetStatsType()) {
208 case StatisticsType::LIST_STATS:
209 ListStats::Copy(stats&: *this, other);
210 break;
211 case StatisticsType::STRUCT_STATS:
212 StructStats::Copy(stats&: *this, other);
213 break;
214 default:
215 break;
216 }
217}
218
219BaseStatistics BaseStatistics::Copy() const {
220 BaseStatistics result(type);
221 result.Copy(other: *this);
222 return result;
223}
224
225unique_ptr<BaseStatistics> BaseStatistics::ToUnique() const {
226 auto result = unique_ptr<BaseStatistics>(new BaseStatistics(type));
227 result->Copy(other: *this);
228 return result;
229}
230
231void BaseStatistics::CopyBase(const BaseStatistics &other) {
232 has_null = other.has_null;
233 has_no_null = other.has_no_null;
234 distinct_count = other.distinct_count;
235}
236
237void BaseStatistics::Set(StatsInfo info) {
238 switch (info) {
239 case StatsInfo::CAN_HAVE_NULL_VALUES:
240 has_null = true;
241 break;
242 case StatsInfo::CANNOT_HAVE_NULL_VALUES:
243 has_null = false;
244 break;
245 case StatsInfo::CAN_HAVE_VALID_VALUES:
246 has_no_null = true;
247 break;
248 case StatsInfo::CANNOT_HAVE_VALID_VALUES:
249 has_no_null = false;
250 break;
251 case StatsInfo::CAN_HAVE_NULL_AND_VALID_VALUES:
252 has_null = true;
253 has_no_null = true;
254 break;
255 default:
256 throw InternalException("Unrecognized StatsInfo for BaseStatistics::Set");
257 }
258}
259
260void BaseStatistics::CombineValidity(BaseStatistics &left, BaseStatistics &right) {
261 has_null = left.has_null || right.has_null;
262 has_no_null = left.has_no_null || right.has_no_null;
263}
264
265void BaseStatistics::CopyValidity(BaseStatistics &stats) {
266 has_null = stats.has_null;
267 has_no_null = stats.has_no_null;
268}
269
270void BaseStatistics::Serialize(Serializer &serializer) const {
271 FieldWriter writer(serializer);
272 writer.WriteField<bool>(element: has_null);
273 writer.WriteField<bool>(element: has_no_null);
274 writer.WriteField<idx_t>(element: distinct_count);
275 Serialize(writer);
276 writer.Finalize();
277}
278
279void BaseStatistics::SetDistinctCount(idx_t count) {
280 this->distinct_count = count;
281}
282
283void BaseStatistics::Serialize(FieldWriter &writer) const {
284 switch (GetStatsType()) {
285 case StatisticsType::NUMERIC_STATS:
286 NumericStats::Serialize(stats: *this, writer);
287 break;
288 case StatisticsType::STRING_STATS:
289 StringStats::Serialize(stats: *this, writer);
290 break;
291 case StatisticsType::LIST_STATS:
292 ListStats::Serialize(stats: *this, writer);
293 break;
294 case StatisticsType::STRUCT_STATS:
295 StructStats::Serialize(stats: *this, writer);
296 break;
297 default:
298 break;
299 }
300}
301BaseStatistics BaseStatistics::DeserializeType(FieldReader &reader, LogicalType type) {
302 switch (GetStatsType(type)) {
303 case StatisticsType::NUMERIC_STATS:
304 return NumericStats::Deserialize(reader, type: std::move(type));
305 case StatisticsType::STRING_STATS:
306 return StringStats::Deserialize(reader, type: std::move(type));
307 case StatisticsType::LIST_STATS:
308 return ListStats::Deserialize(reader, type: std::move(type));
309 case StatisticsType::STRUCT_STATS:
310 return StructStats::Deserialize(reader, type: std::move(type));
311 default:
312 return BaseStatistics(std::move(type));
313 }
314}
315
316BaseStatistics BaseStatistics::Deserialize(Deserializer &source, LogicalType type) {
317 FieldReader reader(source);
318 bool has_null = reader.ReadRequired<bool>();
319 bool has_no_null = reader.ReadRequired<bool>();
320 idx_t distinct_count = reader.ReadRequired<idx_t>();
321 auto result = DeserializeType(reader, type: std::move(type));
322 result.has_null = has_null;
323 result.has_no_null = has_no_null;
324 result.distinct_count = distinct_count;
325 reader.Finalize();
326 return result;
327}
328
329string BaseStatistics::ToString() const {
330 auto has_n = has_null ? "true" : "false";
331 auto has_n_n = has_no_null ? "true" : "false";
332 string result =
333 StringUtil::Format(fmt_str: "%s%s", params: StringUtil::Format(fmt_str: "[Has Null: %s, Has No Null: %s]", params: has_n, params: has_n_n),
334 params: distinct_count > 0 ? StringUtil::Format(fmt_str: "[Approx Unique: %lld]", params: distinct_count) : "");
335 switch (GetStatsType()) {
336 case StatisticsType::NUMERIC_STATS:
337 result = NumericStats::ToString(stats: *this) + result;
338 break;
339 case StatisticsType::STRING_STATS:
340 result = StringStats::ToString(stats: *this) + result;
341 break;
342 case StatisticsType::LIST_STATS:
343 result = ListStats::ToString(stats: *this) + result;
344 break;
345 case StatisticsType::STRUCT_STATS:
346 result = StructStats::ToString(stats: *this) + result;
347 break;
348 default:
349 break;
350 }
351 return result;
352}
353
354void BaseStatistics::Verify(Vector &vector, const SelectionVector &sel, idx_t count) const {
355 D_ASSERT(vector.GetType() == this->type);
356 switch (GetStatsType()) {
357 case StatisticsType::NUMERIC_STATS:
358 NumericStats::Verify(stats: *this, vector, sel, count);
359 break;
360 case StatisticsType::STRING_STATS:
361 StringStats::Verify(stats: *this, vector, sel, count);
362 break;
363 case StatisticsType::LIST_STATS:
364 ListStats::Verify(stats: *this, vector, sel, count);
365 break;
366 case StatisticsType::STRUCT_STATS:
367 StructStats::Verify(stats: *this, vector, sel, count);
368 break;
369 default:
370 break;
371 }
372 if (has_null && has_no_null) {
373 // nothing to verify
374 return;
375 }
376 UnifiedVectorFormat vdata;
377 vector.ToUnifiedFormat(count, data&: vdata);
378 for (idx_t i = 0; i < count; i++) {
379 auto idx = sel.get_index(idx: i);
380 auto index = vdata.sel->get_index(idx);
381 bool row_is_valid = vdata.validity.RowIsValid(row_idx: index);
382 if (row_is_valid && !has_no_null) {
383 throw InternalException(
384 "Statistics mismatch: vector labeled as having only NULL values, but vector contains valid values: %s",
385 vector.ToString(count));
386 }
387 if (!row_is_valid && !has_null) {
388 throw InternalException(
389 "Statistics mismatch: vector labeled as not having NULL values, but vector contains null values: %s",
390 vector.ToString(count));
391 }
392 }
393}
394
395void BaseStatistics::Verify(Vector &vector, idx_t count) const {
396 auto sel = FlatVector::IncrementalSelectionVector();
397 Verify(vector, sel: *sel, count);
398}
399
400BaseStatistics BaseStatistics::FromConstantType(const Value &input) {
401 switch (GetStatsType(type: input.type())) {
402 case StatisticsType::NUMERIC_STATS: {
403 auto result = NumericStats::CreateEmpty(type: input.type());
404 NumericStats::SetMin(stats&: result, val: input);
405 NumericStats::SetMax(stats&: result, val: input);
406 return result;
407 }
408 case StatisticsType::STRING_STATS: {
409 auto result = StringStats::CreateEmpty(type: input.type());
410 if (!input.IsNull()) {
411 auto &string_value = StringValue::Get(value: input);
412 StringStats::Update(stats&: result, value: string_t(string_value));
413 }
414 return result;
415 }
416 case StatisticsType::LIST_STATS: {
417 auto result = ListStats::CreateEmpty(type: input.type());
418 auto &child_stats = ListStats::GetChildStats(stats&: result);
419 if (!input.IsNull()) {
420 auto &list_children = ListValue::GetChildren(value: input);
421 for (auto &child_element : list_children) {
422 child_stats.Merge(other: FromConstant(input: child_element));
423 }
424 }
425 return result;
426 }
427 case StatisticsType::STRUCT_STATS: {
428 auto result = StructStats::CreateEmpty(type: input.type());
429 auto &child_types = StructType::GetChildTypes(type: input.type());
430 if (input.IsNull()) {
431 for (idx_t i = 0; i < child_types.size(); i++) {
432 StructStats::SetChildStats(stats&: result, i, new_stats: FromConstant(input: Value(child_types[i].second)));
433 }
434 } else {
435 auto &struct_children = StructValue::GetChildren(value: input);
436 for (idx_t i = 0; i < child_types.size(); i++) {
437 StructStats::SetChildStats(stats&: result, i, new_stats: FromConstant(input: struct_children[i]));
438 }
439 }
440 return result;
441 }
442 default:
443 return BaseStatistics(input.type());
444 }
445}
446
447BaseStatistics BaseStatistics::FromConstant(const Value &input) {
448 auto result = FromConstantType(input);
449 result.SetDistinctCount(1);
450 if (input.IsNull()) {
451 result.Set(StatsInfo::CAN_HAVE_NULL_VALUES);
452 result.Set(StatsInfo::CANNOT_HAVE_VALID_VALUES);
453 } else {
454 result.Set(StatsInfo::CANNOT_HAVE_NULL_VALUES);
455 result.Set(StatsInfo::CAN_HAVE_VALID_VALUES);
456 }
457 return result;
458}
459
460} // namespace duckdb
461