1#include "duckdb/function/table/table_scan.hpp"
2#include "duckdb/optimizer/join_order/join_node.hpp"
3#include "duckdb/optimizer/join_order/join_order_optimizer.hpp"
4#include "duckdb/planner/filter/conjunction_filter.hpp"
5#include "duckdb/planner/filter/constant_filter.hpp"
6#include "duckdb/planner/operator/logical_comparison_join.hpp"
7#include "duckdb/planner/operator/logical_get.hpp"
8#include "duckdb/storage/data_table.hpp"
9#include "duckdb/catalog/catalog_entry/table_catalog_entry.hpp"
10
11#include <cmath>
12
13namespace duckdb {
14
15static optional_ptr<TableCatalogEntry> GetCatalogTableEntry(LogicalOperator &op) {
16 D_ASSERT(op.type == LogicalOperatorType::LOGICAL_GET);
17 auto &get = op.Cast<LogicalGet>();
18 return get.GetTable();
19}
20
21// The filter was made on top of a logical sample or other projection,
22// but no specific columns are referenced. See issue 4978 number 4.
23bool CardinalityEstimator::EmptyFilter(FilterInfo &filter_info) {
24 if (!filter_info.left_set && !filter_info.right_set) {
25 return true;
26 }
27 return false;
28}
29
30void CardinalityEstimator::AddRelationTdom(FilterInfo &filter_info) {
31 D_ASSERT(filter_info.set.count >= 1);
32 for (const RelationsToTDom &r2tdom : relations_to_tdoms) {
33 auto &i_set = r2tdom.equivalent_relations;
34 if (i_set.find(x: filter_info.left_binding) != i_set.end()) {
35 // found an equivalent filter
36 return;
37 }
38 }
39 auto key = ColumnBinding(filter_info.left_binding.table_index, filter_info.left_binding.column_index);
40 relations_to_tdoms.emplace_back(args: column_binding_set_t({key}));
41}
42
43bool CardinalityEstimator::SingleColumnFilter(FilterInfo &filter_info) {
44 if (filter_info.left_set && filter_info.right_set) {
45 // Both set
46 return false;
47 }
48 if (EmptyFilter(filter_info)) {
49 return false;
50 }
51 return true;
52}
53
54vector<idx_t> CardinalityEstimator::DetermineMatchingEquivalentSets(FilterInfo *filter_info) {
55 vector<idx_t> matching_equivalent_sets;
56 auto equivalent_relation_index = 0;
57
58 for (const RelationsToTDom &r2tdom : relations_to_tdoms) {
59 auto &i_set = r2tdom.equivalent_relations;
60 if (i_set.find(x: filter_info->left_binding) != i_set.end()) {
61 matching_equivalent_sets.push_back(x: equivalent_relation_index);
62 } else if (i_set.find(x: filter_info->right_binding) != i_set.end()) {
63 // don't add both left and right to the matching_equivalent_sets
64 // since both left and right get added to that index anyway.
65 matching_equivalent_sets.push_back(x: equivalent_relation_index);
66 }
67 equivalent_relation_index++;
68 }
69 return matching_equivalent_sets;
70}
71
72void CardinalityEstimator::AddToEquivalenceSets(FilterInfo *filter_info, vector<idx_t> matching_equivalent_sets) {
73 D_ASSERT(matching_equivalent_sets.size() <= 2);
74 if (matching_equivalent_sets.size() > 1) {
75 // an equivalence relation is connecting to sets of equivalence relations
76 // so push all relations from the second set into the first. Later we will delete
77 // the second set.
78 for (ColumnBinding i : relations_to_tdoms.at(n: matching_equivalent_sets[1]).equivalent_relations) {
79 relations_to_tdoms.at(n: matching_equivalent_sets[0]).equivalent_relations.insert(x: i);
80 }
81 relations_to_tdoms.at(n: matching_equivalent_sets[1]).equivalent_relations.clear();
82 relations_to_tdoms.at(n: matching_equivalent_sets[0]).filters.push_back(x: filter_info);
83 // add all values of one set to the other, delete the empty one
84 } else if (matching_equivalent_sets.size() == 1) {
85 auto &tdom_i = relations_to_tdoms.at(n: matching_equivalent_sets.at(n: 0));
86 tdom_i.equivalent_relations.insert(x: filter_info->left_binding);
87 tdom_i.equivalent_relations.insert(x: filter_info->right_binding);
88 tdom_i.filters.push_back(x: filter_info);
89 } else if (matching_equivalent_sets.empty()) {
90 column_binding_set_t tmp;
91 tmp.insert(x: filter_info->left_binding);
92 tmp.insert(x: filter_info->right_binding);
93 relations_to_tdoms.emplace_back(args&: tmp);
94 relations_to_tdoms.back().filters.push_back(x: filter_info);
95 }
96}
97
98void CardinalityEstimator::AddRelationToColumnMapping(ColumnBinding key, ColumnBinding value) {
99 relation_column_to_original_column[key] = value;
100}
101
102void CardinalityEstimator::CopyRelationMap(column_binding_map_t<ColumnBinding> &child_binding_map) {
103 for (auto &binding_map : relation_column_to_original_column) {
104 D_ASSERT(child_binding_map.find(binding_map.first) == child_binding_map.end());
105 child_binding_map[binding_map.first] = binding_map.second;
106 }
107}
108
109void CardinalityEstimator::AddColumnToRelationMap(idx_t table_index, idx_t column_index) {
110 relation_attributes[table_index].columns.insert(x: column_index);
111}
112
113void CardinalityEstimator::InitEquivalentRelations(vector<unique_ptr<FilterInfo>> &filter_infos) {
114 // For each filter, we fill keep track of the index of the equivalent relation set
115 // the left and right relation needs to be added to.
116 for (auto &filter : filter_infos) {
117 if (SingleColumnFilter(filter_info&: *filter)) {
118 // Filter on one relation, (i.e string or range filter on a column).
119 // Grab the first relation and add it to the equivalence_relations
120 AddRelationTdom(filter_info&: *filter);
121 continue;
122 } else if (EmptyFilter(filter_info&: *filter)) {
123 continue;
124 }
125 D_ASSERT(filter->left_set->count >= 1);
126 D_ASSERT(filter->right_set->count >= 1);
127
128 auto matching_equivalent_sets = DetermineMatchingEquivalentSets(filter_info: filter.get());
129 AddToEquivalenceSets(filter_info: filter.get(), matching_equivalent_sets);
130 }
131}
132
133void CardinalityEstimator::VerifySymmetry(JoinNode &result, JoinNode &entry) {
134 if (result.GetCardinality<double>() != entry.GetCardinality<double>()) {
135 // Currently it's possible that some entries are cartesian joins.
136 // When this is the case, you don't always have symmetry, but
137 // if the cost of the result is less, then just assure the cardinality
138 // is also less, then you have the same effect of symmetry.
139 D_ASSERT(ceil(result.GetCardinality<double>()) <= ceil(entry.GetCardinality<double>()) ||
140 floor(result.GetCardinality<double>()) <= floor(entry.GetCardinality<double>()));
141 }
142}
143
144void CardinalityEstimator::InitTotalDomains() {
145 auto remove_start = std::remove_if(first: relations_to_tdoms.begin(), last: relations_to_tdoms.end(),
146 pred: [](RelationsToTDom &r_2_tdom) { return r_2_tdom.equivalent_relations.empty(); });
147 relations_to_tdoms.erase(first: remove_start, last: relations_to_tdoms.end());
148}
149
150double CardinalityEstimator::ComputeCost(JoinNode &left, JoinNode &right, double expected_cardinality) {
151 return expected_cardinality + left.GetCost() + right.GetCost();
152}
153
154double CardinalityEstimator::EstimateCrossProduct(const JoinNode &left, const JoinNode &right) {
155 // need to explicity use double here, otherwise auto converts it to an int, then
156 // there is an autocast in the return.
157 if (left.GetCardinality<double>() >= (NumericLimits<double>::Maximum() / right.GetCardinality<double>())) {
158 return NumericLimits<double>::Maximum();
159 }
160 return left.GetCardinality<double>() * right.GetCardinality<double>();
161}
162
163void CardinalityEstimator::AddRelationColumnMapping(LogicalGet &get, idx_t relation_id) {
164 for (idx_t it = 0; it < get.column_ids.size(); it++) {
165 auto key = ColumnBinding(relation_id, it);
166 auto value = ColumnBinding(get.table_index, get.column_ids[it]);
167 AddRelationToColumnMapping(key, value);
168 }
169}
170
171void UpdateDenom(Subgraph2Denominator &relation_2_denom, RelationsToTDom &relation_to_tdom) {
172 relation_2_denom.denom *= relation_to_tdom.has_tdom_hll ? relation_to_tdom.tdom_hll : relation_to_tdom.tdom_no_hll;
173}
174
175void FindSubgraphMatchAndMerge(Subgraph2Denominator &merge_to, idx_t find_me,
176 vector<Subgraph2Denominator>::iterator subgraph,
177 vector<Subgraph2Denominator>::iterator end) {
178 for (; subgraph != end; subgraph++) {
179 if (subgraph->relations.count(x: find_me) >= 1) {
180 for (auto &relation : subgraph->relations) {
181 merge_to.relations.insert(x: relation);
182 }
183 subgraph->relations.clear();
184 merge_to.denom *= subgraph->denom;
185 return;
186 }
187 }
188}
189
190double CardinalityEstimator::EstimateCardinalityWithSet(JoinRelationSet &new_set) {
191 double numerator = 1;
192 unordered_set<idx_t> actual_set;
193 for (idx_t i = 0; i < new_set.count; i++) {
194 numerator *= relation_attributes[new_set.relations[i]].cardinality;
195 actual_set.insert(x: new_set.relations[i]);
196 }
197 vector<Subgraph2Denominator> subgraphs;
198 bool done = false;
199 bool found_match = false;
200
201 // Finding the denominator is tricky. You need to go through the tdoms in decreasing order
202 // Then loop through all filters in the equivalence set of the tdom to see if both the
203 // left and right relations are in the new set, if so you can use that filter.
204 // You must also make sure that the filters all relations in the given set, so we use subgraphs
205 // that should eventually merge into one connected graph that joins all the relations
206 // TODO: Implement a method to cache subgraphs so you don't have to build them up every
207 // time the cardinality of a new set is requested
208
209 // relations_to_tdoms has already been sorted.
210 for (auto &relation_2_tdom : relations_to_tdoms) {
211 // loop through each filter in the tdom.
212 if (done) {
213 break;
214 }
215 for (auto &filter : relation_2_tdom.filters) {
216 if (actual_set.count(x: filter->left_binding.table_index) == 0 ||
217 actual_set.count(x: filter->right_binding.table_index) == 0) {
218 continue;
219 }
220 // the join filter is on relations in the new set.
221 found_match = false;
222 vector<Subgraph2Denominator>::iterator it;
223 for (it = subgraphs.begin(); it != subgraphs.end(); it++) {
224 auto left_in = it->relations.count(x: filter->left_binding.table_index);
225 auto right_in = it->relations.count(x: filter->right_binding.table_index);
226 if (left_in && right_in) {
227 // if both left and right bindings are in the subgraph, continue.
228 // This means another filter is connecting relations already in the
229 // subgraph it, but it has a tdom that is less, and we don't care.
230 found_match = true;
231 continue;
232 }
233 if (!left_in && !right_in) {
234 // if both left and right bindings are *not* in the subgraph, continue
235 // without finding a match. This will trigger the process to add a new
236 // subgraph
237 continue;
238 }
239 idx_t find_table;
240 if (left_in) {
241 find_table = filter->right_binding.table_index;
242 } else {
243 D_ASSERT(right_in);
244 find_table = filter->left_binding.table_index;
245 }
246 auto next_subgraph = it + 1;
247 // iterate through other subgraphs and merge.
248 FindSubgraphMatchAndMerge(merge_to&: *it, find_me: find_table, subgraph: next_subgraph, end: subgraphs.end());
249 // Now insert the right binding and update denominator with the
250 // tdom of the filter
251 it->relations.insert(x: find_table);
252 UpdateDenom(relation_2_denom&: *it, relation_to_tdom&: relation_2_tdom);
253 found_match = true;
254 break;
255 }
256 // means that the filter joins relations in the given set, but there is no
257 // connection to any subgraph in subgraphs. Add a new subgraph, and maybe later there will be
258 // a connection.
259 if (!found_match) {
260 subgraphs.emplace_back();
261 auto &subgraph = subgraphs.back();
262 subgraph.relations.insert(x: filter->left_binding.table_index);
263 subgraph.relations.insert(x: filter->right_binding.table_index);
264 UpdateDenom(relation_2_denom&: subgraph, relation_to_tdom&: relation_2_tdom);
265 }
266 auto remove_start = std::remove_if(first: subgraphs.begin(), last: subgraphs.end(),
267 pred: [](Subgraph2Denominator &s) { return s.relations.empty(); });
268 subgraphs.erase(first: remove_start, last: subgraphs.end());
269
270 if (subgraphs.size() == 1 && subgraphs.at(n: 0).relations.size() == new_set.count) {
271 // You have found enough filters to connect the relations. These are guaranteed
272 // to be the filters with the highest Tdoms.
273 done = true;
274 break;
275 }
276 }
277 }
278 double denom = 1;
279 // TODO: It's possible cross-products were added and are not present in the filters in the relation_2_tdom
280 // structures. When that's the case, multiply the denom structures that have no intersection
281 for (auto &match : subgraphs) {
282 // It's possible that in production, one of the D_ASSERTS above will fail and not all subgraphs
283 // were connected. When this happens, just use the largest denominator of all the subgraphs.
284 if (match.denom > denom) {
285 denom = match.denom;
286 }
287 }
288 // can happen if a table has cardinality 0, or a tdom is set to 0
289 if (denom == 0) {
290 denom = 1;
291 }
292 return numerator / denom;
293}
294
295static bool IsLogicalFilter(LogicalOperator &op) {
296 return op.type == LogicalOperatorType::LOGICAL_FILTER;
297}
298
299static optional_ptr<LogicalGet> GetLogicalGet(LogicalOperator &op, idx_t table_index = DConstants::INVALID_INDEX) {
300 optional_ptr<LogicalGet> get;
301 switch (op.type) {
302 case LogicalOperatorType::LOGICAL_GET:
303 get = &op.Cast<LogicalGet>();
304 break;
305 case LogicalOperatorType::LOGICAL_FILTER:
306 get = GetLogicalGet(op&: *op.children.at(n: 0), table_index);
307 break;
308 case LogicalOperatorType::LOGICAL_PROJECTION:
309 get = GetLogicalGet(op&: *op.children.at(n: 0), table_index);
310 break;
311 case LogicalOperatorType::LOGICAL_ASOF_JOIN:
312 case LogicalOperatorType::LOGICAL_COMPARISON_JOIN: {
313 auto &join = op.Cast<LogicalComparisonJoin>();
314 // We should never be calling GetLogicalGet without a valid table_index.
315 // We are attempting to get the catalog table for a relation (for statistics/cardinality estimation)
316 // A logical join means there is a non-reorderable relation in the join plan. This means we need
317 // to know the exact table index to return.
318 D_ASSERT(table_index != DConstants::INVALID_INDEX);
319 if (join.join_type == JoinType::MARK || join.join_type == JoinType::LEFT) {
320 auto &left_child = *join.children.at(n: 0);
321 get = GetLogicalGet(op&: left_child, table_index);
322 if (get && get->table_index == table_index) {
323 return get;
324 }
325 auto &right_child = *join.children.at(n: 1);
326 get = GetLogicalGet(op&: right_child, table_index);
327 if (get && get->table_index == table_index) {
328 return get;
329 }
330 }
331 break;
332 }
333 default:
334 // return null pointer, maybe there is no logical get under this child
335 break;
336 }
337 return get;
338}
339
340void CardinalityEstimator::MergeBindings(idx_t binding_index, idx_t relation_id,
341 vector<column_binding_map_t<ColumnBinding>> &child_binding_maps) {
342 for (auto &map_set : child_binding_maps) {
343 for (auto &mapping : map_set) {
344 ColumnBinding relation_bindings = mapping.first;
345 ColumnBinding actual_bindings = mapping.second;
346
347 if (actual_bindings.table_index == binding_index) {
348 auto key = ColumnBinding(relation_id, relation_bindings.column_index);
349 AddRelationToColumnMapping(key, value: actual_bindings);
350 }
351 }
352 }
353}
354
355bool SortTdoms(const RelationsToTDom &a, const RelationsToTDom &b) {
356 if (a.has_tdom_hll && b.has_tdom_hll) {
357 return a.tdom_hll > b.tdom_hll;
358 }
359 if (a.has_tdom_hll) {
360 return a.tdom_hll > b.tdom_no_hll;
361 }
362 if (b.has_tdom_hll) {
363 return a.tdom_no_hll > b.tdom_hll;
364 }
365 return a.tdom_no_hll > b.tdom_no_hll;
366}
367
368void CardinalityEstimator::InitCardinalityEstimatorProps(vector<NodeOp> &node_ops,
369 vector<unique_ptr<FilterInfo>> &filter_infos) {
370 InitEquivalentRelations(filter_infos);
371 InitTotalDomains();
372 for (idx_t i = 0; i < node_ops.size(); i++) {
373 auto &join_node = *node_ops[i].node;
374 auto &op = node_ops[i].op;
375 join_node.SetBaseTableCardinality(op.EstimateCardinality(context));
376 if (op.type == LogicalOperatorType::LOGICAL_COMPARISON_JOIN) {
377 auto &join = op.Cast<LogicalComparisonJoin>();
378 if (join.join_type == JoinType::LEFT) {
379 // If a base op is a Logical Comparison join it is probably a left join,
380 // so the cost of the larger table is a fine estimate.
381 // TODO: provide better estimates for cost of mark joins
382 // MARK joins are used for anti and semi joins, so the cost can conceivably be
383 // less than the base table cardinality.
384 join_node.SetCost(join_node.GetBaseTableCardinality());
385 }
386 } else if (op.type == LogicalOperatorType::LOGICAL_ASOF_JOIN) {
387 // AsOf joins have the cardinality of the LHS
388 join_node.SetCost(join_node.GetBaseTableCardinality());
389 }
390 // Total domains can be affected by filters. So we update base table cardinality first
391 EstimateBaseTableCardinality(node&: join_node, op);
392 // Then update total domains.
393 UpdateTotalDomains(node&: join_node, op);
394 }
395
396 // sort relations from greatest tdom to lowest tdom.
397 std::sort(first: relations_to_tdoms.begin(), last: relations_to_tdoms.end(), comp: SortTdoms);
398}
399
400void CardinalityEstimator::UpdateTotalDomains(JoinNode &node, LogicalOperator &op) {
401 auto relation_id = node.set.relations[0];
402 relation_attributes[relation_id].cardinality = node.GetCardinality<double>();
403 //! Initialize the distinct count for all columns used in joins with the current relation.
404 idx_t distinct_count = node.GetBaseTableCardinality();
405 optional_ptr<TableCatalogEntry> catalog_table;
406
407 optional_ptr<LogicalGet> get;
408 bool get_updated = true;
409 for (auto &column : relation_attributes[relation_id].columns) {
410 //! for every column used in a filter in the relation, get the distinct count via HLL, or assume it to be
411 //! the cardinality
412 ColumnBinding key = ColumnBinding(relation_id, column);
413 auto actual_binding = relation_column_to_original_column.find(x: key);
414 // each relation has columns that are either projected or used as filters
415 // In order to get column statistics we need to make sure the actual binding still
416 // refers to the same base table relation, as non-reorderable joins may involve 2+
417 // base table relations and therefore the columns may also refer to 2 different
418 // base table relations
419 if (actual_binding != relation_column_to_original_column.end() &&
420 (!get || get->table_index != actual_binding->second.table_index)) {
421 get = GetLogicalGet(op, table_index: actual_binding->second.table_index);
422 get_updated = true;
423 } else {
424 get_updated = false;
425 }
426
427 if (get_updated) {
428 if (get) {
429 catalog_table = GetCatalogTableEntry(op&: *get);
430 } else {
431 catalog_table = nullptr;
432 }
433 }
434
435 if (catalog_table && actual_binding != relation_column_to_original_column.end()) {
436 // Get HLL stats here
437 auto base_stats = catalog_table->GetStatistics(context, column_id: actual_binding->second.column_index);
438 if (base_stats) {
439 distinct_count = base_stats->GetDistinctCount();
440 }
441
442 // HLL has estimation error, distinct_count can't be greater than cardinality of the table before filters
443 if (distinct_count > node.GetBaseTableCardinality()) {
444 distinct_count = node.GetBaseTableCardinality();
445 }
446 } else {
447 distinct_count = node.GetBaseTableCardinality();
448 }
449 // Update the relation_to_tdom set with the estimated distinct count (or tdom) calculated above
450 for (auto &relation_to_tdom : relations_to_tdoms) {
451 column_binding_set_t i_set = relation_to_tdom.equivalent_relations;
452 if (i_set.count(x: key) != 1) {
453 continue;
454 }
455 if (catalog_table) {
456 if (relation_to_tdom.tdom_hll < distinct_count) {
457 relation_to_tdom.tdom_hll = distinct_count;
458 relation_to_tdom.has_tdom_hll = true;
459 }
460 if (relation_to_tdom.tdom_no_hll > distinct_count) {
461 relation_to_tdom.tdom_no_hll = distinct_count;
462 }
463 } else {
464 // Here we don't have catalog statistics, and the following is how we determine
465 // the tdom
466 // 1. If there is any hll data in the equivalence set, use that
467 // 2. Otherwise, use the table with the smallest cardinality
468 if (relation_to_tdom.tdom_no_hll > distinct_count && !relation_to_tdom.has_tdom_hll) {
469 relation_to_tdom.tdom_no_hll = distinct_count;
470 }
471 }
472 break;
473 }
474 }
475}
476
477optional_ptr<TableFilterSet> CardinalityEstimator::GetTableFilters(LogicalOperator &op, idx_t table_index) {
478 auto get = GetLogicalGet(op, table_index);
479 return get ? &get->table_filters : nullptr;
480}
481
482idx_t CardinalityEstimator::InspectConjunctionAND(idx_t cardinality, idx_t column_index, ConjunctionAndFilter &filter,
483 unique_ptr<BaseStatistics> base_stats) {
484 auto has_equality_filter = false;
485 auto cardinality_after_filters = cardinality;
486 for (auto &child_filter : filter.child_filters) {
487 if (child_filter->filter_type != TableFilterType::CONSTANT_COMPARISON) {
488 continue;
489 }
490 auto &comparison_filter = child_filter->Cast<ConstantFilter>();
491 if (comparison_filter.comparison_type != ExpressionType::COMPARE_EQUAL) {
492 continue;
493 }
494 auto column_count = 0;
495 if (base_stats) {
496 column_count = base_stats->GetDistinctCount();
497 }
498 auto filtered_card = cardinality;
499 // column_count = 0 when there is no column count (i.e parquet scans)
500 if (column_count > 0) {
501 // we want the ceil of cardinality/column_count. We also want to avoid compiler errors
502 filtered_card = (cardinality + column_count - 1) / column_count;
503 cardinality_after_filters = filtered_card;
504 }
505 if (has_equality_filter) {
506 cardinality_after_filters = MinValue(a: filtered_card, b: cardinality_after_filters);
507 }
508 has_equality_filter = true;
509 }
510 return cardinality_after_filters;
511}
512
513idx_t CardinalityEstimator::InspectConjunctionOR(idx_t cardinality, idx_t column_index, ConjunctionOrFilter &filter,
514 unique_ptr<BaseStatistics> base_stats) {
515 auto has_equality_filter = false;
516 auto cardinality_after_filters = cardinality;
517 for (auto &child_filter : filter.child_filters) {
518 if (child_filter->filter_type != TableFilterType::CONSTANT_COMPARISON) {
519 continue;
520 }
521 auto &comparison_filter = child_filter->Cast<ConstantFilter>();
522 if (comparison_filter.comparison_type == ExpressionType::COMPARE_EQUAL) {
523 auto column_count = cardinality_after_filters;
524 if (base_stats) {
525 column_count = base_stats->GetDistinctCount();
526 }
527 auto increment = MaxValue<idx_t>(a: ((cardinality + column_count - 1) / column_count), b: 1);
528 if (has_equality_filter) {
529 cardinality_after_filters += increment;
530 } else {
531 cardinality_after_filters = increment;
532 }
533 has_equality_filter = true;
534 }
535 }
536 D_ASSERT(cardinality_after_filters > 0);
537 return cardinality_after_filters;
538}
539
540idx_t CardinalityEstimator::InspectTableFilters(idx_t cardinality, LogicalOperator &op, TableFilterSet &table_filters,
541 idx_t table_index) {
542 idx_t cardinality_after_filters = cardinality;
543 auto get = GetLogicalGet(op, table_index);
544 unique_ptr<BaseStatistics> column_statistics;
545 for (auto &it : table_filters.filters) {
546 column_statistics = nullptr;
547 if (get->bind_data && get->function.name.compare(s: "seq_scan") == 0) {
548 auto &table_scan_bind_data = get->bind_data->Cast<TableScanBindData>();
549 column_statistics = get->function.statistics(context, &table_scan_bind_data, it.first);
550 }
551 if (it.second->filter_type == TableFilterType::CONJUNCTION_AND) {
552 auto &filter = it.second->Cast<ConjunctionAndFilter>();
553 idx_t cardinality_with_and_filter =
554 InspectConjunctionAND(cardinality, column_index: it.first, filter, base_stats: std::move(column_statistics));
555 cardinality_after_filters = MinValue(a: cardinality_after_filters, b: cardinality_with_and_filter);
556 } else if (it.second->filter_type == TableFilterType::CONJUNCTION_OR) {
557 auto &filter = it.second->Cast<ConjunctionOrFilter>();
558 idx_t cardinality_with_or_filter =
559 InspectConjunctionOR(cardinality, column_index: it.first, filter, base_stats: std::move(column_statistics));
560 cardinality_after_filters = MinValue(a: cardinality_after_filters, b: cardinality_with_or_filter);
561 }
562 }
563 // if the above code didn't find an equality filter (i.e country_code = "[us]")
564 // and there are other table filters, use default selectivity.
565 bool has_equality_filter = (cardinality_after_filters != cardinality);
566 if (!has_equality_filter && !table_filters.filters.empty()) {
567 cardinality_after_filters = MaxValue<idx_t>(a: cardinality * DEFAULT_SELECTIVITY, b: 1);
568 }
569 return cardinality_after_filters;
570}
571
572void CardinalityEstimator::EstimateBaseTableCardinality(JoinNode &node, LogicalOperator &op) {
573 auto has_logical_filter = IsLogicalFilter(op);
574 D_ASSERT(node.set.count == 1);
575 auto relation_id = node.set.relations[0];
576
577 double lowest_card_found = node.GetBaseTableCardinality();
578 for (auto &column : relation_attributes[relation_id].columns) {
579 auto card_after_filters = node.GetBaseTableCardinality();
580 ColumnBinding key = ColumnBinding(relation_id, column);
581 optional_ptr<TableFilterSet> table_filters;
582 auto actual_binding = relation_column_to_original_column.find(x: key);
583 if (actual_binding != relation_column_to_original_column.end()) {
584 table_filters = GetTableFilters(op, table_index: actual_binding->second.table_index);
585 }
586
587 if (table_filters) {
588 double inspect_result =
589 (double)InspectTableFilters(cardinality: card_after_filters, op, table_filters&: *table_filters, table_index: actual_binding->second.table_index);
590 card_after_filters = MinValue(a: inspect_result, b: (double)card_after_filters);
591 }
592 if (has_logical_filter) {
593 card_after_filters *= DEFAULT_SELECTIVITY;
594 }
595 lowest_card_found = MinValue(a: card_after_filters, b: lowest_card_found);
596 }
597 node.SetEstimatedCardinality(lowest_card_found);
598}
599
600} // namespace duckdb
601