cardinality_estimator.cpp source code [Velox/build/_deps/duckdb-src/src/optimizer/join_order/cardinality_estimator.cpp]

1	#include "duckdb/function/table/table_scan.hpp"
2	#include "duckdb/optimizer/join_order/join_node.hpp"
3	#include "duckdb/optimizer/join_order/join_order_optimizer.hpp"
4	#include "duckdb/planner/filter/conjunction_filter.hpp"
5	#include "duckdb/planner/filter/constant_filter.hpp"
6	#include "duckdb/planner/operator/logical_comparison_join.hpp"
7	#include "duckdb/planner/operator/logical_get.hpp"
8	#include "duckdb/storage/data_table.hpp"
9	#include "duckdb/catalog/catalog_entry/table_catalog_entry.hpp"
10
11	#include <cmath>
12
13	namespace duckdb {
14
15	static optional_ptr<TableCatalogEntry> GetCatalogTableEntry(LogicalOperator &op) {
16	D_ASSERT(op.type == LogicalOperatorType::LOGICAL_GET);
17	auto &get = op.Cast<LogicalGet>();
18	return get.GetTable();
19	}
20
21	// The filter was made on top of a logical sample or other projection,
22	// but no specific columns are referenced. See issue 4978 number 4.
23	bool CardinalityEstimator::EmptyFilter(FilterInfo &filter_info) {
24	if (!filter_info.left_set && !filter_info.right_set) {
25	return true;
26	}
27	return false;
28	}
29
30	void CardinalityEstimator::AddRelationTdom(FilterInfo &filter_info) {
31	D_ASSERT(filter_info.set.count >= `1`);
32	for (const RelationsToTDom &r2tdom : relations_to_tdoms) {
33	auto &i_set = r2tdom.equivalent_relations;
34	if (i_set.find(x: filter_info.left_binding) != i_set.end()) {
35	// found an equivalent filter
36	return;
37	}
38	}
39	auto key = ColumnBinding (filter_info.left_binding.table_index, filter_info.left_binding.column_index);
40	relations_to_tdoms.emplace_back(args: column_binding_set_t ({key}));
41	}
42
43	bool CardinalityEstimator::SingleColumnFilter(FilterInfo &filter_info) {
44	if (filter_info.left_set && filter_info.right_set) {
45	// Both set
46	return false;
47	}
48	if (EmptyFilter(filter_info)) {
49	return false;
50	}
51	return true;
52	}
53
54	vector<idx_t> CardinalityEstimator::DetermineMatchingEquivalentSets(FilterInfo *filter_info) {
55	vector<idx_t> matching_equivalent_sets;
56	auto equivalent_relation_index = `0`;
57
58	for (const RelationsToTDom &r2tdom : relations_to_tdoms) {
59	auto &i_set = r2tdom.equivalent_relations;
60	if (i_set.find(x: filter_info->left_binding) != i_set.end()) {
61	matching_equivalent_sets.push_back(x: equivalent_relation_index);
62	} else if (i_set.find(x: filter_info->right_binding) != i_set.end()) {
63	// don't add both left and right to the matching_equivalent_sets
64	// since both left and right get added to that index anyway.
65	matching_equivalent_sets.push_back(x: equivalent_relation_index);
66	}
67	equivalent_relation_index++;
68	}
69	return matching_equivalent_sets;
70	}
71
72	void CardinalityEstimator::AddToEquivalenceSets(FilterInfo *filter_info, vector<idx_t> matching_equivalent_sets) {
73	D_ASSERT(matching_equivalent_sets.size() <= `2`);
74	if (matching_equivalent_sets.size() > `1`) {
75	// an equivalence relation is connecting to sets of equivalence relations
76	// so push all relations from the second set into the first. Later we will delete
77	// the second set.
78	for (ColumnBinding i : relations_to_tdoms.at(n: matching_equivalent_sets [`1`]).equivalent_relations) {
79	relations_to_tdoms.at(n: matching_equivalent_sets [`0`]).equivalent_relations.insert(x: i);
80	}
81	relations_to_tdoms.at(n: matching_equivalent_sets [`1`]).equivalent_relations.clear();
82	relations_to_tdoms.at(n: matching_equivalent_sets [`0`]).filters.push_back(x: filter_info);
83	// add all values of one set to the other, delete the empty one
84	} else if (matching_equivalent_sets.size() == `1`) {
85	auto &tdom_i = relations_to_tdoms.at(n: matching_equivalent_sets.at(n: `0`));
86	tdom_i.equivalent_relations.insert(x: filter_info->left_binding);
87	tdom_i.equivalent_relations.insert(x: filter_info->right_binding);
88	tdom_i.filters.push_back(x: filter_info);
89	} else if (matching_equivalent_sets.empty()) {
90	column_binding_set_t tmp;
91	tmp.insert(x: filter_info->left_binding);
92	tmp.insert(x: filter_info->right_binding);
93	relations_to_tdoms.emplace_back(args&: tmp);
94	relations_to_tdoms.back().filters.push_back(x: filter_info);
95	}
96	}
97
98	void CardinalityEstimator::AddRelationToColumnMapping(ColumnBinding key, ColumnBinding value) {
99	relation_column_to_original_column [key] = value;
100	}
101
102	void CardinalityEstimator::CopyRelationMap(column_binding_map_t<ColumnBinding> &child_binding_map) {
103	for (auto &binding_map : relation_column_to_original_column) {
104	D_ASSERT(child_binding_map.find(binding_map.first) == child_binding_map.end());
105	child_binding_map [binding_map.first] = binding_map.second;
106	}
107	}
108
109	void CardinalityEstimator::AddColumnToRelationMap(idx_t table_index, idx_t column_index) {
110	relation_attributes [table_index].columns.insert(x: column_index);
111	}
112
113	void CardinalityEstimator::InitEquivalentRelations(vector<unique_ptr<FilterInfo>> &filter_infos) {
114	// For each filter, we fill keep track of the index of the equivalent relation set
115	// the left and right relation needs to be added to.
116	for (auto &filter : filter_infos) {
117	if (SingleColumnFilter(filter_info&: *filter)) {
118	// Filter on one relation, (i.e string or range filter on a column).
119	// Grab the first relation and add it to the equivalence_relations
120	AddRelationTdom(filter_info&: *filter);
121	continue;
122	} else if (EmptyFilter(filter_info&: *filter)) {
123	continue;
124	}
125	D_ASSERT(filter->left_set->count >= `1`);
126	D_ASSERT(filter->right_set->count >= `1`);
127
128	auto matching_equivalent_sets = DetermineMatchingEquivalentSets(filter_info: filter.get());
129	AddToEquivalenceSets(filter_info: filter.get(), matching_equivalent_sets);
130	}
131	}
132
133	void CardinalityEstimator::VerifySymmetry(JoinNode &result, JoinNode &entry) {
134	if (result.GetCardinality<double>() != entry.GetCardinality<double>()) {
135	// Currently it's possible that some entries are cartesian joins.
136	// When this is the case, you don't always have symmetry, but
137	// if the cost of the result is less, then just assure the cardinality
138	// is also less, then you have the same effect of symmetry.
139	D_ASSERT(ceil(result.GetCardinality<double>()) <= ceil(entry.GetCardinality<double>()) \|\|
140	floor(result.GetCardinality<double>()) <= floor(entry.GetCardinality<double>()));
141	}
142	}
143
144	void CardinalityEstimator::InitTotalDomains() {
145	auto remove_start = std::remove_if(first: relations_to_tdoms.begin(), last: relations_to_tdoms.end(),
146	pred: [](RelationsToTDom &r_2_tdom) { return r_2_tdom.equivalent_relations.empty(); });
147	relations_to_tdoms.erase(first: remove_start, last: relations_to_tdoms.end());
148	}
149
150	double CardinalityEstimator::ComputeCost(JoinNode &left, JoinNode &right, double expected_cardinality) {
151	return expected_cardinality + left.GetCost() + right.GetCost();
152	}
153
154	double CardinalityEstimator::EstimateCrossProduct(const JoinNode &left, const JoinNode &right) {
155	// need to explicity use double here, otherwise auto converts it to an int, then
156	// there is an autocast in the return.
157	if (left.GetCardinality<double>() >= (NumericLimits<double>::Maximum() / right.GetCardinality<double>())) {
158	return NumericLimits<double>::Maximum();
159	}
160	return left.GetCardinality<double>() * right.GetCardinality<double>();
161	}
162
163	void CardinalityEstimator::AddRelationColumnMapping(LogicalGet &get, idx_t relation_id) {
164	for (idx_t it = `0`; it < get.column_ids.size(); it++) {
165	auto key = ColumnBinding (relation_id, it);
166	auto value = ColumnBinding (get.table_index, get.column_ids [it]);
167	AddRelationToColumnMapping(key, value);
168	}
169	}
170
171	void UpdateDenom(Subgraph2Denominator &relation_2_denom, RelationsToTDom &relation_to_tdom) {
172	relation_2_denom.denom *= relation_to_tdom.has_tdom_hll ? relation_to_tdom.tdom_hll : relation_to_tdom.tdom_no_hll;
173	}
174
175	void FindSubgraphMatchAndMerge(Subgraph2Denominator &merge_to, idx_t find_me,
176	vector<Subgraph2Denominator>::iterator subgraph,
177	vector<Subgraph2Denominator>::iterator end) {
178	for (; subgraph != end; subgraph ++) {
179	if (subgraph ->relations.count(x: find_me) >= `1`) {
180	for (auto &relation : subgraph ->relations) {
181	merge_to.relations.insert(x: relation);
182	}
183	subgraph ->relations.clear();
184	merge_to.denom *= subgraph ->denom;
185	return;
186	}
187	}
188	}
189
190	double CardinalityEstimator::EstimateCardinalityWithSet(JoinRelationSet &new_set) {
191	double numerator = `1`;
192	unordered_set<idx_t> actual_set;
193	for (idx_t i = `0`; i < new_set.count; i++) {
194	numerator *= relation_attributes [new_set.relations [i]].cardinality;
195	actual_set.insert(x: new_set.relations [i]);
196	}
197	vector<Subgraph2Denominator> subgraphs;
198	bool done = false;
199	bool found_match = false;
200
201	// Finding the denominator is tricky. You need to go through the tdoms in decreasing order
202	// Then loop through all filters in the equivalence set of the tdom to see if both the
203	// left and right relations are in the new set, if so you can use that filter.
204	// You must also make sure that the filters all relations in the given set, so we use subgraphs
205	// that should eventually merge into one connected graph that joins all the relations
206	// TODO: Implement a method to cache subgraphs so you don't have to build them up every
207	// time the cardinality of a new set is requested
208
209	// relations_to_tdoms has already been sorted.
210	for (auto &relation_2_tdom : relations_to_tdoms) {
211	// loop through each filter in the tdom.
212	if (done) {
213	break;
214	}
215	for (auto &filter : relation_2_tdom.filters) {
216	if (actual_set.count(x: filter->left_binding.table_index) == `0` \|\|
217	actual_set.count(x: filter->right_binding.table_index) == `0`) {
218	continue;
219	}
220	// the join filter is on relations in the new set.
221	found_match = false;
222	vector<Subgraph2Denominator>::iterator it;
223	for (it = subgraphs.begin(); it != subgraphs.end(); it ++) {
224	auto left_in = it ->relations.count(x: filter->left_binding.table_index);
225	auto right_in = it ->relations.count(x: filter->right_binding.table_index);
226	if (left_in && right_in) {
227	// if both left and right bindings are in the subgraph, continue.
228	// This means another filter is connecting relations already in the
229	// subgraph it, but it has a tdom that is less, and we don't care.
230	found_match = true;
231	continue;
232	}
233	if (!left_in && !right_in) {
234	// if both left and right bindings are not* in the subgraph, continue*
235	// without finding a match. This will trigger the process to add a new
236	// subgraph
237	continue;
238	}
239	idx_t find_table;
240	if (left_in) {
241	find_table = filter->right_binding.table_index;
242	} else {
243	D_ASSERT(right_in);
244	find_table = filter->left_binding.table_index;
245	}
246	auto next_subgraph = it + `1`;
247	// iterate through other subgraphs and merge.
248	FindSubgraphMatchAndMerge(merge_to&: *it, find_me: find_table, subgraph: next_subgraph, end: subgraphs.end());
249	// Now insert the right binding and update denominator with the
250	// tdom of the filter
251	it ->relations.insert(x: find_table);
252	UpdateDenom(relation_2_denom&: *it, relation_to_tdom&: relation_2_tdom);
253	found_match = true;
254	break;
255	}
256	// means that the filter joins relations in the given set, but there is no
257	// connection to any subgraph in subgraphs. Add a new subgraph, and maybe later there will be
258	// a connection.
259	if (!found_match) {
260	subgraphs.emplace_back();
261	auto &subgraph = subgraphs.back();
262	subgraph.relations.insert(x: filter->left_binding.table_index);
263	subgraph.relations.insert(x: filter->right_binding.table_index);
264	UpdateDenom(relation_2_denom&: subgraph, relation_to_tdom&: relation_2_tdom);
265	}
266	auto remove_start = std::remove_if(first: subgraphs.begin(), last: subgraphs.end(),
267	pred: [](Subgraph2Denominator &s) { return s.relations.empty(); });
268	subgraphs.erase(first: remove_start, last: subgraphs.end());
269
270	if (subgraphs.size() == `1` && subgraphs.at(n: `0`).relations.size() == new_set.count) {
271	// You have found enough filters to connect the relations. These are guaranteed
272	// to be the filters with the highest Tdoms.
273	done = true;
274	break;
275	}
276	}
277	}
278	double denom = `1`;
279	// TODO: It's possible cross-products were added and are not present in the filters in the relation_2_tdom
280	// structures. When that's the case, multiply the denom structures that have no intersection
281	for (auto &match : subgraphs) {
282	// It's possible that in production, one of the D_ASSERTS above will fail and not all subgraphs
283	// were connected. When this happens, just use the largest denominator of all the subgraphs.
284	if (match.denom > denom) {
285	denom = match.denom;
286	}
287	}
288	// can happen if a table has cardinality 0, or a tdom is set to 0
289	if (denom == `0`) {
290	denom = `1`;
291	}
292	return numerator / denom;
293	}
294
295	static bool IsLogicalFilter(LogicalOperator &op) {
296	return op.type == LogicalOperatorType::LOGICAL_FILTER;
297	}
298
299	static optional_ptr<LogicalGet> GetLogicalGet(LogicalOperator &op, idx_t table_index = DConstants::INVALID_INDEX) {
300	optional_ptr<LogicalGet> get;
301	switch (op.type) {
302	case LogicalOperatorType::LOGICAL_GET:
303	get = &op.Cast<LogicalGet>();
304	break;
305	case LogicalOperatorType::LOGICAL_FILTER:
306	get = GetLogicalGet(op&: *op.children.at(n: `0`), table_index);
307	break;
308	case LogicalOperatorType::LOGICAL_PROJECTION:
309	get = GetLogicalGet(op&: *op.children.at(n: `0`), table_index);
310	break;
311	case LogicalOperatorType::LOGICAL_ASOF_JOIN:
312	case LogicalOperatorType::LOGICAL_COMPARISON_JOIN: {
313	auto &join = op.Cast<LogicalComparisonJoin>();
314	// We should never be calling GetLogicalGet without a valid table_index.
315	// We are attempting to get the catalog table for a relation (for statistics/cardinality estimation)
316	// A logical join means there is a non-reorderable relation in the join plan. This means we need
317	// to know the exact table index to return.
318	D_ASSERT(table_index != DConstants::INVALID_INDEX);
319	if (join.join_type == JoinType::MARK \|\| join.join_type == JoinType::LEFT) {
320	auto &left_child = *join.children.at(n: `0`);
321	get = GetLogicalGet(op&: left_child, table_index);
322	if (get && get ->table_index == table_index) {
323	return get;
324	}
325	auto &right_child = *join.children.at(n: `1`);
326	get = GetLogicalGet(op&: right_child, table_index);
327	if (get && get ->table_index == table_index) {
328	return get;
329	}
330	}
331	break;
332	}
333	default:
334	// return null pointer, maybe there is no logical get under this child
335	break;
336	}
337	return get;
338	}
339
340	void CardinalityEstimator::MergeBindings(idx_t binding_index, idx_t relation_id,
341	vector<column_binding_map_t<ColumnBinding>> &child_binding_maps) {
342	for (auto &map_set : child_binding_maps) {
343	for (auto &mapping : map_set) {
344	ColumnBinding relation_bindings = mapping.first;
345	ColumnBinding actual_bindings = mapping.second;
346
347	if (actual_bindings.table_index == binding_index) {
348	auto key = ColumnBinding (relation_id, relation_bindings.column_index);
349	AddRelationToColumnMapping(key, value: actual_bindings);
350	}
351	}
352	}
353	}
354
355	bool SortTdoms(const RelationsToTDom &a, const RelationsToTDom &b) {
356	if (a.has_tdom_hll && b.has_tdom_hll) {
357	return a.tdom_hll > b.tdom_hll;
358	}
359	if (a.has_tdom_hll) {
360	return a.tdom_hll > b.tdom_no_hll;
361	}
362	if (b.has_tdom_hll) {
363	return a.tdom_no_hll > b.tdom_hll;
364	}
365	return a.tdom_no_hll > b.tdom_no_hll;
366	}
367
368	void CardinalityEstimator::InitCardinalityEstimatorProps(vector<NodeOp> &node_ops,
369	vector<unique_ptr<FilterInfo>> &filter_infos) {
370	InitEquivalentRelations(filter_infos);
371	InitTotalDomains();
372	for (idx_t i = `0`; i < node_ops.size(); i++) {
373	auto &join_node = *node_ops [i].node;
374	auto &op = node_ops [i].op;
375	join_node.SetBaseTableCardinality(op.EstimateCardinality(context));
376	if (op.type == LogicalOperatorType::LOGICAL_COMPARISON_JOIN) {
377	auto &join = op.Cast<LogicalComparisonJoin>();
378	if (join.join_type == JoinType::LEFT) {
379	// If a base op is a Logical Comparison join it is probably a left join,
380	// so the cost of the larger table is a fine estimate.
381	// TODO: provide better estimates for cost of mark joins
382	// MARK joins are used for anti and semi joins, so the cost can conceivably be
383	// less than the base table cardinality.
384	join_node.SetCost(join_node.GetBaseTableCardinality());
385	}
386	} else if (op.type == LogicalOperatorType::LOGICAL_ASOF_JOIN) {
387	// AsOf joins have the cardinality of the LHS
388	join_node.SetCost(join_node.GetBaseTableCardinality());
389	}
390	// Total domains can be affected by filters. So we update base table cardinality first
391	EstimateBaseTableCardinality(node&: join_node, op);
392	// Then update total domains.
393	UpdateTotalDomains(node&: join_node, op);
394	}
395
396	// sort relations from greatest tdom to lowest tdom.
397	std::sort(first: relations_to_tdoms.begin(), last: relations_to_tdoms.end(), comp: SortTdoms);
398	}
399
400	void CardinalityEstimator::UpdateTotalDomains(JoinNode &node, LogicalOperator &op) {
401	auto relation_id = node.set.relations [`0`];
402	relation_attributes [relation_id].cardinality = node.GetCardinality<double>();
403	//! Initialize the distinct count for all columns used in joins with the current relation.
404	idx_t distinct_count = node.GetBaseTableCardinality();
405	optional_ptr<TableCatalogEntry> catalog_table;
406
407	optional_ptr<LogicalGet> get;
408	bool get_updated = true;
409	for (auto &column : relation_attributes [relation_id].columns) {
410	//! for every column used in a filter in the relation, get the distinct count via HLL, or assume it to be
411	//! the cardinality
412	ColumnBinding key = ColumnBinding (relation_id, column);
413	auto actual_binding = relation_column_to_original_column.find(x: key);
414	// each relation has columns that are either projected or used as filters
415	// In order to get column statistics we need to make sure the actual binding still
416	// refers to the same base table relation, as non-reorderable joins may involve 2+
417	// base table relations and therefore the columns may also refer to 2 different
418	// base table relations
419	if (actual_binding != relation_column_to_original_column.end() &&
420	(!get \|\| get ->table_index != actual_binding ->second.table_index)) {
421	get = GetLogicalGet(op, table_index: actual_binding ->second.table_index);
422	get_updated = true;
423	} else {
424	get_updated = false;
425	}
426
427	if (get_updated) {
428	if (get) {
429	catalog_table = GetCatalogTableEntry(op&: *get);
430	} else {
431	catalog_table = nullptr;
432	}
433	}
434
435	if (catalog_table && actual_binding != relation_column_to_original_column.end()) {
436	// Get HLL stats here
437	auto base_stats = catalog_table ->GetStatistics(context, column_id: actual_binding ->second.column_index);
438	if (base_stats) {
439	distinct_count = base_stats ->GetDistinctCount();
440	}
441
442	// HLL has estimation error, distinct_count can't be greater than cardinality of the table before filters
443	if (distinct_count > node.GetBaseTableCardinality()) {
444	distinct_count = node.GetBaseTableCardinality();
445	}
446	} else {
447	distinct_count = node.GetBaseTableCardinality();
448	}
449	// Update the relation_to_tdom set with the estimated distinct count (or tdom) calculated above
450	for (auto &relation_to_tdom : relations_to_tdoms) {
451	column_binding_set_t i_set = relation_to_tdom.equivalent_relations;
452	if (i_set.count(x: key) != `1`) {
453	continue;
454	}
455	if (catalog_table) {
456	if (relation_to_tdom.tdom_hll < distinct_count) {
457	relation_to_tdom.tdom_hll = distinct_count;
458	relation_to_tdom.has_tdom_hll = true;
459	}
460	if (relation_to_tdom.tdom_no_hll > distinct_count) {
461	relation_to_tdom.tdom_no_hll = distinct_count;
462	}
463	} else {
464	// Here we don't have catalog statistics, and the following is how we determine
465	// the tdom
466	// 1. If there is any hll data in the equivalence set, use that
467	// 2. Otherwise, use the table with the smallest cardinality
468	if (relation_to_tdom.tdom_no_hll > distinct_count && !relation_to_tdom.has_tdom_hll) {
469	relation_to_tdom.tdom_no_hll = distinct_count;
470	}
471	}
472	break;
473	}
474	}
475	}
476
477	optional_ptr<TableFilterSet> CardinalityEstimator::GetTableFilters(LogicalOperator &op, idx_t table_index) {
478	auto get = GetLogicalGet(op, table_index);
479	return get ? &get ->table_filters : nullptr;
480	}
481
482	idx_t CardinalityEstimator::InspectConjunctionAND(idx_t cardinality, idx_t column_index, ConjunctionAndFilter &filter,
483	unique_ptr<BaseStatistics> base_stats) {
484	auto has_equality_filter = false;
485	auto cardinality_after_filters = cardinality;
486	for (auto &child_filter : filter.child_filters) {
487	if (child_filter ->filter_type != TableFilterType::CONSTANT_COMPARISON) {
488	continue;
489	}
490	auto &comparison_filter = child_filter ->Cast<ConstantFilter>();
491	if (comparison_filter.comparison_type != ExpressionType::COMPARE_EQUAL) {
492	continue;
493	}
494	auto column_count = `0`;
495	if (base_stats) {
496	column_count = base_stats ->GetDistinctCount();
497	}
498	auto filtered_card = cardinality;
499	// column_count = 0 when there is no column count (i.e parquet scans)
500	if (column_count > `0`) {
501	// we want the ceil of cardinality/column_count. We also want to avoid compiler errors
502	filtered_card = (cardinality + column_count - `1`) / column_count;
503	cardinality_after_filters = filtered_card;
504	}
505	if (has_equality_filter) {
506	cardinality_after_filters = MinValue(a: filtered_card, b: cardinality_after_filters);
507	}
508	has_equality_filter = true;
509	}
510	return cardinality_after_filters;
511	}
512
513	idx_t CardinalityEstimator::InspectConjunctionOR(idx_t cardinality, idx_t column_index, ConjunctionOrFilter &filter,
514	unique_ptr<BaseStatistics> base_stats) {
515	auto has_equality_filter = false;
516	auto cardinality_after_filters = cardinality;
517	for (auto &child_filter : filter.child_filters) {
518	if (child_filter ->filter_type != TableFilterType::CONSTANT_COMPARISON) {
519	continue;
520	}
521	auto &comparison_filter = child_filter ->Cast<ConstantFilter>();
522	if (comparison_filter.comparison_type == ExpressionType::COMPARE_EQUAL) {
523	auto column_count = cardinality_after_filters;
524	if (base_stats) {
525	column_count = base_stats ->GetDistinctCount();
526	}
527	auto increment = MaxValue<idx_t>(a: ((cardinality + column_count - `1`) / column_count), b: `1`);
528	if (has_equality_filter) {
529	cardinality_after_filters += increment;
530	} else {
531	cardinality_after_filters = increment;
532	}
533	has_equality_filter = true;
534	}
535	}
536	D_ASSERT(cardinality_after_filters > `0`);
537	return cardinality_after_filters;
538	}
539
540	idx_t CardinalityEstimator::InspectTableFilters(idx_t cardinality, LogicalOperator &op, TableFilterSet &table_filters,
541	idx_t table_index) {
542	idx_t cardinality_after_filters = cardinality;
543	auto get = GetLogicalGet(op, table_index);
544	unique_ptr<BaseStatistics> column_statistics;
545	for (auto &it : table_filters.filters) {
546	column_statistics = nullptr;
547	if (get ->bind_data && get ->function.name.compare(s: "seq_scan") == `0`) {
548	auto &table_scan_bind_data = get ->bind_data ->Cast<TableScanBindData>();
549	column_statistics = get ->function.statistics(context, &table_scan_bind_data, it.first);
550	}
551	if (it.second ->filter_type == TableFilterType::CONJUNCTION_AND) {
552	auto &filter = it.second ->Cast<ConjunctionAndFilter>();
553	idx_t cardinality_with_and_filter =
554	InspectConjunctionAND(cardinality, column_index: it.first, filter, base_stats: std::move(column_statistics));
555	cardinality_after_filters = MinValue(a: cardinality_after_filters, b: cardinality_with_and_filter);
556	} else if (it.second ->filter_type == TableFilterType::CONJUNCTION_OR) {
557	auto &filter = it.second ->Cast<ConjunctionOrFilter>();
558	idx_t cardinality_with_or_filter =
559	InspectConjunctionOR(cardinality, column_index: it.first, filter, base_stats: std::move(column_statistics));
560	cardinality_after_filters = MinValue(a: cardinality_after_filters, b: cardinality_with_or_filter);
561	}
562	}
563	// if the above code didn't find an equality filter (i.e country_code = "[us]")
564	// and there are other table filters, use default selectivity.
565	bool has_equality_filter = (cardinality_after_filters != cardinality);
566	if (!has_equality_filter && !table_filters.filters.empty()) {
567	cardinality_after_filters = MaxValue<idx_t>(a: cardinality * DEFAULT_SELECTIVITY, b: `1`);
568	}
569	return cardinality_after_filters;
570	}
571
572	void CardinalityEstimator::EstimateBaseTableCardinality(JoinNode &node, LogicalOperator &op) {
573	auto has_logical_filter = IsLogicalFilter(op);
574	D_ASSERT(node.set.count == `1`);
575	auto relation_id = node.set.relations [`0`];
576
577	double lowest_card_found = node.GetBaseTableCardinality();
578	for (auto &column : relation_attributes [relation_id].columns) {
579	auto card_after_filters = node.GetBaseTableCardinality();
580	ColumnBinding key = ColumnBinding (relation_id, column);
581	optional_ptr<TableFilterSet> table_filters;
582	auto actual_binding = relation_column_to_original_column.find(x: key);
583	if (actual_binding != relation_column_to_original_column.end()) {
584	table_filters = GetTableFilters(op, table_index: actual_binding ->second.table_index);
585	}
586
587	if (table_filters) {
588	double inspect_result =
589	(double)InspectTableFilters(cardinality: card_after_filters, op, table_filters&: *table_filters, table_index: actual_binding ->second.table_index);
590	card_after_filters = MinValue(a: inspect_result, b: (double)card_after_filters);
591	}
592	if (has_logical_filter) {
593	card_after_filters *= DEFAULT_SELECTIVITY;
594	}
595	lowest_card_found = MinValue(a: card_after_filters, b: lowest_card_found);
596	}
597	node.SetEstimatedCardinality(lowest_card_found);
598	}
599
600	} // namespace duckdb
601

Browse the source code of Velox/build/_deps/duckdb-src/src/optimizer/join_order/cardinality_estimator.cpp