1#include "duckdb/planner/binder.hpp"
2#include "duckdb/parser/statement/show_statement.hpp"
3#include "duckdb/planner/operator/logical_show.hpp"
4#include "duckdb/parser/query_node/select_node.hpp"
5#include "duckdb/parser/expression/function_expression.hpp"
6#include "duckdb/parser/expression/constant_expression.hpp"
7#include "duckdb/parser/expression/cast_expression.hpp"
8#include "duckdb/parser/tableref/subqueryref.hpp"
9
10namespace duckdb {
11
12static unique_ptr<ParsedExpression> SummarizeWrapUnnest(vector<unique_ptr<ParsedExpression>> &children,
13 const string &alias) {
14 auto list_function = make_uniq<FunctionExpression>(args: "list_value", args: std::move(children));
15 vector<unique_ptr<ParsedExpression>> unnest_children;
16 unnest_children.push_back(x: std::move(list_function));
17 auto unnest_function = make_uniq<FunctionExpression>(args: "unnest", args: std::move(unnest_children));
18 unnest_function->alias = alias;
19 return std::move(unnest_function);
20}
21
22static unique_ptr<ParsedExpression> SummarizeCreateAggregate(const string &aggregate, string column_name) {
23 vector<unique_ptr<ParsedExpression>> children;
24 children.push_back(x: make_uniq<ColumnRefExpression>(args: std::move(column_name)));
25 auto aggregate_function = make_uniq<FunctionExpression>(args: aggregate, args: std::move(children));
26 auto cast_function = make_uniq<CastExpression>(args: LogicalType::VARCHAR, args: std::move(aggregate_function));
27 return std::move(cast_function);
28}
29
30static unique_ptr<ParsedExpression> SummarizeCreateAggregate(const string &aggregate, string column_name,
31 const Value &modifier) {
32 vector<unique_ptr<ParsedExpression>> children;
33 children.push_back(x: make_uniq<ColumnRefExpression>(args: std::move(column_name)));
34 children.push_back(x: make_uniq<ConstantExpression>(args: modifier));
35 auto aggregate_function = make_uniq<FunctionExpression>(args: aggregate, args: std::move(children));
36 auto cast_function = make_uniq<CastExpression>(args: LogicalType::VARCHAR, args: std::move(aggregate_function));
37 return std::move(cast_function);
38}
39
40static unique_ptr<ParsedExpression> SummarizeCreateCountStar() {
41 vector<unique_ptr<ParsedExpression>> children;
42 auto aggregate_function = make_uniq<FunctionExpression>(args: "count_star", args: std::move(children));
43 return std::move(aggregate_function);
44}
45
46static unique_ptr<ParsedExpression> SummarizeCreateBinaryFunction(const string &op, unique_ptr<ParsedExpression> left,
47 unique_ptr<ParsedExpression> right) {
48 vector<unique_ptr<ParsedExpression>> children;
49 children.push_back(x: std::move(left));
50 children.push_back(x: std::move(right));
51 auto binary_function = make_uniq<FunctionExpression>(args: op, args: std::move(children));
52 return std::move(binary_function);
53}
54
55static unique_ptr<ParsedExpression> SummarizeCreateNullPercentage(string column_name) {
56 auto count_star = make_uniq<CastExpression>(args: LogicalType::DOUBLE, args: SummarizeCreateCountStar());
57 auto count =
58 make_uniq<CastExpression>(args: LogicalType::DOUBLE, args: SummarizeCreateAggregate(aggregate: "count", column_name: std::move(column_name)));
59 auto null_percentage = SummarizeCreateBinaryFunction(op: "/", left: std::move(count), right: std::move(count_star));
60 auto negate_x =
61 SummarizeCreateBinaryFunction(op: "-", left: make_uniq<ConstantExpression>(args: Value::DOUBLE(value: 1)), right: std::move(null_percentage));
62 auto percentage_x =
63 SummarizeCreateBinaryFunction(op: "*", left: std::move(negate_x), right: make_uniq<ConstantExpression>(args: Value::DOUBLE(value: 100)));
64 auto round_x = SummarizeCreateBinaryFunction(op: "round", left: std::move(percentage_x),
65 right: make_uniq<ConstantExpression>(args: Value::INTEGER(value: 2)));
66 auto concat_x =
67 SummarizeCreateBinaryFunction(op: "concat", left: std::move(round_x), right: make_uniq<ConstantExpression>(args: Value("%")));
68
69 return concat_x;
70}
71
72BoundStatement Binder::BindSummarize(ShowStatement &stmt) {
73 auto query_copy = stmt.info->query->Copy();
74
75 // we bind the plan once in a child-node to figure out the column names and column types
76 auto child_binder = Binder::CreateBinder(context);
77 auto plan = child_binder->Bind(node&: *stmt.info->query);
78 D_ASSERT(plan.types.size() == plan.names.size());
79 vector<unique_ptr<ParsedExpression>> name_children;
80 vector<unique_ptr<ParsedExpression>> type_children;
81 vector<unique_ptr<ParsedExpression>> min_children;
82 vector<unique_ptr<ParsedExpression>> max_children;
83 vector<unique_ptr<ParsedExpression>> unique_children;
84 vector<unique_ptr<ParsedExpression>> avg_children;
85 vector<unique_ptr<ParsedExpression>> std_children;
86 vector<unique_ptr<ParsedExpression>> q25_children;
87 vector<unique_ptr<ParsedExpression>> q50_children;
88 vector<unique_ptr<ParsedExpression>> q75_children;
89 vector<unique_ptr<ParsedExpression>> count_children;
90 vector<unique_ptr<ParsedExpression>> null_percentage_children;
91 auto select = make_uniq<SelectStatement>();
92 select->node = std::move(query_copy);
93 for (idx_t i = 0; i < plan.names.size(); i++) {
94 name_children.push_back(x: make_uniq<ConstantExpression>(args: Value(plan.names[i])));
95 type_children.push_back(x: make_uniq<ConstantExpression>(args: Value(plan.types[i].ToString())));
96 min_children.push_back(x: SummarizeCreateAggregate(aggregate: "min", column_name: plan.names[i]));
97 max_children.push_back(x: SummarizeCreateAggregate(aggregate: "max", column_name: plan.names[i]));
98 unique_children.push_back(x: SummarizeCreateAggregate(aggregate: "approx_count_distinct", column_name: plan.names[i]));
99 if (plan.types[i].IsNumeric()) {
100 avg_children.push_back(x: SummarizeCreateAggregate(aggregate: "avg", column_name: plan.names[i]));
101 std_children.push_back(x: SummarizeCreateAggregate(aggregate: "stddev", column_name: plan.names[i]));
102 q25_children.push_back(x: SummarizeCreateAggregate(aggregate: "approx_quantile", column_name: plan.names[i], modifier: Value::FLOAT(value: 0.25)));
103 q50_children.push_back(x: SummarizeCreateAggregate(aggregate: "approx_quantile", column_name: plan.names[i], modifier: Value::FLOAT(value: 0.50)));
104 q75_children.push_back(x: SummarizeCreateAggregate(aggregate: "approx_quantile", column_name: plan.names[i], modifier: Value::FLOAT(value: 0.75)));
105 } else {
106 avg_children.push_back(x: make_uniq<ConstantExpression>(args: Value()));
107 std_children.push_back(x: make_uniq<ConstantExpression>(args: Value()));
108 q25_children.push_back(x: make_uniq<ConstantExpression>(args: Value()));
109 q50_children.push_back(x: make_uniq<ConstantExpression>(args: Value()));
110 q75_children.push_back(x: make_uniq<ConstantExpression>(args: Value()));
111 }
112 count_children.push_back(x: SummarizeCreateCountStar());
113 null_percentage_children.push_back(x: SummarizeCreateNullPercentage(column_name: plan.names[i]));
114 }
115 auto subquery_ref = make_uniq<SubqueryRef>(args: std::move(select), args: "summarize_tbl");
116 subquery_ref->column_name_alias = plan.names;
117
118 auto select_node = make_uniq<SelectNode>();
119 select_node->select_list.push_back(x: SummarizeWrapUnnest(children&: name_children, alias: "column_name"));
120 select_node->select_list.push_back(x: SummarizeWrapUnnest(children&: type_children, alias: "column_type"));
121 select_node->select_list.push_back(x: SummarizeWrapUnnest(children&: min_children, alias: "min"));
122 select_node->select_list.push_back(x: SummarizeWrapUnnest(children&: max_children, alias: "max"));
123 select_node->select_list.push_back(x: SummarizeWrapUnnest(children&: unique_children, alias: "approx_unique"));
124 select_node->select_list.push_back(x: SummarizeWrapUnnest(children&: avg_children, alias: "avg"));
125 select_node->select_list.push_back(x: SummarizeWrapUnnest(children&: std_children, alias: "std"));
126 select_node->select_list.push_back(x: SummarizeWrapUnnest(children&: q25_children, alias: "q25"));
127 select_node->select_list.push_back(x: SummarizeWrapUnnest(children&: q50_children, alias: "q50"));
128 select_node->select_list.push_back(x: SummarizeWrapUnnest(children&: q75_children, alias: "q75"));
129 select_node->select_list.push_back(x: SummarizeWrapUnnest(children&: count_children, alias: "count"));
130 select_node->select_list.push_back(x: SummarizeWrapUnnest(children&: null_percentage_children, alias: "null_percentage"));
131 select_node->from_table = std::move(subquery_ref);
132
133 properties.return_type = StatementReturnType::QUERY_RESULT;
134 return Bind(node&: *select_node);
135}
136
137} // namespace duckdb
138