| 1 | #include "duckdb/planner/binder.hpp" |
| 2 | #include "duckdb/parser/statement/show_statement.hpp" |
| 3 | #include "duckdb/planner/operator/logical_show.hpp" |
| 4 | #include "duckdb/parser/query_node/select_node.hpp" |
| 5 | #include "duckdb/parser/expression/function_expression.hpp" |
| 6 | #include "duckdb/parser/expression/constant_expression.hpp" |
| 7 | #include "duckdb/parser/expression/cast_expression.hpp" |
| 8 | #include "duckdb/parser/tableref/subqueryref.hpp" |
| 9 | |
| 10 | namespace duckdb { |
| 11 | |
| 12 | static unique_ptr<ParsedExpression> SummarizeWrapUnnest(vector<unique_ptr<ParsedExpression>> &children, |
| 13 | const string &alias) { |
| 14 | auto list_function = make_uniq<FunctionExpression>(args: "list_value" , args: std::move(children)); |
| 15 | vector<unique_ptr<ParsedExpression>> unnest_children; |
| 16 | unnest_children.push_back(x: std::move(list_function)); |
| 17 | auto unnest_function = make_uniq<FunctionExpression>(args: "unnest" , args: std::move(unnest_children)); |
| 18 | unnest_function->alias = alias; |
| 19 | return std::move(unnest_function); |
| 20 | } |
| 21 | |
| 22 | static unique_ptr<ParsedExpression> SummarizeCreateAggregate(const string &aggregate, string column_name) { |
| 23 | vector<unique_ptr<ParsedExpression>> children; |
| 24 | children.push_back(x: make_uniq<ColumnRefExpression>(args: std::move(column_name))); |
| 25 | auto aggregate_function = make_uniq<FunctionExpression>(args: aggregate, args: std::move(children)); |
| 26 | auto cast_function = make_uniq<CastExpression>(args: LogicalType::VARCHAR, args: std::move(aggregate_function)); |
| 27 | return std::move(cast_function); |
| 28 | } |
| 29 | |
| 30 | static unique_ptr<ParsedExpression> SummarizeCreateAggregate(const string &aggregate, string column_name, |
| 31 | const Value &modifier) { |
| 32 | vector<unique_ptr<ParsedExpression>> children; |
| 33 | children.push_back(x: make_uniq<ColumnRefExpression>(args: std::move(column_name))); |
| 34 | children.push_back(x: make_uniq<ConstantExpression>(args: modifier)); |
| 35 | auto aggregate_function = make_uniq<FunctionExpression>(args: aggregate, args: std::move(children)); |
| 36 | auto cast_function = make_uniq<CastExpression>(args: LogicalType::VARCHAR, args: std::move(aggregate_function)); |
| 37 | return std::move(cast_function); |
| 38 | } |
| 39 | |
| 40 | static unique_ptr<ParsedExpression> SummarizeCreateCountStar() { |
| 41 | vector<unique_ptr<ParsedExpression>> children; |
| 42 | auto aggregate_function = make_uniq<FunctionExpression>(args: "count_star" , args: std::move(children)); |
| 43 | return std::move(aggregate_function); |
| 44 | } |
| 45 | |
| 46 | static unique_ptr<ParsedExpression> SummarizeCreateBinaryFunction(const string &op, unique_ptr<ParsedExpression> left, |
| 47 | unique_ptr<ParsedExpression> right) { |
| 48 | vector<unique_ptr<ParsedExpression>> children; |
| 49 | children.push_back(x: std::move(left)); |
| 50 | children.push_back(x: std::move(right)); |
| 51 | auto binary_function = make_uniq<FunctionExpression>(args: op, args: std::move(children)); |
| 52 | return std::move(binary_function); |
| 53 | } |
| 54 | |
| 55 | static unique_ptr<ParsedExpression> SummarizeCreateNullPercentage(string column_name) { |
| 56 | auto count_star = make_uniq<CastExpression>(args: LogicalType::DOUBLE, args: SummarizeCreateCountStar()); |
| 57 | auto count = |
| 58 | make_uniq<CastExpression>(args: LogicalType::DOUBLE, args: SummarizeCreateAggregate(aggregate: "count" , column_name: std::move(column_name))); |
| 59 | auto null_percentage = SummarizeCreateBinaryFunction(op: "/" , left: std::move(count), right: std::move(count_star)); |
| 60 | auto negate_x = |
| 61 | SummarizeCreateBinaryFunction(op: "-" , left: make_uniq<ConstantExpression>(args: Value::DOUBLE(value: 1)), right: std::move(null_percentage)); |
| 62 | auto percentage_x = |
| 63 | SummarizeCreateBinaryFunction(op: "*" , left: std::move(negate_x), right: make_uniq<ConstantExpression>(args: Value::DOUBLE(value: 100))); |
| 64 | auto round_x = SummarizeCreateBinaryFunction(op: "round" , left: std::move(percentage_x), |
| 65 | right: make_uniq<ConstantExpression>(args: Value::INTEGER(value: 2))); |
| 66 | auto concat_x = |
| 67 | SummarizeCreateBinaryFunction(op: "concat" , left: std::move(round_x), right: make_uniq<ConstantExpression>(args: Value("%" ))); |
| 68 | |
| 69 | return concat_x; |
| 70 | } |
| 71 | |
| 72 | BoundStatement Binder::BindSummarize(ShowStatement &stmt) { |
| 73 | auto query_copy = stmt.info->query->Copy(); |
| 74 | |
| 75 | // we bind the plan once in a child-node to figure out the column names and column types |
| 76 | auto child_binder = Binder::CreateBinder(context); |
| 77 | auto plan = child_binder->Bind(node&: *stmt.info->query); |
| 78 | D_ASSERT(plan.types.size() == plan.names.size()); |
| 79 | vector<unique_ptr<ParsedExpression>> name_children; |
| 80 | vector<unique_ptr<ParsedExpression>> type_children; |
| 81 | vector<unique_ptr<ParsedExpression>> min_children; |
| 82 | vector<unique_ptr<ParsedExpression>> max_children; |
| 83 | vector<unique_ptr<ParsedExpression>> unique_children; |
| 84 | vector<unique_ptr<ParsedExpression>> avg_children; |
| 85 | vector<unique_ptr<ParsedExpression>> std_children; |
| 86 | vector<unique_ptr<ParsedExpression>> q25_children; |
| 87 | vector<unique_ptr<ParsedExpression>> q50_children; |
| 88 | vector<unique_ptr<ParsedExpression>> q75_children; |
| 89 | vector<unique_ptr<ParsedExpression>> count_children; |
| 90 | vector<unique_ptr<ParsedExpression>> null_percentage_children; |
| 91 | auto select = make_uniq<SelectStatement>(); |
| 92 | select->node = std::move(query_copy); |
| 93 | for (idx_t i = 0; i < plan.names.size(); i++) { |
| 94 | name_children.push_back(x: make_uniq<ConstantExpression>(args: Value(plan.names[i]))); |
| 95 | type_children.push_back(x: make_uniq<ConstantExpression>(args: Value(plan.types[i].ToString()))); |
| 96 | min_children.push_back(x: SummarizeCreateAggregate(aggregate: "min" , column_name: plan.names[i])); |
| 97 | max_children.push_back(x: SummarizeCreateAggregate(aggregate: "max" , column_name: plan.names[i])); |
| 98 | unique_children.push_back(x: SummarizeCreateAggregate(aggregate: "approx_count_distinct" , column_name: plan.names[i])); |
| 99 | if (plan.types[i].IsNumeric()) { |
| 100 | avg_children.push_back(x: SummarizeCreateAggregate(aggregate: "avg" , column_name: plan.names[i])); |
| 101 | std_children.push_back(x: SummarizeCreateAggregate(aggregate: "stddev" , column_name: plan.names[i])); |
| 102 | q25_children.push_back(x: SummarizeCreateAggregate(aggregate: "approx_quantile" , column_name: plan.names[i], modifier: Value::FLOAT(value: 0.25))); |
| 103 | q50_children.push_back(x: SummarizeCreateAggregate(aggregate: "approx_quantile" , column_name: plan.names[i], modifier: Value::FLOAT(value: 0.50))); |
| 104 | q75_children.push_back(x: SummarizeCreateAggregate(aggregate: "approx_quantile" , column_name: plan.names[i], modifier: Value::FLOAT(value: 0.75))); |
| 105 | } else { |
| 106 | avg_children.push_back(x: make_uniq<ConstantExpression>(args: Value())); |
| 107 | std_children.push_back(x: make_uniq<ConstantExpression>(args: Value())); |
| 108 | q25_children.push_back(x: make_uniq<ConstantExpression>(args: Value())); |
| 109 | q50_children.push_back(x: make_uniq<ConstantExpression>(args: Value())); |
| 110 | q75_children.push_back(x: make_uniq<ConstantExpression>(args: Value())); |
| 111 | } |
| 112 | count_children.push_back(x: SummarizeCreateCountStar()); |
| 113 | null_percentage_children.push_back(x: SummarizeCreateNullPercentage(column_name: plan.names[i])); |
| 114 | } |
| 115 | auto subquery_ref = make_uniq<SubqueryRef>(args: std::move(select), args: "summarize_tbl" ); |
| 116 | subquery_ref->column_name_alias = plan.names; |
| 117 | |
| 118 | auto select_node = make_uniq<SelectNode>(); |
| 119 | select_node->select_list.push_back(x: SummarizeWrapUnnest(children&: name_children, alias: "column_name" )); |
| 120 | select_node->select_list.push_back(x: SummarizeWrapUnnest(children&: type_children, alias: "column_type" )); |
| 121 | select_node->select_list.push_back(x: SummarizeWrapUnnest(children&: min_children, alias: "min" )); |
| 122 | select_node->select_list.push_back(x: SummarizeWrapUnnest(children&: max_children, alias: "max" )); |
| 123 | select_node->select_list.push_back(x: SummarizeWrapUnnest(children&: unique_children, alias: "approx_unique" )); |
| 124 | select_node->select_list.push_back(x: SummarizeWrapUnnest(children&: avg_children, alias: "avg" )); |
| 125 | select_node->select_list.push_back(x: SummarizeWrapUnnest(children&: std_children, alias: "std" )); |
| 126 | select_node->select_list.push_back(x: SummarizeWrapUnnest(children&: q25_children, alias: "q25" )); |
| 127 | select_node->select_list.push_back(x: SummarizeWrapUnnest(children&: q50_children, alias: "q50" )); |
| 128 | select_node->select_list.push_back(x: SummarizeWrapUnnest(children&: q75_children, alias: "q75" )); |
| 129 | select_node->select_list.push_back(x: SummarizeWrapUnnest(children&: count_children, alias: "count" )); |
| 130 | select_node->select_list.push_back(x: SummarizeWrapUnnest(children&: null_percentage_children, alias: "null_percentage" )); |
| 131 | select_node->from_table = std::move(subquery_ref); |
| 132 | |
| 133 | properties.return_type = StatementReturnType::QUERY_RESULT; |
| 134 | return Bind(node&: *select_node); |
| 135 | } |
| 136 | |
| 137 | } // namespace duckdb |
| 138 | |