1#include "duckdb/planner/binder.hpp"
2#include "duckdb/parser/expression/star_expression.hpp"
3#include "duckdb/parser/expression/constant_expression.hpp"
4#include "duckdb/planner/expression_binder/table_function_binder.hpp"
5#include "duckdb/parser/parsed_expression_iterator.hpp"
6#include "duckdb/execution/expression_executor.hpp"
7#include "re2/re2.h"
8
9namespace duckdb {
10
11string GetColumnsStringValue(ParsedExpression &expr) {
12 if (expr.type == ExpressionType::COLUMN_REF) {
13 auto &colref = expr.Cast<ColumnRefExpression>();
14 return colref.GetColumnName();
15 } else {
16 return expr.ToString();
17 }
18}
19
20bool Binder::FindStarExpression(unique_ptr<ParsedExpression> &expr, StarExpression **star, bool is_root,
21 bool in_columns) {
22 bool has_star = false;
23 if (expr->GetExpressionClass() == ExpressionClass::STAR) {
24 auto &current_star = expr->Cast<StarExpression>();
25 if (!current_star.columns) {
26 if (is_root) {
27 *star = &current_star;
28 return true;
29 }
30 if (!in_columns) {
31 throw BinderException(
32 "STAR expression is only allowed as the root element of an expression. Use COLUMNS(*) instead.");
33 }
34 // star expression inside a COLUMNS - convert to a constant list
35 if (!current_star.replace_list.empty()) {
36 throw BinderException(
37 "STAR expression with REPLACE list is only allowed as the root element of COLUMNS");
38 }
39 vector<unique_ptr<ParsedExpression>> star_list;
40 bind_context.GenerateAllColumnExpressions(expr&: current_star, new_select_list&: star_list);
41
42 vector<Value> values;
43 values.reserve(n: star_list.size());
44 for (auto &expr : star_list) {
45 values.emplace_back(args: GetColumnsStringValue(expr&: *expr));
46 }
47 D_ASSERT(!values.empty());
48
49 expr = make_uniq<ConstantExpression>(args: Value::LIST(child_type: LogicalType::VARCHAR, values));
50 return true;
51 }
52 if (in_columns) {
53 throw BinderException("COLUMNS expression is not allowed inside another COLUMNS expression");
54 }
55 in_columns = true;
56 if (*star) {
57 // we can have multiple
58 if (!(*star)->Equals(other: current_star)) {
59 throw BinderException(
60 FormatError(expr_context&: *expr, message: "Multiple different STAR/COLUMNS in the same expression are not supported"));
61 }
62 return true;
63 }
64 *star = &current_star;
65 has_star = true;
66 }
67 ParsedExpressionIterator::EnumerateChildren(expr&: *expr, callback: [&](unique_ptr<ParsedExpression> &child_expr) {
68 if (FindStarExpression(expr&: child_expr, star, is_root: false, in_columns)) {
69 has_star = true;
70 }
71 });
72 return has_star;
73}
74
75void Binder::ReplaceStarExpression(unique_ptr<ParsedExpression> &expr, unique_ptr<ParsedExpression> &replacement) {
76 D_ASSERT(expr);
77 if (expr->GetExpressionClass() == ExpressionClass::STAR) {
78 D_ASSERT(replacement);
79 expr = replacement->Copy();
80 return;
81 }
82 ParsedExpressionIterator::EnumerateChildren(
83 expr&: *expr, callback: [&](unique_ptr<ParsedExpression> &child_expr) { ReplaceStarExpression(expr&: child_expr, replacement); });
84}
85
86void Binder::ExpandStarExpression(unique_ptr<ParsedExpression> expr,
87 vector<unique_ptr<ParsedExpression>> &new_select_list) {
88 StarExpression *star = nullptr;
89 if (!FindStarExpression(expr, star: &star, is_root: true, in_columns: false)) {
90 // no star expression: add it as-is
91 D_ASSERT(!star);
92 new_select_list.push_back(x: std::move(expr));
93 return;
94 }
95 D_ASSERT(star);
96 vector<unique_ptr<ParsedExpression>> star_list;
97 // we have star expressions! expand the list of star expressions
98 bind_context.GenerateAllColumnExpressions(expr&: *star, new_select_list&: star_list);
99
100 if (star->expr) {
101 // COLUMNS with an expression
102 // two options:
103 // VARCHAR parameter <- this is a regular expression
104 // LIST of VARCHAR parameters <- this is a set of columns
105 TableFunctionBinder binder(*this, context);
106 auto child = star->expr->Copy();
107 auto result = binder.Bind(expr&: child);
108 if (!result->IsFoldable()) {
109 // cannot resolve parameters here
110 if (star->expr->HasParameter()) {
111 throw ParameterNotResolvedException();
112 } else {
113 throw BinderException("Unsupported expression in COLUMNS");
114 }
115 }
116 auto val = ExpressionExecutor::EvaluateScalar(context, expr: *result);
117 if (val.type().id() == LogicalTypeId::VARCHAR) {
118 // regex
119 if (val.IsNull()) {
120 throw BinderException("COLUMNS does not support NULL as regex argument");
121 }
122 auto &regex_str = StringValue::Get(value: val);
123 duckdb_re2::RE2 regex(regex_str);
124 if (!regex.error().empty()) {
125 auto err = StringUtil::Format(fmt_str: "Failed to compile regex \"%s\": %s", params: regex_str, params: regex.error());
126 throw BinderException(FormatError(expr_context&: *star, message: err));
127 }
128 vector<unique_ptr<ParsedExpression>> new_list;
129 for (idx_t i = 0; i < star_list.size(); i++) {
130 auto &colref = star_list[i]->Cast<ColumnRefExpression>();
131 if (!RE2::PartialMatch(text: colref.GetColumnName(), re: regex)) {
132 continue;
133 }
134 new_list.push_back(x: std::move(star_list[i]));
135 }
136 if (new_list.empty()) {
137 auto err = StringUtil::Format(fmt_str: "No matching columns found that match regex \"%s\"", params: regex_str);
138 throw BinderException(FormatError(expr_context&: *star, message: err));
139 }
140 star_list = std::move(new_list);
141 } else if (val.type().id() == LogicalTypeId::LIST &&
142 ListType::GetChildType(type: val.type()).id() == LogicalTypeId::VARCHAR) {
143 // list of varchar columns
144 if (val.IsNull() || ListValue::GetChildren(value: val).empty()) {
145 auto err =
146 StringUtil::Format(fmt_str: "Star expression \"%s\" resulted in an empty set of columns", params: star->ToString());
147 throw BinderException(FormatError(expr_context&: *star, message: err));
148 }
149 auto &children = ListValue::GetChildren(value: val);
150 vector<unique_ptr<ParsedExpression>> new_list;
151 // scan the list for all selected columns and construct a lookup table
152 case_insensitive_map_t<bool> selected_set;
153 for (auto &child : children) {
154 selected_set.insert(x: make_pair(x: StringValue::Get(value: child), y: false));
155 }
156 // now check the list of all possible expressions and select which ones make it in
157 for (auto &expr : star_list) {
158 auto str = GetColumnsStringValue(expr&: *expr);
159 auto entry = selected_set.find(x: str);
160 if (entry != selected_set.end()) {
161 new_list.push_back(x: std::move(expr));
162 entry->second = true;
163 }
164 }
165 // check if all expressions found a match
166 for (auto &entry : selected_set) {
167 if (!entry.second) {
168 throw BinderException("Column \"%s\" was selected but was not found in the FROM clause",
169 entry.first);
170 }
171 }
172 star_list = std::move(new_list);
173 } else {
174 throw BinderException(FormatError(
175 expr_context&: *star, message: "COLUMNS expects either a VARCHAR argument (regex) or a LIST of VARCHAR (list of columns)"));
176 }
177 }
178
179 // now perform the replacement
180 for (idx_t i = 0; i < star_list.size(); i++) {
181 auto new_expr = expr->Copy();
182 ReplaceStarExpression(expr&: new_expr, replacement&: star_list[i]);
183 new_select_list.push_back(x: std::move(new_expr));
184 }
185}
186
187void Binder::ExpandStarExpressions(vector<unique_ptr<ParsedExpression>> &select_list,
188 vector<unique_ptr<ParsedExpression>> &new_select_list) {
189 for (auto &select_element : select_list) {
190 ExpandStarExpression(expr: std::move(select_element), new_select_list);
191 }
192}
193
194} // namespace duckdb
195