| 1 | #include "duckdb/planner/binder.hpp" |
| 2 | #include "duckdb/parser/expression/star_expression.hpp" |
| 3 | #include "duckdb/parser/expression/constant_expression.hpp" |
| 4 | #include "duckdb/planner/expression_binder/table_function_binder.hpp" |
| 5 | #include "duckdb/parser/parsed_expression_iterator.hpp" |
| 6 | #include "duckdb/execution/expression_executor.hpp" |
| 7 | #include "re2/re2.h" |
| 8 | |
| 9 | namespace duckdb { |
| 10 | |
| 11 | string GetColumnsStringValue(ParsedExpression &expr) { |
| 12 | if (expr.type == ExpressionType::COLUMN_REF) { |
| 13 | auto &colref = expr.Cast<ColumnRefExpression>(); |
| 14 | return colref.GetColumnName(); |
| 15 | } else { |
| 16 | return expr.ToString(); |
| 17 | } |
| 18 | } |
| 19 | |
| 20 | bool Binder::FindStarExpression(unique_ptr<ParsedExpression> &expr, StarExpression **star, bool is_root, |
| 21 | bool in_columns) { |
| 22 | bool has_star = false; |
| 23 | if (expr->GetExpressionClass() == ExpressionClass::STAR) { |
| 24 | auto ¤t_star = expr->Cast<StarExpression>(); |
| 25 | if (!current_star.columns) { |
| 26 | if (is_root) { |
| 27 | *star = ¤t_star; |
| 28 | return true; |
| 29 | } |
| 30 | if (!in_columns) { |
| 31 | throw BinderException( |
| 32 | "STAR expression is only allowed as the root element of an expression. Use COLUMNS(*) instead." ); |
| 33 | } |
| 34 | // star expression inside a COLUMNS - convert to a constant list |
| 35 | if (!current_star.replace_list.empty()) { |
| 36 | throw BinderException( |
| 37 | "STAR expression with REPLACE list is only allowed as the root element of COLUMNS" ); |
| 38 | } |
| 39 | vector<unique_ptr<ParsedExpression>> star_list; |
| 40 | bind_context.GenerateAllColumnExpressions(expr&: current_star, new_select_list&: star_list); |
| 41 | |
| 42 | vector<Value> values; |
| 43 | values.reserve(n: star_list.size()); |
| 44 | for (auto &expr : star_list) { |
| 45 | values.emplace_back(args: GetColumnsStringValue(expr&: *expr)); |
| 46 | } |
| 47 | D_ASSERT(!values.empty()); |
| 48 | |
| 49 | expr = make_uniq<ConstantExpression>(args: Value::LIST(child_type: LogicalType::VARCHAR, values)); |
| 50 | return true; |
| 51 | } |
| 52 | if (in_columns) { |
| 53 | throw BinderException("COLUMNS expression is not allowed inside another COLUMNS expression" ); |
| 54 | } |
| 55 | in_columns = true; |
| 56 | if (*star) { |
| 57 | // we can have multiple |
| 58 | if (!(*star)->Equals(other: current_star)) { |
| 59 | throw BinderException( |
| 60 | FormatError(expr_context&: *expr, message: "Multiple different STAR/COLUMNS in the same expression are not supported" )); |
| 61 | } |
| 62 | return true; |
| 63 | } |
| 64 | *star = ¤t_star; |
| 65 | has_star = true; |
| 66 | } |
| 67 | ParsedExpressionIterator::EnumerateChildren(expr&: *expr, callback: [&](unique_ptr<ParsedExpression> &child_expr) { |
| 68 | if (FindStarExpression(expr&: child_expr, star, is_root: false, in_columns)) { |
| 69 | has_star = true; |
| 70 | } |
| 71 | }); |
| 72 | return has_star; |
| 73 | } |
| 74 | |
| 75 | void Binder::ReplaceStarExpression(unique_ptr<ParsedExpression> &expr, unique_ptr<ParsedExpression> &replacement) { |
| 76 | D_ASSERT(expr); |
| 77 | if (expr->GetExpressionClass() == ExpressionClass::STAR) { |
| 78 | D_ASSERT(replacement); |
| 79 | expr = replacement->Copy(); |
| 80 | return; |
| 81 | } |
| 82 | ParsedExpressionIterator::EnumerateChildren( |
| 83 | expr&: *expr, callback: [&](unique_ptr<ParsedExpression> &child_expr) { ReplaceStarExpression(expr&: child_expr, replacement); }); |
| 84 | } |
| 85 | |
| 86 | void Binder::ExpandStarExpression(unique_ptr<ParsedExpression> expr, |
| 87 | vector<unique_ptr<ParsedExpression>> &new_select_list) { |
| 88 | StarExpression *star = nullptr; |
| 89 | if (!FindStarExpression(expr, star: &star, is_root: true, in_columns: false)) { |
| 90 | // no star expression: add it as-is |
| 91 | D_ASSERT(!star); |
| 92 | new_select_list.push_back(x: std::move(expr)); |
| 93 | return; |
| 94 | } |
| 95 | D_ASSERT(star); |
| 96 | vector<unique_ptr<ParsedExpression>> star_list; |
| 97 | // we have star expressions! expand the list of star expressions |
| 98 | bind_context.GenerateAllColumnExpressions(expr&: *star, new_select_list&: star_list); |
| 99 | |
| 100 | if (star->expr) { |
| 101 | // COLUMNS with an expression |
| 102 | // two options: |
| 103 | // VARCHAR parameter <- this is a regular expression |
| 104 | // LIST of VARCHAR parameters <- this is a set of columns |
| 105 | TableFunctionBinder binder(*this, context); |
| 106 | auto child = star->expr->Copy(); |
| 107 | auto result = binder.Bind(expr&: child); |
| 108 | if (!result->IsFoldable()) { |
| 109 | // cannot resolve parameters here |
| 110 | if (star->expr->HasParameter()) { |
| 111 | throw ParameterNotResolvedException(); |
| 112 | } else { |
| 113 | throw BinderException("Unsupported expression in COLUMNS" ); |
| 114 | } |
| 115 | } |
| 116 | auto val = ExpressionExecutor::EvaluateScalar(context, expr: *result); |
| 117 | if (val.type().id() == LogicalTypeId::VARCHAR) { |
| 118 | // regex |
| 119 | if (val.IsNull()) { |
| 120 | throw BinderException("COLUMNS does not support NULL as regex argument" ); |
| 121 | } |
| 122 | auto ®ex_str = StringValue::Get(value: val); |
| 123 | duckdb_re2::RE2 regex(regex_str); |
| 124 | if (!regex.error().empty()) { |
| 125 | auto err = StringUtil::Format(fmt_str: "Failed to compile regex \"%s\": %s" , params: regex_str, params: regex.error()); |
| 126 | throw BinderException(FormatError(expr_context&: *star, message: err)); |
| 127 | } |
| 128 | vector<unique_ptr<ParsedExpression>> new_list; |
| 129 | for (idx_t i = 0; i < star_list.size(); i++) { |
| 130 | auto &colref = star_list[i]->Cast<ColumnRefExpression>(); |
| 131 | if (!RE2::PartialMatch(text: colref.GetColumnName(), re: regex)) { |
| 132 | continue; |
| 133 | } |
| 134 | new_list.push_back(x: std::move(star_list[i])); |
| 135 | } |
| 136 | if (new_list.empty()) { |
| 137 | auto err = StringUtil::Format(fmt_str: "No matching columns found that match regex \"%s\"" , params: regex_str); |
| 138 | throw BinderException(FormatError(expr_context&: *star, message: err)); |
| 139 | } |
| 140 | star_list = std::move(new_list); |
| 141 | } else if (val.type().id() == LogicalTypeId::LIST && |
| 142 | ListType::GetChildType(type: val.type()).id() == LogicalTypeId::VARCHAR) { |
| 143 | // list of varchar columns |
| 144 | if (val.IsNull() || ListValue::GetChildren(value: val).empty()) { |
| 145 | auto err = |
| 146 | StringUtil::Format(fmt_str: "Star expression \"%s\" resulted in an empty set of columns" , params: star->ToString()); |
| 147 | throw BinderException(FormatError(expr_context&: *star, message: err)); |
| 148 | } |
| 149 | auto &children = ListValue::GetChildren(value: val); |
| 150 | vector<unique_ptr<ParsedExpression>> new_list; |
| 151 | // scan the list for all selected columns and construct a lookup table |
| 152 | case_insensitive_map_t<bool> selected_set; |
| 153 | for (auto &child : children) { |
| 154 | selected_set.insert(x: make_pair(x: StringValue::Get(value: child), y: false)); |
| 155 | } |
| 156 | // now check the list of all possible expressions and select which ones make it in |
| 157 | for (auto &expr : star_list) { |
| 158 | auto str = GetColumnsStringValue(expr&: *expr); |
| 159 | auto entry = selected_set.find(x: str); |
| 160 | if (entry != selected_set.end()) { |
| 161 | new_list.push_back(x: std::move(expr)); |
| 162 | entry->second = true; |
| 163 | } |
| 164 | } |
| 165 | // check if all expressions found a match |
| 166 | for (auto &entry : selected_set) { |
| 167 | if (!entry.second) { |
| 168 | throw BinderException("Column \"%s\" was selected but was not found in the FROM clause" , |
| 169 | entry.first); |
| 170 | } |
| 171 | } |
| 172 | star_list = std::move(new_list); |
| 173 | } else { |
| 174 | throw BinderException(FormatError( |
| 175 | expr_context&: *star, message: "COLUMNS expects either a VARCHAR argument (regex) or a LIST of VARCHAR (list of columns)" )); |
| 176 | } |
| 177 | } |
| 178 | |
| 179 | // now perform the replacement |
| 180 | for (idx_t i = 0; i < star_list.size(); i++) { |
| 181 | auto new_expr = expr->Copy(); |
| 182 | ReplaceStarExpression(expr&: new_expr, replacement&: star_list[i]); |
| 183 | new_select_list.push_back(x: std::move(new_expr)); |
| 184 | } |
| 185 | } |
| 186 | |
| 187 | void Binder::ExpandStarExpressions(vector<unique_ptr<ParsedExpression>> &select_list, |
| 188 | vector<unique_ptr<ParsedExpression>> &new_select_list) { |
| 189 | for (auto &select_element : select_list) { |
| 190 | ExpandStarExpression(expr: std::move(select_element), new_select_list); |
| 191 | } |
| 192 | } |
| 193 | |
| 194 | } // namespace duckdb |
| 195 | |