| 1 | #include "duckdb/catalog/catalog_entry/schema_catalog_entry.hpp" |
| 2 | #include "duckdb/common/string_util.hpp" |
| 3 | #include "duckdb/function/scalar/nested_functions.hpp" |
| 4 | #include "duckdb/parser/expression/columnref_expression.hpp" |
| 5 | #include "duckdb/parser/expression/constant_expression.hpp" |
| 6 | #include "duckdb/parser/expression/function_expression.hpp" |
| 7 | #include "duckdb/parser/expression/operator_expression.hpp" |
| 8 | #include "duckdb/parser/expression/positional_reference_expression.hpp" |
| 9 | #include "duckdb/parser/expression/subquery_expression.hpp" |
| 10 | #include "duckdb/parser/parsed_expression_iterator.hpp" |
| 11 | #include "duckdb/planner/binder.hpp" |
| 12 | #include "duckdb/planner/expression/bound_columnref_expression.hpp" |
| 13 | #include "duckdb/planner/expression/bound_lambdaref_expression.hpp" |
| 14 | #include "duckdb/planner/expression/bound_constant_expression.hpp" |
| 15 | #include "duckdb/planner/expression_binder.hpp" |
| 16 | #include "duckdb/planner/expression_binder/where_binder.hpp" |
| 17 | |
| 18 | namespace duckdb { |
| 19 | |
| 20 | string GetSQLValueFunctionName(const string &column_name) { |
| 21 | auto lcase = StringUtil::Lower(str: column_name); |
| 22 | if (lcase == "current_catalog" ) { |
| 23 | return "current_catalog" ; |
| 24 | } else if (lcase == "current_date" ) { |
| 25 | return "current_date" ; |
| 26 | } else if (lcase == "current_schema" ) { |
| 27 | return "current_schema" ; |
| 28 | } else if (lcase == "current_role" ) { |
| 29 | return "current_role" ; |
| 30 | } else if (lcase == "current_time" ) { |
| 31 | return "get_current_time" ; |
| 32 | } else if (lcase == "current_timestamp" ) { |
| 33 | return "get_current_timestamp" ; |
| 34 | } else if (lcase == "current_user" ) { |
| 35 | return "current_user" ; |
| 36 | } else if (lcase == "localtime" ) { |
| 37 | return "current_localtime" ; |
| 38 | } else if (lcase == "localtimestamp" ) { |
| 39 | return "current_localtimestamp" ; |
| 40 | } else if (lcase == "session_user" ) { |
| 41 | return "session_user" ; |
| 42 | } else if (lcase == "user" ) { |
| 43 | return "user" ; |
| 44 | } |
| 45 | return string(); |
| 46 | } |
| 47 | |
| 48 | unique_ptr<ParsedExpression> ExpressionBinder::GetSQLValueFunction(const string &column_name) { |
| 49 | auto value_function = GetSQLValueFunctionName(column_name); |
| 50 | if (value_function.empty()) { |
| 51 | return nullptr; |
| 52 | } |
| 53 | |
| 54 | vector<unique_ptr<ParsedExpression>> children; |
| 55 | return make_uniq<FunctionExpression>(args&: value_function, args: std::move(children)); |
| 56 | } |
| 57 | |
| 58 | unique_ptr<ParsedExpression> ExpressionBinder::QualifyColumnName(const string &column_name, string &error_message) { |
| 59 | auto using_binding = binder.bind_context.GetUsingBinding(column_name); |
| 60 | if (using_binding) { |
| 61 | // we are referencing a USING column |
| 62 | // check if we can refer to one of the base columns directly |
| 63 | unique_ptr<Expression> expression; |
| 64 | if (!using_binding->primary_binding.empty()) { |
| 65 | // we can! just assign the table name and re-bind |
| 66 | return binder.bind_context.CreateColumnReference(table_name: using_binding->primary_binding, column_name); |
| 67 | } else { |
| 68 | // // we cannot! we need to bind this as a coalesce between all the relevant columns |
| 69 | auto coalesce = make_uniq<OperatorExpression>(args: ExpressionType::OPERATOR_COALESCE); |
| 70 | coalesce->children.reserve(n: using_binding->bindings.size()); |
| 71 | for (auto &entry : using_binding->bindings) { |
| 72 | coalesce->children.push_back(x: make_uniq<ColumnRefExpression>(args: column_name, args: entry)); |
| 73 | } |
| 74 | return std::move(coalesce); |
| 75 | } |
| 76 | } |
| 77 | |
| 78 | // find a binding that contains this |
| 79 | string table_name = binder.bind_context.GetMatchingBinding(column_name); |
| 80 | |
| 81 | // throw an error if a macro conflicts with a column name |
| 82 | auto is_macro_column = false; |
| 83 | if (binder.macro_binding != nullptr && binder.macro_binding->HasMatchingBinding(column_name)) { |
| 84 | is_macro_column = true; |
| 85 | if (!table_name.empty()) { |
| 86 | throw BinderException("Conflicting column names for column " + column_name + "!" ); |
| 87 | } |
| 88 | } |
| 89 | |
| 90 | if (lambda_bindings) { |
| 91 | for (idx_t i = 0; i < lambda_bindings->size(); i++) { |
| 92 | if ((*lambda_bindings)[i].HasMatchingBinding(column_name)) { |
| 93 | |
| 94 | // throw an error if a lambda conflicts with a column name or a macro |
| 95 | if (!table_name.empty() || is_macro_column) { |
| 96 | throw BinderException("Conflicting column names for column " + column_name + "!" ); |
| 97 | } |
| 98 | |
| 99 | D_ASSERT(!(*lambda_bindings)[i].alias.empty()); |
| 100 | return make_uniq<ColumnRefExpression>(args: column_name, args&: (*lambda_bindings)[i].alias); |
| 101 | } |
| 102 | } |
| 103 | } |
| 104 | |
| 105 | if (is_macro_column) { |
| 106 | D_ASSERT(!binder.macro_binding->alias.empty()); |
| 107 | return make_uniq<ColumnRefExpression>(args: column_name, args&: binder.macro_binding->alias); |
| 108 | } |
| 109 | // see if it's a column |
| 110 | if (table_name.empty()) { |
| 111 | // column was not found - check if it is a SQL value function |
| 112 | auto value_function = GetSQLValueFunction(column_name); |
| 113 | if (value_function) { |
| 114 | return value_function; |
| 115 | } |
| 116 | // it's not, find candidates and error |
| 117 | auto similar_bindings = binder.bind_context.GetSimilarBindings(column_name); |
| 118 | string candidate_str = StringUtil::CandidatesMessage(candidates: similar_bindings, candidate: "Candidate bindings" ); |
| 119 | error_message = |
| 120 | StringUtil::Format(fmt_str: "Referenced column \"%s\" not found in FROM clause!%s" , params: column_name, params: candidate_str); |
| 121 | return nullptr; |
| 122 | } |
| 123 | return binder.bind_context.CreateColumnReference(table_name, column_name); |
| 124 | } |
| 125 | |
| 126 | void ExpressionBinder::QualifyColumnNames(unique_ptr<ParsedExpression> &expr) { |
| 127 | switch (expr->type) { |
| 128 | case ExpressionType::COLUMN_REF: { |
| 129 | auto &colref = expr->Cast<ColumnRefExpression>(); |
| 130 | string error_message; |
| 131 | auto new_expr = QualifyColumnName(colref, error_message); |
| 132 | if (new_expr) { |
| 133 | if (!expr->alias.empty()) { |
| 134 | new_expr->alias = expr->alias; |
| 135 | } |
| 136 | new_expr->query_location = colref.query_location; |
| 137 | expr = std::move(new_expr); |
| 138 | } |
| 139 | break; |
| 140 | } |
| 141 | case ExpressionType::POSITIONAL_REFERENCE: { |
| 142 | auto &ref = expr->Cast<PositionalReferenceExpression>(); |
| 143 | if (ref.alias.empty()) { |
| 144 | string table_name, column_name; |
| 145 | auto error = binder.bind_context.BindColumn(ref, table_name, column_name); |
| 146 | if (error.empty()) { |
| 147 | ref.alias = column_name; |
| 148 | } |
| 149 | } |
| 150 | break; |
| 151 | } |
| 152 | default: |
| 153 | break; |
| 154 | } |
| 155 | ParsedExpressionIterator::EnumerateChildren( |
| 156 | expr&: *expr, callback: [&](unique_ptr<ParsedExpression> &child) { QualifyColumnNames(expr&: child); }); |
| 157 | } |
| 158 | |
| 159 | void ExpressionBinder::QualifyColumnNames(Binder &binder, unique_ptr<ParsedExpression> &expr) { |
| 160 | WhereBinder where_binder(binder, binder.context); |
| 161 | where_binder.QualifyColumnNames(expr); |
| 162 | } |
| 163 | |
| 164 | unique_ptr<ParsedExpression> ExpressionBinder::(unique_ptr<ParsedExpression> base, |
| 165 | string field_name) { |
| 166 | |
| 167 | // we need to transform the struct extract if it is inside a lambda expression |
| 168 | // because we cannot bind to an existing table, so we remove the dummy table also |
| 169 | if (lambda_bindings && base->type == ExpressionType::COLUMN_REF) { |
| 170 | auto &lambda_column_ref = base->Cast<ColumnRefExpression>(); |
| 171 | D_ASSERT(!lambda_column_ref.column_names.empty()); |
| 172 | |
| 173 | if (lambda_column_ref.column_names[0].find(s: DummyBinding::DUMMY_NAME) != string::npos) { |
| 174 | D_ASSERT(lambda_column_ref.column_names.size() == 2); |
| 175 | auto lambda_param_name = lambda_column_ref.column_names.back(); |
| 176 | lambda_column_ref.column_names.clear(); |
| 177 | lambda_column_ref.column_names.push_back(x: lambda_param_name); |
| 178 | } |
| 179 | } |
| 180 | |
| 181 | vector<unique_ptr<ParsedExpression>> children; |
| 182 | children.push_back(x: std::move(base)); |
| 183 | children.push_back(x: make_uniq_base<ParsedExpression, ConstantExpression>(args: Value(std::move(field_name)))); |
| 184 | auto = make_uniq<OperatorExpression>(args: ExpressionType::STRUCT_EXTRACT, args: std::move(children)); |
| 185 | return std::move(extract_fun); |
| 186 | } |
| 187 | |
| 188 | unique_ptr<ParsedExpression> ExpressionBinder::CreateStructPack(ColumnRefExpression &colref) { |
| 189 | D_ASSERT(colref.column_names.size() <= 3); |
| 190 | string error_message; |
| 191 | auto &table_name = colref.column_names.back(); |
| 192 | auto binding = binder.bind_context.GetBinding(name: table_name, out_error&: error_message); |
| 193 | if (!binding) { |
| 194 | return nullptr; |
| 195 | } |
| 196 | if (colref.column_names.size() >= 2) { |
| 197 | // "schema_name.table_name" |
| 198 | auto catalog_entry = binding->GetStandardEntry(); |
| 199 | if (!catalog_entry) { |
| 200 | return nullptr; |
| 201 | } |
| 202 | if (catalog_entry->name != table_name) { |
| 203 | return nullptr; |
| 204 | } |
| 205 | if (colref.column_names.size() == 2) { |
| 206 | auto &qualifier = colref.column_names[0]; |
| 207 | if (catalog_entry->catalog.GetName() != qualifier && catalog_entry->schema.name != qualifier) { |
| 208 | return nullptr; |
| 209 | } |
| 210 | } else if (colref.column_names.size() == 3) { |
| 211 | auto &catalog_name = colref.column_names[0]; |
| 212 | auto &schema_name = colref.column_names[1]; |
| 213 | if (catalog_entry->catalog.GetName() != catalog_name || catalog_entry->schema.name != schema_name) { |
| 214 | return nullptr; |
| 215 | } |
| 216 | } else { |
| 217 | throw InternalException("Expected 2 or 3 column names for CreateStructPack" ); |
| 218 | } |
| 219 | } |
| 220 | // We found the table, now create the struct_pack expression |
| 221 | vector<unique_ptr<ParsedExpression>> child_expressions; |
| 222 | child_expressions.reserve(n: binding->names.size()); |
| 223 | for (const auto &column_name : binding->names) { |
| 224 | child_expressions.push_back(x: make_uniq<ColumnRefExpression>(args: column_name, args&: table_name)); |
| 225 | } |
| 226 | return make_uniq<FunctionExpression>(args: "struct_pack" , args: std::move(child_expressions)); |
| 227 | } |
| 228 | |
| 229 | unique_ptr<ParsedExpression> ExpressionBinder::QualifyColumnName(ColumnRefExpression &colref, string &error_message) { |
| 230 | idx_t column_parts = colref.column_names.size(); |
| 231 | // column names can have an arbitrary amount of dots |
| 232 | // here is how the resolution works: |
| 233 | if (column_parts == 1) { |
| 234 | // no dots (i.e. "part1") |
| 235 | // -> part1 refers to a column |
| 236 | // check if we can qualify the column name with the table name |
| 237 | auto qualified_colref = QualifyColumnName(column_name: colref.GetColumnName(), error_message); |
| 238 | if (qualified_colref) { |
| 239 | // we could: return it |
| 240 | return qualified_colref; |
| 241 | } |
| 242 | // we could not! Try creating an implicit struct_pack |
| 243 | return CreateStructPack(colref); |
| 244 | } else if (column_parts == 2) { |
| 245 | // one dot (i.e. "part1.part2") |
| 246 | // EITHER: |
| 247 | // -> part1 is a table, part2 is a column |
| 248 | // -> part1 is a column, part2 is a property of that column (i.e. struct_extract) |
| 249 | |
| 250 | // first check if part1 is a table, and part2 is a standard column |
| 251 | if (binder.HasMatchingBinding(table_name: colref.column_names[0], column_name: colref.column_names[1], error_message)) { |
| 252 | // it is! return the colref directly |
| 253 | return binder.bind_context.CreateColumnReference(table_name: colref.column_names[0], column_name: colref.column_names[1]); |
| 254 | } else { |
| 255 | // otherwise check if we can turn this into a struct extract |
| 256 | auto new_colref = make_uniq<ColumnRefExpression>(args&: colref.column_names[0]); |
| 257 | string other_error; |
| 258 | auto qualified_colref = QualifyColumnName(column_name: colref.column_names[0], error_message&: other_error); |
| 259 | if (qualified_colref) { |
| 260 | // we could: create a struct extract |
| 261 | return CreateStructExtract(base: std::move(qualified_colref), field_name: colref.column_names[1]); |
| 262 | } |
| 263 | // we could not! Try creating an implicit struct_pack |
| 264 | return CreateStructPack(colref); |
| 265 | } |
| 266 | } else { |
| 267 | // two or more dots (i.e. "part1.part2.part3.part4...") |
| 268 | // -> part1 is a catalog, part2 is a schema, part3 is a table, part4 is a column name, part 5 and beyond are |
| 269 | // struct fields |
| 270 | // -> part1 is a catalog, part2 is a table, part3 is a column name, part4 and beyond are struct fields |
| 271 | // -> part1 is a schema, part2 is a table, part3 is a column name, part4 and beyond are struct fields |
| 272 | // -> part1 is a table, part2 is a column name, part3 and beyond are struct fields |
| 273 | // -> part1 is a column, part2 and beyond are struct fields |
| 274 | |
| 275 | // we always prefer the most top-level view |
| 276 | // i.e. in case of multiple resolution options, we resolve in order: |
| 277 | // -> 1. resolve "part1" as a catalog |
| 278 | // -> 2. resolve "part1" as a schema |
| 279 | // -> 3. resolve "part1" as a table |
| 280 | // -> 4. resolve "part1" as a column |
| 281 | |
| 282 | unique_ptr<ParsedExpression> result_expr; |
| 283 | idx_t ; |
| 284 | // first check if part1 is a catalog |
| 285 | if (colref.column_names.size() > 3 && |
| 286 | binder.HasMatchingBinding(catalog_name: colref.column_names[0], schema_name: colref.column_names[1], table_name: colref.column_names[2], |
| 287 | column_name: colref.column_names[3], error_message)) { |
| 288 | // part1 is a catalog - the column reference is "catalog.schema.table.column" |
| 289 | result_expr = binder.bind_context.CreateColumnReference(catalog_name: colref.column_names[0], schema_name: colref.column_names[1], |
| 290 | table_name: colref.column_names[2], column_name: colref.column_names[3]); |
| 291 | struct_extract_start = 4; |
| 292 | } else if (binder.HasMatchingBinding(catalog_name: colref.column_names[0], INVALID_SCHEMA, table_name: colref.column_names[1], |
| 293 | column_name: colref.column_names[2], error_message)) { |
| 294 | // part1 is a catalog - the column reference is "catalog.table.column" |
| 295 | result_expr = binder.bind_context.CreateColumnReference(catalog_name: colref.column_names[0], INVALID_SCHEMA, |
| 296 | table_name: colref.column_names[1], column_name: colref.column_names[2]); |
| 297 | struct_extract_start = 3; |
| 298 | } else if (binder.HasMatchingBinding(schema_name: colref.column_names[0], table_name: colref.column_names[1], column_name: colref.column_names[2], |
| 299 | error_message)) { |
| 300 | // part1 is a schema - the column reference is "schema.table.column" |
| 301 | // any additional fields are turned into struct_extract calls |
| 302 | result_expr = binder.bind_context.CreateColumnReference(schema_name: colref.column_names[0], table_name: colref.column_names[1], |
| 303 | column_name: colref.column_names[2]); |
| 304 | struct_extract_start = 3; |
| 305 | } else if (binder.HasMatchingBinding(table_name: colref.column_names[0], column_name: colref.column_names[1], error_message)) { |
| 306 | // part1 is a table |
| 307 | // the column reference is "table.column" |
| 308 | // any additional fields are turned into struct_extract calls |
| 309 | result_expr = binder.bind_context.CreateColumnReference(table_name: colref.column_names[0], column_name: colref.column_names[1]); |
| 310 | struct_extract_start = 2; |
| 311 | } else { |
| 312 | // part1 could be a column |
| 313 | string col_error; |
| 314 | result_expr = QualifyColumnName(column_name: colref.column_names[0], error_message&: col_error); |
| 315 | if (!result_expr) { |
| 316 | // it is not! Try creating an implicit struct_pack |
| 317 | return CreateStructPack(colref); |
| 318 | } |
| 319 | // it is! add the struct extract calls |
| 320 | struct_extract_start = 1; |
| 321 | } |
| 322 | for (idx_t i = struct_extract_start; i < colref.column_names.size(); i++) { |
| 323 | result_expr = CreateStructExtract(base: std::move(result_expr), field_name: colref.column_names[i]); |
| 324 | } |
| 325 | return result_expr; |
| 326 | } |
| 327 | } |
| 328 | |
| 329 | BindResult ExpressionBinder::BindExpression(ColumnRefExpression &colref_p, idx_t depth) { |
| 330 | if (binder.GetBindingMode() == BindingMode::EXTRACT_NAMES) { |
| 331 | return BindResult(make_uniq<BoundConstantExpression>(args: Value(LogicalType::SQLNULL))); |
| 332 | } |
| 333 | string error_message; |
| 334 | auto expr = QualifyColumnName(colref&: colref_p, error_message); |
| 335 | if (!expr) { |
| 336 | return BindResult(binder.FormatError(expr_context&: colref_p, message: error_message)); |
| 337 | } |
| 338 | expr->query_location = colref_p.query_location; |
| 339 | |
| 340 | // a generated column returns a generated expression, a struct on a column returns a struct extract |
| 341 | if (expr->type != ExpressionType::COLUMN_REF) { |
| 342 | auto alias = expr->alias; |
| 343 | auto result = BindExpression(expr_ptr&: expr, depth); |
| 344 | if (result.expression) { |
| 345 | result.expression->alias = std::move(alias); |
| 346 | } |
| 347 | return result; |
| 348 | } |
| 349 | |
| 350 | auto &colref = expr->Cast<ColumnRefExpression>(); |
| 351 | D_ASSERT(colref.IsQualified()); |
| 352 | auto &table_name = colref.GetTableName(); |
| 353 | |
| 354 | // individual column reference |
| 355 | // resolve to either a base table or a subquery expression |
| 356 | // if it was a macro parameter, let macro_binding bind it to the argument |
| 357 | // if it was a lambda parameter, let lambda_bindings bind it to the argument |
| 358 | |
| 359 | BindResult result; |
| 360 | |
| 361 | auto found_lambda_binding = false; |
| 362 | if (lambda_bindings) { |
| 363 | for (idx_t i = 0; i < lambda_bindings->size(); i++) { |
| 364 | if (table_name == (*lambda_bindings)[i].alias) { |
| 365 | result = (*lambda_bindings)[i].Bind(colref, lambda_index: i, depth); |
| 366 | found_lambda_binding = true; |
| 367 | break; |
| 368 | } |
| 369 | } |
| 370 | } |
| 371 | |
| 372 | if (!found_lambda_binding) { |
| 373 | if (binder.macro_binding && table_name == binder.macro_binding->alias) { |
| 374 | result = binder.macro_binding->Bind(colref, depth); |
| 375 | } else { |
| 376 | result = binder.bind_context.BindColumn(colref, depth); |
| 377 | } |
| 378 | } |
| 379 | |
| 380 | if (!result.HasError()) { |
| 381 | BoundColumnReferenceInfo ref; |
| 382 | ref.name = colref.column_names.back(); |
| 383 | ref.query_location = colref.query_location; |
| 384 | bound_columns.push_back(x: std::move(ref)); |
| 385 | } else { |
| 386 | result.error = binder.FormatError(expr_context&: colref_p, message: result.error); |
| 387 | } |
| 388 | return result; |
| 389 | } |
| 390 | |
| 391 | bool ExpressionBinder::QualifyColumnAlias(const ColumnRefExpression &colref) { |
| 392 | // Only BaseSelectBinder will have a valid col alias map, |
| 393 | // otherwise just return false |
| 394 | return false; |
| 395 | } |
| 396 | |
| 397 | } // namespace duckdb |
| 398 | |