| 1 | #include "duckdb/catalog/catalog.hpp" |
| 2 | #include "duckdb/catalog/catalog_entry/table_macro_catalog_entry.hpp" |
| 3 | #include "duckdb/common/algorithm.hpp" |
| 4 | #include "duckdb/execution/expression_executor.hpp" |
| 5 | #include "duckdb/parser/expression/columnref_expression.hpp" |
| 6 | #include "duckdb/parser/expression/comparison_expression.hpp" |
| 7 | #include "duckdb/parser/expression/function_expression.hpp" |
| 8 | #include "duckdb/parser/expression/subquery_expression.hpp" |
| 9 | #include "duckdb/parser/query_node/select_node.hpp" |
| 10 | #include "duckdb/parser/tableref/emptytableref.hpp" |
| 11 | #include "duckdb/parser/tableref/table_function_ref.hpp" |
| 12 | #include "duckdb/planner/binder.hpp" |
| 13 | #include "duckdb/planner/expression_binder/table_function_binder.hpp" |
| 14 | #include "duckdb/planner/expression_binder/select_binder.hpp" |
| 15 | #include "duckdb/planner/operator/logical_get.hpp" |
| 16 | #include "duckdb/planner/query_node/bound_select_node.hpp" |
| 17 | #include "duckdb/planner/tableref/bound_subqueryref.hpp" |
| 18 | #include "duckdb/planner/tableref/bound_table_function.hpp" |
| 19 | #include "duckdb/function/function_binder.hpp" |
| 20 | #include "duckdb/catalog/catalog_entry/table_function_catalog_entry.hpp" |
| 21 | #include "duckdb/catalog/catalog_entry/table_catalog_entry.hpp" |
| 22 | #include "duckdb/function/table/read_csv.hpp" |
| 23 | |
| 24 | namespace duckdb { |
| 25 | |
| 26 | static bool IsTableInTableOutFunction(TableFunctionCatalogEntry &table_function) { |
| 27 | auto fun = table_function.functions.GetFunctionByOffset(offset: 0); |
| 28 | return table_function.functions.Size() == 1 && fun.arguments.size() == 1 && |
| 29 | fun.arguments[0].id() == LogicalTypeId::TABLE; |
| 30 | } |
| 31 | |
| 32 | bool Binder::BindTableInTableOutFunction(vector<unique_ptr<ParsedExpression>> &expressions, |
| 33 | unique_ptr<BoundSubqueryRef> &subquery, string &error) { |
| 34 | auto binder = Binder::CreateBinder(context&: this->context, parent: this, inherit_ctes: true); |
| 35 | unique_ptr<QueryNode> subquery_node; |
| 36 | if (expressions.size() == 1 && expressions[0]->type == ExpressionType::SUBQUERY) { |
| 37 | // general case: argument is a subquery, bind it as part of the node |
| 38 | auto &se = expressions[0]->Cast<SubqueryExpression>(); |
| 39 | subquery_node = std::move(se.subquery->node); |
| 40 | } else { |
| 41 | // special case: non-subquery parameter to table-in table-out function |
| 42 | // generate a subquery and bind that (i.e. UNNEST([1,2,3]) becomes UNNEST((SELECT [1,2,3])) |
| 43 | auto select_node = make_uniq<SelectNode>(); |
| 44 | select_node->select_list = std::move(expressions); |
| 45 | select_node->from_table = make_uniq<EmptyTableRef>(); |
| 46 | subquery_node = std::move(select_node); |
| 47 | } |
| 48 | auto node = binder->BindNode(node&: *subquery_node); |
| 49 | subquery = make_uniq<BoundSubqueryRef>(args: std::move(binder), args: std::move(node)); |
| 50 | MoveCorrelatedExpressions(other&: *subquery->binder); |
| 51 | return true; |
| 52 | } |
| 53 | |
| 54 | bool Binder::BindTableFunctionParameters(TableFunctionCatalogEntry &table_function, |
| 55 | vector<unique_ptr<ParsedExpression>> &expressions, |
| 56 | vector<LogicalType> &arguments, vector<Value> ¶meters, |
| 57 | named_parameter_map_t &named_parameters, |
| 58 | unique_ptr<BoundSubqueryRef> &subquery, string &error) { |
| 59 | if (IsTableInTableOutFunction(table_function)) { |
| 60 | // special case binding for table-in table-out function |
| 61 | arguments.emplace_back(args: LogicalTypeId::TABLE); |
| 62 | return BindTableInTableOutFunction(expressions, subquery, error); |
| 63 | } |
| 64 | bool seen_subquery = false; |
| 65 | for (auto &child : expressions) { |
| 66 | string parameter_name; |
| 67 | |
| 68 | // hack to make named parameters work |
| 69 | if (child->type == ExpressionType::COMPARE_EQUAL) { |
| 70 | // comparison, check if the LHS is a columnref |
| 71 | auto &comp = child->Cast<ComparisonExpression>(); |
| 72 | if (comp.left->type == ExpressionType::COLUMN_REF) { |
| 73 | auto &colref = comp.left->Cast<ColumnRefExpression>(); |
| 74 | if (!colref.IsQualified()) { |
| 75 | parameter_name = colref.GetColumnName(); |
| 76 | child = std::move(comp.right); |
| 77 | } |
| 78 | } |
| 79 | } |
| 80 | if (child->type == ExpressionType::SUBQUERY) { |
| 81 | auto fun = table_function.functions.GetFunctionByOffset(offset: 0); |
| 82 | if (table_function.functions.Size() != 1 || fun.arguments.empty() || |
| 83 | fun.arguments[0].id() != LogicalTypeId::TABLE) { |
| 84 | throw BinderException( |
| 85 | "Only table-in-out functions can have subquery parameters - %s only accepts constant parameters" , |
| 86 | fun.name); |
| 87 | } |
| 88 | // this separate subquery binding path is only used by python_map |
| 89 | // FIXME: this should be unified with `BindTableInTableOutFunction` above |
| 90 | if (seen_subquery) { |
| 91 | error = "Table function can have at most one subquery parameter " ; |
| 92 | return false; |
| 93 | } |
| 94 | auto binder = Binder::CreateBinder(context&: this->context, parent: this, inherit_ctes: true); |
| 95 | auto &se = child->Cast<SubqueryExpression>(); |
| 96 | auto node = binder->BindNode(node&: *se.subquery->node); |
| 97 | subquery = make_uniq<BoundSubqueryRef>(args: std::move(binder), args: std::move(node)); |
| 98 | seen_subquery = true; |
| 99 | arguments.emplace_back(args: LogicalTypeId::TABLE); |
| 100 | parameters.emplace_back( |
| 101 | args: Value(LogicalType::INVALID)); // this is a dummy value so the lengths of arguments and parameter match |
| 102 | continue; |
| 103 | } |
| 104 | |
| 105 | TableFunctionBinder binder(*this, context); |
| 106 | LogicalType sql_type; |
| 107 | auto expr = binder.Bind(expr&: child, result_type: &sql_type); |
| 108 | if (expr->HasParameter()) { |
| 109 | throw ParameterNotResolvedException(); |
| 110 | } |
| 111 | if (!expr->IsScalar()) { |
| 112 | // should have been eliminated before |
| 113 | throw InternalException("Table function requires a constant parameter" ); |
| 114 | } |
| 115 | auto constant = ExpressionExecutor::EvaluateScalar(context, expr: *expr, allow_unfoldable: true); |
| 116 | if (parameter_name.empty()) { |
| 117 | // unnamed parameter |
| 118 | if (!named_parameters.empty()) { |
| 119 | error = "Unnamed parameters cannot come after named parameters" ; |
| 120 | return false; |
| 121 | } |
| 122 | arguments.emplace_back(args&: sql_type); |
| 123 | parameters.emplace_back(args: std::move(constant)); |
| 124 | } else { |
| 125 | named_parameters[parameter_name] = std::move(constant); |
| 126 | } |
| 127 | } |
| 128 | return true; |
| 129 | } |
| 130 | |
| 131 | unique_ptr<LogicalOperator> |
| 132 | Binder::BindTableFunctionInternal(TableFunction &table_function, const string &function_name, vector<Value> parameters, |
| 133 | named_parameter_map_t named_parameters, vector<LogicalType> input_table_types, |
| 134 | vector<string> input_table_names, const vector<string> &column_name_alias, |
| 135 | unique_ptr<ExternalDependency> external_dependency) { |
| 136 | auto bind_index = GenerateTableIndex(); |
| 137 | // perform the binding |
| 138 | unique_ptr<FunctionData> bind_data; |
| 139 | vector<LogicalType> return_types; |
| 140 | vector<string> return_names; |
| 141 | if (table_function.bind || table_function.bind_replace) { |
| 142 | TableFunctionBindInput bind_input(parameters, named_parameters, input_table_types, input_table_names, |
| 143 | table_function.function_info.get()); |
| 144 | if (table_function.bind_replace) { |
| 145 | auto new_plan = table_function.bind_replace(context, bind_input); |
| 146 | if (new_plan != nullptr) { |
| 147 | return CreatePlan(ref&: *Bind(ref&: *new_plan)); |
| 148 | } else if (!table_function.bind) { |
| 149 | throw BinderException("Failed to bind \"%s\": nullptr returned from bind_replace without bind function" , |
| 150 | table_function.name); |
| 151 | } |
| 152 | } |
| 153 | bind_data = table_function.bind(context, bind_input, return_types, return_names); |
| 154 | if (table_function.name == "pandas_scan" || table_function.name == "arrow_scan" ) { |
| 155 | auto &arrow_bind = bind_data->Cast<PyTableFunctionData>(); |
| 156 | arrow_bind.external_dependency = std::move(external_dependency); |
| 157 | } |
| 158 | if (table_function.name == "read_csv" || table_function.name == "read_csv_auto" ) { |
| 159 | auto &csv_bind = bind_data->Cast<ReadCSVData>(); |
| 160 | if (csv_bind.single_threaded) { |
| 161 | table_function.extra_info = "(Single-Threaded)" ; |
| 162 | } else { |
| 163 | table_function.extra_info = "(Multi-Threaded)" ; |
| 164 | } |
| 165 | } |
| 166 | } else { |
| 167 | throw InvalidInputException("Cannot call function \"%s\" directly - it has no bind function" , |
| 168 | table_function.name); |
| 169 | } |
| 170 | if (return_types.size() != return_names.size()) { |
| 171 | throw InternalException("Failed to bind \"%s\": return_types/names must have same size" , table_function.name); |
| 172 | } |
| 173 | if (return_types.empty()) { |
| 174 | throw InternalException("Failed to bind \"%s\": Table function must return at least one column" , |
| 175 | table_function.name); |
| 176 | } |
| 177 | // overwrite the names with any supplied aliases |
| 178 | for (idx_t i = 0; i < column_name_alias.size() && i < return_names.size(); i++) { |
| 179 | return_names[i] = column_name_alias[i]; |
| 180 | } |
| 181 | for (idx_t i = 0; i < return_names.size(); i++) { |
| 182 | if (return_names[i].empty()) { |
| 183 | return_names[i] = "C" + to_string(val: i); |
| 184 | } |
| 185 | } |
| 186 | |
| 187 | auto get = make_uniq<LogicalGet>(args&: bind_index, args&: table_function, args: std::move(bind_data), args&: return_types, args&: return_names); |
| 188 | get->parameters = parameters; |
| 189 | get->named_parameters = named_parameters; |
| 190 | get->input_table_types = input_table_types; |
| 191 | get->input_table_names = input_table_names; |
| 192 | if (table_function.in_out_function && !table_function.projection_pushdown) { |
| 193 | get->column_ids.reserve(n: return_types.size()); |
| 194 | for (idx_t i = 0; i < return_types.size(); i++) { |
| 195 | get->column_ids.push_back(x: i); |
| 196 | } |
| 197 | } |
| 198 | // now add the table function to the bind context so its columns can be bound |
| 199 | bind_context.AddTableFunction(index: bind_index, alias: function_name, names: return_names, types: return_types, bound_column_ids&: get->column_ids, |
| 200 | entry: get->GetTable().get()); |
| 201 | return std::move(get); |
| 202 | } |
| 203 | |
| 204 | unique_ptr<LogicalOperator> Binder::BindTableFunction(TableFunction &function, vector<Value> parameters) { |
| 205 | named_parameter_map_t named_parameters; |
| 206 | vector<LogicalType> input_table_types; |
| 207 | vector<string> input_table_names; |
| 208 | vector<string> column_name_aliases; |
| 209 | return BindTableFunctionInternal(table_function&: function, function_name: function.name, parameters: std::move(parameters), named_parameters: std::move(named_parameters), |
| 210 | input_table_types: std::move(input_table_types), input_table_names: std::move(input_table_names), column_name_alias: column_name_aliases, |
| 211 | external_dependency: nullptr); |
| 212 | } |
| 213 | |
| 214 | unique_ptr<BoundTableRef> Binder::Bind(TableFunctionRef &ref) { |
| 215 | QueryErrorContext error_context(root_statement, ref.query_location); |
| 216 | |
| 217 | D_ASSERT(ref.function->type == ExpressionType::FUNCTION); |
| 218 | auto &fexpr = ref.function->Cast<FunctionExpression>(); |
| 219 | |
| 220 | // fetch the function from the catalog |
| 221 | auto &func_catalog = Catalog::GetEntry(context, type: CatalogType::TABLE_FUNCTION_ENTRY, catalog: fexpr.catalog, schema: fexpr.schema, |
| 222 | name: fexpr.function_name, error_context); |
| 223 | |
| 224 | if (func_catalog.type == CatalogType::TABLE_MACRO_ENTRY) { |
| 225 | auto ¯o_func = func_catalog.Cast<TableMacroCatalogEntry>(); |
| 226 | auto query_node = BindTableMacro(function&: fexpr, macro_func, depth: 0); |
| 227 | D_ASSERT(query_node); |
| 228 | |
| 229 | auto binder = Binder::CreateBinder(context, parent: this); |
| 230 | binder->can_contain_nulls = true; |
| 231 | |
| 232 | binder->alias = ref.alias.empty() ? "unnamed_query" : ref.alias; |
| 233 | auto query = binder->BindNode(node&: *query_node); |
| 234 | |
| 235 | idx_t bind_index = query->GetRootIndex(); |
| 236 | // string alias; |
| 237 | string alias = (ref.alias.empty() ? "unnamed_query" + to_string(val: bind_index) : ref.alias); |
| 238 | |
| 239 | auto result = make_uniq<BoundSubqueryRef>(args: std::move(binder), args: std::move(query)); |
| 240 | // remember ref here is TableFunctionRef and NOT base class |
| 241 | bind_context.AddSubquery(index: bind_index, alias, ref, subquery&: *result->subquery); |
| 242 | MoveCorrelatedExpressions(other&: *result->binder); |
| 243 | return std::move(result); |
| 244 | } |
| 245 | D_ASSERT(func_catalog.type == CatalogType::TABLE_FUNCTION_ENTRY); |
| 246 | auto &function = func_catalog.Cast<TableFunctionCatalogEntry>(); |
| 247 | |
| 248 | // evaluate the input parameters to the function |
| 249 | vector<LogicalType> arguments; |
| 250 | vector<Value> parameters; |
| 251 | named_parameter_map_t named_parameters; |
| 252 | unique_ptr<BoundSubqueryRef> subquery; |
| 253 | string error; |
| 254 | if (!BindTableFunctionParameters(table_function&: function, expressions&: fexpr.children, arguments, parameters, named_parameters, subquery, |
| 255 | error)) { |
| 256 | throw BinderException(FormatError(ref_context&: ref, message: error)); |
| 257 | } |
| 258 | |
| 259 | // select the function based on the input parameters |
| 260 | FunctionBinder function_binder(context); |
| 261 | idx_t best_function_idx = function_binder.BindFunction(name: function.name, functions&: function.functions, arguments, error); |
| 262 | if (best_function_idx == DConstants::INVALID_INDEX) { |
| 263 | throw BinderException(FormatError(ref_context&: ref, message: error)); |
| 264 | } |
| 265 | auto table_function = function.functions.GetFunctionByOffset(offset: best_function_idx); |
| 266 | |
| 267 | // now check the named parameters |
| 268 | BindNamedParameters(types&: table_function.named_parameters, values&: named_parameters, error_context, func_name&: table_function.name); |
| 269 | |
| 270 | // cast the parameters to the type of the function |
| 271 | for (idx_t i = 0; i < arguments.size(); i++) { |
| 272 | auto target_type = i < table_function.arguments.size() ? table_function.arguments[i] : table_function.varargs; |
| 273 | |
| 274 | if (target_type != LogicalType::ANY && target_type != LogicalType::TABLE && |
| 275 | target_type != LogicalType::POINTER && target_type.id() != LogicalTypeId::LIST) { |
| 276 | parameters[i] = parameters[i].CastAs(context, target_type); |
| 277 | } |
| 278 | } |
| 279 | |
| 280 | vector<LogicalType> input_table_types; |
| 281 | vector<string> input_table_names; |
| 282 | |
| 283 | if (subquery) { |
| 284 | input_table_types = subquery->subquery->types; |
| 285 | input_table_names = subquery->subquery->names; |
| 286 | } |
| 287 | auto get = BindTableFunctionInternal(table_function, function_name: ref.alias.empty() ? fexpr.function_name : ref.alias, |
| 288 | parameters: std::move(parameters), named_parameters: std::move(named_parameters), |
| 289 | input_table_types: std::move(input_table_types), input_table_names: std::move(input_table_names), |
| 290 | column_name_alias: ref.column_name_alias, external_dependency: std::move(ref.external_dependency)); |
| 291 | if (subquery) { |
| 292 | get->children.push_back(x: Binder::CreatePlan(ref&: *subquery)); |
| 293 | } |
| 294 | |
| 295 | return make_uniq_base<BoundTableRef, BoundTableFunction>(args: std::move(get)); |
| 296 | } |
| 297 | |
| 298 | } // namespace duckdb |
| 299 | |