1#include "duckdb/catalog/catalog.hpp"
2#include "duckdb/catalog/catalog_entry/table_macro_catalog_entry.hpp"
3#include "duckdb/common/algorithm.hpp"
4#include "duckdb/execution/expression_executor.hpp"
5#include "duckdb/parser/expression/columnref_expression.hpp"
6#include "duckdb/parser/expression/comparison_expression.hpp"
7#include "duckdb/parser/expression/function_expression.hpp"
8#include "duckdb/parser/expression/subquery_expression.hpp"
9#include "duckdb/parser/query_node/select_node.hpp"
10#include "duckdb/parser/tableref/emptytableref.hpp"
11#include "duckdb/parser/tableref/table_function_ref.hpp"
12#include "duckdb/planner/binder.hpp"
13#include "duckdb/planner/expression_binder/table_function_binder.hpp"
14#include "duckdb/planner/expression_binder/select_binder.hpp"
15#include "duckdb/planner/operator/logical_get.hpp"
16#include "duckdb/planner/query_node/bound_select_node.hpp"
17#include "duckdb/planner/tableref/bound_subqueryref.hpp"
18#include "duckdb/planner/tableref/bound_table_function.hpp"
19#include "duckdb/function/function_binder.hpp"
20#include "duckdb/catalog/catalog_entry/table_function_catalog_entry.hpp"
21#include "duckdb/catalog/catalog_entry/table_catalog_entry.hpp"
22#include "duckdb/function/table/read_csv.hpp"
23
24namespace duckdb {
25
26static bool IsTableInTableOutFunction(TableFunctionCatalogEntry &table_function) {
27 auto fun = table_function.functions.GetFunctionByOffset(offset: 0);
28 return table_function.functions.Size() == 1 && fun.arguments.size() == 1 &&
29 fun.arguments[0].id() == LogicalTypeId::TABLE;
30}
31
32bool Binder::BindTableInTableOutFunction(vector<unique_ptr<ParsedExpression>> &expressions,
33 unique_ptr<BoundSubqueryRef> &subquery, string &error) {
34 auto binder = Binder::CreateBinder(context&: this->context, parent: this, inherit_ctes: true);
35 unique_ptr<QueryNode> subquery_node;
36 if (expressions.size() == 1 && expressions[0]->type == ExpressionType::SUBQUERY) {
37 // general case: argument is a subquery, bind it as part of the node
38 auto &se = expressions[0]->Cast<SubqueryExpression>();
39 subquery_node = std::move(se.subquery->node);
40 } else {
41 // special case: non-subquery parameter to table-in table-out function
42 // generate a subquery and bind that (i.e. UNNEST([1,2,3]) becomes UNNEST((SELECT [1,2,3]))
43 auto select_node = make_uniq<SelectNode>();
44 select_node->select_list = std::move(expressions);
45 select_node->from_table = make_uniq<EmptyTableRef>();
46 subquery_node = std::move(select_node);
47 }
48 auto node = binder->BindNode(node&: *subquery_node);
49 subquery = make_uniq<BoundSubqueryRef>(args: std::move(binder), args: std::move(node));
50 MoveCorrelatedExpressions(other&: *subquery->binder);
51 return true;
52}
53
54bool Binder::BindTableFunctionParameters(TableFunctionCatalogEntry &table_function,
55 vector<unique_ptr<ParsedExpression>> &expressions,
56 vector<LogicalType> &arguments, vector<Value> &parameters,
57 named_parameter_map_t &named_parameters,
58 unique_ptr<BoundSubqueryRef> &subquery, string &error) {
59 if (IsTableInTableOutFunction(table_function)) {
60 // special case binding for table-in table-out function
61 arguments.emplace_back(args: LogicalTypeId::TABLE);
62 return BindTableInTableOutFunction(expressions, subquery, error);
63 }
64 bool seen_subquery = false;
65 for (auto &child : expressions) {
66 string parameter_name;
67
68 // hack to make named parameters work
69 if (child->type == ExpressionType::COMPARE_EQUAL) {
70 // comparison, check if the LHS is a columnref
71 auto &comp = child->Cast<ComparisonExpression>();
72 if (comp.left->type == ExpressionType::COLUMN_REF) {
73 auto &colref = comp.left->Cast<ColumnRefExpression>();
74 if (!colref.IsQualified()) {
75 parameter_name = colref.GetColumnName();
76 child = std::move(comp.right);
77 }
78 }
79 }
80 if (child->type == ExpressionType::SUBQUERY) {
81 auto fun = table_function.functions.GetFunctionByOffset(offset: 0);
82 if (table_function.functions.Size() != 1 || fun.arguments.empty() ||
83 fun.arguments[0].id() != LogicalTypeId::TABLE) {
84 throw BinderException(
85 "Only table-in-out functions can have subquery parameters - %s only accepts constant parameters",
86 fun.name);
87 }
88 // this separate subquery binding path is only used by python_map
89 // FIXME: this should be unified with `BindTableInTableOutFunction` above
90 if (seen_subquery) {
91 error = "Table function can have at most one subquery parameter ";
92 return false;
93 }
94 auto binder = Binder::CreateBinder(context&: this->context, parent: this, inherit_ctes: true);
95 auto &se = child->Cast<SubqueryExpression>();
96 auto node = binder->BindNode(node&: *se.subquery->node);
97 subquery = make_uniq<BoundSubqueryRef>(args: std::move(binder), args: std::move(node));
98 seen_subquery = true;
99 arguments.emplace_back(args: LogicalTypeId::TABLE);
100 parameters.emplace_back(
101 args: Value(LogicalType::INVALID)); // this is a dummy value so the lengths of arguments and parameter match
102 continue;
103 }
104
105 TableFunctionBinder binder(*this, context);
106 LogicalType sql_type;
107 auto expr = binder.Bind(expr&: child, result_type: &sql_type);
108 if (expr->HasParameter()) {
109 throw ParameterNotResolvedException();
110 }
111 if (!expr->IsScalar()) {
112 // should have been eliminated before
113 throw InternalException("Table function requires a constant parameter");
114 }
115 auto constant = ExpressionExecutor::EvaluateScalar(context, expr: *expr, allow_unfoldable: true);
116 if (parameter_name.empty()) {
117 // unnamed parameter
118 if (!named_parameters.empty()) {
119 error = "Unnamed parameters cannot come after named parameters";
120 return false;
121 }
122 arguments.emplace_back(args&: sql_type);
123 parameters.emplace_back(args: std::move(constant));
124 } else {
125 named_parameters[parameter_name] = std::move(constant);
126 }
127 }
128 return true;
129}
130
131unique_ptr<LogicalOperator>
132Binder::BindTableFunctionInternal(TableFunction &table_function, const string &function_name, vector<Value> parameters,
133 named_parameter_map_t named_parameters, vector<LogicalType> input_table_types,
134 vector<string> input_table_names, const vector<string> &column_name_alias,
135 unique_ptr<ExternalDependency> external_dependency) {
136 auto bind_index = GenerateTableIndex();
137 // perform the binding
138 unique_ptr<FunctionData> bind_data;
139 vector<LogicalType> return_types;
140 vector<string> return_names;
141 if (table_function.bind || table_function.bind_replace) {
142 TableFunctionBindInput bind_input(parameters, named_parameters, input_table_types, input_table_names,
143 table_function.function_info.get());
144 if (table_function.bind_replace) {
145 auto new_plan = table_function.bind_replace(context, bind_input);
146 if (new_plan != nullptr) {
147 return CreatePlan(ref&: *Bind(ref&: *new_plan));
148 } else if (!table_function.bind) {
149 throw BinderException("Failed to bind \"%s\": nullptr returned from bind_replace without bind function",
150 table_function.name);
151 }
152 }
153 bind_data = table_function.bind(context, bind_input, return_types, return_names);
154 if (table_function.name == "pandas_scan" || table_function.name == "arrow_scan") {
155 auto &arrow_bind = bind_data->Cast<PyTableFunctionData>();
156 arrow_bind.external_dependency = std::move(external_dependency);
157 }
158 if (table_function.name == "read_csv" || table_function.name == "read_csv_auto") {
159 auto &csv_bind = bind_data->Cast<ReadCSVData>();
160 if (csv_bind.single_threaded) {
161 table_function.extra_info = "(Single-Threaded)";
162 } else {
163 table_function.extra_info = "(Multi-Threaded)";
164 }
165 }
166 } else {
167 throw InvalidInputException("Cannot call function \"%s\" directly - it has no bind function",
168 table_function.name);
169 }
170 if (return_types.size() != return_names.size()) {
171 throw InternalException("Failed to bind \"%s\": return_types/names must have same size", table_function.name);
172 }
173 if (return_types.empty()) {
174 throw InternalException("Failed to bind \"%s\": Table function must return at least one column",
175 table_function.name);
176 }
177 // overwrite the names with any supplied aliases
178 for (idx_t i = 0; i < column_name_alias.size() && i < return_names.size(); i++) {
179 return_names[i] = column_name_alias[i];
180 }
181 for (idx_t i = 0; i < return_names.size(); i++) {
182 if (return_names[i].empty()) {
183 return_names[i] = "C" + to_string(val: i);
184 }
185 }
186
187 auto get = make_uniq<LogicalGet>(args&: bind_index, args&: table_function, args: std::move(bind_data), args&: return_types, args&: return_names);
188 get->parameters = parameters;
189 get->named_parameters = named_parameters;
190 get->input_table_types = input_table_types;
191 get->input_table_names = input_table_names;
192 if (table_function.in_out_function && !table_function.projection_pushdown) {
193 get->column_ids.reserve(n: return_types.size());
194 for (idx_t i = 0; i < return_types.size(); i++) {
195 get->column_ids.push_back(x: i);
196 }
197 }
198 // now add the table function to the bind context so its columns can be bound
199 bind_context.AddTableFunction(index: bind_index, alias: function_name, names: return_names, types: return_types, bound_column_ids&: get->column_ids,
200 entry: get->GetTable().get());
201 return std::move(get);
202}
203
204unique_ptr<LogicalOperator> Binder::BindTableFunction(TableFunction &function, vector<Value> parameters) {
205 named_parameter_map_t named_parameters;
206 vector<LogicalType> input_table_types;
207 vector<string> input_table_names;
208 vector<string> column_name_aliases;
209 return BindTableFunctionInternal(table_function&: function, function_name: function.name, parameters: std::move(parameters), named_parameters: std::move(named_parameters),
210 input_table_types: std::move(input_table_types), input_table_names: std::move(input_table_names), column_name_alias: column_name_aliases,
211 external_dependency: nullptr);
212}
213
214unique_ptr<BoundTableRef> Binder::Bind(TableFunctionRef &ref) {
215 QueryErrorContext error_context(root_statement, ref.query_location);
216
217 D_ASSERT(ref.function->type == ExpressionType::FUNCTION);
218 auto &fexpr = ref.function->Cast<FunctionExpression>();
219
220 // fetch the function from the catalog
221 auto &func_catalog = Catalog::GetEntry(context, type: CatalogType::TABLE_FUNCTION_ENTRY, catalog: fexpr.catalog, schema: fexpr.schema,
222 name: fexpr.function_name, error_context);
223
224 if (func_catalog.type == CatalogType::TABLE_MACRO_ENTRY) {
225 auto &macro_func = func_catalog.Cast<TableMacroCatalogEntry>();
226 auto query_node = BindTableMacro(function&: fexpr, macro_func, depth: 0);
227 D_ASSERT(query_node);
228
229 auto binder = Binder::CreateBinder(context, parent: this);
230 binder->can_contain_nulls = true;
231
232 binder->alias = ref.alias.empty() ? "unnamed_query" : ref.alias;
233 auto query = binder->BindNode(node&: *query_node);
234
235 idx_t bind_index = query->GetRootIndex();
236 // string alias;
237 string alias = (ref.alias.empty() ? "unnamed_query" + to_string(val: bind_index) : ref.alias);
238
239 auto result = make_uniq<BoundSubqueryRef>(args: std::move(binder), args: std::move(query));
240 // remember ref here is TableFunctionRef and NOT base class
241 bind_context.AddSubquery(index: bind_index, alias, ref, subquery&: *result->subquery);
242 MoveCorrelatedExpressions(other&: *result->binder);
243 return std::move(result);
244 }
245 D_ASSERT(func_catalog.type == CatalogType::TABLE_FUNCTION_ENTRY);
246 auto &function = func_catalog.Cast<TableFunctionCatalogEntry>();
247
248 // evaluate the input parameters to the function
249 vector<LogicalType> arguments;
250 vector<Value> parameters;
251 named_parameter_map_t named_parameters;
252 unique_ptr<BoundSubqueryRef> subquery;
253 string error;
254 if (!BindTableFunctionParameters(table_function&: function, expressions&: fexpr.children, arguments, parameters, named_parameters, subquery,
255 error)) {
256 throw BinderException(FormatError(ref_context&: ref, message: error));
257 }
258
259 // select the function based on the input parameters
260 FunctionBinder function_binder(context);
261 idx_t best_function_idx = function_binder.BindFunction(name: function.name, functions&: function.functions, arguments, error);
262 if (best_function_idx == DConstants::INVALID_INDEX) {
263 throw BinderException(FormatError(ref_context&: ref, message: error));
264 }
265 auto table_function = function.functions.GetFunctionByOffset(offset: best_function_idx);
266
267 // now check the named parameters
268 BindNamedParameters(types&: table_function.named_parameters, values&: named_parameters, error_context, func_name&: table_function.name);
269
270 // cast the parameters to the type of the function
271 for (idx_t i = 0; i < arguments.size(); i++) {
272 auto target_type = i < table_function.arguments.size() ? table_function.arguments[i] : table_function.varargs;
273
274 if (target_type != LogicalType::ANY && target_type != LogicalType::TABLE &&
275 target_type != LogicalType::POINTER && target_type.id() != LogicalTypeId::LIST) {
276 parameters[i] = parameters[i].CastAs(context, target_type);
277 }
278 }
279
280 vector<LogicalType> input_table_types;
281 vector<string> input_table_names;
282
283 if (subquery) {
284 input_table_types = subquery->subquery->types;
285 input_table_names = subquery->subquery->names;
286 }
287 auto get = BindTableFunctionInternal(table_function, function_name: ref.alias.empty() ? fexpr.function_name : ref.alias,
288 parameters: std::move(parameters), named_parameters: std::move(named_parameters),
289 input_table_types: std::move(input_table_types), input_table_names: std::move(input_table_names),
290 column_name_alias: ref.column_name_alias, external_dependency: std::move(ref.external_dependency));
291 if (subquery) {
292 get->children.push_back(x: Binder::CreatePlan(ref&: *subquery));
293 }
294
295 return make_uniq_base<BoundTableRef, BoundTableFunction>(args: std::move(get));
296}
297
298} // namespace duckdb
299