1 | #include "duckdb/catalog/catalog.hpp" |
2 | #include "duckdb/catalog/catalog_entry/table_macro_catalog_entry.hpp" |
3 | #include "duckdb/common/algorithm.hpp" |
4 | #include "duckdb/execution/expression_executor.hpp" |
5 | #include "duckdb/parser/expression/columnref_expression.hpp" |
6 | #include "duckdb/parser/expression/comparison_expression.hpp" |
7 | #include "duckdb/parser/expression/function_expression.hpp" |
8 | #include "duckdb/parser/expression/subquery_expression.hpp" |
9 | #include "duckdb/parser/query_node/select_node.hpp" |
10 | #include "duckdb/parser/tableref/emptytableref.hpp" |
11 | #include "duckdb/parser/tableref/table_function_ref.hpp" |
12 | #include "duckdb/planner/binder.hpp" |
13 | #include "duckdb/planner/expression_binder/table_function_binder.hpp" |
14 | #include "duckdb/planner/expression_binder/select_binder.hpp" |
15 | #include "duckdb/planner/operator/logical_get.hpp" |
16 | #include "duckdb/planner/query_node/bound_select_node.hpp" |
17 | #include "duckdb/planner/tableref/bound_subqueryref.hpp" |
18 | #include "duckdb/planner/tableref/bound_table_function.hpp" |
19 | #include "duckdb/function/function_binder.hpp" |
20 | #include "duckdb/catalog/catalog_entry/table_function_catalog_entry.hpp" |
21 | #include "duckdb/catalog/catalog_entry/table_catalog_entry.hpp" |
22 | #include "duckdb/function/table/read_csv.hpp" |
23 | |
24 | namespace duckdb { |
25 | |
26 | static bool IsTableInTableOutFunction(TableFunctionCatalogEntry &table_function) { |
27 | auto fun = table_function.functions.GetFunctionByOffset(offset: 0); |
28 | return table_function.functions.Size() == 1 && fun.arguments.size() == 1 && |
29 | fun.arguments[0].id() == LogicalTypeId::TABLE; |
30 | } |
31 | |
32 | bool Binder::BindTableInTableOutFunction(vector<unique_ptr<ParsedExpression>> &expressions, |
33 | unique_ptr<BoundSubqueryRef> &subquery, string &error) { |
34 | auto binder = Binder::CreateBinder(context&: this->context, parent: this, inherit_ctes: true); |
35 | unique_ptr<QueryNode> subquery_node; |
36 | if (expressions.size() == 1 && expressions[0]->type == ExpressionType::SUBQUERY) { |
37 | // general case: argument is a subquery, bind it as part of the node |
38 | auto &se = expressions[0]->Cast<SubqueryExpression>(); |
39 | subquery_node = std::move(se.subquery->node); |
40 | } else { |
41 | // special case: non-subquery parameter to table-in table-out function |
42 | // generate a subquery and bind that (i.e. UNNEST([1,2,3]) becomes UNNEST((SELECT [1,2,3])) |
43 | auto select_node = make_uniq<SelectNode>(); |
44 | select_node->select_list = std::move(expressions); |
45 | select_node->from_table = make_uniq<EmptyTableRef>(); |
46 | subquery_node = std::move(select_node); |
47 | } |
48 | auto node = binder->BindNode(node&: *subquery_node); |
49 | subquery = make_uniq<BoundSubqueryRef>(args: std::move(binder), args: std::move(node)); |
50 | MoveCorrelatedExpressions(other&: *subquery->binder); |
51 | return true; |
52 | } |
53 | |
54 | bool Binder::BindTableFunctionParameters(TableFunctionCatalogEntry &table_function, |
55 | vector<unique_ptr<ParsedExpression>> &expressions, |
56 | vector<LogicalType> &arguments, vector<Value> ¶meters, |
57 | named_parameter_map_t &named_parameters, |
58 | unique_ptr<BoundSubqueryRef> &subquery, string &error) { |
59 | if (IsTableInTableOutFunction(table_function)) { |
60 | // special case binding for table-in table-out function |
61 | arguments.emplace_back(args: LogicalTypeId::TABLE); |
62 | return BindTableInTableOutFunction(expressions, subquery, error); |
63 | } |
64 | bool seen_subquery = false; |
65 | for (auto &child : expressions) { |
66 | string parameter_name; |
67 | |
68 | // hack to make named parameters work |
69 | if (child->type == ExpressionType::COMPARE_EQUAL) { |
70 | // comparison, check if the LHS is a columnref |
71 | auto &comp = child->Cast<ComparisonExpression>(); |
72 | if (comp.left->type == ExpressionType::COLUMN_REF) { |
73 | auto &colref = comp.left->Cast<ColumnRefExpression>(); |
74 | if (!colref.IsQualified()) { |
75 | parameter_name = colref.GetColumnName(); |
76 | child = std::move(comp.right); |
77 | } |
78 | } |
79 | } |
80 | if (child->type == ExpressionType::SUBQUERY) { |
81 | auto fun = table_function.functions.GetFunctionByOffset(offset: 0); |
82 | if (table_function.functions.Size() != 1 || fun.arguments.empty() || |
83 | fun.arguments[0].id() != LogicalTypeId::TABLE) { |
84 | throw BinderException( |
85 | "Only table-in-out functions can have subquery parameters - %s only accepts constant parameters" , |
86 | fun.name); |
87 | } |
88 | // this separate subquery binding path is only used by python_map |
89 | // FIXME: this should be unified with `BindTableInTableOutFunction` above |
90 | if (seen_subquery) { |
91 | error = "Table function can have at most one subquery parameter " ; |
92 | return false; |
93 | } |
94 | auto binder = Binder::CreateBinder(context&: this->context, parent: this, inherit_ctes: true); |
95 | auto &se = child->Cast<SubqueryExpression>(); |
96 | auto node = binder->BindNode(node&: *se.subquery->node); |
97 | subquery = make_uniq<BoundSubqueryRef>(args: std::move(binder), args: std::move(node)); |
98 | seen_subquery = true; |
99 | arguments.emplace_back(args: LogicalTypeId::TABLE); |
100 | parameters.emplace_back( |
101 | args: Value(LogicalType::INVALID)); // this is a dummy value so the lengths of arguments and parameter match |
102 | continue; |
103 | } |
104 | |
105 | TableFunctionBinder binder(*this, context); |
106 | LogicalType sql_type; |
107 | auto expr = binder.Bind(expr&: child, result_type: &sql_type); |
108 | if (expr->HasParameter()) { |
109 | throw ParameterNotResolvedException(); |
110 | } |
111 | if (!expr->IsScalar()) { |
112 | // should have been eliminated before |
113 | throw InternalException("Table function requires a constant parameter" ); |
114 | } |
115 | auto constant = ExpressionExecutor::EvaluateScalar(context, expr: *expr, allow_unfoldable: true); |
116 | if (parameter_name.empty()) { |
117 | // unnamed parameter |
118 | if (!named_parameters.empty()) { |
119 | error = "Unnamed parameters cannot come after named parameters" ; |
120 | return false; |
121 | } |
122 | arguments.emplace_back(args&: sql_type); |
123 | parameters.emplace_back(args: std::move(constant)); |
124 | } else { |
125 | named_parameters[parameter_name] = std::move(constant); |
126 | } |
127 | } |
128 | return true; |
129 | } |
130 | |
131 | unique_ptr<LogicalOperator> |
132 | Binder::BindTableFunctionInternal(TableFunction &table_function, const string &function_name, vector<Value> parameters, |
133 | named_parameter_map_t named_parameters, vector<LogicalType> input_table_types, |
134 | vector<string> input_table_names, const vector<string> &column_name_alias, |
135 | unique_ptr<ExternalDependency> external_dependency) { |
136 | auto bind_index = GenerateTableIndex(); |
137 | // perform the binding |
138 | unique_ptr<FunctionData> bind_data; |
139 | vector<LogicalType> return_types; |
140 | vector<string> return_names; |
141 | if (table_function.bind || table_function.bind_replace) { |
142 | TableFunctionBindInput bind_input(parameters, named_parameters, input_table_types, input_table_names, |
143 | table_function.function_info.get()); |
144 | if (table_function.bind_replace) { |
145 | auto new_plan = table_function.bind_replace(context, bind_input); |
146 | if (new_plan != nullptr) { |
147 | return CreatePlan(ref&: *Bind(ref&: *new_plan)); |
148 | } else if (!table_function.bind) { |
149 | throw BinderException("Failed to bind \"%s\": nullptr returned from bind_replace without bind function" , |
150 | table_function.name); |
151 | } |
152 | } |
153 | bind_data = table_function.bind(context, bind_input, return_types, return_names); |
154 | if (table_function.name == "pandas_scan" || table_function.name == "arrow_scan" ) { |
155 | auto &arrow_bind = bind_data->Cast<PyTableFunctionData>(); |
156 | arrow_bind.external_dependency = std::move(external_dependency); |
157 | } |
158 | if (table_function.name == "read_csv" || table_function.name == "read_csv_auto" ) { |
159 | auto &csv_bind = bind_data->Cast<ReadCSVData>(); |
160 | if (csv_bind.single_threaded) { |
161 | table_function.extra_info = "(Single-Threaded)" ; |
162 | } else { |
163 | table_function.extra_info = "(Multi-Threaded)" ; |
164 | } |
165 | } |
166 | } else { |
167 | throw InvalidInputException("Cannot call function \"%s\" directly - it has no bind function" , |
168 | table_function.name); |
169 | } |
170 | if (return_types.size() != return_names.size()) { |
171 | throw InternalException("Failed to bind \"%s\": return_types/names must have same size" , table_function.name); |
172 | } |
173 | if (return_types.empty()) { |
174 | throw InternalException("Failed to bind \"%s\": Table function must return at least one column" , |
175 | table_function.name); |
176 | } |
177 | // overwrite the names with any supplied aliases |
178 | for (idx_t i = 0; i < column_name_alias.size() && i < return_names.size(); i++) { |
179 | return_names[i] = column_name_alias[i]; |
180 | } |
181 | for (idx_t i = 0; i < return_names.size(); i++) { |
182 | if (return_names[i].empty()) { |
183 | return_names[i] = "C" + to_string(val: i); |
184 | } |
185 | } |
186 | |
187 | auto get = make_uniq<LogicalGet>(args&: bind_index, args&: table_function, args: std::move(bind_data), args&: return_types, args&: return_names); |
188 | get->parameters = parameters; |
189 | get->named_parameters = named_parameters; |
190 | get->input_table_types = input_table_types; |
191 | get->input_table_names = input_table_names; |
192 | if (table_function.in_out_function && !table_function.projection_pushdown) { |
193 | get->column_ids.reserve(n: return_types.size()); |
194 | for (idx_t i = 0; i < return_types.size(); i++) { |
195 | get->column_ids.push_back(x: i); |
196 | } |
197 | } |
198 | // now add the table function to the bind context so its columns can be bound |
199 | bind_context.AddTableFunction(index: bind_index, alias: function_name, names: return_names, types: return_types, bound_column_ids&: get->column_ids, |
200 | entry: get->GetTable().get()); |
201 | return std::move(get); |
202 | } |
203 | |
204 | unique_ptr<LogicalOperator> Binder::BindTableFunction(TableFunction &function, vector<Value> parameters) { |
205 | named_parameter_map_t named_parameters; |
206 | vector<LogicalType> input_table_types; |
207 | vector<string> input_table_names; |
208 | vector<string> column_name_aliases; |
209 | return BindTableFunctionInternal(table_function&: function, function_name: function.name, parameters: std::move(parameters), named_parameters: std::move(named_parameters), |
210 | input_table_types: std::move(input_table_types), input_table_names: std::move(input_table_names), column_name_alias: column_name_aliases, |
211 | external_dependency: nullptr); |
212 | } |
213 | |
214 | unique_ptr<BoundTableRef> Binder::Bind(TableFunctionRef &ref) { |
215 | QueryErrorContext error_context(root_statement, ref.query_location); |
216 | |
217 | D_ASSERT(ref.function->type == ExpressionType::FUNCTION); |
218 | auto &fexpr = ref.function->Cast<FunctionExpression>(); |
219 | |
220 | // fetch the function from the catalog |
221 | auto &func_catalog = Catalog::GetEntry(context, type: CatalogType::TABLE_FUNCTION_ENTRY, catalog: fexpr.catalog, schema: fexpr.schema, |
222 | name: fexpr.function_name, error_context); |
223 | |
224 | if (func_catalog.type == CatalogType::TABLE_MACRO_ENTRY) { |
225 | auto ¯o_func = func_catalog.Cast<TableMacroCatalogEntry>(); |
226 | auto query_node = BindTableMacro(function&: fexpr, macro_func, depth: 0); |
227 | D_ASSERT(query_node); |
228 | |
229 | auto binder = Binder::CreateBinder(context, parent: this); |
230 | binder->can_contain_nulls = true; |
231 | |
232 | binder->alias = ref.alias.empty() ? "unnamed_query" : ref.alias; |
233 | auto query = binder->BindNode(node&: *query_node); |
234 | |
235 | idx_t bind_index = query->GetRootIndex(); |
236 | // string alias; |
237 | string alias = (ref.alias.empty() ? "unnamed_query" + to_string(val: bind_index) : ref.alias); |
238 | |
239 | auto result = make_uniq<BoundSubqueryRef>(args: std::move(binder), args: std::move(query)); |
240 | // remember ref here is TableFunctionRef and NOT base class |
241 | bind_context.AddSubquery(index: bind_index, alias, ref, subquery&: *result->subquery); |
242 | MoveCorrelatedExpressions(other&: *result->binder); |
243 | return std::move(result); |
244 | } |
245 | D_ASSERT(func_catalog.type == CatalogType::TABLE_FUNCTION_ENTRY); |
246 | auto &function = func_catalog.Cast<TableFunctionCatalogEntry>(); |
247 | |
248 | // evaluate the input parameters to the function |
249 | vector<LogicalType> arguments; |
250 | vector<Value> parameters; |
251 | named_parameter_map_t named_parameters; |
252 | unique_ptr<BoundSubqueryRef> subquery; |
253 | string error; |
254 | if (!BindTableFunctionParameters(table_function&: function, expressions&: fexpr.children, arguments, parameters, named_parameters, subquery, |
255 | error)) { |
256 | throw BinderException(FormatError(ref_context&: ref, message: error)); |
257 | } |
258 | |
259 | // select the function based on the input parameters |
260 | FunctionBinder function_binder(context); |
261 | idx_t best_function_idx = function_binder.BindFunction(name: function.name, functions&: function.functions, arguments, error); |
262 | if (best_function_idx == DConstants::INVALID_INDEX) { |
263 | throw BinderException(FormatError(ref_context&: ref, message: error)); |
264 | } |
265 | auto table_function = function.functions.GetFunctionByOffset(offset: best_function_idx); |
266 | |
267 | // now check the named parameters |
268 | BindNamedParameters(types&: table_function.named_parameters, values&: named_parameters, error_context, func_name&: table_function.name); |
269 | |
270 | // cast the parameters to the type of the function |
271 | for (idx_t i = 0; i < arguments.size(); i++) { |
272 | auto target_type = i < table_function.arguments.size() ? table_function.arguments[i] : table_function.varargs; |
273 | |
274 | if (target_type != LogicalType::ANY && target_type != LogicalType::TABLE && |
275 | target_type != LogicalType::POINTER && target_type.id() != LogicalTypeId::LIST) { |
276 | parameters[i] = parameters[i].CastAs(context, target_type); |
277 | } |
278 | } |
279 | |
280 | vector<LogicalType> input_table_types; |
281 | vector<string> input_table_names; |
282 | |
283 | if (subquery) { |
284 | input_table_types = subquery->subquery->types; |
285 | input_table_names = subquery->subquery->names; |
286 | } |
287 | auto get = BindTableFunctionInternal(table_function, function_name: ref.alias.empty() ? fexpr.function_name : ref.alias, |
288 | parameters: std::move(parameters), named_parameters: std::move(named_parameters), |
289 | input_table_types: std::move(input_table_types), input_table_names: std::move(input_table_names), |
290 | column_name_alias: ref.column_name_alias, external_dependency: std::move(ref.external_dependency)); |
291 | if (subquery) { |
292 | get->children.push_back(x: Binder::CreatePlan(ref&: *subquery)); |
293 | } |
294 | |
295 | return make_uniq_base<BoundTableRef, BoundTableFunction>(args: std::move(get)); |
296 | } |
297 | |
298 | } // namespace duckdb |
299 | |