1//===----------------------------------------------------------------------===//
2// DuckDB
3//
4// duckdb/function/table_function.hpp
5//
6//
7//===----------------------------------------------------------------------===//
8
9#pragma once
10
11#include "duckdb/common/enums/operator_result_type.hpp"
12#include "duckdb/common/optional_ptr.hpp"
13#include "duckdb/execution/execution_context.hpp"
14#include "duckdb/function/function.hpp"
15#include "duckdb/planner/bind_context.hpp"
16#include "duckdb/planner/logical_operator.hpp"
17#include "duckdb/storage/statistics/node_statistics.hpp"
18
19#include <functional>
20
21namespace duckdb {
22
23class BaseStatistics;
24class DependencyList;
25class LogicalGet;
26class TableFilterSet;
27
28struct TableFunctionInfo {
29 DUCKDB_API virtual ~TableFunctionInfo();
30
31 template <class TARGET>
32 TARGET &Cast() {
33 D_ASSERT(dynamic_cast<TARGET *>(this));
34 return reinterpret_cast<TARGET &>(*this);
35 }
36 template <class TARGET>
37 const TARGET &Cast() const {
38 D_ASSERT(dynamic_cast<const TARGET *>(this));
39 return reinterpret_cast<const TARGET &>(*this);
40 }
41};
42
43struct GlobalTableFunctionState {
44public:
45 // value returned from MaxThreads when as many threads as possible should be used
46 constexpr static const int64_t MAX_THREADS = 999999999;
47
48public:
49 DUCKDB_API virtual ~GlobalTableFunctionState();
50
51 virtual idx_t MaxThreads() const {
52 return 1;
53 }
54
55 template <class TARGET>
56 TARGET &Cast() {
57 D_ASSERT(dynamic_cast<TARGET *>(this));
58 return reinterpret_cast<TARGET &>(*this);
59 }
60 template <class TARGET>
61 const TARGET &Cast() const {
62 D_ASSERT(dynamic_cast<const TARGET *>(this));
63 return reinterpret_cast<const TARGET &>(*this);
64 }
65};
66
67struct LocalTableFunctionState {
68 DUCKDB_API virtual ~LocalTableFunctionState();
69
70 template <class TARGET>
71 TARGET &Cast() {
72 D_ASSERT(dynamic_cast<TARGET *>(this));
73 return reinterpret_cast<TARGET &>(*this);
74 }
75 template <class TARGET>
76 const TARGET &Cast() const {
77 D_ASSERT(dynamic_cast<const TARGET *>(this));
78 return reinterpret_cast<const TARGET &>(*this);
79 }
80};
81
82struct TableFunctionBindInput {
83 TableFunctionBindInput(vector<Value> &inputs, named_parameter_map_t &named_parameters,
84 vector<LogicalType> &input_table_types, vector<string> &input_table_names,
85 optional_ptr<TableFunctionInfo> info)
86 : inputs(inputs), named_parameters(named_parameters), input_table_types(input_table_types),
87 input_table_names(input_table_names), info(info) {
88 }
89
90 vector<Value> &inputs;
91 named_parameter_map_t &named_parameters;
92 vector<LogicalType> &input_table_types;
93 vector<string> &input_table_names;
94 optional_ptr<TableFunctionInfo> info;
95};
96
97struct TableFunctionInitInput {
98 TableFunctionInitInput(optional_ptr<const FunctionData> bind_data_p, const vector<column_t> &column_ids_p,
99 const vector<idx_t> &projection_ids_p, optional_ptr<TableFilterSet> filters_p)
100 : bind_data(bind_data_p), column_ids(column_ids_p), projection_ids(projection_ids_p), filters(filters_p) {
101 }
102
103 optional_ptr<const FunctionData> bind_data;
104 const vector<column_t> &column_ids;
105 const vector<idx_t> projection_ids;
106 optional_ptr<TableFilterSet> filters;
107
108 bool CanRemoveFilterColumns() const {
109 if (projection_ids.empty()) {
110 // Not set, can't remove filter columns
111 return false;
112 } else if (projection_ids.size() == column_ids.size()) {
113 // Filter column is used in remainder of plan, can't remove
114 return false;
115 } else {
116 // Less columns need to be projected out than that we scan
117 return true;
118 }
119 }
120};
121
122struct TableFunctionInput {
123public:
124 TableFunctionInput(optional_ptr<const FunctionData> bind_data_p,
125 optional_ptr<LocalTableFunctionState> local_state_p,
126 optional_ptr<GlobalTableFunctionState> global_state_p)
127 : bind_data(bind_data_p), local_state(local_state_p), global_state(global_state_p) {
128 }
129
130public:
131 optional_ptr<const FunctionData> bind_data;
132 optional_ptr<LocalTableFunctionState> local_state;
133 optional_ptr<GlobalTableFunctionState> global_state;
134};
135
136enum ScanType { TABLE, PARQUET };
137
138struct BindInfo {
139public:
140 explicit BindInfo(ScanType type_p) : type(type_p) {};
141 unordered_map<string, Value> options;
142 ScanType type;
143 void InsertOption(const string &name, Value value) {
144 if (options.find(x: name) != options.end()) {
145 throw InternalException("This option already exists");
146 }
147 options[name] = std::move(value);
148 }
149 template <class T>
150 T GetOption(const string &name) {
151 if (options.find(x: name) == options.end()) {
152 throw InternalException("This option does not exist");
153 }
154 return options[name].GetValue<T>();
155 }
156 template <class T>
157 vector<T> GetOptionList(const string &name) {
158 if (options.find(x: name) == options.end()) {
159 throw InternalException("This option does not exist");
160 }
161 auto option = options[name];
162 if (option.type().id() != LogicalTypeId::LIST) {
163 throw InternalException("This option is not a list");
164 }
165 vector<T> result;
166 auto list_children = ListValue::GetChildren(value: option);
167 for (auto &child : list_children) {
168 result.emplace_back(child.GetValue<T>());
169 }
170 return result;
171 }
172};
173
174typedef unique_ptr<FunctionData> (*table_function_bind_t)(ClientContext &context, TableFunctionBindInput &input,
175 vector<LogicalType> &return_types, vector<string> &names);
176typedef unique_ptr<TableRef> (*table_function_bind_replace_t)(ClientContext &context, TableFunctionBindInput &input);
177typedef unique_ptr<GlobalTableFunctionState> (*table_function_init_global_t)(ClientContext &context,
178 TableFunctionInitInput &input);
179typedef unique_ptr<LocalTableFunctionState> (*table_function_init_local_t)(ExecutionContext &context,
180 TableFunctionInitInput &input,
181 GlobalTableFunctionState *global_state);
182typedef unique_ptr<BaseStatistics> (*table_statistics_t)(ClientContext &context, const FunctionData *bind_data,
183 column_t column_index);
184typedef void (*table_function_t)(ClientContext &context, TableFunctionInput &data, DataChunk &output);
185typedef OperatorResultType (*table_in_out_function_t)(ExecutionContext &context, TableFunctionInput &data,
186 DataChunk &input, DataChunk &output);
187typedef OperatorFinalizeResultType (*table_in_out_function_final_t)(ExecutionContext &context, TableFunctionInput &data,
188 DataChunk &output);
189typedef idx_t (*table_function_get_batch_index_t)(ClientContext &context, const FunctionData *bind_data,
190 LocalTableFunctionState *local_state,
191 GlobalTableFunctionState *global_state);
192
193typedef BindInfo (*table_function_get_bind_info)(const FunctionData *bind_data);
194
195typedef double (*table_function_progress_t)(ClientContext &context, const FunctionData *bind_data,
196 const GlobalTableFunctionState *global_state);
197typedef void (*table_function_dependency_t)(DependencyList &dependencies, const FunctionData *bind_data);
198typedef unique_ptr<NodeStatistics> (*table_function_cardinality_t)(ClientContext &context,
199 const FunctionData *bind_data);
200typedef void (*table_function_pushdown_complex_filter_t)(ClientContext &context, LogicalGet &get,
201 FunctionData *bind_data,
202 vector<unique_ptr<Expression>> &filters);
203typedef string (*table_function_to_string_t)(const FunctionData *bind_data);
204
205typedef void (*table_function_serialize_t)(FieldWriter &writer, const FunctionData *bind_data,
206 const TableFunction &function);
207typedef unique_ptr<FunctionData> (*table_function_deserialize_t)(PlanDeserializationState &context, FieldReader &reader,
208 TableFunction &function);
209
210class TableFunction : public SimpleNamedParameterFunction {
211public:
212 DUCKDB_API
213 TableFunction(string name, vector<LogicalType> arguments, table_function_t function,
214 table_function_bind_t bind = nullptr, table_function_init_global_t init_global = nullptr,
215 table_function_init_local_t init_local = nullptr);
216 DUCKDB_API
217 TableFunction(const vector<LogicalType> &arguments, table_function_t function, table_function_bind_t bind = nullptr,
218 table_function_init_global_t init_global = nullptr, table_function_init_local_t init_local = nullptr);
219 DUCKDB_API TableFunction();
220
221 //! Bind function
222 //! This function is used for determining the return type of a table producing function and returning bind data
223 //! The returned FunctionData object should be constant and should not be changed during execution.
224 table_function_bind_t bind;
225 //! (Optional) Bind replace function
226 //! This function is called before the regular bind function. It allows returning a TableRef will be used to
227 //! to generate a logical plan that replaces the LogicalGet of a regularly bound TableFunction. The BindReplace can
228 //! also return a nullptr to indicate a regular bind needs to be performed instead.
229 table_function_bind_replace_t bind_replace;
230 //! (Optional) global init function
231 //! Initialize the global operator state of the function.
232 //! The global operator state is used to keep track of the progress in the table function and is shared between
233 //! all threads working on the table function.
234 table_function_init_global_t init_global;
235 //! (Optional) local init function
236 //! Initialize the local operator state of the function.
237 //! The local operator state is used to keep track of the progress in the table function and is thread-local.
238 table_function_init_local_t init_local;
239 //! The main function
240 table_function_t function;
241 //! The table in-out function (if this is an in-out function)
242 table_in_out_function_t in_out_function;
243 //! The table in-out final function (if this is an in-out function)
244 table_in_out_function_final_t in_out_function_final;
245 //! (Optional) statistics function
246 //! Returns the statistics of a specified column
247 table_statistics_t statistics;
248 //! (Optional) dependency function
249 //! Sets up which catalog entries this table function depend on
250 table_function_dependency_t dependency;
251 //! (Optional) cardinality function
252 //! Returns the expected cardinality of this scan
253 table_function_cardinality_t cardinality;
254 //! (Optional) pushdown a set of arbitrary filter expressions, rather than only simple comparisons with a constant
255 //! Any functions remaining in the expression list will be pushed as a regular filter after the scan
256 table_function_pushdown_complex_filter_t pushdown_complex_filter;
257 //! (Optional) function for rendering the operator to a string in profiling output
258 table_function_to_string_t to_string;
259 //! (Optional) return how much of the table we have scanned up to this point (% of the data)
260 table_function_progress_t table_scan_progress;
261 //! (Optional) returns the current batch index of the current scan operator
262 table_function_get_batch_index_t get_batch_index;
263 //! (Optional) returns the extra batch info, currently only used for the substrait extension
264 table_function_get_bind_info get_batch_info;
265
266 table_function_serialize_t serialize;
267 table_function_deserialize_t deserialize;
268 bool verify_serialization = true;
269
270 //! Whether or not the table function supports projection pushdown. If not supported a projection will be added
271 //! that filters out unused columns.
272 bool projection_pushdown;
273 //! Whether or not the table function supports filter pushdown. If not supported a filter will be added
274 //! that applies the table filter directly.
275 bool filter_pushdown;
276 //! Whether or not the table function can immediately prune out filter columns that are unused in the remainder of
277 //! the query plan, e.g., "SELECT i FROM tbl WHERE j = 42;" - j does not need to leave the table function at all
278 bool filter_prune;
279 //! Additional function info, passed to the bind
280 shared_ptr<TableFunctionInfo> function_info;
281
282 DUCKDB_API bool Equal(const TableFunction &rhs) const;
283};
284
285} // namespace duckdb
286