| 1 | //===----------------------------------------------------------------------===// | 
| 2 | //                         DuckDB | 
| 3 | // | 
| 4 | // duckdb/function/table_function.hpp | 
| 5 | // | 
| 6 | // | 
| 7 | //===----------------------------------------------------------------------===// | 
| 8 |  | 
| 9 | #pragma once | 
| 10 |  | 
| 11 | #include "duckdb/common/enums/operator_result_type.hpp" | 
| 12 | #include "duckdb/common/optional_ptr.hpp" | 
| 13 | #include "duckdb/execution/execution_context.hpp" | 
| 14 | #include "duckdb/function/function.hpp" | 
| 15 | #include "duckdb/planner/bind_context.hpp" | 
| 16 | #include "duckdb/planner/logical_operator.hpp" | 
| 17 | #include "duckdb/storage/statistics/node_statistics.hpp" | 
| 18 |  | 
| 19 | #include <functional> | 
| 20 |  | 
| 21 | namespace duckdb { | 
| 22 |  | 
| 23 | class BaseStatistics; | 
| 24 | class DependencyList; | 
| 25 | class LogicalGet; | 
| 26 | class TableFilterSet; | 
| 27 |  | 
| 28 | struct TableFunctionInfo { | 
| 29 | 	DUCKDB_API virtual ~TableFunctionInfo(); | 
| 30 |  | 
| 31 | 	template <class TARGET> | 
| 32 | 	TARGET &Cast() { | 
| 33 | 		D_ASSERT(dynamic_cast<TARGET *>(this)); | 
| 34 | 		return reinterpret_cast<TARGET &>(*this); | 
| 35 | 	} | 
| 36 | 	template <class TARGET> | 
| 37 | 	const TARGET &Cast() const { | 
| 38 | 		D_ASSERT(dynamic_cast<const TARGET *>(this)); | 
| 39 | 		return reinterpret_cast<const TARGET &>(*this); | 
| 40 | 	} | 
| 41 | }; | 
| 42 |  | 
| 43 | struct GlobalTableFunctionState { | 
| 44 | public: | 
| 45 | 	// value returned from MaxThreads when as many threads as possible should be used | 
| 46 | 	constexpr static const int64_t MAX_THREADS = 999999999; | 
| 47 |  | 
| 48 | public: | 
| 49 | 	DUCKDB_API virtual ~GlobalTableFunctionState(); | 
| 50 |  | 
| 51 | 	virtual idx_t MaxThreads() const { | 
| 52 | 		return 1; | 
| 53 | 	} | 
| 54 |  | 
| 55 | 	template <class TARGET> | 
| 56 | 	TARGET &Cast() { | 
| 57 | 		D_ASSERT(dynamic_cast<TARGET *>(this)); | 
| 58 | 		return reinterpret_cast<TARGET &>(*this); | 
| 59 | 	} | 
| 60 | 	template <class TARGET> | 
| 61 | 	const TARGET &Cast() const { | 
| 62 | 		D_ASSERT(dynamic_cast<const TARGET *>(this)); | 
| 63 | 		return reinterpret_cast<const TARGET &>(*this); | 
| 64 | 	} | 
| 65 | }; | 
| 66 |  | 
| 67 | struct LocalTableFunctionState { | 
| 68 | 	DUCKDB_API virtual ~LocalTableFunctionState(); | 
| 69 |  | 
| 70 | 	template <class TARGET> | 
| 71 | 	TARGET &Cast() { | 
| 72 | 		D_ASSERT(dynamic_cast<TARGET *>(this)); | 
| 73 | 		return reinterpret_cast<TARGET &>(*this); | 
| 74 | 	} | 
| 75 | 	template <class TARGET> | 
| 76 | 	const TARGET &Cast() const { | 
| 77 | 		D_ASSERT(dynamic_cast<const TARGET *>(this)); | 
| 78 | 		return reinterpret_cast<const TARGET &>(*this); | 
| 79 | 	} | 
| 80 | }; | 
| 81 |  | 
| 82 | struct TableFunctionBindInput { | 
| 83 | 	TableFunctionBindInput(vector<Value> &inputs, named_parameter_map_t &named_parameters, | 
| 84 | 	                       vector<LogicalType> &input_table_types, vector<string> &input_table_names, | 
| 85 | 	                       optional_ptr<TableFunctionInfo> info) | 
| 86 | 	    : inputs(inputs), named_parameters(named_parameters), input_table_types(input_table_types), | 
| 87 | 	      input_table_names(input_table_names), info(info) { | 
| 88 | 	} | 
| 89 |  | 
| 90 | 	vector<Value> &inputs; | 
| 91 | 	named_parameter_map_t &named_parameters; | 
| 92 | 	vector<LogicalType> &input_table_types; | 
| 93 | 	vector<string> &input_table_names; | 
| 94 | 	optional_ptr<TableFunctionInfo> info; | 
| 95 | }; | 
| 96 |  | 
| 97 | struct TableFunctionInitInput { | 
| 98 | 	TableFunctionInitInput(optional_ptr<const FunctionData> bind_data_p, const vector<column_t> &column_ids_p, | 
| 99 | 	                       const vector<idx_t> &projection_ids_p, optional_ptr<TableFilterSet> filters_p) | 
| 100 | 	    : bind_data(bind_data_p), column_ids(column_ids_p), projection_ids(projection_ids_p), filters(filters_p) { | 
| 101 | 	} | 
| 102 |  | 
| 103 | 	optional_ptr<const FunctionData> bind_data; | 
| 104 | 	const vector<column_t> &column_ids; | 
| 105 | 	const vector<idx_t> projection_ids; | 
| 106 | 	optional_ptr<TableFilterSet> filters; | 
| 107 |  | 
| 108 | 	bool CanRemoveFilterColumns() const { | 
| 109 | 		if (projection_ids.empty()) { | 
| 110 | 			// Not set, can't remove filter columns | 
| 111 | 			return false; | 
| 112 | 		} else if (projection_ids.size() == column_ids.size()) { | 
| 113 | 			// Filter column is used in remainder of plan, can't remove | 
| 114 | 			return false; | 
| 115 | 		} else { | 
| 116 | 			// Less columns need to be projected out than that we scan | 
| 117 | 			return true; | 
| 118 | 		} | 
| 119 | 	} | 
| 120 | }; | 
| 121 |  | 
| 122 | struct TableFunctionInput { | 
| 123 | public: | 
| 124 | 	TableFunctionInput(optional_ptr<const FunctionData> bind_data_p, | 
| 125 | 	                   optional_ptr<LocalTableFunctionState> local_state_p, | 
| 126 | 	                   optional_ptr<GlobalTableFunctionState> global_state_p) | 
| 127 | 	    : bind_data(bind_data_p), local_state(local_state_p), global_state(global_state_p) { | 
| 128 | 	} | 
| 129 |  | 
| 130 | public: | 
| 131 | 	optional_ptr<const FunctionData> bind_data; | 
| 132 | 	optional_ptr<LocalTableFunctionState> local_state; | 
| 133 | 	optional_ptr<GlobalTableFunctionState> global_state; | 
| 134 | }; | 
| 135 |  | 
| 136 | enum ScanType { TABLE, PARQUET }; | 
| 137 |  | 
| 138 | struct BindInfo { | 
| 139 | public: | 
| 140 | 	explicit BindInfo(ScanType type_p) : type(type_p) {}; | 
| 141 | 	unordered_map<string, Value> options; | 
| 142 | 	ScanType type; | 
| 143 | 	void InsertOption(const string &name, Value value) { | 
| 144 | 		if (options.find(x: name) != options.end()) { | 
| 145 | 			throw InternalException("This option already exists" ); | 
| 146 | 		} | 
| 147 | 		options[name] = std::move(value); | 
| 148 | 	} | 
| 149 | 	template <class T> | 
| 150 | 	T GetOption(const string &name) { | 
| 151 | 		if (options.find(x: name) == options.end()) { | 
| 152 | 			throw InternalException("This option does not exist" ); | 
| 153 | 		} | 
| 154 | 		return options[name].GetValue<T>(); | 
| 155 | 	} | 
| 156 | 	template <class T> | 
| 157 | 	vector<T> GetOptionList(const string &name) { | 
| 158 | 		if (options.find(x: name) == options.end()) { | 
| 159 | 			throw InternalException("This option does not exist" ); | 
| 160 | 		} | 
| 161 | 		auto option = options[name]; | 
| 162 | 		if (option.type().id() != LogicalTypeId::LIST) { | 
| 163 | 			throw InternalException("This option is not a list" ); | 
| 164 | 		} | 
| 165 | 		vector<T> result; | 
| 166 | 		auto list_children = ListValue::GetChildren(value: option); | 
| 167 | 		for (auto &child : list_children) { | 
| 168 | 			result.emplace_back(child.GetValue<T>()); | 
| 169 | 		} | 
| 170 | 		return result; | 
| 171 | 	} | 
| 172 | }; | 
| 173 |  | 
| 174 | typedef unique_ptr<FunctionData> (*table_function_bind_t)(ClientContext &context, TableFunctionBindInput &input, | 
| 175 |                                                           vector<LogicalType> &return_types, vector<string> &names); | 
| 176 | typedef unique_ptr<TableRef> (*table_function_bind_replace_t)(ClientContext &context, TableFunctionBindInput &input); | 
| 177 | typedef unique_ptr<GlobalTableFunctionState> (*table_function_init_global_t)(ClientContext &context, | 
| 178 |                                                                              TableFunctionInitInput &input); | 
| 179 | typedef unique_ptr<LocalTableFunctionState> (*table_function_init_local_t)(ExecutionContext &context, | 
| 180 |                                                                            TableFunctionInitInput &input, | 
| 181 |                                                                            GlobalTableFunctionState *global_state); | 
| 182 | typedef unique_ptr<BaseStatistics> (*table_statistics_t)(ClientContext &context, const FunctionData *bind_data, | 
| 183 |                                                          column_t column_index); | 
| 184 | typedef void (*table_function_t)(ClientContext &context, TableFunctionInput &data, DataChunk &output); | 
| 185 | typedef OperatorResultType (*table_in_out_function_t)(ExecutionContext &context, TableFunctionInput &data, | 
| 186 |                                                       DataChunk &input, DataChunk &output); | 
| 187 | typedef OperatorFinalizeResultType (*table_in_out_function_final_t)(ExecutionContext &context, TableFunctionInput &data, | 
| 188 |                                                                     DataChunk &output); | 
| 189 | typedef idx_t (*table_function_get_batch_index_t)(ClientContext &context, const FunctionData *bind_data, | 
| 190 |                                                   LocalTableFunctionState *local_state, | 
| 191 |                                                   GlobalTableFunctionState *global_state); | 
| 192 |  | 
| 193 | typedef BindInfo (*table_function_get_bind_info)(const FunctionData *bind_data); | 
| 194 |  | 
| 195 | typedef double (*table_function_progress_t)(ClientContext &context, const FunctionData *bind_data, | 
| 196 |                                             const GlobalTableFunctionState *global_state); | 
| 197 | typedef void (*table_function_dependency_t)(DependencyList &dependencies, const FunctionData *bind_data); | 
| 198 | typedef unique_ptr<NodeStatistics> (*table_function_cardinality_t)(ClientContext &context, | 
| 199 |                                                                    const FunctionData *bind_data); | 
| 200 | typedef void (*table_function_pushdown_complex_filter_t)(ClientContext &context, LogicalGet &get, | 
| 201 |                                                          FunctionData *bind_data, | 
| 202 |                                                          vector<unique_ptr<Expression>> &filters); | 
| 203 | typedef string (*table_function_to_string_t)(const FunctionData *bind_data); | 
| 204 |  | 
| 205 | typedef void (*table_function_serialize_t)(FieldWriter &writer, const FunctionData *bind_data, | 
| 206 |                                            const TableFunction &function); | 
| 207 | typedef unique_ptr<FunctionData> (*table_function_deserialize_t)(PlanDeserializationState &context, FieldReader &reader, | 
| 208 |                                                                  TableFunction &function); | 
| 209 |  | 
| 210 | class TableFunction : public SimpleNamedParameterFunction { | 
| 211 | public: | 
| 212 | 	DUCKDB_API | 
| 213 | 	TableFunction(string name, vector<LogicalType> arguments, table_function_t function, | 
| 214 | 	              table_function_bind_t bind = nullptr, table_function_init_global_t init_global = nullptr, | 
| 215 | 	              table_function_init_local_t init_local = nullptr); | 
| 216 | 	DUCKDB_API | 
| 217 | 	TableFunction(const vector<LogicalType> &arguments, table_function_t function, table_function_bind_t bind = nullptr, | 
| 218 | 	              table_function_init_global_t init_global = nullptr, table_function_init_local_t init_local = nullptr); | 
| 219 | 	DUCKDB_API TableFunction(); | 
| 220 |  | 
| 221 | 	//! Bind function | 
| 222 | 	//! This function is used for determining the return type of a table producing function and returning bind data | 
| 223 | 	//! The returned FunctionData object should be constant and should not be changed during execution. | 
| 224 | 	table_function_bind_t bind; | 
| 225 | 	//! (Optional) Bind replace function | 
| 226 | 	//! This function is called before the regular bind function. It allows returning a TableRef will be used to | 
| 227 | 	//! to generate a logical plan that replaces the LogicalGet of a regularly bound TableFunction. The BindReplace can | 
| 228 | 	//! also return a nullptr to indicate a regular bind needs to be performed instead. | 
| 229 | 	table_function_bind_replace_t bind_replace; | 
| 230 | 	//! (Optional) global init function | 
| 231 | 	//! Initialize the global operator state of the function. | 
| 232 | 	//! The global operator state is used to keep track of the progress in the table function and is shared between | 
| 233 | 	//! all threads working on the table function. | 
| 234 | 	table_function_init_global_t init_global; | 
| 235 | 	//! (Optional) local init function | 
| 236 | 	//! Initialize the local operator state of the function. | 
| 237 | 	//! The local operator state is used to keep track of the progress in the table function and is thread-local. | 
| 238 | 	table_function_init_local_t init_local; | 
| 239 | 	//! The main function | 
| 240 | 	table_function_t function; | 
| 241 | 	//! The table in-out function (if this is an in-out function) | 
| 242 | 	table_in_out_function_t in_out_function; | 
| 243 | 	//! The table in-out final function (if this is an in-out function) | 
| 244 | 	table_in_out_function_final_t in_out_function_final; | 
| 245 | 	//! (Optional) statistics function | 
| 246 | 	//! Returns the statistics of a specified column | 
| 247 | 	table_statistics_t statistics; | 
| 248 | 	//! (Optional) dependency function | 
| 249 | 	//! Sets up which catalog entries this table function depend on | 
| 250 | 	table_function_dependency_t dependency; | 
| 251 | 	//! (Optional) cardinality function | 
| 252 | 	//! Returns the expected cardinality of this scan | 
| 253 | 	table_function_cardinality_t cardinality; | 
| 254 | 	//! (Optional) pushdown a set of arbitrary filter expressions, rather than only simple comparisons with a constant | 
| 255 | 	//! Any functions remaining in the expression list will be pushed as a regular filter after the scan | 
| 256 | 	table_function_pushdown_complex_filter_t pushdown_complex_filter; | 
| 257 | 	//! (Optional) function for rendering the operator to a string in profiling output | 
| 258 | 	table_function_to_string_t to_string; | 
| 259 | 	//! (Optional) return how much of the table we have scanned up to this point (% of the data) | 
| 260 | 	table_function_progress_t table_scan_progress; | 
| 261 | 	//! (Optional) returns the current batch index of the current scan operator | 
| 262 | 	table_function_get_batch_index_t get_batch_index; | 
| 263 | 	//! (Optional) returns the extra batch info, currently only used for the substrait extension | 
| 264 | 	table_function_get_bind_info get_batch_info; | 
| 265 |  | 
| 266 | 	table_function_serialize_t serialize; | 
| 267 | 	table_function_deserialize_t deserialize; | 
| 268 | 	bool verify_serialization = true; | 
| 269 |  | 
| 270 | 	//! Whether or not the table function supports projection pushdown. If not supported a projection will be added | 
| 271 | 	//! that filters out unused columns. | 
| 272 | 	bool projection_pushdown; | 
| 273 | 	//! Whether or not the table function supports filter pushdown. If not supported a filter will be added | 
| 274 | 	//! that applies the table filter directly. | 
| 275 | 	bool filter_pushdown; | 
| 276 | 	//! Whether or not the table function can immediately prune out filter columns that are unused in the remainder of | 
| 277 | 	//! the query plan, e.g., "SELECT i FROM tbl WHERE j = 42;" - j does not need to leave the table function at all | 
| 278 | 	bool filter_prune; | 
| 279 | 	//! Additional function info, passed to the bind | 
| 280 | 	shared_ptr<TableFunctionInfo> function_info; | 
| 281 |  | 
| 282 | 	DUCKDB_API bool Equal(const TableFunction &rhs) const; | 
| 283 | }; | 
| 284 |  | 
| 285 | } // namespace duckdb | 
| 286 |  |