| 1 | //===----------------------------------------------------------------------===// |
| 2 | // DuckDB |
| 3 | // |
| 4 | // duckdb/planner/binder.hpp |
| 5 | // |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #pragma once |
| 10 | |
| 11 | #include "duckdb/common/case_insensitive_map.hpp" |
| 12 | #include "duckdb/common/enums/join_type.hpp" |
| 13 | #include "duckdb/common/enums/statement_type.hpp" |
| 14 | #include "duckdb/common/unordered_map.hpp" |
| 15 | #include "duckdb/parser/column_definition.hpp" |
| 16 | #include "duckdb/parser/query_node.hpp" |
| 17 | #include "duckdb/parser/result_modifier.hpp" |
| 18 | #include "duckdb/parser/tokens.hpp" |
| 19 | #include "duckdb/planner/bind_context.hpp" |
| 20 | #include "duckdb/planner/bound_statement.hpp" |
| 21 | #include "duckdb/planner/bound_tokens.hpp" |
| 22 | #include "duckdb/planner/expression/bound_columnref_expression.hpp" |
| 23 | #include "duckdb/planner/logical_operator.hpp" |
| 24 | #include "duckdb/common/reference_map.hpp" |
| 25 | |
| 26 | namespace duckdb { |
| 27 | class BoundResultModifier; |
| 28 | class BoundSelectNode; |
| 29 | class ClientContext; |
| 30 | class ExpressionBinder; |
| 31 | class LimitModifier; |
| 32 | class OrderBinder; |
| 33 | class TableCatalogEntry; |
| 34 | class ViewCatalogEntry; |
| 35 | class TableMacroCatalogEntry; |
| 36 | class UpdateSetInfo; |
| 37 | class LogicalProjection; |
| 38 | |
| 39 | class ColumnList; |
| 40 | class ExternalDependency; |
| 41 | class TableFunction; |
| 42 | class TableStorageInfo; |
| 43 | |
| 44 | struct CreateInfo; |
| 45 | struct BoundCreateTableInfo; |
| 46 | struct BoundCreateFunctionInfo; |
| 47 | struct CommonTableExpressionInfo; |
| 48 | struct BoundParameterMap; |
| 49 | |
| 50 | enum class BindingMode : uint8_t { STANDARD_BINDING, }; |
| 51 | |
| 52 | struct CorrelatedColumnInfo { |
| 53 | ColumnBinding binding; |
| 54 | LogicalType type; |
| 55 | string name; |
| 56 | idx_t depth; |
| 57 | |
| 58 | CorrelatedColumnInfo(ColumnBinding binding, LogicalType type_p, string name_p, idx_t depth) |
| 59 | : binding(binding), type(std::move(type_p)), name(std::move(name_p)), depth(depth) { |
| 60 | } |
| 61 | explicit CorrelatedColumnInfo(BoundColumnRefExpression &expr) |
| 62 | : CorrelatedColumnInfo(expr.binding, expr.return_type, expr.GetName(), expr.depth) { |
| 63 | } |
| 64 | |
| 65 | bool operator==(const CorrelatedColumnInfo &rhs) const { |
| 66 | return binding == rhs.binding; |
| 67 | } |
| 68 | }; |
| 69 | |
| 70 | //! Bind the parsed query tree to the actual columns present in the catalog. |
| 71 | /*! |
| 72 | The binder is responsible for binding tables and columns to actual physical |
| 73 | tables and columns in the catalog. In the process, it also resolves types of |
| 74 | all expressions. |
| 75 | */ |
| 76 | class Binder : public std::enable_shared_from_this<Binder> { |
| 77 | friend class ExpressionBinder; |
| 78 | friend class RecursiveSubqueryPlanner; |
| 79 | |
| 80 | public: |
| 81 | DUCKDB_API static shared_ptr<Binder> CreateBinder(ClientContext &context, optional_ptr<Binder> parent = nullptr, |
| 82 | bool inherit_ctes = true); |
| 83 | |
| 84 | //! The client context |
| 85 | ClientContext &context; |
| 86 | //! A mapping of names to common table expressions |
| 87 | case_insensitive_map_t<reference<CommonTableExpressionInfo>> CTE_bindings; // NOLINT |
| 88 | //! The CTEs that have already been bound |
| 89 | reference_set_t<CommonTableExpressionInfo> bound_ctes; |
| 90 | //! The bind context |
| 91 | BindContext bind_context; |
| 92 | //! The set of correlated columns bound by this binder (FIXME: this should probably be an unordered_set and not a |
| 93 | //! vector) |
| 94 | vector<CorrelatedColumnInfo> correlated_columns; |
| 95 | //! The set of parameter expressions bound by this binder |
| 96 | optional_ptr<BoundParameterMap> parameters; |
| 97 | //! Statement properties |
| 98 | StatementProperties properties; |
| 99 | //! The alias for the currently processing subquery, if it exists |
| 100 | string alias; |
| 101 | //! Macro parameter bindings (if any) |
| 102 | optional_ptr<DummyBinding> macro_binding; |
| 103 | //! The intermediate lambda bindings to bind nested lambdas (if any) |
| 104 | optional_ptr<vector<DummyBinding>> lambda_bindings; |
| 105 | |
| 106 | public: |
| 107 | DUCKDB_API BoundStatement Bind(SQLStatement &statement); |
| 108 | DUCKDB_API BoundStatement Bind(QueryNode &node); |
| 109 | |
| 110 | unique_ptr<BoundCreateTableInfo> BindCreateTableInfo(unique_ptr<CreateInfo> info); |
| 111 | unique_ptr<BoundCreateTableInfo> BindCreateTableInfo(unique_ptr<CreateInfo> info, SchemaCatalogEntry &schema); |
| 112 | |
| 113 | vector<unique_ptr<Expression>> BindCreateIndexExpressions(TableCatalogEntry &table, CreateIndexInfo &info); |
| 114 | |
| 115 | void BindCreateViewInfo(CreateViewInfo &base); |
| 116 | SchemaCatalogEntry &BindSchema(CreateInfo &info); |
| 117 | SchemaCatalogEntry &BindCreateFunctionInfo(CreateInfo &info); |
| 118 | |
| 119 | //! Check usage, and cast named parameters to their types |
| 120 | static void BindNamedParameters(named_parameter_type_map_t &types, named_parameter_map_t &values, |
| 121 | QueryErrorContext &error_context, string &func_name); |
| 122 | |
| 123 | unique_ptr<BoundTableRef> Bind(TableRef &ref); |
| 124 | unique_ptr<LogicalOperator> CreatePlan(BoundTableRef &ref); |
| 125 | |
| 126 | //! Generates an unused index for a table |
| 127 | idx_t GenerateTableIndex(); |
| 128 | |
| 129 | //! Add a common table expression to the binder |
| 130 | void AddCTE(const string &name, CommonTableExpressionInfo &cte); |
| 131 | //! Find a common table expression by name; returns nullptr if none exists |
| 132 | optional_ptr<CommonTableExpressionInfo> FindCTE(const string &name, bool skip = false); |
| 133 | |
| 134 | bool CTEIsAlreadyBound(CommonTableExpressionInfo &cte); |
| 135 | |
| 136 | //! Add the view to the set of currently bound views - used for detecting recursive view definitions |
| 137 | void AddBoundView(ViewCatalogEntry &view); |
| 138 | |
| 139 | void PushExpressionBinder(ExpressionBinder &binder); |
| 140 | void PopExpressionBinder(); |
| 141 | void SetActiveBinder(ExpressionBinder &binder); |
| 142 | ExpressionBinder &GetActiveBinder(); |
| 143 | bool HasActiveBinder(); |
| 144 | |
| 145 | vector<reference<ExpressionBinder>> &GetActiveBinders(); |
| 146 | |
| 147 | void MergeCorrelatedColumns(vector<CorrelatedColumnInfo> &other); |
| 148 | //! Add a correlated column to this binder (if it does not exist) |
| 149 | void AddCorrelatedColumn(const CorrelatedColumnInfo &info); |
| 150 | |
| 151 | string FormatError(ParsedExpression &expr_context, const string &message); |
| 152 | string FormatError(TableRef &ref_context, const string &message); |
| 153 | |
| 154 | string FormatErrorRecursive(idx_t query_location, const string &message, vector<ExceptionFormatValue> &values); |
| 155 | template <class T, typename... ARGS> |
| 156 | string FormatErrorRecursive(idx_t query_location, const string &msg, vector<ExceptionFormatValue> &values, T param, |
| 157 | ARGS... params) { |
| 158 | values.push_back(ExceptionFormatValue::CreateFormatValue<T>(param)); |
| 159 | return FormatErrorRecursive(query_location, msg, values, params...); |
| 160 | } |
| 161 | |
| 162 | template <typename... ARGS> |
| 163 | string FormatError(idx_t query_location, const string &msg, ARGS... params) { |
| 164 | vector<ExceptionFormatValue> values; |
| 165 | return FormatErrorRecursive(query_location, msg, values, params...); |
| 166 | } |
| 167 | |
| 168 | unique_ptr<LogicalOperator> BindUpdateSet(LogicalOperator &op, unique_ptr<LogicalOperator> root, |
| 169 | UpdateSetInfo &set_info, TableCatalogEntry &table, |
| 170 | vector<PhysicalIndex> &columns); |
| 171 | void BindDoUpdateSetExpressions(const string &table_alias, LogicalInsert &insert, UpdateSetInfo &set_info, |
| 172 | TableCatalogEntry &table, TableStorageInfo &storage_info); |
| 173 | void BindOnConflictClause(LogicalInsert &insert, TableCatalogEntry &table, InsertStatement &stmt); |
| 174 | |
| 175 | static void BindSchemaOrCatalog(ClientContext &context, string &catalog, string &schema); |
| 176 | static void BindLogicalType(ClientContext &context, LogicalType &type, optional_ptr<Catalog> catalog = nullptr, |
| 177 | const string &schema = INVALID_SCHEMA); |
| 178 | |
| 179 | bool HasMatchingBinding(const string &table_name, const string &column_name, string &error_message); |
| 180 | bool HasMatchingBinding(const string &schema_name, const string &table_name, const string &column_name, |
| 181 | string &error_message); |
| 182 | bool HasMatchingBinding(const string &catalog_name, const string &schema_name, const string &table_name, |
| 183 | const string &column_name, string &error_message); |
| 184 | |
| 185 | void SetBindingMode(BindingMode mode); |
| 186 | BindingMode GetBindingMode(); |
| 187 | void AddTableName(string table_name); |
| 188 | const unordered_set<string> &GetTableNames(); |
| 189 | optional_ptr<SQLStatement> GetRootStatement() { |
| 190 | return root_statement; |
| 191 | } |
| 192 | |
| 193 | void SetCanContainNulls(bool can_contain_nulls); |
| 194 | |
| 195 | private: |
| 196 | //! The parent binder (if any) |
| 197 | shared_ptr<Binder> parent; |
| 198 | //! The vector of active binders |
| 199 | vector<reference<ExpressionBinder>> active_binders; |
| 200 | //! The count of bound_tables |
| 201 | idx_t bound_tables; |
| 202 | //! Whether or not the binder has any unplanned subqueries that still need to be planned |
| 203 | bool has_unplanned_subqueries = false; |
| 204 | //! Whether or not subqueries should be planned already |
| 205 | bool plan_subquery = true; |
| 206 | //! Whether CTEs should reference the parent binder (if it exists) |
| 207 | bool inherit_ctes = true; |
| 208 | //! Whether or not the binder can contain NULLs as the root of expressions |
| 209 | bool can_contain_nulls = false; |
| 210 | //! The root statement of the query that is currently being parsed |
| 211 | optional_ptr<SQLStatement> root_statement; |
| 212 | //! Binding mode |
| 213 | BindingMode mode = BindingMode::STANDARD_BINDING; |
| 214 | //! Table names extracted for BindingMode::EXTRACT_NAMES |
| 215 | unordered_set<string> table_names; |
| 216 | //! The set of bound views |
| 217 | reference_set_t<ViewCatalogEntry> bound_views; |
| 218 | |
| 219 | private: |
| 220 | //! Bind the expressions of generated columns to check for errors |
| 221 | void BindGeneratedColumns(BoundCreateTableInfo &info); |
| 222 | //! Bind the default values of the columns of a table |
| 223 | void BindDefaultValues(const ColumnList &columns, vector<unique_ptr<Expression>> &bound_defaults); |
| 224 | //! Bind a limit value (LIMIT or OFFSET) |
| 225 | unique_ptr<Expression> BindDelimiter(ClientContext &context, OrderBinder &order_binder, |
| 226 | unique_ptr<ParsedExpression> delimiter, const LogicalType &type, |
| 227 | Value &delimiter_value); |
| 228 | |
| 229 | //! Move correlated expressions from the child binder to this binder |
| 230 | void MoveCorrelatedExpressions(Binder &other); |
| 231 | |
| 232 | BoundStatement Bind(SelectStatement &stmt); |
| 233 | BoundStatement Bind(InsertStatement &stmt); |
| 234 | BoundStatement Bind(CopyStatement &stmt); |
| 235 | BoundStatement Bind(DeleteStatement &stmt); |
| 236 | BoundStatement Bind(UpdateStatement &stmt); |
| 237 | BoundStatement Bind(CreateStatement &stmt); |
| 238 | BoundStatement Bind(DropStatement &stmt); |
| 239 | BoundStatement Bind(AlterStatement &stmt); |
| 240 | BoundStatement Bind(PrepareStatement &stmt); |
| 241 | BoundStatement Bind(ExecuteStatement &stmt); |
| 242 | BoundStatement Bind(TransactionStatement &stmt); |
| 243 | BoundStatement Bind(PragmaStatement &stmt); |
| 244 | BoundStatement Bind(ExplainStatement &stmt); |
| 245 | BoundStatement Bind(VacuumStatement &stmt); |
| 246 | BoundStatement Bind(RelationStatement &stmt); |
| 247 | BoundStatement Bind(ShowStatement &stmt); |
| 248 | BoundStatement Bind(CallStatement &stmt); |
| 249 | BoundStatement Bind(ExportStatement &stmt); |
| 250 | BoundStatement Bind(ExtensionStatement &stmt); |
| 251 | BoundStatement Bind(SetStatement &stmt); |
| 252 | BoundStatement Bind(SetVariableStatement &stmt); |
| 253 | BoundStatement Bind(ResetVariableStatement &stmt); |
| 254 | BoundStatement Bind(LoadStatement &stmt); |
| 255 | BoundStatement Bind(LogicalPlanStatement &stmt); |
| 256 | BoundStatement Bind(AttachStatement &stmt); |
| 257 | BoundStatement Bind(DetachStatement &stmt); |
| 258 | |
| 259 | BoundStatement BindReturning(vector<unique_ptr<ParsedExpression>> returning_list, TableCatalogEntry &table, |
| 260 | const string &alias, idx_t update_table_index, |
| 261 | unique_ptr<LogicalOperator> child_operator, BoundStatement result); |
| 262 | |
| 263 | unique_ptr<QueryNode> BindTableMacro(FunctionExpression &function, TableMacroCatalogEntry ¯o_func, idx_t depth); |
| 264 | |
| 265 | unique_ptr<BoundQueryNode> BindNode(SelectNode &node); |
| 266 | unique_ptr<BoundQueryNode> BindNode(SetOperationNode &node); |
| 267 | unique_ptr<BoundQueryNode> BindNode(RecursiveCTENode &node); |
| 268 | unique_ptr<BoundQueryNode> BindNode(QueryNode &node); |
| 269 | |
| 270 | unique_ptr<LogicalOperator> VisitQueryNode(BoundQueryNode &node, unique_ptr<LogicalOperator> root); |
| 271 | unique_ptr<LogicalOperator> CreatePlan(BoundRecursiveCTENode &node); |
| 272 | unique_ptr<LogicalOperator> CreatePlan(BoundSelectNode &statement); |
| 273 | unique_ptr<LogicalOperator> CreatePlan(BoundSetOperationNode &node); |
| 274 | unique_ptr<LogicalOperator> CreatePlan(BoundQueryNode &node); |
| 275 | |
| 276 | unique_ptr<BoundTableRef> Bind(BaseTableRef &ref); |
| 277 | unique_ptr<BoundTableRef> Bind(JoinRef &ref); |
| 278 | unique_ptr<BoundTableRef> Bind(SubqueryRef &ref, optional_ptr<CommonTableExpressionInfo> cte = nullptr); |
| 279 | unique_ptr<BoundTableRef> Bind(TableFunctionRef &ref); |
| 280 | unique_ptr<BoundTableRef> Bind(EmptyTableRef &ref); |
| 281 | unique_ptr<BoundTableRef> Bind(ExpressionListRef &ref); |
| 282 | unique_ptr<BoundTableRef> Bind(PivotRef &expr); |
| 283 | |
| 284 | unique_ptr<SelectNode> BindPivot(PivotRef &expr, vector<unique_ptr<ParsedExpression>> all_columns); |
| 285 | unique_ptr<SelectNode> BindUnpivot(Binder &child_binder, PivotRef &expr, |
| 286 | vector<unique_ptr<ParsedExpression>> all_columns, |
| 287 | unique_ptr<ParsedExpression> &where_clause); |
| 288 | unique_ptr<BoundTableRef> BindBoundPivot(PivotRef &expr); |
| 289 | |
| 290 | bool BindTableFunctionParameters(TableFunctionCatalogEntry &table_function, |
| 291 | vector<unique_ptr<ParsedExpression>> &expressions, vector<LogicalType> &arguments, |
| 292 | vector<Value> ¶meters, named_parameter_map_t &named_parameters, |
| 293 | unique_ptr<BoundSubqueryRef> &subquery, string &error); |
| 294 | bool BindTableInTableOutFunction(vector<unique_ptr<ParsedExpression>> &expressions, |
| 295 | unique_ptr<BoundSubqueryRef> &subquery, string &error); |
| 296 | unique_ptr<LogicalOperator> BindTableFunction(TableFunction &function, vector<Value> parameters); |
| 297 | unique_ptr<LogicalOperator> |
| 298 | BindTableFunctionInternal(TableFunction &table_function, const string &function_name, vector<Value> parameters, |
| 299 | named_parameter_map_t named_parameters, vector<LogicalType> input_table_types, |
| 300 | vector<string> input_table_names, const vector<string> &column_name_alias, |
| 301 | unique_ptr<ExternalDependency> external_dependency); |
| 302 | |
| 303 | unique_ptr<LogicalOperator> CreatePlan(BoundBaseTableRef &ref); |
| 304 | unique_ptr<LogicalOperator> CreatePlan(BoundJoinRef &ref); |
| 305 | unique_ptr<LogicalOperator> CreatePlan(BoundSubqueryRef &ref); |
| 306 | unique_ptr<LogicalOperator> CreatePlan(BoundTableFunction &ref); |
| 307 | unique_ptr<LogicalOperator> CreatePlan(BoundEmptyTableRef &ref); |
| 308 | unique_ptr<LogicalOperator> CreatePlan(BoundExpressionListRef &ref); |
| 309 | unique_ptr<LogicalOperator> CreatePlan(BoundCTERef &ref); |
| 310 | unique_ptr<LogicalOperator> CreatePlan(BoundPivotRef &ref); |
| 311 | |
| 312 | BoundStatement BindCopyTo(CopyStatement &stmt); |
| 313 | BoundStatement BindCopyFrom(CopyStatement &stmt); |
| 314 | |
| 315 | void BindModifiers(OrderBinder &order_binder, QueryNode &statement, BoundQueryNode &result); |
| 316 | void BindModifierTypes(BoundQueryNode &result, const vector<LogicalType> &sql_types, idx_t projection_index); |
| 317 | |
| 318 | BoundStatement BindSummarize(ShowStatement &stmt); |
| 319 | unique_ptr<BoundResultModifier> BindLimit(OrderBinder &order_binder, LimitModifier &limit_mod); |
| 320 | unique_ptr<BoundResultModifier> BindLimitPercent(OrderBinder &order_binder, LimitPercentModifier &limit_mod); |
| 321 | unique_ptr<Expression> BindOrderExpression(OrderBinder &order_binder, unique_ptr<ParsedExpression> expr); |
| 322 | |
| 323 | unique_ptr<LogicalOperator> PlanFilter(unique_ptr<Expression> condition, unique_ptr<LogicalOperator> root); |
| 324 | |
| 325 | void PlanSubqueries(unique_ptr<Expression> &expr, unique_ptr<LogicalOperator> &root); |
| 326 | unique_ptr<Expression> PlanSubquery(BoundSubqueryExpression &expr, unique_ptr<LogicalOperator> &root); |
| 327 | unique_ptr<LogicalOperator> PlanLateralJoin(unique_ptr<LogicalOperator> left, unique_ptr<LogicalOperator> right, |
| 328 | vector<CorrelatedColumnInfo> &correlated_columns, |
| 329 | JoinType join_type = JoinType::INNER, |
| 330 | unique_ptr<Expression> condition = nullptr); |
| 331 | |
| 332 | unique_ptr<LogicalOperator> CastLogicalOperatorToTypes(vector<LogicalType> &source_types, |
| 333 | vector<LogicalType> &target_types, |
| 334 | unique_ptr<LogicalOperator> op); |
| 335 | |
| 336 | string FindBinding(const string &using_column, const string &join_side); |
| 337 | bool TryFindBinding(const string &using_column, const string &join_side, string &result); |
| 338 | |
| 339 | void AddUsingBindingSet(unique_ptr<UsingColumnSet> set); |
| 340 | string RetrieveUsingBinding(Binder ¤t_binder, optional_ptr<UsingColumnSet> current_set, |
| 341 | const string &column_name, const string &join_side); |
| 342 | |
| 343 | void AddCTEMap(CommonTableExpressionMap &cte_map); |
| 344 | |
| 345 | void ExpandStarExpressions(vector<unique_ptr<ParsedExpression>> &select_list, |
| 346 | vector<unique_ptr<ParsedExpression>> &new_select_list); |
| 347 | void ExpandStarExpression(unique_ptr<ParsedExpression> expr, vector<unique_ptr<ParsedExpression>> &new_select_list); |
| 348 | bool FindStarExpression(unique_ptr<ParsedExpression> &expr, StarExpression **star, bool is_root, bool in_columns); |
| 349 | void ReplaceStarExpression(unique_ptr<ParsedExpression> &expr, unique_ptr<ParsedExpression> &replacement); |
| 350 | void BindWhereStarExpression(unique_ptr<ParsedExpression> &expr); |
| 351 | |
| 352 | //! If only a schema name is provided (e.g. "a.b") then figure out if "a" is a schema or a catalog name |
| 353 | void BindSchemaOrCatalog(string &catalog_name, string &schema_name); |
| 354 | SchemaCatalogEntry &BindCreateSchema(CreateInfo &info); |
| 355 | |
| 356 | unique_ptr<BoundQueryNode> BindSelectNode(SelectNode &statement, unique_ptr<BoundTableRef> from_table); |
| 357 | |
| 358 | public: |
| 359 | // This should really be a private constructor, but make_shared does not allow it... |
| 360 | // If you are thinking about calling this, you should probably call Binder::CreateBinder |
| 361 | Binder(bool i_know_what_i_am_doing, ClientContext &context, shared_ptr<Binder> parent, bool inherit_ctes); |
| 362 | }; |
| 363 | |
| 364 | } // namespace duckdb |
| 365 | |