| 1 | #pragma once |
| 2 | |
| 3 | #include <Core/Settings.h> |
| 4 | #include <DataStreams/IBlockStream_fwd.h> |
| 5 | #include <Interpreters/AggregateDescription.h> |
| 6 | #include <Interpreters/SyntaxAnalyzer.h> |
| 7 | #include <Interpreters/SubqueryForSet.h> |
| 8 | #include <Parsers/IAST_fwd.h> |
| 9 | #include <Storages/IStorage_fwd.h> |
| 10 | |
| 11 | |
| 12 | namespace DB |
| 13 | { |
| 14 | |
| 15 | class Block; |
| 16 | class Context; |
| 17 | |
| 18 | struct ExpressionActionsChain; |
| 19 | class ExpressionActions; |
| 20 | using ExpressionActionsPtr = std::shared_ptr<ExpressionActions>; |
| 21 | using ManyExpressionActions = std::vector<ExpressionActionsPtr>; |
| 22 | |
| 23 | struct ASTTableJoin; |
| 24 | class IJoin; |
| 25 | using JoinPtr = std::shared_ptr<IJoin>; |
| 26 | |
| 27 | class ASTFunction; |
| 28 | class ASTExpressionList; |
| 29 | class ASTSelectQuery; |
| 30 | struct ASTTablesInSelectQueryElement; |
| 31 | |
| 32 | /// ExpressionAnalyzer sources, intermediates and results. It splits data and logic, allows to test them separately. |
| 33 | struct ExpressionAnalyzerData |
| 34 | { |
| 35 | SubqueriesForSets subqueries_for_sets; |
| 36 | PreparedSets prepared_sets; |
| 37 | |
| 38 | /// Columns after ARRAY JOIN, JOIN, and/or aggregation. |
| 39 | NamesAndTypesList aggregated_columns; |
| 40 | NamesAndTypesList array_join_columns; |
| 41 | |
| 42 | bool has_aggregation = false; |
| 43 | NamesAndTypesList aggregation_keys; |
| 44 | AggregateDescriptions aggregate_descriptions; |
| 45 | |
| 46 | bool has_global_subqueries = false; |
| 47 | |
| 48 | /// All new temporary tables obtained by performing the GLOBAL IN/JOIN subqueries. |
| 49 | Tables external_tables; |
| 50 | |
| 51 | /// Actions by every element of ORDER BY |
| 52 | ManyExpressionActions order_by_elements_actions; |
| 53 | }; |
| 54 | |
| 55 | |
| 56 | /** Transforms an expression from a syntax tree into a sequence of actions to execute it. |
| 57 | * |
| 58 | * NOTE: if `ast` is a SELECT query from a table, the structure of this table should not change during the lifetime of ExpressionAnalyzer. |
| 59 | */ |
| 60 | class ExpressionAnalyzer : protected ExpressionAnalyzerData, private boost::noncopyable |
| 61 | { |
| 62 | private: |
| 63 | /// Extracts settings to enlight which are used (and avoid copy of others). |
| 64 | struct |
| 65 | { |
| 66 | const bool ; |
| 67 | const SizeLimits ; |
| 68 | |
| 69 | (const Settings & settings_) |
| 70 | : use_index_for_in_with_subqueries(settings_.use_index_for_in_with_subqueries), |
| 71 | size_limits_for_set(settings_.max_rows_in_set, settings_.max_bytes_in_set, settings_.set_overflow_mode) |
| 72 | {} |
| 73 | }; |
| 74 | |
| 75 | public: |
| 76 | /// Ctor for non-select queries. Generally its usage is: |
| 77 | /// auto actions = ExpressionAnalyzer(query, syntax, context).getActions(); |
| 78 | ExpressionAnalyzer( |
| 79 | const ASTPtr & query_, |
| 80 | const SyntaxAnalyzerResultPtr & syntax_analyzer_result_, |
| 81 | const Context & context_) |
| 82 | : ExpressionAnalyzer(query_, syntax_analyzer_result_, context_, 0, false) |
| 83 | {} |
| 84 | |
| 85 | void appendExpression(ExpressionActionsChain & chain, const ASTPtr & expr, bool only_types); |
| 86 | |
| 87 | /// If `ast` is not a SELECT query, just gets all the actions to evaluate the expression. |
| 88 | /// If add_aliases, only the calculated values in the desired order and add aliases. |
| 89 | /// If also project_result, than only aliases remain in the output block. |
| 90 | /// Otherwise, only temporary columns will be deleted from the block. |
| 91 | ExpressionActionsPtr getActions(bool add_aliases, bool project_result = true); |
| 92 | |
| 93 | /// Actions that can be performed on an empty block: adding constants and applying functions that depend only on constants. |
| 94 | /// Does not execute subqueries. |
| 95 | ExpressionActionsPtr getConstActions(); |
| 96 | |
| 97 | /** Sets that require a subquery to be create. |
| 98 | * Only the sets needed to perform actions returned from already executed `append*` or `getActions`. |
| 99 | * That is, you need to call getSetsWithSubqueries after all calls of `append*` or `getActions` |
| 100 | * and create all the returned sets before performing the actions. |
| 101 | */ |
| 102 | const SubqueriesForSets & getSubqueriesForSets() const { return subqueries_for_sets; } |
| 103 | |
| 104 | /// Get intermediates for tests |
| 105 | const ExpressionAnalyzerData & getAnalyzedData() const { return *this; } |
| 106 | |
| 107 | protected: |
| 108 | ExpressionAnalyzer( |
| 109 | const ASTPtr & query_, |
| 110 | const SyntaxAnalyzerResultPtr & syntax_analyzer_result_, |
| 111 | const Context & context_, |
| 112 | size_t subquery_depth_, |
| 113 | bool do_global_); |
| 114 | |
| 115 | ASTPtr query; |
| 116 | const Context & context; |
| 117 | const ExtractedSettings settings; |
| 118 | size_t subquery_depth; |
| 119 | |
| 120 | SyntaxAnalyzerResultPtr syntax; |
| 121 | |
| 122 | const StoragePtr & storage() const { return syntax->storage; } /// The main table in FROM clause, if exists. |
| 123 | const AnalyzedJoin & analyzedJoin() const { return *syntax->analyzed_join; } |
| 124 | const NamesAndTypesList & sourceColumns() const { return syntax->required_source_columns; } |
| 125 | const std::vector<const ASTFunction *> & aggregates() const { return syntax->aggregates; } |
| 126 | NamesAndTypesList sourceWithJoinedColumns() const; |
| 127 | |
| 128 | /// Find global subqueries in the GLOBAL IN/JOIN sections. Fills in external_tables. |
| 129 | void initGlobalSubqueriesAndExternalTables(bool do_global); |
| 130 | |
| 131 | void addMultipleArrayJoinAction(ExpressionActionsPtr & actions, bool is_left) const; |
| 132 | |
| 133 | void addJoinAction(ExpressionActionsPtr & actions, JoinPtr = {}) const; |
| 134 | |
| 135 | void getRootActions(const ASTPtr & ast, bool no_subqueries, ExpressionActionsPtr & actions, bool only_consts = false); |
| 136 | |
| 137 | /** Add aggregation keys to aggregation_keys, aggregate functions to aggregate_descriptions, |
| 138 | * Create a set of columns aggregated_columns resulting after the aggregation, if any, |
| 139 | * or after all the actions that are normally performed before aggregation. |
| 140 | * Set has_aggregation = true if there is GROUP BY or at least one aggregate function. |
| 141 | */ |
| 142 | void analyzeAggregation(); |
| 143 | bool makeAggregateDescriptions(ExpressionActionsPtr & actions); |
| 144 | |
| 145 | /// columns - the columns that are present before the transformations begin. |
| 146 | void initChain(ExpressionActionsChain & chain, const NamesAndTypesList & columns) const; |
| 147 | |
| 148 | const ASTSelectQuery * getSelectQuery() const; |
| 149 | |
| 150 | bool isRemoteStorage() const; |
| 151 | }; |
| 152 | |
| 153 | /// SelectQuery specific ExpressionAnalyzer part. |
| 154 | class SelectQueryExpressionAnalyzer : public ExpressionAnalyzer |
| 155 | { |
| 156 | public: |
| 157 | SelectQueryExpressionAnalyzer( |
| 158 | const ASTPtr & query_, |
| 159 | const SyntaxAnalyzerResultPtr & syntax_analyzer_result_, |
| 160 | const Context & context_, |
| 161 | const NameSet & required_result_columns_ = {}, |
| 162 | size_t subquery_depth_ = 0, |
| 163 | bool do_global_ = false) |
| 164 | : ExpressionAnalyzer(query_, syntax_analyzer_result_, context_, subquery_depth_, do_global_) |
| 165 | , required_result_columns(required_result_columns_) |
| 166 | {} |
| 167 | |
| 168 | /// Does the expression have aggregate functions or a GROUP BY or HAVING section. |
| 169 | bool hasAggregation() const { return has_aggregation; } |
| 170 | bool hasGlobalSubqueries() { return has_global_subqueries; } |
| 171 | |
| 172 | /// Get a list of aggregation keys and descriptions of aggregate functions if the query contains GROUP BY. |
| 173 | void getAggregateInfo(Names & key_names, AggregateDescriptions & aggregates) const; |
| 174 | |
| 175 | const PreparedSets & getPreparedSets() const { return prepared_sets; } |
| 176 | |
| 177 | const ManyExpressionActions & getOrderByActions() const { return order_by_elements_actions; } |
| 178 | |
| 179 | /// Tables that will need to be sent to remote servers for distributed query processing. |
| 180 | const Tables & getExternalTables() const { return external_tables; } |
| 181 | |
| 182 | /** These methods allow you to build a chain of transformations over a block, that receives values in the desired sections of the query. |
| 183 | * |
| 184 | * Example usage: |
| 185 | * ExpressionActionsChain chain; |
| 186 | * analyzer.appendWhere(chain); |
| 187 | * chain.addStep(); |
| 188 | * analyzer.appendSelect(chain); |
| 189 | * analyzer.appendOrderBy(chain); |
| 190 | * chain.finalize(); |
| 191 | * |
| 192 | * If only_types = true set, does not execute subqueries in the relevant parts of the query. The actions got this way |
| 193 | * shouldn't be executed, they are only needed to get a list of columns with their types. |
| 194 | */ |
| 195 | |
| 196 | /// Before aggregation: |
| 197 | bool appendArrayJoin(ExpressionActionsChain & chain, bool only_types); |
| 198 | bool appendJoin(ExpressionActionsChain & chain, bool only_types); |
| 199 | /// Add preliminary rows filtration. Actions are created in other expression analyzer to prevent any possible alias injection. |
| 200 | void appendPreliminaryFilter(ExpressionActionsChain & chain, ExpressionActionsPtr actions, String column_name); |
| 201 | /// remove_filter is set in ExpressionActionsChain::finalize(); |
| 202 | /// Columns in `additional_required_columns` will not be removed (they can be used for e.g. sampling or FINAL modifier). |
| 203 | bool appendPrewhere(ExpressionActionsChain & chain, bool only_types, const Names & additional_required_columns); |
| 204 | bool appendWhere(ExpressionActionsChain & chain, bool only_types); |
| 205 | bool appendGroupBy(ExpressionActionsChain & chain, bool only_types); |
| 206 | void appendAggregateFunctionsArguments(ExpressionActionsChain & chain, bool only_types); |
| 207 | |
| 208 | /// After aggregation: |
| 209 | bool appendHaving(ExpressionActionsChain & chain, bool only_types); |
| 210 | void appendSelect(ExpressionActionsChain & chain, bool only_types); |
| 211 | bool appendOrderBy(ExpressionActionsChain & chain, bool only_types, bool optimize_read_in_order); |
| 212 | bool appendLimitBy(ExpressionActionsChain & chain, bool only_types); |
| 213 | /// Deletes all columns except mentioned by SELECT, arranges the remaining columns and renames them to aliases. |
| 214 | void appendProjectResult(ExpressionActionsChain & chain) const; |
| 215 | |
| 216 | /// Create Set-s that we can from IN section to use the index on them. |
| 217 | void makeSetsForIndex(const ASTPtr & node); |
| 218 | |
| 219 | private: |
| 220 | /// If non-empty, ignore all expressions not from this list. |
| 221 | NameSet required_result_columns; |
| 222 | |
| 223 | /** |
| 224 | * Create Set from a subquery or a table expression in the query. The created set is suitable for using the index. |
| 225 | * The set will not be created if its size hits the limit. |
| 226 | */ |
| 227 | void tryMakeSetForIndexFromSubquery(const ASTPtr & subquery_or_table_name); |
| 228 | |
| 229 | /** |
| 230 | * Checks if subquery is not a plain StorageSet. |
| 231 | * Because while making set we will read data from StorageSet which is not allowed. |
| 232 | * Returns valid SetPtr from StorageSet if the latter is used after IN or nullptr otherwise. |
| 233 | */ |
| 234 | SetPtr isPlainStorageSetInSubquery(const ASTPtr & subquery_of_table_name); |
| 235 | |
| 236 | JoinPtr makeTableJoin(const ASTTablesInSelectQueryElement & join_element); |
| 237 | void makeSubqueryForJoin(const ASTTablesInSelectQueryElement & join_element, NamesWithAliases && required_columns_with_aliases, |
| 238 | SubqueryForSet & subquery_for_set) const; |
| 239 | |
| 240 | const ASTSelectQuery * getAggregatingQuery() const; |
| 241 | }; |
| 242 | |
| 243 | } |
| 244 | |