| 1 | #pragma once | 
|---|
| 2 |  | 
|---|
| 3 | #include <Core/Block.h> | 
|---|
| 4 | #include <Core/ColumnWithTypeAndName.h> | 
|---|
| 5 | #include <Core/Names.h> | 
|---|
| 6 | #include <Core/Settings.h> | 
|---|
| 7 | #include <Interpreters/Context.h> | 
|---|
| 8 | #include <Common/SipHash.h> | 
|---|
| 9 | #include "config_core.h" | 
|---|
| 10 | #include <unordered_map> | 
|---|
| 11 | #include <unordered_set> | 
|---|
| 12 | #include <Parsers/ASTTablesInSelectQuery.h> | 
|---|
| 13 |  | 
|---|
| 14 |  | 
|---|
| 15 | namespace DB | 
|---|
| 16 | { | 
|---|
| 17 |  | 
|---|
| 18 | namespace ErrorCodes | 
|---|
| 19 | { | 
|---|
| 20 | extern const int LOGICAL_ERROR; | 
|---|
| 21 | } | 
|---|
| 22 |  | 
|---|
| 23 | class AnalyzedJoin; | 
|---|
| 24 | class IJoin; | 
|---|
| 25 | using JoinPtr = std::shared_ptr<IJoin>; | 
|---|
| 26 |  | 
|---|
| 27 | class IExecutableFunction; | 
|---|
| 28 | using ExecutableFunctionPtr = std::shared_ptr<IExecutableFunction>; | 
|---|
| 29 |  | 
|---|
| 30 | class IFunctionBase; | 
|---|
| 31 | using FunctionBasePtr = std::shared_ptr<IFunctionBase>; | 
|---|
| 32 |  | 
|---|
| 33 | class IFunctionOverloadResolver; | 
|---|
| 34 | using FunctionOverloadResolverPtr = std::shared_ptr<IFunctionOverloadResolver>; | 
|---|
| 35 |  | 
|---|
| 36 | class IDataType; | 
|---|
| 37 | using DataTypePtr = std::shared_ptr<const IDataType>; | 
|---|
| 38 |  | 
|---|
| 39 | class ExpressionActions; | 
|---|
| 40 |  | 
|---|
| 41 | /** Action on the block. | 
|---|
| 42 | */ | 
|---|
| 43 | struct ExpressionAction | 
|---|
| 44 | { | 
|---|
| 45 | private: | 
|---|
| 46 | using ExpressionActionsPtr = std::shared_ptr<ExpressionActions>; | 
|---|
| 47 | public: | 
|---|
| 48 | enum Type | 
|---|
| 49 | { | 
|---|
| 50 | ADD_COLUMN, | 
|---|
| 51 | REMOVE_COLUMN, | 
|---|
| 52 | COPY_COLUMN, | 
|---|
| 53 |  | 
|---|
| 54 | APPLY_FUNCTION, | 
|---|
| 55 |  | 
|---|
| 56 | /** Replaces the specified columns with arrays into columns with elements. | 
|---|
| 57 | * Duplicates the values in the remaining columns by the number of elements in the arrays. | 
|---|
| 58 | * Arrays must be parallel (have the same lengths). | 
|---|
| 59 | */ | 
|---|
| 60 | ARRAY_JOIN, | 
|---|
| 61 |  | 
|---|
| 62 | JOIN, | 
|---|
| 63 |  | 
|---|
| 64 | /// Reorder and rename the columns, delete the extra ones. The same column names are allowed in the result. | 
|---|
| 65 | PROJECT, | 
|---|
| 66 | /// Add columns with alias names. This columns are the same as non-aliased. PROJECT columns if you need to modify them. | 
|---|
| 67 | ADD_ALIASES, | 
|---|
| 68 | }; | 
|---|
| 69 |  | 
|---|
| 70 | Type type{}; | 
|---|
| 71 |  | 
|---|
| 72 | /// For ADD/REMOVE/COPY_COLUMN. | 
|---|
| 73 | std::string source_name; | 
|---|
| 74 | std::string result_name; | 
|---|
| 75 | DataTypePtr result_type; | 
|---|
| 76 |  | 
|---|
| 77 | /// If COPY_COLUMN can replace the result column. | 
|---|
| 78 | bool can_replace = false; | 
|---|
| 79 |  | 
|---|
| 80 | /// For ADD_COLUMN. | 
|---|
| 81 | ColumnPtr added_column; | 
|---|
| 82 |  | 
|---|
| 83 | /// For APPLY_FUNCTION and LEFT ARRAY JOIN. | 
|---|
| 84 | /// OverloadResolver is used before action was added to ExpressionActions (when we don't know types of arguments). | 
|---|
| 85 | FunctionOverloadResolverPtr function_builder; | 
|---|
| 86 |  | 
|---|
| 87 | /// For unaligned [LEFT] ARRAY JOIN | 
|---|
| 88 | FunctionOverloadResolverPtr function_length; | 
|---|
| 89 | FunctionOverloadResolverPtr function_greatest; | 
|---|
| 90 | FunctionOverloadResolverPtr function_arrayResize; | 
|---|
| 91 |  | 
|---|
| 92 | /// Can be used after action was added to ExpressionActions if we want to get function signature or properties like monotonicity. | 
|---|
| 93 | FunctionBasePtr function_base; | 
|---|
| 94 | /// Prepared function which is used in function execution. | 
|---|
| 95 | ExecutableFunctionPtr function; | 
|---|
| 96 | Names argument_names; | 
|---|
| 97 | bool is_function_compiled = false; | 
|---|
| 98 |  | 
|---|
| 99 | /// For ARRAY_JOIN | 
|---|
| 100 | NameSet array_joined_columns; | 
|---|
| 101 | bool array_join_is_left = false; | 
|---|
| 102 | bool unaligned_array_join = false; | 
|---|
| 103 |  | 
|---|
| 104 | /// For JOIN | 
|---|
| 105 | std::shared_ptr<const AnalyzedJoin> table_join; | 
|---|
| 106 | JoinPtr join; | 
|---|
| 107 |  | 
|---|
| 108 | /// For PROJECT. | 
|---|
| 109 | NamesWithAliases projection; | 
|---|
| 110 |  | 
|---|
| 111 | /// If result_name_ == "", as name "function_name(arguments separated by commas) is used". | 
|---|
| 112 | static ExpressionAction applyFunction( | 
|---|
| 113 | const FunctionOverloadResolverPtr & function_, const std::vector<std::string> & argument_names_, std::string result_name_ = ""); | 
|---|
| 114 |  | 
|---|
| 115 | static ExpressionAction addColumn(const ColumnWithTypeAndName & added_column_); | 
|---|
| 116 | static ExpressionAction removeColumn(const std::string & removed_name); | 
|---|
| 117 | static ExpressionAction copyColumn(const std::string & from_name, const std::string & to_name, bool can_replace = false); | 
|---|
| 118 | static ExpressionAction project(const NamesWithAliases & projected_columns_); | 
|---|
| 119 | static ExpressionAction project(const Names & projected_columns_); | 
|---|
| 120 | static ExpressionAction addAliases(const NamesWithAliases & aliased_columns_); | 
|---|
| 121 | static ExpressionAction arrayJoin(const NameSet & array_joined_columns, bool array_join_is_left, const Context & context); | 
|---|
| 122 | static ExpressionAction ordinaryJoin(std::shared_ptr<AnalyzedJoin> table_join, JoinPtr join); | 
|---|
| 123 |  | 
|---|
| 124 | /// Which columns necessary to perform this action. | 
|---|
| 125 | Names getNeededColumns() const; | 
|---|
| 126 |  | 
|---|
| 127 | std::string toString() const; | 
|---|
| 128 |  | 
|---|
| 129 | bool operator==(const ExpressionAction & other) const; | 
|---|
| 130 |  | 
|---|
| 131 | struct ActionHash | 
|---|
| 132 | { | 
|---|
| 133 | UInt128 operator()(const ExpressionAction & action) const; | 
|---|
| 134 | }; | 
|---|
| 135 |  | 
|---|
| 136 | private: | 
|---|
| 137 | friend class ExpressionActions; | 
|---|
| 138 |  | 
|---|
| 139 | void prepare(Block & sample_block, const Settings & settings, NameSet & names_not_for_constant_folding); | 
|---|
| 140 | void execute(Block & block, bool dry_run) const; | 
|---|
| 141 | void executeOnTotals(Block & block) const; | 
|---|
| 142 | }; | 
|---|
| 143 |  | 
|---|
| 144 |  | 
|---|
| 145 | /** Contains a sequence of actions on the block. | 
|---|
| 146 | */ | 
|---|
| 147 | class ExpressionActions | 
|---|
| 148 | { | 
|---|
| 149 | public: | 
|---|
| 150 | using Actions = std::vector<ExpressionAction>; | 
|---|
| 151 |  | 
|---|
| 152 | ExpressionActions(const NamesAndTypesList & input_columns_, const Context & context_) | 
|---|
| 153 | : input_columns(input_columns_), settings(context_.getSettingsRef()) | 
|---|
| 154 | { | 
|---|
| 155 | for (const auto & input_elem : input_columns) | 
|---|
| 156 | sample_block.insert(ColumnWithTypeAndName(nullptr, input_elem.type, input_elem.name)); | 
|---|
| 157 |  | 
|---|
| 158 | #if USE_EMBEDDED_COMPILER | 
|---|
| 159 | compilation_cache = context_.getCompiledExpressionCache(); | 
|---|
| 160 | #endif | 
|---|
| 161 | } | 
|---|
| 162 |  | 
|---|
| 163 | /// For constant columns the columns themselves can be contained in `input_columns_`. | 
|---|
| 164 | ExpressionActions(const ColumnsWithTypeAndName & input_columns_, const Context & context_) | 
|---|
| 165 | : settings(context_.getSettingsRef()) | 
|---|
| 166 | { | 
|---|
| 167 | for (const auto & input_elem : input_columns_) | 
|---|
| 168 | { | 
|---|
| 169 | input_columns.emplace_back(input_elem.name, input_elem.type); | 
|---|
| 170 | sample_block.insert(input_elem); | 
|---|
| 171 | } | 
|---|
| 172 | #if USE_EMBEDDED_COMPILER | 
|---|
| 173 | compilation_cache = context_.getCompiledExpressionCache(); | 
|---|
| 174 | #endif | 
|---|
| 175 | } | 
|---|
| 176 |  | 
|---|
| 177 | /// Add the input column. | 
|---|
| 178 | /// The name of the column must not match the names of the intermediate columns that occur when evaluating the expression. | 
|---|
| 179 | /// The expression must not have any PROJECT actions. | 
|---|
| 180 | void addInput(const ColumnWithTypeAndName & column); | 
|---|
| 181 | void addInput(const NameAndTypePair & column); | 
|---|
| 182 |  | 
|---|
| 183 | void add(const ExpressionAction & action); | 
|---|
| 184 |  | 
|---|
| 185 | /// Adds new column names to out_new_columns (formed as a result of the added action). | 
|---|
| 186 | void add(const ExpressionAction & action, Names & out_new_columns); | 
|---|
| 187 |  | 
|---|
| 188 | /// Adds to the beginning the removal of all extra columns. | 
|---|
| 189 | void prependProjectInput(); | 
|---|
| 190 |  | 
|---|
| 191 | /// Add the specified ARRAY JOIN action to the beginning. Change the appropriate input types to arrays. | 
|---|
| 192 | /// If there are unknown columns in the ARRAY JOIN list, take their types from sample_block, and immediately after ARRAY JOIN remove them. | 
|---|
| 193 | void prependArrayJoin(const ExpressionAction & action, const Block & sample_block_before); | 
|---|
| 194 |  | 
|---|
| 195 | /// If the last action is ARRAY JOIN, and it does not affect the columns from required_columns, discard and return it. | 
|---|
| 196 | /// Change the corresponding output types to arrays. | 
|---|
| 197 | bool popUnusedArrayJoin(const Names & required_columns, ExpressionAction & out_action); | 
|---|
| 198 |  | 
|---|
| 199 | /// - Adds actions to delete all but the specified columns. | 
|---|
| 200 | /// - Removes unused input columns. | 
|---|
| 201 | /// - Can somehow optimize the expression. | 
|---|
| 202 | /// - Does not reorder the columns. | 
|---|
| 203 | /// - Does not remove "unexpected" columns (for example, added by functions). | 
|---|
| 204 | /// - If output_columns is empty, leaves one arbitrary column (so that the number of rows in the block is not lost). | 
|---|
| 205 | void finalize(const Names & output_columns); | 
|---|
| 206 |  | 
|---|
| 207 | const Actions & getActions() const { return actions; } | 
|---|
| 208 |  | 
|---|
| 209 | /// Get a list of input columns. | 
|---|
| 210 | Names getRequiredColumns() const | 
|---|
| 211 | { | 
|---|
| 212 | Names names; | 
|---|
| 213 | for (NamesAndTypesList::const_iterator it = input_columns.begin(); it != input_columns.end(); ++it) | 
|---|
| 214 | names.push_back(it->name); | 
|---|
| 215 | return names; | 
|---|
| 216 | } | 
|---|
| 217 |  | 
|---|
| 218 | const NamesAndTypesList & getRequiredColumnsWithTypes() const { return input_columns; } | 
|---|
| 219 |  | 
|---|
| 220 | /// Execute the expression on the block. The block must contain all the columns returned by getRequiredColumns. | 
|---|
| 221 | void execute(Block & block, bool dry_run = false) const; | 
|---|
| 222 |  | 
|---|
| 223 | /// Check if joined subquery has totals. | 
|---|
| 224 | bool hasTotalsInJoin() const; | 
|---|
| 225 |  | 
|---|
| 226 | /** Execute the expression on the block of total values. | 
|---|
| 227 | * Almost the same as `execute`. The difference is only when JOIN is executed. | 
|---|
| 228 | */ | 
|---|
| 229 | void executeOnTotals(Block & block) const; | 
|---|
| 230 |  | 
|---|
| 231 | /// Obtain a sample block that contains the names and types of result columns. | 
|---|
| 232 | const Block & getSampleBlock() const { return sample_block; } | 
|---|
| 233 |  | 
|---|
| 234 | std::string dumpActions() const; | 
|---|
| 235 |  | 
|---|
| 236 | static std::string getSmallestColumn(const NamesAndTypesList & columns); | 
|---|
| 237 |  | 
|---|
| 238 | JoinPtr getTableJoinAlgo() const; | 
|---|
| 239 |  | 
|---|
| 240 | const Settings & getSettings() const { return settings; } | 
|---|
| 241 |  | 
|---|
| 242 | /// Check if result block has no rows. True if it's definite, false if we can't say for sure. | 
|---|
| 243 | /// Call it only after subqueries for join were executed. | 
|---|
| 244 | bool resultIsAlwaysEmpty() const; | 
|---|
| 245 |  | 
|---|
| 246 | /// Check if column is always zero. True if it's definite, false if we can't say for sure. | 
|---|
| 247 | /// Call it only after subqueries for sets were executed. | 
|---|
| 248 | bool checkColumnIsAlwaysFalse(const String & column_name) const; | 
|---|
| 249 |  | 
|---|
| 250 | struct ActionsHash | 
|---|
| 251 | { | 
|---|
| 252 | UInt128 operator()(const ExpressionActions::Actions & elems) const | 
|---|
| 253 | { | 
|---|
| 254 | SipHash hash; | 
|---|
| 255 | for (const ExpressionAction & act : elems) | 
|---|
| 256 | hash.update(ExpressionAction::ActionHash{}(act)); | 
|---|
| 257 | UInt128 result; | 
|---|
| 258 | hash.get128(result.low, result.high); | 
|---|
| 259 | return result; | 
|---|
| 260 | } | 
|---|
| 261 | }; | 
|---|
| 262 |  | 
|---|
| 263 | private: | 
|---|
| 264 | /// These columns have to be in input blocks (arguments of execute* methods) | 
|---|
| 265 | NamesAndTypesList input_columns; | 
|---|
| 266 | /// These actions will be executed on input blocks | 
|---|
| 267 | Actions actions; | 
|---|
| 268 | /// The example of result (output) block. | 
|---|
| 269 | Block sample_block; | 
|---|
| 270 | /// Columns which can't be used for constant folding. | 
|---|
| 271 | NameSet names_not_for_constant_folding; | 
|---|
| 272 |  | 
|---|
| 273 | Settings settings; | 
|---|
| 274 | #if USE_EMBEDDED_COMPILER | 
|---|
| 275 | std::shared_ptr<CompiledExpressionCache> compilation_cache; | 
|---|
| 276 | #endif | 
|---|
| 277 |  | 
|---|
| 278 | void checkLimits(Block & block) const; | 
|---|
| 279 |  | 
|---|
| 280 | void addImpl(ExpressionAction action, Names & new_names); | 
|---|
| 281 |  | 
|---|
| 282 | /// Move all arrayJoin as close as possible to the end. | 
|---|
| 283 | void optimizeArrayJoin(); | 
|---|
| 284 | }; | 
|---|
| 285 |  | 
|---|
| 286 | using ExpressionActionsPtr = std::shared_ptr<ExpressionActions>; | 
|---|
| 287 |  | 
|---|
| 288 |  | 
|---|
| 289 | /** The sequence of transformations over the block. | 
|---|
| 290 | * It is assumed that the result of each step is fed to the input of the next step. | 
|---|
| 291 | * Used to execute parts of the query individually. | 
|---|
| 292 | * | 
|---|
| 293 | * For example, you can create a chain of two steps: | 
|---|
| 294 | *     1) evaluate the expression in the WHERE clause, | 
|---|
| 295 | *     2) calculate the expression in the SELECT section, | 
|---|
| 296 | * and between the two steps do the filtering by value in the WHERE clause. | 
|---|
| 297 | */ | 
|---|
| 298 | struct ExpressionActionsChain | 
|---|
| 299 | { | 
|---|
| 300 | ExpressionActionsChain(const Context & context_) | 
|---|
| 301 | : context(context_) {} | 
|---|
| 302 | struct Step | 
|---|
| 303 | { | 
|---|
| 304 | ExpressionActionsPtr actions; | 
|---|
| 305 | /// Columns were added to the block before current step in addition to prev step output. | 
|---|
| 306 | NameSet additional_input; | 
|---|
| 307 | /// Columns which are required in the result of current step. | 
|---|
| 308 | Names required_output; | 
|---|
| 309 | /// True if column from required_output is needed only for current step and not used in next actions | 
|---|
| 310 | /// (and can be removed from block). Example: filter column for where actions. | 
|---|
| 311 | /// If not empty, has the same size with required_output; is filled in finalize(). | 
|---|
| 312 | std::vector<bool> can_remove_required_output; | 
|---|
| 313 |  | 
|---|
| 314 | Step(const ExpressionActionsPtr & actions_ = nullptr, const Names & required_output_ = Names()) | 
|---|
| 315 | : actions(actions_), required_output(required_output_) {} | 
|---|
| 316 | }; | 
|---|
| 317 |  | 
|---|
| 318 | using Steps = std::vector<Step>; | 
|---|
| 319 |  | 
|---|
| 320 | const Context & context; | 
|---|
| 321 | Steps steps; | 
|---|
| 322 |  | 
|---|
| 323 | void addStep(); | 
|---|
| 324 |  | 
|---|
| 325 | void finalize(); | 
|---|
| 326 |  | 
|---|
| 327 | void clear() | 
|---|
| 328 | { | 
|---|
| 329 | steps.clear(); | 
|---|
| 330 | } | 
|---|
| 331 |  | 
|---|
| 332 | ExpressionActionsPtr getLastActions() | 
|---|
| 333 | { | 
|---|
| 334 | if (steps.empty()) | 
|---|
| 335 | throw Exception( "Empty ExpressionActionsChain", ErrorCodes::LOGICAL_ERROR); | 
|---|
| 336 |  | 
|---|
| 337 | return steps.back().actions; | 
|---|
| 338 | } | 
|---|
| 339 |  | 
|---|
| 340 | Step & getLastStep() | 
|---|
| 341 | { | 
|---|
| 342 | if (steps.empty()) | 
|---|
| 343 | throw Exception( "Empty ExpressionActionsChain", ErrorCodes::LOGICAL_ERROR); | 
|---|
| 344 |  | 
|---|
| 345 | return steps.back(); | 
|---|
| 346 | } | 
|---|
| 347 |  | 
|---|
| 348 | std::string dumpChain(); | 
|---|
| 349 | }; | 
|---|
| 350 |  | 
|---|
| 351 | } | 
|---|
| 352 |  | 
|---|