| 1 | /* |
| 2 | Copyright (c) 2010, 2015, MariaDB |
| 3 | |
| 4 | This program is free software; you can redistribute it and/or modify |
| 5 | it under the terms of the GNU General Public License as published by |
| 6 | the Free Software Foundation; version 2 of the License. |
| 7 | |
| 8 | This program is distributed in the hope that it will be useful, |
| 9 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 11 | GNU General Public License for more details. |
| 12 | |
| 13 | You should have received a copy of the GNU General Public License |
| 14 | along with this program; if not, write to the Free Software |
| 15 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */ |
| 16 | |
| 17 | /* |
| 18 | Semi-join subquery optimization code definitions |
| 19 | */ |
| 20 | |
| 21 | #ifdef USE_PRAGMA_INTERFACE |
| 22 | #pragma interface /* gcc class implementation */ |
| 23 | #endif |
| 24 | |
| 25 | int check_and_do_in_subquery_rewrites(JOIN *join); |
| 26 | bool convert_join_subqueries_to_semijoins(JOIN *join); |
| 27 | int pull_out_semijoin_tables(JOIN *join); |
| 28 | bool optimize_semijoin_nests(JOIN *join, table_map all_table_map); |
| 29 | bool setup_jtbm_semi_joins(JOIN *join, List<TABLE_LIST> *join_list, |
| 30 | Item **join_where); |
| 31 | void cleanup_empty_jtbm_semi_joins(JOIN *join, List<TABLE_LIST> *join_list); |
| 32 | |
| 33 | // used by Loose_scan_opt |
| 34 | ulonglong get_bound_sj_equalities(TABLE_LIST *sj_nest, |
| 35 | table_map remaining_tables); |
| 36 | |
| 37 | /* |
| 38 | This is a class for considering possible loose index scan optimizations. |
| 39 | It's usage pattern is as follows: |
| 40 | best_access_path() |
| 41 | { |
| 42 | Loose_scan_opt opt; |
| 43 | |
| 44 | opt.init() |
| 45 | for each index we can do ref access with |
| 46 | { |
| 47 | opt.next_ref_key(); |
| 48 | for each keyuse |
| 49 | opt.add_keyuse(); |
| 50 | opt.check_ref_access(); |
| 51 | } |
| 52 | |
| 53 | if (some criteria for range scans) |
| 54 | opt.check_range_access(); |
| 55 | |
| 56 | opt.get_best_option(); |
| 57 | } |
| 58 | */ |
| 59 | |
| 60 | class Loose_scan_opt |
| 61 | { |
| 62 | /* All methods must check this before doing anything else */ |
| 63 | bool try_loosescan; |
| 64 | |
| 65 | /* |
| 66 | If we consider (oe1, .. oeN) IN (SELECT ie1, .. ieN) then ieK=oeK is |
| 67 | called sj-equality. If oeK depends only on preceding tables then such |
| 68 | equality is called 'bound'. |
| 69 | */ |
| 70 | ulonglong bound_sj_equalities; |
| 71 | |
| 72 | /* Accumulated properties of ref access we're now considering: */ |
| 73 | ulonglong handled_sj_equalities; |
| 74 | key_part_map loose_scan_keyparts; |
| 75 | uint max_loose_keypart; |
| 76 | bool part1_conds_met; |
| 77 | |
| 78 | /* |
| 79 | Use of quick select is a special case. Some of its properties: |
| 80 | */ |
| 81 | uint quick_uses_applicable_index; |
| 82 | uint quick_max_loose_keypart; |
| 83 | |
| 84 | /* Best loose scan method so far */ |
| 85 | uint best_loose_scan_key; |
| 86 | double best_loose_scan_cost; |
| 87 | double best_loose_scan_records; |
| 88 | KEYUSE *best_loose_scan_start_key; |
| 89 | |
| 90 | uint best_max_loose_keypart; |
| 91 | |
| 92 | public: |
| 93 | Loose_scan_opt(): |
| 94 | try_loosescan(FALSE), |
| 95 | bound_sj_equalities(0), |
| 96 | quick_uses_applicable_index(FALSE) |
| 97 | { |
| 98 | /* Protected by quick_uses_applicable_index */ |
| 99 | LINT_INIT(quick_max_loose_keypart); |
| 100 | /* The following are protected by best_loose_scan_cost!= DBL_MAX */ |
| 101 | LINT_INIT(best_loose_scan_key); |
| 102 | LINT_INIT(best_loose_scan_records); |
| 103 | LINT_INIT(best_max_loose_keypart); |
| 104 | LINT_INIT(best_loose_scan_start_key); |
| 105 | } |
| 106 | |
| 107 | void init(JOIN *join, JOIN_TAB *s, table_map remaining_tables) |
| 108 | { |
| 109 | /* |
| 110 | Discover the bound equalities. We need to do this if |
| 111 | 1. The next table is an SJ-inner table, and |
| 112 | 2. It is the first table from that semijoin, and |
| 113 | 3. We're not within a semi-join range (i.e. all semi-joins either have |
| 114 | all or none of their tables in join_table_map), except |
| 115 | s->emb_sj_nest (which we've just entered, see #2). |
| 116 | 4. All non-IN-equality correlation references from this sj-nest are |
| 117 | bound |
| 118 | 5. But some of the IN-equalities aren't (so this can't be handled by |
| 119 | FirstMatch strategy) |
| 120 | */ |
| 121 | best_loose_scan_cost= DBL_MAX; |
| 122 | if (!join->emb_sjm_nest && s->emb_sj_nest && // (1) |
| 123 | s->emb_sj_nest->sj_in_exprs < 64 && |
| 124 | ((remaining_tables & s->emb_sj_nest->sj_inner_tables) == // (2) |
| 125 | s->emb_sj_nest->sj_inner_tables) && // (2) |
| 126 | join->cur_sj_inner_tables == 0 && // (3) |
| 127 | !(remaining_tables & |
| 128 | s->emb_sj_nest->nested_join->sj_corr_tables) && // (4) |
| 129 | remaining_tables & s->emb_sj_nest->nested_join->sj_depends_on &&// (5) |
| 130 | optimizer_flag(join->thd, OPTIMIZER_SWITCH_LOOSE_SCAN)) |
| 131 | { |
| 132 | /* This table is an LooseScan scan candidate */ |
| 133 | bound_sj_equalities= get_bound_sj_equalities(s->emb_sj_nest, |
| 134 | remaining_tables); |
| 135 | try_loosescan= TRUE; |
| 136 | DBUG_PRINT("info" , ("Will try LooseScan scan, bound_map=%llx" , |
| 137 | (longlong)bound_sj_equalities)); |
| 138 | } |
| 139 | } |
| 140 | |
| 141 | void next_ref_key() |
| 142 | { |
| 143 | handled_sj_equalities=0; |
| 144 | loose_scan_keyparts= 0; |
| 145 | max_loose_keypart= 0; |
| 146 | part1_conds_met= FALSE; |
| 147 | } |
| 148 | |
| 149 | void add_keyuse(table_map remaining_tables, KEYUSE *keyuse) |
| 150 | { |
| 151 | if (try_loosescan && keyuse->sj_pred_no != UINT_MAX && |
| 152 | (keyuse->table->file->index_flags(keyuse->key, 0, 1 ) & HA_READ_ORDER)) |
| 153 | |
| 154 | { |
| 155 | if (!(remaining_tables & keyuse->used_tables)) |
| 156 | { |
| 157 | /* |
| 158 | This allows to use equality propagation to infer that some |
| 159 | sj-equalities are bound. |
| 160 | */ |
| 161 | bound_sj_equalities |= 1ULL << keyuse->sj_pred_no; |
| 162 | } |
| 163 | else |
| 164 | { |
| 165 | handled_sj_equalities |= 1ULL << keyuse->sj_pred_no; |
| 166 | loose_scan_keyparts |= ((key_part_map)1) << keyuse->keypart; |
| 167 | set_if_bigger(max_loose_keypart, keyuse->keypart); |
| 168 | } |
| 169 | } |
| 170 | } |
| 171 | |
| 172 | bool have_a_case() { return MY_TEST(handled_sj_equalities); } |
| 173 | |
| 174 | void check_ref_access_part1(JOIN_TAB *s, uint key, KEYUSE *start_key, |
| 175 | table_map found_part) |
| 176 | { |
| 177 | /* |
| 178 | Check if we can use LooseScan semi-join strategy. We can if |
| 179 | 1. This is the right table at right location |
| 180 | 2. All IN-equalities are either |
| 181 | - "bound", ie. the outer_expr part refers to the preceding tables |
| 182 | - "handled", ie. covered by the index we're considering |
| 183 | 3. Index order allows to enumerate subquery's duplicate groups in |
| 184 | order. This happens when the index definition matches this |
| 185 | pattern: |
| 186 | |
| 187 | (handled_col|bound_col)* (other_col|bound_col) |
| 188 | |
| 189 | */ |
| 190 | if (try_loosescan && // (1) |
| 191 | (handled_sj_equalities | bound_sj_equalities) == // (2) |
| 192 | PREV_BITS(ulonglong, s->emb_sj_nest->sj_in_exprs) && // (2) |
| 193 | (PREV_BITS(key_part_map, max_loose_keypart+1) & // (3) |
| 194 | (found_part | loose_scan_keyparts)) == // (3) |
| 195 | PREV_BITS(key_part_map, max_loose_keypart+1) && // (3) |
| 196 | !key_uses_partial_cols(s->table->s, key)) |
| 197 | { |
| 198 | if (s->quick && s->quick->index == key && |
| 199 | s->quick->get_type() == QUICK_SELECT_I::QS_TYPE_RANGE) |
| 200 | { |
| 201 | quick_uses_applicable_index= TRUE; |
| 202 | quick_max_loose_keypart= max_loose_keypart; |
| 203 | } |
| 204 | DBUG_PRINT("info" , ("Can use LooseScan scan" )); |
| 205 | |
| 206 | if (found_part & 1) |
| 207 | { |
| 208 | /* Can use LooseScan on ref access if the first key part is bound */ |
| 209 | part1_conds_met= TRUE; |
| 210 | } |
| 211 | |
| 212 | /* |
| 213 | Check if this is a special case where there are no usable bound |
| 214 | IN-equalities, i.e. we have |
| 215 | |
| 216 | outer_expr IN (SELECT innertbl.key FROM ...) |
| 217 | |
| 218 | and outer_expr cannot be evaluated yet, so it's actually full |
| 219 | index scan and not a ref access. |
| 220 | We can do full index scan if it uses index-only. |
| 221 | */ |
| 222 | if (!(found_part & 1 ) && /* no usable ref access for 1st key part */ |
| 223 | s->table->covering_keys.is_set(key)) |
| 224 | { |
| 225 | part1_conds_met= TRUE; |
| 226 | DBUG_PRINT("info" , ("Can use full index scan for LooseScan" )); |
| 227 | |
| 228 | /* Calculate the cost of complete loose index scan. */ |
| 229 | double records= rows2double(s->table->file->stats.records); |
| 230 | |
| 231 | /* The cost is entire index scan cost (divided by 2) */ |
| 232 | double read_time= s->table->file->keyread_time(key, 1, |
| 233 | (ha_rows) records); |
| 234 | |
| 235 | /* |
| 236 | Now find out how many different keys we will get (for now we |
| 237 | ignore the fact that we have "keypart_i=const" restriction for |
| 238 | some key components, that may make us think think that loose |
| 239 | scan will produce more distinct records than it actually will) |
| 240 | */ |
| 241 | ulong rpc; |
| 242 | if ((rpc= s->table->key_info[key].rec_per_key[max_loose_keypart])) |
| 243 | records= records / rpc; |
| 244 | |
| 245 | // TODO: previous version also did /2 |
| 246 | if (read_time < best_loose_scan_cost) |
| 247 | { |
| 248 | best_loose_scan_key= key; |
| 249 | best_loose_scan_cost= read_time; |
| 250 | best_loose_scan_records= records; |
| 251 | best_max_loose_keypart= max_loose_keypart; |
| 252 | best_loose_scan_start_key= start_key; |
| 253 | } |
| 254 | } |
| 255 | } |
| 256 | } |
| 257 | |
| 258 | void check_ref_access_part2(uint key, KEYUSE *start_key, double records, |
| 259 | double read_time) |
| 260 | { |
| 261 | if (part1_conds_met && read_time < best_loose_scan_cost) |
| 262 | { |
| 263 | /* TODO use rec-per-key-based fanout calculations */ |
| 264 | best_loose_scan_key= key; |
| 265 | best_loose_scan_cost= read_time; |
| 266 | best_loose_scan_records= records; |
| 267 | best_max_loose_keypart= max_loose_keypart; |
| 268 | best_loose_scan_start_key= start_key; |
| 269 | } |
| 270 | } |
| 271 | |
| 272 | void check_range_access(JOIN *join, uint idx, QUICK_SELECT_I *quick) |
| 273 | { |
| 274 | /* TODO: this the right part restriction: */ |
| 275 | if (quick_uses_applicable_index && idx == join->const_tables && |
| 276 | quick->read_time < best_loose_scan_cost) |
| 277 | { |
| 278 | best_loose_scan_key= quick->index; |
| 279 | best_loose_scan_cost= quick->read_time; |
| 280 | /* this is ok because idx == join->const_tables */ |
| 281 | best_loose_scan_records= rows2double(quick->records); |
| 282 | best_max_loose_keypart= quick_max_loose_keypart; |
| 283 | best_loose_scan_start_key= NULL; |
| 284 | } |
| 285 | } |
| 286 | |
| 287 | void save_to_position(JOIN_TAB *tab, POSITION *pos) |
| 288 | { |
| 289 | pos->read_time= best_loose_scan_cost; |
| 290 | if (best_loose_scan_cost != DBL_MAX) |
| 291 | { |
| 292 | pos->records_read= best_loose_scan_records; |
| 293 | pos->key= best_loose_scan_start_key; |
| 294 | pos->cond_selectivity= 1.0; |
| 295 | pos->loosescan_picker.loosescan_key= best_loose_scan_key; |
| 296 | pos->loosescan_picker.loosescan_parts= best_max_loose_keypart + 1; |
| 297 | pos->use_join_buffer= FALSE; |
| 298 | pos->table= tab; |
| 299 | // todo need ref_depend_map ? |
| 300 | DBUG_PRINT("info" , ("Produced a LooseScan plan, key %s, %s" , |
| 301 | tab->table->key_info[best_loose_scan_key].name.str, |
| 302 | best_loose_scan_start_key? "(ref access)" : |
| 303 | "(range/index access)" )); |
| 304 | } |
| 305 | } |
| 306 | }; |
| 307 | |
| 308 | |
| 309 | void advance_sj_state(JOIN *join, table_map remaining_tables, uint idx, |
| 310 | double *current_record_count, double *current_read_time, |
| 311 | POSITION *loose_scan_pos); |
| 312 | void restore_prev_sj_state(const table_map remaining_tables, |
| 313 | const JOIN_TAB *tab, uint idx); |
| 314 | |
| 315 | void fix_semijoin_strategies_for_picked_join_order(JOIN *join); |
| 316 | |
| 317 | bool setup_sj_materialization_part1(JOIN_TAB *sjm_tab); |
| 318 | bool setup_sj_materialization_part2(JOIN_TAB *sjm_tab); |
| 319 | |
| 320 | |
| 321 | /* |
| 322 | Temporary table used by semi-join DuplicateElimination strategy |
| 323 | |
| 324 | This consists of the temptable itself and data needed to put records |
| 325 | into it. The table's DDL is as follows: |
| 326 | |
| 327 | CREATE TABLE tmptable (col VARCHAR(n) BINARY, PRIMARY KEY(col)); |
| 328 | |
| 329 | where the primary key can be replaced with unique constraint if n exceeds |
| 330 | the limit (as it is always done for query execution-time temptables). |
| 331 | |
| 332 | The record value is a concatenation of rowids of tables from the join we're |
| 333 | executing. If a join table is on the inner side of the outer join, we |
| 334 | assume that its rowid can be NULL and provide means to store this rowid in |
| 335 | the tuple. |
| 336 | */ |
| 337 | |
| 338 | class SJ_TMP_TABLE : public Sql_alloc |
| 339 | { |
| 340 | public: |
| 341 | /* |
| 342 | Array of pointers to tables whose rowids compose the temporary table |
| 343 | record. |
| 344 | */ |
| 345 | class TAB |
| 346 | { |
| 347 | public: |
| 348 | JOIN_TAB *join_tab; |
| 349 | uint rowid_offset; |
| 350 | ushort null_byte; |
| 351 | uchar null_bit; |
| 352 | }; |
| 353 | TAB *tabs; |
| 354 | TAB *tabs_end; |
| 355 | |
| 356 | /* |
| 357 | is_degenerate==TRUE means this is a special case where the temptable record |
| 358 | has zero length (and presence of a unique key means that the temptable can |
| 359 | have either 0 or 1 records). |
| 360 | In this case we don't create the physical temptable but instead record |
| 361 | its state in SJ_TMP_TABLE::have_degenerate_row. |
| 362 | */ |
| 363 | bool is_degenerate; |
| 364 | |
| 365 | /* |
| 366 | When is_degenerate==TRUE: the contents of the table (whether it has the |
| 367 | record or not). |
| 368 | */ |
| 369 | bool have_degenerate_row; |
| 370 | |
| 371 | /* table record parameters */ |
| 372 | uint null_bits; |
| 373 | uint null_bytes; |
| 374 | uint rowid_len; |
| 375 | |
| 376 | /* The temporary table itself (NULL means not created yet) */ |
| 377 | TABLE *tmp_table; |
| 378 | |
| 379 | /* |
| 380 | These are the members we got from temptable creation code. We'll need |
| 381 | them if we'll need to convert table from HEAP to MyISAM/Maria. |
| 382 | */ |
| 383 | TMP_ENGINE_COLUMNDEF *start_recinfo; |
| 384 | TMP_ENGINE_COLUMNDEF *recinfo; |
| 385 | |
| 386 | SJ_TMP_TABLE *next_flush_table; |
| 387 | |
| 388 | int sj_weedout_delete_rows(); |
| 389 | int sj_weedout_check_row(THD *thd); |
| 390 | bool create_sj_weedout_tmp_table(THD *thd); |
| 391 | }; |
| 392 | |
| 393 | int setup_semijoin_loosescan(JOIN *join); |
| 394 | int setup_semijoin_dups_elimination(JOIN *join, ulonglong options, |
| 395 | uint no_jbuf_after); |
| 396 | void destroy_sj_tmp_tables(JOIN *join); |
| 397 | int clear_sj_tmp_tables(JOIN *join); |
| 398 | int rewrite_to_index_subquery_engine(JOIN *join); |
| 399 | |
| 400 | |
| 401 | void get_delayed_table_estimates(TABLE *table, |
| 402 | ha_rows *out_rows, |
| 403 | double *scan_time, |
| 404 | double *startup_cost); |
| 405 | |
| 406 | enum_nested_loop_state join_tab_execution_startup(JOIN_TAB *tab); |
| 407 | |
| 408 | |