| 1 | /* Copyright (c) 2016 MariaDB corporation |
| 2 | |
| 3 | This program is free software; you can redistribute it and/or modify |
| 4 | it under the terms of the GNU General Public License as published by |
| 5 | the Free Software Foundation; version 2 of the License. |
| 6 | |
| 7 | This program is distributed in the hope that it will be useful, |
| 8 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 9 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 10 | GNU General Public License for more details. |
| 11 | |
| 12 | You should have received a copy of the GNU General Public License |
| 13 | along with this program; if not, write to the Free Software |
| 14 | Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ |
| 15 | |
| 16 | #ifndef UNIQUE_INCLUDED |
| 17 | #define UNIQUE_INCLUDED |
| 18 | |
| 19 | #include "filesort.h" |
| 20 | |
| 21 | /* |
| 22 | Unique -- class for unique (removing of duplicates). |
| 23 | Puts all values to the TREE. If the tree becomes too big, |
| 24 | it's dumped to the file. User can request sorted values, or |
| 25 | just iterate through them. In the last case tree merging is performed in |
| 26 | memory simultaneously with iteration, so it should be ~2-3x faster. |
| 27 | */ |
| 28 | |
| 29 | class Unique :public Sql_alloc |
| 30 | { |
| 31 | DYNAMIC_ARRAY file_ptrs; |
| 32 | ulong max_elements; |
| 33 | size_t max_in_memory_size; |
| 34 | IO_CACHE file; |
| 35 | TREE tree; |
| 36 | ulong filtered_out_elems; |
| 37 | uint size; |
| 38 | uint full_size; |
| 39 | uint min_dupl_count; /* always 0 for unions, > 0 for intersections */ |
| 40 | bool with_counters; |
| 41 | |
| 42 | bool merge(TABLE *table, uchar *buff, bool without_last_merge); |
| 43 | bool flush(); |
| 44 | |
| 45 | public: |
| 46 | ulong elements; |
| 47 | SORT_INFO sort; |
| 48 | Unique(qsort_cmp2 comp_func, void *comp_func_fixed_arg, |
| 49 | uint size_arg, size_t max_in_memory_size_arg, |
| 50 | uint min_dupl_count_arg= 0); |
| 51 | ~Unique(); |
| 52 | ulong elements_in_tree() { return tree.elements_in_tree; } |
| 53 | inline bool unique_add(void *ptr) |
| 54 | { |
| 55 | DBUG_ENTER("unique_add" ); |
| 56 | DBUG_PRINT("info" , ("tree %u - %lu" , tree.elements_in_tree, max_elements)); |
| 57 | if (!(tree.flag & TREE_ONLY_DUPS) && |
| 58 | tree.elements_in_tree >= max_elements && flush()) |
| 59 | DBUG_RETURN(1); |
| 60 | DBUG_RETURN(!tree_insert(&tree, ptr, 0, tree.custom_arg)); |
| 61 | } |
| 62 | |
| 63 | bool is_in_memory() { return (my_b_tell(&file) == 0); } |
| 64 | void close_for_expansion() { tree.flag= TREE_ONLY_DUPS; } |
| 65 | |
| 66 | bool get(TABLE *table); |
| 67 | |
| 68 | /* Cost of searching for an element in the tree */ |
| 69 | inline static double get_search_cost(ulonglong tree_elems, uint compare_factor) |
| 70 | { |
| 71 | return log((double) tree_elems) / (compare_factor * M_LN2); |
| 72 | } |
| 73 | |
| 74 | static double get_use_cost(uint *buffer, size_t nkeys, uint key_size, |
| 75 | size_t max_in_memory_size, uint compare_factor, |
| 76 | bool intersect_fl, bool *in_memory); |
| 77 | inline static int get_cost_calc_buff_size(size_t nkeys, uint key_size, |
| 78 | size_t max_in_memory_size) |
| 79 | { |
| 80 | size_t max_elems_in_tree= |
| 81 | max_in_memory_size / ALIGN_SIZE(sizeof(TREE_ELEMENT)+key_size); |
| 82 | return (int) (sizeof(uint)*(1 + nkeys/max_elems_in_tree)); |
| 83 | } |
| 84 | |
| 85 | void reset(); |
| 86 | bool walk(TABLE *table, tree_walk_action action, void *walk_action_arg); |
| 87 | |
| 88 | uint get_size() const { return size; } |
| 89 | size_t get_max_in_memory_size() const { return max_in_memory_size; } |
| 90 | |
| 91 | friend int unique_write_to_file(uchar* key, element_count count, Unique *unique); |
| 92 | friend int unique_write_to_ptrs(uchar* key, element_count count, Unique *unique); |
| 93 | |
| 94 | friend int unique_write_to_file_with_count(uchar* key, element_count count, |
| 95 | Unique *unique); |
| 96 | friend int unique_intersect_write_to_ptrs(uchar* key, element_count count, |
| 97 | Unique *unique); |
| 98 | }; |
| 99 | |
| 100 | #endif /* UNIQUE_INCLUDED */ |
| 101 | |