1 | /* Copyright (c) 2016 MariaDB corporation |
2 | |
3 | This program is free software; you can redistribute it and/or modify |
4 | it under the terms of the GNU General Public License as published by |
5 | the Free Software Foundation; version 2 of the License. |
6 | |
7 | This program is distributed in the hope that it will be useful, |
8 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
9 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
10 | GNU General Public License for more details. |
11 | |
12 | You should have received a copy of the GNU General Public License |
13 | along with this program; if not, write to the Free Software |
14 | Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ |
15 | |
16 | #ifndef UNIQUE_INCLUDED |
17 | #define UNIQUE_INCLUDED |
18 | |
19 | #include "filesort.h" |
20 | |
21 | /* |
22 | Unique -- class for unique (removing of duplicates). |
23 | Puts all values to the TREE. If the tree becomes too big, |
24 | it's dumped to the file. User can request sorted values, or |
25 | just iterate through them. In the last case tree merging is performed in |
26 | memory simultaneously with iteration, so it should be ~2-3x faster. |
27 | */ |
28 | |
29 | class Unique :public Sql_alloc |
30 | { |
31 | DYNAMIC_ARRAY file_ptrs; |
32 | ulong max_elements; |
33 | size_t max_in_memory_size; |
34 | IO_CACHE file; |
35 | TREE tree; |
36 | ulong filtered_out_elems; |
37 | uint size; |
38 | uint full_size; |
39 | uint min_dupl_count; /* always 0 for unions, > 0 for intersections */ |
40 | bool with_counters; |
41 | |
42 | bool merge(TABLE *table, uchar *buff, bool without_last_merge); |
43 | bool flush(); |
44 | |
45 | public: |
46 | ulong elements; |
47 | SORT_INFO sort; |
48 | Unique(qsort_cmp2 comp_func, void *comp_func_fixed_arg, |
49 | uint size_arg, size_t max_in_memory_size_arg, |
50 | uint min_dupl_count_arg= 0); |
51 | ~Unique(); |
52 | ulong elements_in_tree() { return tree.elements_in_tree; } |
53 | inline bool unique_add(void *ptr) |
54 | { |
55 | DBUG_ENTER("unique_add" ); |
56 | DBUG_PRINT("info" , ("tree %u - %lu" , tree.elements_in_tree, max_elements)); |
57 | if (!(tree.flag & TREE_ONLY_DUPS) && |
58 | tree.elements_in_tree >= max_elements && flush()) |
59 | DBUG_RETURN(1); |
60 | DBUG_RETURN(!tree_insert(&tree, ptr, 0, tree.custom_arg)); |
61 | } |
62 | |
63 | bool is_in_memory() { return (my_b_tell(&file) == 0); } |
64 | void close_for_expansion() { tree.flag= TREE_ONLY_DUPS; } |
65 | |
66 | bool get(TABLE *table); |
67 | |
68 | /* Cost of searching for an element in the tree */ |
69 | inline static double get_search_cost(ulonglong tree_elems, uint compare_factor) |
70 | { |
71 | return log((double) tree_elems) / (compare_factor * M_LN2); |
72 | } |
73 | |
74 | static double get_use_cost(uint *buffer, size_t nkeys, uint key_size, |
75 | size_t max_in_memory_size, uint compare_factor, |
76 | bool intersect_fl, bool *in_memory); |
77 | inline static int get_cost_calc_buff_size(size_t nkeys, uint key_size, |
78 | size_t max_in_memory_size) |
79 | { |
80 | size_t max_elems_in_tree= |
81 | max_in_memory_size / ALIGN_SIZE(sizeof(TREE_ELEMENT)+key_size); |
82 | return (int) (sizeof(uint)*(1 + nkeys/max_elems_in_tree)); |
83 | } |
84 | |
85 | void reset(); |
86 | bool walk(TABLE *table, tree_walk_action action, void *walk_action_arg); |
87 | |
88 | uint get_size() const { return size; } |
89 | size_t get_max_in_memory_size() const { return max_in_memory_size; } |
90 | |
91 | friend int unique_write_to_file(uchar* key, element_count count, Unique *unique); |
92 | friend int unique_write_to_ptrs(uchar* key, element_count count, Unique *unique); |
93 | |
94 | friend int unique_write_to_file_with_count(uchar* key, element_count count, |
95 | Unique *unique); |
96 | friend int unique_intersect_write_to_ptrs(uchar* key, element_count count, |
97 | Unique *unique); |
98 | }; |
99 | |
100 | #endif /* UNIQUE_INCLUDED */ |
101 | |