1 | /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ |
2 | // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: |
3 | /*====== |
4 | This file is part of PerconaFT. |
5 | |
6 | |
7 | Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. |
8 | |
9 | PerconaFT is free software: you can redistribute it and/or modify |
10 | it under the terms of the GNU General Public License, version 2, |
11 | as published by the Free Software Foundation. |
12 | |
13 | PerconaFT is distributed in the hope that it will be useful, |
14 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
16 | GNU General Public License for more details. |
17 | |
18 | You should have received a copy of the GNU General Public License |
19 | along with PerconaFT. If not, see <http://www.gnu.org/licenses/>. |
20 | |
21 | ---------------------------------------- |
22 | |
23 | PerconaFT is free software: you can redistribute it and/or modify |
24 | it under the terms of the GNU Affero General Public License, version 3, |
25 | as published by the Free Software Foundation. |
26 | |
27 | PerconaFT is distributed in the hope that it will be useful, |
28 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
29 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
30 | GNU Affero General Public License for more details. |
31 | |
32 | You should have received a copy of the GNU Affero General Public License |
33 | along with PerconaFT. If not, see <http://www.gnu.org/licenses/>. |
34 | ======= */ |
35 | |
36 | #ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved." |
37 | |
38 | #pragma once |
39 | |
40 | #include <db.h> |
41 | |
42 | #include "ft/ft-internal.h" |
43 | |
44 | /* an ft cursor is represented as a kv pair in a tree */ |
45 | struct ft_cursor { |
46 | FT_HANDLE ft_handle; |
47 | DBT key, val; // The key-value pair that the cursor currently points to |
48 | DBT range_lock_left_key, range_lock_right_key; |
49 | bool prefetching; |
50 | bool left_is_neg_infty, right_is_pos_infty; |
51 | enum cursor_read_type read_type; // true if query is reading from a snapshot, false otherwise |
52 | bool is_leaf_mode; |
53 | bool disable_prefetching; |
54 | bool is_temporary; |
55 | int out_of_range_error; |
56 | int direction; |
57 | TOKUTXN ttxn; |
58 | FT_CHECK_INTERRUPT_CALLBACK interrupt_cb; |
59 | void *; |
60 | }; |
61 | typedef struct ft_cursor *FT_CURSOR; |
62 | |
63 | enum ft_search_direction_e { |
64 | FT_SEARCH_LEFT = 1, /* search left -> right, finds min xy as defined by the compare function */ |
65 | FT_SEARCH_RIGHT = 2, /* search right -> left, finds max xy as defined by the compare function */ |
66 | }; |
67 | |
68 | struct ft_search; |
69 | |
70 | /* the search compare function should return 0 for all xy < kv and 1 for all xy >= kv |
71 | the compare function should be a step function from 0 to 1 for a left to right search |
72 | and 1 to 0 for a right to left search */ |
73 | |
74 | typedef int (*ft_search_compare_func_t)(const struct ft_search &, const DBT *); |
75 | |
76 | /* the search object contains the compare function, search direction, and the kv pair that |
77 | is used in the compare function. the context is the user's private data */ |
78 | |
79 | struct ft_search { |
80 | ft_search_compare_func_t compare; |
81 | enum ft_search_direction_e direction; |
82 | const DBT *k; |
83 | void *context; |
84 | |
85 | // To fix #3522, we need to remember the pivots that we have searched unsuccessfully. |
86 | // For example, when searching right (left), we call search->compare() on the ith pivot key. If search->compare(0 returns |
87 | // nonzero, then we search the ith subtree. If that subsearch returns DB_NOTFOUND then maybe the key isn't present in the |
88 | // tree. But maybe we are doing a DB_NEXT (DB_PREV), and everything was deleted. So we remember the pivot, and later we |
89 | // will only search subtrees which contain keys that are bigger than (less than) the pivot. |
90 | // The code is a kludge (even before this fix), and interacts strangely with the TOKUDB_FOUND_BUT_REJECTED (which is there |
91 | // because a failed DB_GET we would keep searching the rest of the tree). We probably should write the various lookup |
92 | // codes (NEXT, PREV, CURRENT, etc) more directly, and we should probably use a binary search within a node to search the |
93 | // pivots so that we can support a larger fanout. |
94 | // These changes (3312+3522) also (probably) introduce an isolation error (#3529). |
95 | // We must make sure we lock the right range for proper isolation level. |
96 | // There's probably a bug in which the following could happen. |
97 | // Thread A: Searches through deleted keys A,B,D,E and finds nothing, so searches the next leaf, releasing the YDB lock. |
98 | // Thread B: Inserts key C, and acquires the write lock, then commits. |
99 | // Thread A: Resumes, searching F,G,H and return success. Thread A then read-locks the range A-H, and doesn't notice |
100 | // the value C inserted by thread B. Thus a failure of serialization. |
101 | // See #3529. |
102 | // There also remains a potential thrashing problem. When we get a TOKUDB_TRY_AGAIN, we unpin everything. There's |
103 | // no guarantee that we will get everything pinned again. We ought to keep nodes pinned when we retry, except that on the |
104 | // way out with a DB_NOTFOUND we ought to unpin those nodes. See #3528. |
105 | DBT pivot_bound; |
106 | const DBT *k_bound; |
107 | }; |
108 | |
109 | /* initialize the search compare object */ |
110 | static inline ft_search *ft_search_init(ft_search *search, ft_search_compare_func_t compare, |
111 | enum ft_search_direction_e direction, |
112 | const DBT *k, const DBT *k_bound, void *context) { |
113 | search->compare = compare; |
114 | search->direction = direction; |
115 | search->k = k; |
116 | search->context = context; |
117 | toku_init_dbt(&search->pivot_bound); |
118 | search->k_bound = k_bound; |
119 | return search; |
120 | } |
121 | |
122 | static inline void ft_search_finish(ft_search *search) { |
123 | toku_destroy_dbt(&search->pivot_bound); |
124 | } |
125 | |
126 | |
127 | int toku_ft_cursor_create(FT_HANDLE ft_handle, FT_CURSOR cursor, TOKUTXN txn, |
128 | enum cursor_read_type read_type, |
129 | bool disable_prefetching, |
130 | bool is_temporary); |
131 | |
132 | void toku_ft_cursor_destroy(FT_CURSOR cursor); |
133 | |
134 | int toku_ft_lookup(FT_HANDLE ft_h, DBT *k, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result)); |
135 | |
136 | void toku_ft_cursor_set_prefetching(FT_CURSOR cursor); |
137 | |
138 | bool toku_ft_cursor_prefetching(FT_CURSOR cursor); |
139 | |
140 | bool toku_ft_cursor_not_set(FT_CURSOR cursor); |
141 | |
142 | void toku_ft_cursor_set_leaf_mode(FT_CURSOR cursor); |
143 | |
144 | void toku_ft_cursor_remove_restriction(FT_CURSOR cursor); |
145 | |
146 | void toku_ft_cursor_set_check_interrupt_cb(FT_CURSOR cursor, FT_CHECK_INTERRUPT_CALLBACK cb, void *); |
147 | |
148 | int toku_ft_cursor_is_leaf_mode(FT_CURSOR cursor); |
149 | |
150 | void toku_ft_cursor_set_range_lock(FT_CURSOR, const DBT *, const DBT *, bool, bool, int); |
151 | |
152 | int toku_ft_cursor_first(FT_CURSOR cursor, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result)); |
153 | |
154 | int toku_ft_cursor_last(FT_CURSOR cursor, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result)); |
155 | |
156 | int toku_ft_cursor_next(FT_CURSOR cursor, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result)); |
157 | |
158 | int toku_ft_cursor_prev(FT_CURSOR cursor, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result)); |
159 | |
160 | int toku_ft_cursor_current(FT_CURSOR cursor, int op, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result)); |
161 | |
162 | int toku_ft_cursor_set(FT_CURSOR cursor, DBT *key, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result)); |
163 | |
164 | int toku_ft_cursor_set_range(FT_CURSOR cursor, DBT *key, DBT *key_bound, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result)); |
165 | |
166 | int toku_ft_cursor_set_range_reverse(FT_CURSOR cursor, DBT *key, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result)); |
167 | |
168 | bool toku_ft_cursor_uninitialized(FT_CURSOR cursor) __attribute__ ((warn_unused_result)); |
169 | |
170 | void toku_ft_cursor_peek(FT_CURSOR cursor, const DBT **pkey, const DBT **pval); |
171 | |
172 | int toku_ft_cursor_check_restricted_range(FT_CURSOR cursor, const void *key, uint32_t keylen); |
173 | |
174 | int toku_ft_cursor_shortcut(FT_CURSOR cursor, int direction, uint32_t index, bn_data *bd, |
175 | FT_GET_CALLBACK_FUNCTION getf, void *getf_v, |
176 | uint32_t *keylen, void **key, uint32_t *vallen, void **val); |
177 | |
178 | // used by get_key_after_bytes |
179 | int toku_ft_cursor_compare_one(const ft_search &search, const DBT *x); |
180 | int toku_ft_cursor_compare_set_range(const ft_search &search, const DBT *x); |
181 | |
182 | // deprecated, should only be used by tests, and eventually removed |
183 | int toku_ft_cursor(FT_HANDLE ft_handle, FT_CURSOR *ftcursor_p, TOKUTXN txn, bool, bool) __attribute__ ((warn_unused_result)); |
184 | void toku_ft_cursor_close(FT_CURSOR cursor); |
185 | int toku_ft_cursor_get(FT_CURSOR cursor, DBT *key, FT_GET_CALLBACK_FUNCTION getf, void *getf_v, int get_flags); |
186 | int toku_ft_cursor_delete(FT_CURSOR cursor, int flags, TOKUTXN txn); |
187 | |