| 1 | /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ |
| 2 | // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: |
| 3 | /*====== |
| 4 | This file is part of PerconaFT. |
| 5 | |
| 6 | |
| 7 | Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. |
| 8 | |
| 9 | PerconaFT is free software: you can redistribute it and/or modify |
| 10 | it under the terms of the GNU General Public License, version 2, |
| 11 | as published by the Free Software Foundation. |
| 12 | |
| 13 | PerconaFT is distributed in the hope that it will be useful, |
| 14 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 16 | GNU General Public License for more details. |
| 17 | |
| 18 | You should have received a copy of the GNU General Public License |
| 19 | along with PerconaFT. If not, see <http://www.gnu.org/licenses/>. |
| 20 | |
| 21 | ---------------------------------------- |
| 22 | |
| 23 | PerconaFT is free software: you can redistribute it and/or modify |
| 24 | it under the terms of the GNU Affero General Public License, version 3, |
| 25 | as published by the Free Software Foundation. |
| 26 | |
| 27 | PerconaFT is distributed in the hope that it will be useful, |
| 28 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 29 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 30 | GNU Affero General Public License for more details. |
| 31 | |
| 32 | You should have received a copy of the GNU Affero General Public License |
| 33 | along with PerconaFT. If not, see <http://www.gnu.org/licenses/>. |
| 34 | ======= */ |
| 35 | |
| 36 | #ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved." |
| 37 | |
| 38 | #pragma once |
| 39 | |
| 40 | #include <db.h> |
| 41 | |
| 42 | #include "ft/ft-internal.h" |
| 43 | |
| 44 | /* an ft cursor is represented as a kv pair in a tree */ |
| 45 | struct ft_cursor { |
| 46 | FT_HANDLE ft_handle; |
| 47 | DBT key, val; // The key-value pair that the cursor currently points to |
| 48 | DBT range_lock_left_key, range_lock_right_key; |
| 49 | bool prefetching; |
| 50 | bool left_is_neg_infty, right_is_pos_infty; |
| 51 | enum cursor_read_type read_type; // true if query is reading from a snapshot, false otherwise |
| 52 | bool is_leaf_mode; |
| 53 | bool disable_prefetching; |
| 54 | bool is_temporary; |
| 55 | int out_of_range_error; |
| 56 | int direction; |
| 57 | TOKUTXN ttxn; |
| 58 | FT_CHECK_INTERRUPT_CALLBACK interrupt_cb; |
| 59 | void *; |
| 60 | }; |
| 61 | typedef struct ft_cursor *FT_CURSOR; |
| 62 | |
| 63 | enum ft_search_direction_e { |
| 64 | FT_SEARCH_LEFT = 1, /* search left -> right, finds min xy as defined by the compare function */ |
| 65 | FT_SEARCH_RIGHT = 2, /* search right -> left, finds max xy as defined by the compare function */ |
| 66 | }; |
| 67 | |
| 68 | struct ft_search; |
| 69 | |
| 70 | /* the search compare function should return 0 for all xy < kv and 1 for all xy >= kv |
| 71 | the compare function should be a step function from 0 to 1 for a left to right search |
| 72 | and 1 to 0 for a right to left search */ |
| 73 | |
| 74 | typedef int (*ft_search_compare_func_t)(const struct ft_search &, const DBT *); |
| 75 | |
| 76 | /* the search object contains the compare function, search direction, and the kv pair that |
| 77 | is used in the compare function. the context is the user's private data */ |
| 78 | |
| 79 | struct ft_search { |
| 80 | ft_search_compare_func_t compare; |
| 81 | enum ft_search_direction_e direction; |
| 82 | const DBT *k; |
| 83 | void *context; |
| 84 | |
| 85 | // To fix #3522, we need to remember the pivots that we have searched unsuccessfully. |
| 86 | // For example, when searching right (left), we call search->compare() on the ith pivot key. If search->compare(0 returns |
| 87 | // nonzero, then we search the ith subtree. If that subsearch returns DB_NOTFOUND then maybe the key isn't present in the |
| 88 | // tree. But maybe we are doing a DB_NEXT (DB_PREV), and everything was deleted. So we remember the pivot, and later we |
| 89 | // will only search subtrees which contain keys that are bigger than (less than) the pivot. |
| 90 | // The code is a kludge (even before this fix), and interacts strangely with the TOKUDB_FOUND_BUT_REJECTED (which is there |
| 91 | // because a failed DB_GET we would keep searching the rest of the tree). We probably should write the various lookup |
| 92 | // codes (NEXT, PREV, CURRENT, etc) more directly, and we should probably use a binary search within a node to search the |
| 93 | // pivots so that we can support a larger fanout. |
| 94 | // These changes (3312+3522) also (probably) introduce an isolation error (#3529). |
| 95 | // We must make sure we lock the right range for proper isolation level. |
| 96 | // There's probably a bug in which the following could happen. |
| 97 | // Thread A: Searches through deleted keys A,B,D,E and finds nothing, so searches the next leaf, releasing the YDB lock. |
| 98 | // Thread B: Inserts key C, and acquires the write lock, then commits. |
| 99 | // Thread A: Resumes, searching F,G,H and return success. Thread A then read-locks the range A-H, and doesn't notice |
| 100 | // the value C inserted by thread B. Thus a failure of serialization. |
| 101 | // See #3529. |
| 102 | // There also remains a potential thrashing problem. When we get a TOKUDB_TRY_AGAIN, we unpin everything. There's |
| 103 | // no guarantee that we will get everything pinned again. We ought to keep nodes pinned when we retry, except that on the |
| 104 | // way out with a DB_NOTFOUND we ought to unpin those nodes. See #3528. |
| 105 | DBT pivot_bound; |
| 106 | const DBT *k_bound; |
| 107 | }; |
| 108 | |
| 109 | /* initialize the search compare object */ |
| 110 | static inline ft_search *ft_search_init(ft_search *search, ft_search_compare_func_t compare, |
| 111 | enum ft_search_direction_e direction, |
| 112 | const DBT *k, const DBT *k_bound, void *context) { |
| 113 | search->compare = compare; |
| 114 | search->direction = direction; |
| 115 | search->k = k; |
| 116 | search->context = context; |
| 117 | toku_init_dbt(&search->pivot_bound); |
| 118 | search->k_bound = k_bound; |
| 119 | return search; |
| 120 | } |
| 121 | |
| 122 | static inline void ft_search_finish(ft_search *search) { |
| 123 | toku_destroy_dbt(&search->pivot_bound); |
| 124 | } |
| 125 | |
| 126 | |
| 127 | int toku_ft_cursor_create(FT_HANDLE ft_handle, FT_CURSOR cursor, TOKUTXN txn, |
| 128 | enum cursor_read_type read_type, |
| 129 | bool disable_prefetching, |
| 130 | bool is_temporary); |
| 131 | |
| 132 | void toku_ft_cursor_destroy(FT_CURSOR cursor); |
| 133 | |
| 134 | int toku_ft_lookup(FT_HANDLE ft_h, DBT *k, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result)); |
| 135 | |
| 136 | void toku_ft_cursor_set_prefetching(FT_CURSOR cursor); |
| 137 | |
| 138 | bool toku_ft_cursor_prefetching(FT_CURSOR cursor); |
| 139 | |
| 140 | bool toku_ft_cursor_not_set(FT_CURSOR cursor); |
| 141 | |
| 142 | void toku_ft_cursor_set_leaf_mode(FT_CURSOR cursor); |
| 143 | |
| 144 | void toku_ft_cursor_remove_restriction(FT_CURSOR cursor); |
| 145 | |
| 146 | void toku_ft_cursor_set_check_interrupt_cb(FT_CURSOR cursor, FT_CHECK_INTERRUPT_CALLBACK cb, void *); |
| 147 | |
| 148 | int toku_ft_cursor_is_leaf_mode(FT_CURSOR cursor); |
| 149 | |
| 150 | void toku_ft_cursor_set_range_lock(FT_CURSOR, const DBT *, const DBT *, bool, bool, int); |
| 151 | |
| 152 | int toku_ft_cursor_first(FT_CURSOR cursor, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result)); |
| 153 | |
| 154 | int toku_ft_cursor_last(FT_CURSOR cursor, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result)); |
| 155 | |
| 156 | int toku_ft_cursor_next(FT_CURSOR cursor, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result)); |
| 157 | |
| 158 | int toku_ft_cursor_prev(FT_CURSOR cursor, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result)); |
| 159 | |
| 160 | int toku_ft_cursor_current(FT_CURSOR cursor, int op, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result)); |
| 161 | |
| 162 | int toku_ft_cursor_set(FT_CURSOR cursor, DBT *key, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result)); |
| 163 | |
| 164 | int toku_ft_cursor_set_range(FT_CURSOR cursor, DBT *key, DBT *key_bound, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result)); |
| 165 | |
| 166 | int toku_ft_cursor_set_range_reverse(FT_CURSOR cursor, DBT *key, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result)); |
| 167 | |
| 168 | bool toku_ft_cursor_uninitialized(FT_CURSOR cursor) __attribute__ ((warn_unused_result)); |
| 169 | |
| 170 | void toku_ft_cursor_peek(FT_CURSOR cursor, const DBT **pkey, const DBT **pval); |
| 171 | |
| 172 | int toku_ft_cursor_check_restricted_range(FT_CURSOR cursor, const void *key, uint32_t keylen); |
| 173 | |
| 174 | int toku_ft_cursor_shortcut(FT_CURSOR cursor, int direction, uint32_t index, bn_data *bd, |
| 175 | FT_GET_CALLBACK_FUNCTION getf, void *getf_v, |
| 176 | uint32_t *keylen, void **key, uint32_t *vallen, void **val); |
| 177 | |
| 178 | // used by get_key_after_bytes |
| 179 | int toku_ft_cursor_compare_one(const ft_search &search, const DBT *x); |
| 180 | int toku_ft_cursor_compare_set_range(const ft_search &search, const DBT *x); |
| 181 | |
| 182 | // deprecated, should only be used by tests, and eventually removed |
| 183 | int toku_ft_cursor(FT_HANDLE ft_handle, FT_CURSOR *ftcursor_p, TOKUTXN txn, bool, bool) __attribute__ ((warn_unused_result)); |
| 184 | void toku_ft_cursor_close(FT_CURSOR cursor); |
| 185 | int toku_ft_cursor_get(FT_CURSOR cursor, DBT *key, FT_GET_CALLBACK_FUNCTION getf, void *getf_v, int get_flags); |
| 186 | int toku_ft_cursor_delete(FT_CURSOR cursor, int flags, TOKUTXN txn); |
| 187 | |