| 1 | /* | 
|---|
| 2 | Portions Copyright (c) 2016-Present, Facebook, Inc. | 
|---|
| 3 | Portions Copyright (c) 2012, Monty Program Ab | 
|---|
| 4 |  | 
|---|
| 5 | This program is free software; you can redistribute it and/or modify | 
|---|
| 6 | it under the terms of the GNU General Public License as published by | 
|---|
| 7 | the Free Software Foundation; version 2 of the License. | 
|---|
| 8 |  | 
|---|
| 9 | This program is distributed in the hope that it will be useful, | 
|---|
| 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of | 
|---|
| 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
|---|
| 12 | GNU General Public License for more details. | 
|---|
| 13 |  | 
|---|
| 14 | You should have received a copy of the GNU General Public License | 
|---|
| 15 | along with this program; if not, write to the Free Software | 
|---|
| 16 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */ | 
|---|
| 17 | #pragma once | 
|---|
| 18 |  | 
|---|
| 19 | #ifdef USE_PRAGMA_IMPLEMENTATION | 
|---|
| 20 | #pragma implementation // gcc: Class implementation | 
|---|
| 21 | #endif | 
|---|
| 22 |  | 
|---|
| 23 | /* C++ system header files */ | 
|---|
| 24 | #include <string> | 
|---|
| 25 | #include <time.h> | 
|---|
| 26 | #include <ctime> | 
|---|
| 27 |  | 
|---|
| 28 | /* RocksDB includes */ | 
|---|
| 29 | #include "rocksdb/compaction_filter.h" | 
|---|
| 30 |  | 
|---|
| 31 | /* MyRocks includes */ | 
|---|
| 32 | #include "./ha_rocksdb_proto.h" | 
|---|
| 33 | #include "./rdb_datadic.h" | 
|---|
| 34 |  | 
|---|
| 35 | namespace myrocks { | 
|---|
| 36 |  | 
|---|
| 37 | class Rdb_compact_filter : public rocksdb::CompactionFilter { | 
|---|
| 38 | public: | 
|---|
| 39 | Rdb_compact_filter(const Rdb_compact_filter &) = delete; | 
|---|
| 40 | Rdb_compact_filter &operator=(const Rdb_compact_filter &) = delete; | 
|---|
| 41 |  | 
|---|
| 42 | explicit Rdb_compact_filter(uint32_t _cf_id) : m_cf_id(_cf_id) {} | 
|---|
| 43 | ~Rdb_compact_filter() { | 
|---|
| 44 | // Increment stats by num expired at the end of compaction | 
|---|
| 45 | rdb_update_global_stats(ROWS_EXPIRED, m_num_expired); | 
|---|
| 46 | } | 
|---|
| 47 |  | 
|---|
| 48 | // keys are passed in sorted order within the same sst. | 
|---|
| 49 | // V1 Filter is thread safe on our usage (creating from Factory). | 
|---|
| 50 | // Make sure to protect instance variables when switching to thread | 
|---|
| 51 | // unsafe in the future. | 
|---|
| 52 | virtual bool Filter(int level, const rocksdb::Slice &key, | 
|---|
| 53 | const rocksdb::Slice &existing_value, | 
|---|
| 54 | std::string *new_value, | 
|---|
| 55 | bool *value_changed) const override { | 
|---|
| 56 | DBUG_ASSERT(key.size() >= sizeof(uint32)); | 
|---|
| 57 |  | 
|---|
| 58 | GL_INDEX_ID gl_index_id; | 
|---|
| 59 | gl_index_id.cf_id = m_cf_id; | 
|---|
| 60 | gl_index_id.index_id = rdb_netbuf_to_uint32((const uchar *)key.data()); | 
|---|
| 61 | DBUG_ASSERT(gl_index_id.index_id >= 1); | 
|---|
| 62 |  | 
|---|
| 63 | if (gl_index_id != m_prev_index) { | 
|---|
| 64 | m_should_delete = | 
|---|
| 65 | rdb_get_dict_manager()->is_drop_index_ongoing(gl_index_id); | 
|---|
| 66 |  | 
|---|
| 67 | if (!m_should_delete) { | 
|---|
| 68 | get_ttl_duration_and_offset(gl_index_id, &m_ttl_duration, | 
|---|
| 69 | &m_ttl_offset); | 
|---|
| 70 |  | 
|---|
| 71 | if (m_ttl_duration != 0 && m_snapshot_timestamp == 0) { | 
|---|
| 72 | /* | 
|---|
| 73 | For efficiency reasons, we lazily call GetIntProperty to get the | 
|---|
| 74 | oldest snapshot time (occurs once per compaction). | 
|---|
| 75 | */ | 
|---|
| 76 | rocksdb::DB *const rdb = rdb_get_rocksdb_db(); | 
|---|
| 77 | if (!rdb->GetIntProperty(rocksdb::DB::Properties::kOldestSnapshotTime, | 
|---|
| 78 | &m_snapshot_timestamp) || | 
|---|
| 79 | m_snapshot_timestamp == 0) { | 
|---|
| 80 | m_snapshot_timestamp = static_cast<uint64_t>(std::time(nullptr)); | 
|---|
| 81 | } | 
|---|
| 82 |  | 
|---|
| 83 | #ifndef NDEBUG | 
|---|
| 84 | int snapshot_ts = rdb_dbug_set_ttl_snapshot_ts(); | 
|---|
| 85 | if (snapshot_ts) { | 
|---|
| 86 | m_snapshot_timestamp = | 
|---|
| 87 | static_cast<uint64_t>(std::time(nullptr)) + snapshot_ts; | 
|---|
| 88 | } | 
|---|
| 89 | #endif | 
|---|
| 90 | } | 
|---|
| 91 | } | 
|---|
| 92 |  | 
|---|
| 93 | m_prev_index = gl_index_id; | 
|---|
| 94 | } | 
|---|
| 95 |  | 
|---|
| 96 | if (m_should_delete) { | 
|---|
| 97 | m_num_deleted++; | 
|---|
| 98 | return true; | 
|---|
| 99 | } else if (m_ttl_duration > 0 && | 
|---|
| 100 | should_filter_ttl_rec(key, existing_value)) { | 
|---|
| 101 | m_num_expired++; | 
|---|
| 102 | return true; | 
|---|
| 103 | } | 
|---|
| 104 |  | 
|---|
| 105 | return false; | 
|---|
| 106 | } | 
|---|
| 107 |  | 
|---|
| 108 | virtual bool IgnoreSnapshots() const override { return true; } | 
|---|
| 109 |  | 
|---|
| 110 | virtual const char *Name() const override { return "Rdb_compact_filter"; } | 
|---|
| 111 |  | 
|---|
| 112 | void get_ttl_duration_and_offset(const GL_INDEX_ID &gl_index_id, | 
|---|
| 113 | uint64 *ttl_duration, | 
|---|
| 114 | uint32 *ttl_offset) const { | 
|---|
| 115 | DBUG_ASSERT(ttl_duration != nullptr); | 
|---|
| 116 | /* | 
|---|
| 117 | If TTL is disabled set ttl_duration to 0.  This prevents the compaction | 
|---|
| 118 | filter from dropping expired records. | 
|---|
| 119 | */ | 
|---|
| 120 | if (!rdb_is_ttl_enabled()) { | 
|---|
| 121 | *ttl_duration = 0; | 
|---|
| 122 | return; | 
|---|
| 123 | } | 
|---|
| 124 |  | 
|---|
| 125 | /* | 
|---|
| 126 | If key is part of system column family, it's definitely not a TTL key. | 
|---|
| 127 | */ | 
|---|
| 128 | rocksdb::ColumnFamilyHandle *s_cf = rdb_get_dict_manager()->get_system_cf(); | 
|---|
| 129 | if (s_cf == nullptr || gl_index_id.cf_id == s_cf->GetID()) { | 
|---|
| 130 | *ttl_duration = 0; | 
|---|
| 131 | return; | 
|---|
| 132 | } | 
|---|
| 133 |  | 
|---|
| 134 | struct Rdb_index_info index_info; | 
|---|
| 135 | if (!rdb_get_dict_manager()->get_index_info(gl_index_id, &index_info)) { | 
|---|
| 136 | // NO_LINT_DEBUG | 
|---|
| 137 | sql_print_error( "RocksDB: Could not get index information " | 
|---|
| 138 | "for Index Number (%u,%u)", | 
|---|
| 139 | gl_index_id.cf_id, gl_index_id.index_id); | 
|---|
| 140 | } | 
|---|
| 141 |  | 
|---|
| 142 | #ifndef NDEBUG | 
|---|
| 143 | if (rdb_dbug_set_ttl_ignore_pk() && | 
|---|
| 144 | index_info.m_index_type == Rdb_key_def::INDEX_TYPE_PRIMARY) { | 
|---|
| 145 | *ttl_duration = 0; | 
|---|
| 146 | return; | 
|---|
| 147 | } | 
|---|
| 148 | #endif | 
|---|
| 149 |  | 
|---|
| 150 | *ttl_duration = index_info.m_ttl_duration; | 
|---|
| 151 | if (Rdb_key_def::has_index_flag(index_info.m_index_flags, | 
|---|
| 152 | Rdb_key_def::TTL_FLAG)) { | 
|---|
| 153 | *ttl_offset = Rdb_key_def::calculate_index_flag_offset( | 
|---|
| 154 | index_info.m_index_flags, Rdb_key_def::TTL_FLAG); | 
|---|
| 155 | } | 
|---|
| 156 | } | 
|---|
| 157 |  | 
|---|
| 158 | bool should_filter_ttl_rec(const rocksdb::Slice &key, | 
|---|
| 159 | const rocksdb::Slice &existing_value) const { | 
|---|
| 160 | uint64 ttl_timestamp; | 
|---|
| 161 | Rdb_string_reader reader(&existing_value); | 
|---|
| 162 | if (!reader.read(m_ttl_offset) || reader.read_uint64(&ttl_timestamp)) { | 
|---|
| 163 | std::string buf; | 
|---|
| 164 | buf = rdb_hexdump(existing_value.data(), existing_value.size(), | 
|---|
| 165 | RDB_MAX_HEXDUMP_LEN); | 
|---|
| 166 | // NO_LINT_DEBUG | 
|---|
| 167 | sql_print_error( "Decoding ttl from PK value failed in compaction filter, " | 
|---|
| 168 | "for index (%u,%u), val: %s", | 
|---|
| 169 | m_prev_index.cf_id, m_prev_index.index_id, buf.c_str()); | 
|---|
| 170 | abort(); | 
|---|
| 171 | } | 
|---|
| 172 |  | 
|---|
| 173 | /* | 
|---|
| 174 | Filter out the record only if it is older than the oldest snapshot | 
|---|
| 175 | timestamp.  This prevents any rows from expiring in the middle of | 
|---|
| 176 | long-running transactions. | 
|---|
| 177 | */ | 
|---|
| 178 | return ttl_timestamp + m_ttl_duration <= m_snapshot_timestamp; | 
|---|
| 179 | } | 
|---|
| 180 |  | 
|---|
| 181 | private: | 
|---|
| 182 | // Column family for this compaction filter | 
|---|
| 183 | const uint32_t m_cf_id; | 
|---|
| 184 | // Index id of the previous record | 
|---|
| 185 | mutable GL_INDEX_ID m_prev_index = {0, 0}; | 
|---|
| 186 | // Number of rows deleted for the same index id | 
|---|
| 187 | mutable uint64 m_num_deleted = 0; | 
|---|
| 188 | // Number of rows expired for the TTL index | 
|---|
| 189 | mutable uint64 m_num_expired = 0; | 
|---|
| 190 | // Current index id should be deleted or not (should be deleted if true) | 
|---|
| 191 | mutable bool m_should_delete = false; | 
|---|
| 192 | // TTL duration for the current index if TTL is enabled | 
|---|
| 193 | mutable uint64 m_ttl_duration = 0; | 
|---|
| 194 | // TTL offset for all records in the current index | 
|---|
| 195 | mutable uint32 m_ttl_offset = 0; | 
|---|
| 196 | // Oldest snapshot timestamp at the time a TTL index is discovered | 
|---|
| 197 | mutable uint64_t m_snapshot_timestamp = 0; | 
|---|
| 198 | }; | 
|---|
| 199 |  | 
|---|
| 200 | class Rdb_compact_filter_factory : public rocksdb::CompactionFilterFactory { | 
|---|
| 201 | public: | 
|---|
| 202 | Rdb_compact_filter_factory(const Rdb_compact_filter_factory &) = delete; | 
|---|
| 203 | Rdb_compact_filter_factory & | 
|---|
| 204 | operator=(const Rdb_compact_filter_factory &) = delete; | 
|---|
| 205 | Rdb_compact_filter_factory() {} | 
|---|
| 206 |  | 
|---|
| 207 | ~Rdb_compact_filter_factory() {} | 
|---|
| 208 |  | 
|---|
| 209 | const char *Name() const override { return "Rdb_compact_filter_factory"; } | 
|---|
| 210 |  | 
|---|
| 211 | std::unique_ptr<rocksdb::CompactionFilter> CreateCompactionFilter( | 
|---|
| 212 | const rocksdb::CompactionFilter::Context &context) override { | 
|---|
| 213 | return std::unique_ptr<rocksdb::CompactionFilter>( | 
|---|
| 214 | new Rdb_compact_filter(context.column_family_id)); | 
|---|
| 215 | } | 
|---|
| 216 | }; | 
|---|
| 217 |  | 
|---|
| 218 | } // namespace myrocks | 
|---|
| 219 |  | 
|---|