1/*
2 * Copyright 2014-present Facebook, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16#pragma once
17
18#include <algorithm>
19#include <exception>
20#include <functional>
21
22#include <boost/intrusive/list.hpp>
23#include <boost/intrusive/unordered_set.hpp>
24#include <boost/iterator/iterator_adaptor.hpp>
25#include <boost/utility.hpp>
26
27#include <folly/lang/Exception.h>
28
29namespace folly {
30
31/**
32 * A general purpose LRU evicting cache. Designed to support constant time
33 * set/get operations. It maintains a doubly linked list of items that are
34 * threaded through an index (a hash map). The access ordered is maintained
35 * on the list by moving an element to the front of list on a get. New elements
36 * are added to the front of the list. The index size is set to half the
37 * capacity (setting capacity to 0 is a special case. see notes at the end of
38 * this section). So assuming uniform distribution of keys, set/get are both
39 * constant time operations.
40 *
41 * On reaching capacity limit, clearSize_ LRU items are evicted at a time. If
42 * a callback is specified with setPruneHook, it is invoked for each eviction.
43 *
44 * This is NOT a thread-safe implementation.
45 *
46 * Configurability: capacity of the cache, number of items to evict, eviction
47 * callback and the hasher to hash the keys can all be supplied by the caller.
48 *
49 * If at a given state, N1 - N6 are the nodes in MRU to LRU order and hashing
50 * to index keys as {(N1,N5)->H1, (N4,N5,N5)->H2, N3->Hi}, the datastructure
51 * layout is as below. N1 .. N6 is a list threaded through the hash.
52 * Assuming, each the number of nodes hashed to each index key is bounded, the
53 * following operations run in constant time.
54 * i) get computes the index key, walks the list of elements hashed to
55 * the key and moves it to the front of the list, if found.
56 * ii) set inserts a new node into the list and places the same node on to the
57 * list of elements hashing to the corresponding index key.
58 * ii) prune deletes nodes from the end of the list as well from the index.
59 *
60 * +----+ +----+ +----+
61 * | H1 | <-> | N1 | <-> | N5 |
62 * +----+ +----+ +----+
63 * ^ ^ ^
64 * | ___/ \
65 * | / \
66 * |_ /________ \___
67 * / | \
68 * / | \
69 * v v v
70 * +----+ +----+ +----+ +----+
71 * | H2 | <-> | N4 | <-> | N2 | <-> | N6 |
72 * +----+ +----+ +----+ +----+
73 * . ^ ^
74 * . | |
75 * . | |
76 * . | _____|
77 * . | /
78 * v v
79 * +----+ +----+
80 * | Hi | <-> | N3 |
81 * +----+ +----+
82 *
83 * N.B 1 : Changing the capacity with setMaxSize does not change the index size
84 * and it could end up in too many elements indexed to the same slot in index.
85 * The set/get performance will get worse in this case. So it is best to avoid
86 * resizing.
87 *
88 * N.B 2 : Setting capacity to 0, using setMaxSize or initialization, turns off
89 * evictions based on sizeof the cache making it an INFINITE size cache
90 * unless evictions of LRU items are triggered by calling prune() by clients
91 * (using their own eviction criteria).
92 */
93template <
94 class TKey,
95 class TValue,
96 class THash = std::hash<TKey>,
97 class TKeyEqual = std::equal_to<TKey>>
98class EvictingCacheMap {
99 private:
100 // typedefs for brevity
101 struct Node;
102 struct KeyHasher;
103 struct KeyValueEqual;
104 typedef boost::intrusive::link_mode<boost::intrusive::safe_link> link_mode;
105 typedef boost::intrusive::unordered_set<
106 Node,
107 boost::intrusive::hash<KeyHasher>,
108 boost::intrusive::equal<KeyValueEqual>>
109 NodeMap;
110 typedef boost::intrusive::list<Node> NodeList;
111 typedef std::pair<const TKey, TValue> TPair;
112
113 public:
114 typedef std::function<void(TKey, TValue&&)> PruneHookCall;
115
116 // iterator base : returns TPair on dereference
117 template <typename Value, typename TIterator>
118 class iterator_base : public boost::iterator_adaptor<
119 iterator_base<Value, TIterator>,
120 TIterator,
121 Value,
122 boost::bidirectional_traversal_tag> {
123 public:
124 iterator_base() {}
125 explicit iterator_base(TIterator it)
126 : iterator_base::iterator_adaptor_(it) {}
127 Value& dereference() const {
128 return this->base_reference()->pr;
129 }
130 };
131
132 // iterators
133 typedef iterator_base<TPair, typename NodeList::iterator> iterator;
134 typedef iterator_base<const TPair, typename NodeList::const_iterator>
135 const_iterator;
136 typedef iterator_base<TPair, typename NodeList::reverse_iterator>
137 reverse_iterator;
138 typedef iterator_base<const TPair, typename NodeList::const_reverse_iterator>
139 const_reverse_iterator;
140
141 // the default map typedefs
142 using key_type = TKey;
143 using mapped_type = TValue;
144 using hasher = THash;
145
146 /**
147 * Construct a EvictingCacheMap
148 * @param maxSize maximum size of the cache map. Once the map size exceeds
149 * maxSize, the map will begin to evict.
150 * @param clearSize the number of elements to clear at a time when the
151 * eviction size is reached.
152 */
153 explicit EvictingCacheMap(
154 std::size_t maxSize,
155 std::size_t clearSize = 1,
156 const THash& keyHash = THash(),
157 const TKeyEqual& keyEqual = TKeyEqual())
158 : nIndexBuckets_(std::max(maxSize / 2, std::size_t(kMinNumIndexBuckets))),
159 indexBuckets_(new typename NodeMap::bucket_type[nIndexBuckets_]),
160 indexTraits_(indexBuckets_.get(), nIndexBuckets_),
161 keyHash_(keyHash),
162 keyEqual_(keyEqual),
163 index_(indexTraits_, keyHash_, keyEqual_),
164 maxSize_(maxSize),
165 clearSize_(clearSize) {}
166
167 EvictingCacheMap(const EvictingCacheMap&) = delete;
168 EvictingCacheMap& operator=(const EvictingCacheMap&) = delete;
169 EvictingCacheMap(EvictingCacheMap&&) = default;
170 EvictingCacheMap& operator=(EvictingCacheMap&&) = default;
171
172 ~EvictingCacheMap() {
173 setPruneHook(nullptr);
174 // ignore any potential exceptions from pruneHook_
175 pruneWithFailSafeOption(size(), nullptr, true);
176 }
177
178 /**
179 * Adjust the max size of EvictingCacheMap. Note that this does not update
180 * nIndexBuckets_ accordingly. This API can cause performance to get very
181 * bad, e.g., the nIndexBuckets_ is still 100 after maxSize is updated to 1M.
182 *
183 * Calling this function with an arugment of 0 removes the limit on the cache
184 * size and elements are not evicted unless clients explictly call prune.
185 *
186 * If you intend to resize dynamically using this, then picking an index size
187 * that works well and initializing with corresponding maxSize is the only
188 * reasonable option.
189 *
190 * @param maxSize new maximum size of the cache map.
191 * @param pruneHook callback to use on eviction.
192 */
193 void setMaxSize(size_t maxSize, PruneHookCall pruneHook = nullptr) {
194 if (maxSize != 0 && maxSize < size()) {
195 // Prune the excess elements with our new constraints.
196 prune(std::max(size() - maxSize, clearSize_), pruneHook);
197 }
198 maxSize_ = maxSize;
199 }
200
201 size_t getMaxSize() const {
202 return maxSize_;
203 }
204
205 void setClearSize(size_t clearSize) {
206 clearSize_ = clearSize;
207 }
208
209 /**
210 * Check for existence of a specific key in the map. This operation has
211 * no effect on LRU order.
212 * @param key key to search for
213 * @return true if exists, false otherwise
214 */
215 bool exists(const TKey& key) const {
216 return findInIndex(key) != index_.end();
217 }
218
219 /**
220 * Get the value associated with a specific key. This function always
221 * promotes a found value to the head of the LRU.
222 * @param key key associated with the value
223 * @return the value if it exists
224 * @throw std::out_of_range exception of the key does not exist
225 */
226 TValue& get(const TKey& key) {
227 auto it = find(key);
228 if (it == end()) {
229 throw_exception<std::out_of_range>("Key does not exist");
230 }
231 return it->second;
232 }
233
234 /**
235 * Get the iterator associated with a specific key. This function always
236 * promotes a found value to the head of the LRU.
237 * @param key key to associate with value
238 * @return the iterator of the object (a std::pair of const TKey, TValue) or
239 * end() if it does not exist
240 */
241 iterator find(const TKey& key) {
242 auto it = findInIndex(key);
243 if (it == index_.end()) {
244 return end();
245 }
246 lru_.erase(lru_.iterator_to(*it));
247 lru_.push_front(*it);
248 return iterator(lru_.iterator_to(*it));
249 }
250
251 /**
252 * Get the value associated with a specific key. This function never
253 * promotes a found value to the head of the LRU.
254 * @param key key associated with the value
255 * @return the value if it exists
256 * @throw std::out_of_range exception of the key does not exist
257 */
258 const TValue& getWithoutPromotion(const TKey& key) const {
259 auto it = findWithoutPromotion(key);
260 if (it == end()) {
261 throw_exception<std::out_of_range>("Key does not exist");
262 }
263 return it->second;
264 }
265
266 TValue& getWithoutPromotion(const TKey& key) {
267 auto const& cThis = *this;
268 return const_cast<TValue&>(cThis.getWithoutPromotion(key));
269 }
270
271 /**
272 * Get the iterator associated with a specific key. This function never
273 * promotes a found value to the head of the LRU.
274 * @param key key to associate with value
275 * @return the iterator of the object (a std::pair of const TKey, TValue) or
276 * end() if it does not exist
277 */
278 const_iterator findWithoutPromotion(const TKey& key) const {
279 auto it = findInIndex(key);
280 return (it == index_.end()) ? end() : const_iterator(lru_.iterator_to(*it));
281 }
282
283 iterator findWithoutPromotion(const TKey& key) {
284 auto it = findInIndex(key);
285 return (it == index_.end()) ? end() : iterator(lru_.iterator_to(*it));
286 }
287
288 /**
289 * Erase the key-value pair associated with key if it exists.
290 * @param key key associated with the value
291 * @return true if the key existed and was erased, else false
292 */
293 bool erase(const TKey& key) {
294 auto it = findInIndex(key);
295 if (it == index_.end()) {
296 return false;
297 }
298 auto node = &(*it);
299 std::unique_ptr<Node> nptr(node);
300 lru_.erase(lru_.iterator_to(*node));
301 index_.erase(it);
302 return true;
303 }
304
305 /**
306 * Set a key-value pair in the dictionary
307 * @param key key to associate with value
308 * @param value value to associate with the key
309 * @param promote boolean flag indicating whether or not to move something
310 * to the front of an LRU. This only really matters if you're setting
311 * a value that already exists.
312 * @param pruneHook callback to use on eviction (if it occurs).
313 */
314 void set(
315 const TKey& key,
316 TValue value,
317 bool promote = true,
318 PruneHookCall pruneHook = nullptr) {
319 auto it = findInIndex(key);
320 if (it != index_.end()) {
321 it->pr.second = std::move(value);
322 if (promote) {
323 lru_.erase(lru_.iterator_to(*it));
324 lru_.push_front(*it);
325 }
326 } else {
327 auto node = new Node(key, std::move(value));
328 index_.insert(*node);
329 lru_.push_front(*node);
330
331 // no evictions if maxSize_ is 0 i.e. unlimited capacity
332 if (maxSize_ > 0 && size() > maxSize_) {
333 prune(clearSize_, pruneHook);
334 }
335 }
336 }
337
338 /**
339 * Get the number of elements in the dictionary
340 * @return the size of the dictionary
341 */
342 std::size_t size() const {
343 return index_.size();
344 }
345
346 /**
347 * Typical empty function
348 * @return true if empty, false otherwise
349 */
350 bool empty() const {
351 return index_.empty();
352 }
353
354 void clear(PruneHookCall pruneHook = nullptr) {
355 prune(size(), pruneHook);
356 }
357
358 /**
359 * Set the prune hook, which is the function invoked on the key and value
360 * on each eviction. Will throw If the pruneHook throws, unless the
361 * EvictingCacheMap object is being destroyed in which case it will
362 * be ignored.
363 * @param pruneHook new callback to use on eviction.
364 * @param promote boolean flag indicating whether or not to move something
365 * to the front of an LRU.
366 * @return the iterator of the object (a std::pair of const TKey, TValue) or
367 * end() if it does not exist
368 */
369 void setPruneHook(PruneHookCall pruneHook) {
370 pruneHook_ = pruneHook;
371 }
372
373 /**
374 * Prune the minimum of pruneSize and size() from the back of the LRU.
375 * Will throw if pruneHook throws.
376 * @param pruneSize minimum number of elements to prune
377 * @param pruneHook a custom pruneHook function
378 */
379 void prune(std::size_t pruneSize, PruneHookCall pruneHook = nullptr) {
380 // do not swallow exceptions for prunes not triggered from destructor
381 pruneWithFailSafeOption(pruneSize, pruneHook, false);
382 }
383
384 // Iterators and such
385 iterator begin() {
386 return iterator(lru_.begin());
387 }
388 iterator end() {
389 return iterator(lru_.end());
390 }
391 const_iterator begin() const {
392 return const_iterator(lru_.begin());
393 }
394 const_iterator end() const {
395 return const_iterator(lru_.end());
396 }
397
398 const_iterator cbegin() const {
399 return const_iterator(lru_.cbegin());
400 }
401 const_iterator cend() const {
402 return const_iterator(lru_.cend());
403 }
404
405 reverse_iterator rbegin() {
406 return reverse_iterator(lru_.rbegin());
407 }
408 reverse_iterator rend() {
409 return reverse_iterator(lru_.rend());
410 }
411
412 const_reverse_iterator rbegin() const {
413 return const_reverse_iterator(lru_.rbegin());
414 }
415 const_reverse_iterator rend() const {
416 return const_reverse_iterator(lru_.rend());
417 }
418
419 const_reverse_iterator crbegin() const {
420 return const_reverse_iterator(lru_.crbegin());
421 }
422 const_reverse_iterator crend() const {
423 return const_reverse_iterator(lru_.crend());
424 }
425
426 private:
427 struct Node : public boost::intrusive::unordered_set_base_hook<link_mode>,
428 public boost::intrusive::list_base_hook<link_mode> {
429 Node(const TKey& key, TValue&& value)
430 : pr(std::make_pair(key, std::move(value))) {}
431 TPair pr;
432 };
433
434 struct KeyHasher {
435 KeyHasher(const THash& keyHash) : hash(keyHash) {}
436 std::size_t operator()(const Node& node) const {
437 return hash(node.pr.first);
438 }
439 std::size_t operator()(const TKey& key) const {
440 return hash(key);
441 }
442 THash hash;
443 };
444
445 struct KeyValueEqual {
446 KeyValueEqual(const TKeyEqual& keyEqual) : equal(keyEqual) {}
447 bool operator()(const TKey& lhs, const Node& rhs) const {
448 return equal(lhs, rhs.pr.first);
449 }
450 bool operator()(const Node& lhs, const TKey& rhs) const {
451 return equal(lhs.pr.first, rhs);
452 }
453 bool operator()(const Node& lhs, const Node& rhs) const {
454 return equal(lhs.pr.first, rhs.pr.first);
455 }
456 TKeyEqual equal;
457 };
458
459 /**
460 * Get the iterator in in the index associated with a specific key. This is
461 * merely a search in the index and does not promote the object.
462 * @param key key to associate with value
463 * @return the NodeMap::iterator to the Node containing the object
464 * (a std::pair of const TKey, TValue) or index_.end() if it does not exist
465 */
466 typename NodeMap::iterator findInIndex(const TKey& key) {
467 return index_.find(key, KeyHasher(keyHash_), KeyValueEqual(keyEqual_));
468 }
469
470 typename NodeMap::const_iterator findInIndex(const TKey& key) const {
471 return index_.find(key, KeyHasher(keyHash_), KeyValueEqual(keyEqual_));
472 }
473
474 /**
475 * Prune the minimum of pruneSize and size() from the back of the LRU.
476 * @param pruneSize minimum number of elements to prune
477 * @param pruneHook a custom pruneHook function
478 * @param failSafe true if exceptions are to ignored, false by default
479 */
480 void pruneWithFailSafeOption(
481 std::size_t pruneSize,
482 PruneHookCall pruneHook,
483 bool failSafe) {
484 auto& ph = (nullptr == pruneHook) ? pruneHook_ : pruneHook;
485
486 for (std::size_t i = 0; i < pruneSize && !lru_.empty(); i++) {
487 auto* node = &(*lru_.rbegin());
488 std::unique_ptr<Node> nptr(node);
489
490 lru_.erase(lru_.iterator_to(*node));
491 index_.erase(index_.iterator_to(*node));
492 if (ph) {
493 try {
494 ph(node->pr.first, std::move(node->pr.second));
495 } catch (...) {
496 if (!failSafe) {
497 throw;
498 }
499 }
500 }
501 }
502 }
503
504 static const std::size_t kMinNumIndexBuckets = 100;
505 PruneHookCall pruneHook_;
506 std::size_t nIndexBuckets_;
507 std::unique_ptr<typename NodeMap::bucket_type[]> indexBuckets_;
508 typename NodeMap::bucket_traits indexTraits_;
509 THash keyHash_;
510 TKeyEqual keyEqual_;
511 NodeMap index_;
512 NodeList lru_;
513 std::size_t maxSize_;
514 std::size_t clearSize_;
515};
516
517} // namespace folly
518