1 | /* |
2 | * Copyright 2014-present Facebook, Inc. |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | */ |
16 | #pragma once |
17 | |
18 | #include <algorithm> |
19 | #include <exception> |
20 | #include <functional> |
21 | |
22 | #include <boost/intrusive/list.hpp> |
23 | #include <boost/intrusive/unordered_set.hpp> |
24 | #include <boost/iterator/iterator_adaptor.hpp> |
25 | #include <boost/utility.hpp> |
26 | |
27 | #include <folly/lang/Exception.h> |
28 | |
29 | namespace folly { |
30 | |
31 | /** |
32 | * A general purpose LRU evicting cache. Designed to support constant time |
33 | * set/get operations. It maintains a doubly linked list of items that are |
34 | * threaded through an index (a hash map). The access ordered is maintained |
35 | * on the list by moving an element to the front of list on a get. New elements |
36 | * are added to the front of the list. The index size is set to half the |
37 | * capacity (setting capacity to 0 is a special case. see notes at the end of |
38 | * this section). So assuming uniform distribution of keys, set/get are both |
39 | * constant time operations. |
40 | * |
41 | * On reaching capacity limit, clearSize_ LRU items are evicted at a time. If |
42 | * a callback is specified with setPruneHook, it is invoked for each eviction. |
43 | * |
44 | * This is NOT a thread-safe implementation. |
45 | * |
46 | * Configurability: capacity of the cache, number of items to evict, eviction |
47 | * callback and the hasher to hash the keys can all be supplied by the caller. |
48 | * |
49 | * If at a given state, N1 - N6 are the nodes in MRU to LRU order and hashing |
50 | * to index keys as {(N1,N5)->H1, (N4,N5,N5)->H2, N3->Hi}, the datastructure |
51 | * layout is as below. N1 .. N6 is a list threaded through the hash. |
52 | * Assuming, each the number of nodes hashed to each index key is bounded, the |
53 | * following operations run in constant time. |
54 | * i) get computes the index key, walks the list of elements hashed to |
55 | * the key and moves it to the front of the list, if found. |
56 | * ii) set inserts a new node into the list and places the same node on to the |
57 | * list of elements hashing to the corresponding index key. |
58 | * ii) prune deletes nodes from the end of the list as well from the index. |
59 | * |
60 | * +----+ +----+ +----+ |
61 | * | H1 | <-> | N1 | <-> | N5 | |
62 | * +----+ +----+ +----+ |
63 | * ^ ^ ^ |
64 | * | ___/ \ |
65 | * | / \ |
66 | * |_ /________ \___ |
67 | * / | \ |
68 | * / | \ |
69 | * v v v |
70 | * +----+ +----+ +----+ +----+ |
71 | * | H2 | <-> | N4 | <-> | N2 | <-> | N6 | |
72 | * +----+ +----+ +----+ +----+ |
73 | * . ^ ^ |
74 | * . | | |
75 | * . | | |
76 | * . | _____| |
77 | * . | / |
78 | * v v |
79 | * +----+ +----+ |
80 | * | Hi | <-> | N3 | |
81 | * +----+ +----+ |
82 | * |
83 | * N.B 1 : Changing the capacity with setMaxSize does not change the index size |
84 | * and it could end up in too many elements indexed to the same slot in index. |
85 | * The set/get performance will get worse in this case. So it is best to avoid |
86 | * resizing. |
87 | * |
88 | * N.B 2 : Setting capacity to 0, using setMaxSize or initialization, turns off |
89 | * evictions based on sizeof the cache making it an INFINITE size cache |
90 | * unless evictions of LRU items are triggered by calling prune() by clients |
91 | * (using their own eviction criteria). |
92 | */ |
93 | template < |
94 | class TKey, |
95 | class TValue, |
96 | class THash = std::hash<TKey>, |
97 | class TKeyEqual = std::equal_to<TKey>> |
98 | class EvictingCacheMap { |
99 | private: |
100 | // typedefs for brevity |
101 | struct Node; |
102 | struct KeyHasher; |
103 | struct KeyValueEqual; |
104 | typedef boost::intrusive::link_mode<boost::intrusive::safe_link> link_mode; |
105 | typedef boost::intrusive::unordered_set< |
106 | Node, |
107 | boost::intrusive::hash<KeyHasher>, |
108 | boost::intrusive::equal<KeyValueEqual>> |
109 | NodeMap; |
110 | typedef boost::intrusive::list<Node> NodeList; |
111 | typedef std::pair<const TKey, TValue> TPair; |
112 | |
113 | public: |
114 | typedef std::function<void(TKey, TValue&&)> PruneHookCall; |
115 | |
116 | // iterator base : returns TPair on dereference |
117 | template <typename Value, typename TIterator> |
118 | class iterator_base : public boost::iterator_adaptor< |
119 | iterator_base<Value, TIterator>, |
120 | TIterator, |
121 | Value, |
122 | boost::bidirectional_traversal_tag> { |
123 | public: |
124 | iterator_base() {} |
125 | explicit iterator_base(TIterator it) |
126 | : iterator_base::iterator_adaptor_(it) {} |
127 | Value& dereference() const { |
128 | return this->base_reference()->pr; |
129 | } |
130 | }; |
131 | |
132 | // iterators |
133 | typedef iterator_base<TPair, typename NodeList::iterator> iterator; |
134 | typedef iterator_base<const TPair, typename NodeList::const_iterator> |
135 | const_iterator; |
136 | typedef iterator_base<TPair, typename NodeList::reverse_iterator> |
137 | reverse_iterator; |
138 | typedef iterator_base<const TPair, typename NodeList::const_reverse_iterator> |
139 | const_reverse_iterator; |
140 | |
141 | // the default map typedefs |
142 | using key_type = TKey; |
143 | using mapped_type = TValue; |
144 | using hasher = THash; |
145 | |
146 | /** |
147 | * Construct a EvictingCacheMap |
148 | * @param maxSize maximum size of the cache map. Once the map size exceeds |
149 | * maxSize, the map will begin to evict. |
150 | * @param clearSize the number of elements to clear at a time when the |
151 | * eviction size is reached. |
152 | */ |
153 | explicit EvictingCacheMap( |
154 | std::size_t maxSize, |
155 | std::size_t clearSize = 1, |
156 | const THash& keyHash = THash(), |
157 | const TKeyEqual& keyEqual = TKeyEqual()) |
158 | : nIndexBuckets_(std::max(maxSize / 2, std::size_t(kMinNumIndexBuckets))), |
159 | indexBuckets_(new typename NodeMap::bucket_type[nIndexBuckets_]), |
160 | indexTraits_(indexBuckets_.get(), nIndexBuckets_), |
161 | keyHash_(keyHash), |
162 | keyEqual_(keyEqual), |
163 | index_(indexTraits_, keyHash_, keyEqual_), |
164 | maxSize_(maxSize), |
165 | clearSize_(clearSize) {} |
166 | |
167 | EvictingCacheMap(const EvictingCacheMap&) = delete; |
168 | EvictingCacheMap& operator=(const EvictingCacheMap&) = delete; |
169 | EvictingCacheMap(EvictingCacheMap&&) = default; |
170 | EvictingCacheMap& operator=(EvictingCacheMap&&) = default; |
171 | |
172 | ~EvictingCacheMap() { |
173 | setPruneHook(nullptr); |
174 | // ignore any potential exceptions from pruneHook_ |
175 | pruneWithFailSafeOption(size(), nullptr, true); |
176 | } |
177 | |
178 | /** |
179 | * Adjust the max size of EvictingCacheMap. Note that this does not update |
180 | * nIndexBuckets_ accordingly. This API can cause performance to get very |
181 | * bad, e.g., the nIndexBuckets_ is still 100 after maxSize is updated to 1M. |
182 | * |
183 | * Calling this function with an arugment of 0 removes the limit on the cache |
184 | * size and elements are not evicted unless clients explictly call prune. |
185 | * |
186 | * If you intend to resize dynamically using this, then picking an index size |
187 | * that works well and initializing with corresponding maxSize is the only |
188 | * reasonable option. |
189 | * |
190 | * @param maxSize new maximum size of the cache map. |
191 | * @param pruneHook callback to use on eviction. |
192 | */ |
193 | void setMaxSize(size_t maxSize, PruneHookCall pruneHook = nullptr) { |
194 | if (maxSize != 0 && maxSize < size()) { |
195 | // Prune the excess elements with our new constraints. |
196 | prune(std::max(size() - maxSize, clearSize_), pruneHook); |
197 | } |
198 | maxSize_ = maxSize; |
199 | } |
200 | |
201 | size_t getMaxSize() const { |
202 | return maxSize_; |
203 | } |
204 | |
205 | void setClearSize(size_t clearSize) { |
206 | clearSize_ = clearSize; |
207 | } |
208 | |
209 | /** |
210 | * Check for existence of a specific key in the map. This operation has |
211 | * no effect on LRU order. |
212 | * @param key key to search for |
213 | * @return true if exists, false otherwise |
214 | */ |
215 | bool exists(const TKey& key) const { |
216 | return findInIndex(key) != index_.end(); |
217 | } |
218 | |
219 | /** |
220 | * Get the value associated with a specific key. This function always |
221 | * promotes a found value to the head of the LRU. |
222 | * @param key key associated with the value |
223 | * @return the value if it exists |
224 | * @throw std::out_of_range exception of the key does not exist |
225 | */ |
226 | TValue& get(const TKey& key) { |
227 | auto it = find(key); |
228 | if (it == end()) { |
229 | throw_exception<std::out_of_range>("Key does not exist" ); |
230 | } |
231 | return it->second; |
232 | } |
233 | |
234 | /** |
235 | * Get the iterator associated with a specific key. This function always |
236 | * promotes a found value to the head of the LRU. |
237 | * @param key key to associate with value |
238 | * @return the iterator of the object (a std::pair of const TKey, TValue) or |
239 | * end() if it does not exist |
240 | */ |
241 | iterator find(const TKey& key) { |
242 | auto it = findInIndex(key); |
243 | if (it == index_.end()) { |
244 | return end(); |
245 | } |
246 | lru_.erase(lru_.iterator_to(*it)); |
247 | lru_.push_front(*it); |
248 | return iterator(lru_.iterator_to(*it)); |
249 | } |
250 | |
251 | /** |
252 | * Get the value associated with a specific key. This function never |
253 | * promotes a found value to the head of the LRU. |
254 | * @param key key associated with the value |
255 | * @return the value if it exists |
256 | * @throw std::out_of_range exception of the key does not exist |
257 | */ |
258 | const TValue& getWithoutPromotion(const TKey& key) const { |
259 | auto it = findWithoutPromotion(key); |
260 | if (it == end()) { |
261 | throw_exception<std::out_of_range>("Key does not exist" ); |
262 | } |
263 | return it->second; |
264 | } |
265 | |
266 | TValue& getWithoutPromotion(const TKey& key) { |
267 | auto const& cThis = *this; |
268 | return const_cast<TValue&>(cThis.getWithoutPromotion(key)); |
269 | } |
270 | |
271 | /** |
272 | * Get the iterator associated with a specific key. This function never |
273 | * promotes a found value to the head of the LRU. |
274 | * @param key key to associate with value |
275 | * @return the iterator of the object (a std::pair of const TKey, TValue) or |
276 | * end() if it does not exist |
277 | */ |
278 | const_iterator findWithoutPromotion(const TKey& key) const { |
279 | auto it = findInIndex(key); |
280 | return (it == index_.end()) ? end() : const_iterator(lru_.iterator_to(*it)); |
281 | } |
282 | |
283 | iterator findWithoutPromotion(const TKey& key) { |
284 | auto it = findInIndex(key); |
285 | return (it == index_.end()) ? end() : iterator(lru_.iterator_to(*it)); |
286 | } |
287 | |
288 | /** |
289 | * Erase the key-value pair associated with key if it exists. |
290 | * @param key key associated with the value |
291 | * @return true if the key existed and was erased, else false |
292 | */ |
293 | bool erase(const TKey& key) { |
294 | auto it = findInIndex(key); |
295 | if (it == index_.end()) { |
296 | return false; |
297 | } |
298 | auto node = &(*it); |
299 | std::unique_ptr<Node> nptr(node); |
300 | lru_.erase(lru_.iterator_to(*node)); |
301 | index_.erase(it); |
302 | return true; |
303 | } |
304 | |
305 | /** |
306 | * Set a key-value pair in the dictionary |
307 | * @param key key to associate with value |
308 | * @param value value to associate with the key |
309 | * @param promote boolean flag indicating whether or not to move something |
310 | * to the front of an LRU. This only really matters if you're setting |
311 | * a value that already exists. |
312 | * @param pruneHook callback to use on eviction (if it occurs). |
313 | */ |
314 | void set( |
315 | const TKey& key, |
316 | TValue value, |
317 | bool promote = true, |
318 | PruneHookCall pruneHook = nullptr) { |
319 | auto it = findInIndex(key); |
320 | if (it != index_.end()) { |
321 | it->pr.second = std::move(value); |
322 | if (promote) { |
323 | lru_.erase(lru_.iterator_to(*it)); |
324 | lru_.push_front(*it); |
325 | } |
326 | } else { |
327 | auto node = new Node(key, std::move(value)); |
328 | index_.insert(*node); |
329 | lru_.push_front(*node); |
330 | |
331 | // no evictions if maxSize_ is 0 i.e. unlimited capacity |
332 | if (maxSize_ > 0 && size() > maxSize_) { |
333 | prune(clearSize_, pruneHook); |
334 | } |
335 | } |
336 | } |
337 | |
338 | /** |
339 | * Get the number of elements in the dictionary |
340 | * @return the size of the dictionary |
341 | */ |
342 | std::size_t size() const { |
343 | return index_.size(); |
344 | } |
345 | |
346 | /** |
347 | * Typical empty function |
348 | * @return true if empty, false otherwise |
349 | */ |
350 | bool empty() const { |
351 | return index_.empty(); |
352 | } |
353 | |
354 | void clear(PruneHookCall pruneHook = nullptr) { |
355 | prune(size(), pruneHook); |
356 | } |
357 | |
358 | /** |
359 | * Set the prune hook, which is the function invoked on the key and value |
360 | * on each eviction. Will throw If the pruneHook throws, unless the |
361 | * EvictingCacheMap object is being destroyed in which case it will |
362 | * be ignored. |
363 | * @param pruneHook new callback to use on eviction. |
364 | * @param promote boolean flag indicating whether or not to move something |
365 | * to the front of an LRU. |
366 | * @return the iterator of the object (a std::pair of const TKey, TValue) or |
367 | * end() if it does not exist |
368 | */ |
369 | void setPruneHook(PruneHookCall pruneHook) { |
370 | pruneHook_ = pruneHook; |
371 | } |
372 | |
373 | /** |
374 | * Prune the minimum of pruneSize and size() from the back of the LRU. |
375 | * Will throw if pruneHook throws. |
376 | * @param pruneSize minimum number of elements to prune |
377 | * @param pruneHook a custom pruneHook function |
378 | */ |
379 | void prune(std::size_t pruneSize, PruneHookCall pruneHook = nullptr) { |
380 | // do not swallow exceptions for prunes not triggered from destructor |
381 | pruneWithFailSafeOption(pruneSize, pruneHook, false); |
382 | } |
383 | |
384 | // Iterators and such |
385 | iterator begin() { |
386 | return iterator(lru_.begin()); |
387 | } |
388 | iterator end() { |
389 | return iterator(lru_.end()); |
390 | } |
391 | const_iterator begin() const { |
392 | return const_iterator(lru_.begin()); |
393 | } |
394 | const_iterator end() const { |
395 | return const_iterator(lru_.end()); |
396 | } |
397 | |
398 | const_iterator cbegin() const { |
399 | return const_iterator(lru_.cbegin()); |
400 | } |
401 | const_iterator cend() const { |
402 | return const_iterator(lru_.cend()); |
403 | } |
404 | |
405 | reverse_iterator rbegin() { |
406 | return reverse_iterator(lru_.rbegin()); |
407 | } |
408 | reverse_iterator rend() { |
409 | return reverse_iterator(lru_.rend()); |
410 | } |
411 | |
412 | const_reverse_iterator rbegin() const { |
413 | return const_reverse_iterator(lru_.rbegin()); |
414 | } |
415 | const_reverse_iterator rend() const { |
416 | return const_reverse_iterator(lru_.rend()); |
417 | } |
418 | |
419 | const_reverse_iterator crbegin() const { |
420 | return const_reverse_iterator(lru_.crbegin()); |
421 | } |
422 | const_reverse_iterator crend() const { |
423 | return const_reverse_iterator(lru_.crend()); |
424 | } |
425 | |
426 | private: |
427 | struct Node : public boost::intrusive::unordered_set_base_hook<link_mode>, |
428 | public boost::intrusive::list_base_hook<link_mode> { |
429 | Node(const TKey& key, TValue&& value) |
430 | : pr(std::make_pair(key, std::move(value))) {} |
431 | TPair pr; |
432 | }; |
433 | |
434 | struct KeyHasher { |
435 | KeyHasher(const THash& keyHash) : hash(keyHash) {} |
436 | std::size_t operator()(const Node& node) const { |
437 | return hash(node.pr.first); |
438 | } |
439 | std::size_t operator()(const TKey& key) const { |
440 | return hash(key); |
441 | } |
442 | THash hash; |
443 | }; |
444 | |
445 | struct KeyValueEqual { |
446 | KeyValueEqual(const TKeyEqual& keyEqual) : equal(keyEqual) {} |
447 | bool operator()(const TKey& lhs, const Node& rhs) const { |
448 | return equal(lhs, rhs.pr.first); |
449 | } |
450 | bool operator()(const Node& lhs, const TKey& rhs) const { |
451 | return equal(lhs.pr.first, rhs); |
452 | } |
453 | bool operator()(const Node& lhs, const Node& rhs) const { |
454 | return equal(lhs.pr.first, rhs.pr.first); |
455 | } |
456 | TKeyEqual equal; |
457 | }; |
458 | |
459 | /** |
460 | * Get the iterator in in the index associated with a specific key. This is |
461 | * merely a search in the index and does not promote the object. |
462 | * @param key key to associate with value |
463 | * @return the NodeMap::iterator to the Node containing the object |
464 | * (a std::pair of const TKey, TValue) or index_.end() if it does not exist |
465 | */ |
466 | typename NodeMap::iterator findInIndex(const TKey& key) { |
467 | return index_.find(key, KeyHasher(keyHash_), KeyValueEqual(keyEqual_)); |
468 | } |
469 | |
470 | typename NodeMap::const_iterator findInIndex(const TKey& key) const { |
471 | return index_.find(key, KeyHasher(keyHash_), KeyValueEqual(keyEqual_)); |
472 | } |
473 | |
474 | /** |
475 | * Prune the minimum of pruneSize and size() from the back of the LRU. |
476 | * @param pruneSize minimum number of elements to prune |
477 | * @param pruneHook a custom pruneHook function |
478 | * @param failSafe true if exceptions are to ignored, false by default |
479 | */ |
480 | void pruneWithFailSafeOption( |
481 | std::size_t pruneSize, |
482 | PruneHookCall pruneHook, |
483 | bool failSafe) { |
484 | auto& ph = (nullptr == pruneHook) ? pruneHook_ : pruneHook; |
485 | |
486 | for (std::size_t i = 0; i < pruneSize && !lru_.empty(); i++) { |
487 | auto* node = &(*lru_.rbegin()); |
488 | std::unique_ptr<Node> nptr(node); |
489 | |
490 | lru_.erase(lru_.iterator_to(*node)); |
491 | index_.erase(index_.iterator_to(*node)); |
492 | if (ph) { |
493 | try { |
494 | ph(node->pr.first, std::move(node->pr.second)); |
495 | } catch (...) { |
496 | if (!failSafe) { |
497 | throw; |
498 | } |
499 | } |
500 | } |
501 | } |
502 | } |
503 | |
504 | static const std::size_t kMinNumIndexBuckets = 100; |
505 | PruneHookCall pruneHook_; |
506 | std::size_t nIndexBuckets_; |
507 | std::unique_ptr<typename NodeMap::bucket_type[]> indexBuckets_; |
508 | typename NodeMap::bucket_traits ; |
509 | THash keyHash_; |
510 | TKeyEqual keyEqual_; |
511 | NodeMap index_; |
512 | NodeList lru_; |
513 | std::size_t maxSize_; |
514 | std::size_t clearSize_; |
515 | }; |
516 | |
517 | } // namespace folly |
518 | |