1// Licensed to the .NET Foundation under one or more agreements.
2// The .NET Foundation licenses this file to you under the MIT license.
3// See the LICENSE file in the project root for more information.
4
5#ifndef _SMALLHASHTABLE_H_
6#define _SMALLHASHTABLE_H_
7
8// genLog2 is defined in compiler.hpp
9unsigned genLog2(unsigned value);
10
11//------------------------------------------------------------------------
12// HashTableInfo: a concept that provides equality and hashing methods for
13// a particular key type. Used by HashTableBase and its
14// subclasses.
15template <typename TKey>
16struct HashTableInfo
17{
18 // static bool Equals(const TKey& x, const TKey& y);
19 // static unsigned GetHashCode(const TKey& key);
20};
21
22//------------------------------------------------------------------------
23// HashTableInfo<TKey*>: specialized version of HashTableInfo for pointer-
24// typed keys.
25template <typename TKey>
26struct HashTableInfo<TKey*>
27{
28 static bool Equals(const TKey* x, const TKey* y)
29 {
30 return x == y;
31 }
32
33 static unsigned GetHashCode(const TKey* key)
34 {
35 // Shift off bits that are not likely to be significant
36 size_t keyval = reinterpret_cast<size_t>(key) >> ConstLog2<__alignof(TKey)>::value;
37
38 // Truncate and return the result
39 return static_cast<unsigned>(keyval);
40 }
41};
42
43//------------------------------------------------------------------------
44// HashTableInfo<int>: specialized version of HashTableInfo for int-
45// typed keys.
46template <>
47struct HashTableInfo<int>
48{
49 static bool Equals(int x, int y)
50 {
51 return x == y;
52 }
53
54 static unsigned GetHashCode(int key)
55 {
56 // Cast and return the key
57 return static_cast<unsigned>(key);
58 }
59};
60
61//------------------------------------------------------------------------
62// HashTableInfo<unsigned>: specialized version of HashTableInfo for unsigned-
63// typed keys.
64template <>
65struct HashTableInfo<unsigned>
66{
67 static bool Equals(unsigned x, unsigned y)
68 {
69 return x == y;
70 }
71
72 static unsigned GetHashCode(unsigned key)
73 {
74 // Return the key itself
75 return key;
76 }
77};
78
79//------------------------------------------------------------------------
80// HashTableBase: base type for HashTable and SmallHashTable. This class
81// provides the vast majority of the implementation. The
82// subclasses differ in the storage they use at the time of
83// construction: HashTable allocates the initial bucket
84// array on the heap; SmallHashTable contains a small inline
85// array.
86//
87// This implementation is based on the ideas presented in Herlihy, Shavit,
88// and Tzafrir '08 (http://mcg.cs.tau.ac.il/papers/disc2008-hopscotch.pdf),
89// though it does not currently implement the hopscotch algorithm.
90//
91// The approach taken is intended to perform well in both space and speed.
92// This approach is a hybrid of separate chaining and open addressing with
93// linear probing: collisions are resolved using a bucket chain, but that
94// chain is stored in the bucket array itself.
95//
96// Resolving collisions using a bucket chain avoids the primary clustering
97// issue common in linearly-probed open addressed hash tables, while using
98// buckets as chain nodes avoids the allocaiton traffic typical of chained
99// tables. Applying the hopscotch algorithm in the aforementioned paper
100// could further improve performance by optimizing access patterns for
101// better cache usage.
102//
103// Template parameters:
104// TKey - The type of the table's keys.
105// TValue - The type of the table's values.
106// TKeyInfo - A type that conforms to the HashTableInfo<TKey> concept.
107template <typename TKey, typename TValue, typename TKeyInfo = HashTableInfo<TKey>, typename TAllocator = CompAllocator>
108class HashTableBase
109{
110 friend class KeyValuePair;
111 friend class Iterator;
112
113 enum : unsigned
114 {
115 InitialNumBuckets = 8
116 };
117
118protected:
119 //------------------------------------------------------------------------
120 // HashTableBase::Bucket: provides storage for the key-value pairs that
121 // make up the contents of the table.
122 //
123 // The "home" bucket for a particular key is the bucket indexed by the
124 // key's hash code modulo the size of the bucket array (the "home index").
125 //
126 // The home bucket is always considered to be part of the chain that it
127 // roots, even if it is also part of the chain rooted at a different
128 // bucket. `m_firstOffset` indicates the offset of the first non-home
129 // bucket in the home bucket's chain. If the `m_firstOffset` of a bucket
130 // is 0, the chain rooted at that bucket is empty.
131 //
132 // The index of the next bucket in a chain is calculated by adding the
133 // value in `m_nextOffset` to the index of the current bucket. If
134 // `m_nextOffset` is 0, the current bucket is the end of its chain. Each
135 // bucket in a chain must be occupied (i.e. `m_isFull` will be true).
136 struct Bucket
137 {
138 bool m_isFull; // True if the bucket is occupied; false otherwise.
139
140 unsigned m_firstOffset; // The offset to the first node in the chain for this bucket index.
141 unsigned m_nextOffset; // The offset to the next node in the chain for this bucket index.
142
143 unsigned m_hash; // The hash code for the element stored in this bucket.
144 TKey m_key; // The key for the element stored in this bucket.
145 TValue m_value; // The value for the element stored in this bucket.
146 };
147
148private:
149 TAllocator m_alloc; // The memory allocator.
150 Bucket* m_buckets; // The bucket array.
151 unsigned m_numBuckets; // The number of buckets in the bucket array.
152 unsigned m_numFullBuckets; // The number of occupied buckets.
153
154 //------------------------------------------------------------------------
155 // HashTableBase::Insert: inserts a key-value pair into a bucket array.
156 //
157 // Arguments:
158 // buckets - The bucket array in which to insert the key-value pair.
159 // numBuckets - The number of buckets in the bucket array.
160 // hash - The hash code of the key to insert.
161 // key - The key to insert.
162 // value - The value to insert.
163 //
164 // Returns:
165 // True if the key-value pair was successfully inserted; false
166 // otherwise.
167 static bool Insert(Bucket* buckets, unsigned numBuckets, unsigned hash, const TKey& key, const TValue& value)
168 {
169 const unsigned mask = numBuckets - 1;
170 unsigned homeIndex = hash & mask;
171
172 Bucket* home = &buckets[homeIndex];
173 if (!home->m_isFull)
174 {
175 // The home bucket is empty; use it.
176 //
177 // Note that `m_firstOffset` does not need to be updated: whether or not it is non-zero,
178 // it is already correct, since we're inserting at the head of the list. `m_nextOffset`
179 // must be 0, however, since this node should not be part of a list.
180 assert(home->m_nextOffset == 0);
181
182 home->m_isFull = true;
183 home->m_hash = hash;
184 home->m_key = key;
185 home->m_value = value;
186 return true;
187 }
188
189 // If the home bucket is full, probe to find the next empty bucket.
190 unsigned precedingIndexInChain = homeIndex;
191 unsigned nextIndexInChain = (homeIndex + home->m_firstOffset) & mask;
192 for (unsigned j = 1; j < numBuckets; j++)
193 {
194 unsigned bucketIndex = (homeIndex + j) & mask;
195 Bucket* bucket = &buckets[bucketIndex];
196 if (bucketIndex == nextIndexInChain)
197 {
198 assert(bucket->m_isFull);
199 precedingIndexInChain = bucketIndex;
200 nextIndexInChain = (bucketIndex + bucket->m_nextOffset) & mask;
201 }
202 else if (!bucket->m_isFull)
203 {
204 bucket->m_isFull = true;
205 if (precedingIndexInChain == nextIndexInChain)
206 {
207 bucket->m_nextOffset = 0;
208 }
209 else
210 {
211 assert(((nextIndexInChain - bucketIndex) & mask) > 0);
212 bucket->m_nextOffset = (nextIndexInChain - bucketIndex) & mask;
213 }
214
215 unsigned offset = (bucketIndex - precedingIndexInChain) & mask;
216 assert(offset != 0);
217
218 if (precedingIndexInChain == homeIndex)
219 {
220 buckets[precedingIndexInChain].m_firstOffset = offset;
221 }
222 else
223 {
224 buckets[precedingIndexInChain].m_nextOffset = offset;
225 }
226
227 bucket->m_hash = hash;
228 bucket->m_key = key;
229 bucket->m_value = value;
230 return true;
231 }
232 }
233
234 // No more free buckets.
235 return false;
236 }
237
238 //------------------------------------------------------------------------
239 // HashTableBase::TryGetBucket: attempts to get the bucket that holds a
240 // particular key.
241 //
242 // Arguments:
243 // hash - The hash code of the key to find.
244 // key - The key to find.
245 // precedingIndex - An output parameter that will hold the index of the
246 // preceding bucket in the chain for the key. May be
247 // equal to `bucketIndex` if the key is stored in its
248 // home bucket.
249 // bucketIndex - An output parameter that will hold the index of the
250 // bucket that stores the key.
251 //
252 // Returns:
253 // True if the key was successfully found; false otherwise.
254 bool TryGetBucket(unsigned hash, const TKey& key, unsigned* precedingIndex, unsigned* bucketIndex) const
255 {
256 if (m_numBuckets == 0)
257 {
258 return false;
259 }
260
261 const unsigned mask = m_numBuckets - 1;
262 unsigned index = hash & mask;
263
264 Bucket* bucket = &m_buckets[index];
265 if (bucket->m_isFull && bucket->m_hash == hash && TKeyInfo::Equals(bucket->m_key, key))
266 {
267 *precedingIndex = index;
268 *bucketIndex = index;
269 return true;
270 }
271
272 for (unsigned offset = bucket->m_firstOffset; offset != 0; offset = bucket->m_nextOffset)
273 {
274 unsigned precedingIndexInChain = index;
275
276 index = (index + offset) & mask;
277 bucket = &m_buckets[index];
278
279 assert(bucket->m_isFull);
280 if (bucket->m_hash == hash && TKeyInfo::Equals(bucket->m_key, key))
281 {
282 *precedingIndex = precedingIndexInChain;
283 *bucketIndex = index;
284 return true;
285 }
286 }
287
288 return false;
289 }
290
291 //------------------------------------------------------------------------
292 // HashTableBase::Resize: allocates a new bucket array twice the size of
293 // the current array and copies the key-value pairs
294 // from the current bucket array into the new array.
295 void Resize()
296 {
297 Bucket* currentBuckets = m_buckets;
298
299 unsigned newNumBuckets = m_numBuckets == 0 ? InitialNumBuckets : m_numBuckets * 2;
300 Bucket* newBuckets = m_alloc.template allocate<Bucket>(newNumBuckets);
301 memset(newBuckets, 0, sizeof(Bucket) * newNumBuckets);
302
303 for (unsigned currentIndex = 0; currentIndex < m_numBuckets; currentIndex++)
304 {
305 Bucket* currentBucket = &currentBuckets[currentIndex];
306 if (!currentBucket->m_isFull)
307 {
308 continue;
309 }
310
311 bool inserted =
312 Insert(newBuckets, newNumBuckets, currentBucket->m_hash, currentBucket->m_key, currentBucket->m_value);
313 (assert(inserted), (void)inserted);
314 }
315
316 m_numBuckets = newNumBuckets;
317 m_buckets = newBuckets;
318 }
319
320protected:
321 HashTableBase(TAllocator alloc, Bucket* buckets, unsigned numBuckets)
322 : m_alloc(alloc), m_buckets(buckets), m_numBuckets(numBuckets), m_numFullBuckets(0)
323 {
324 if (numBuckets > 0)
325 {
326 assert((numBuckets & (numBuckets - 1)) == 0); // Size must be a power of 2
327 assert(m_buckets != nullptr);
328
329 memset(m_buckets, 0, sizeof(Bucket) * numBuckets);
330 }
331 }
332
333public:
334#ifdef DEBUG
335 class Iterator;
336
337 class KeyValuePair final
338 {
339 friend class HashTableBase<TKey, TValue, TKeyInfo>::Iterator;
340
341 Bucket* m_bucket;
342
343 KeyValuePair(Bucket* bucket) : m_bucket(bucket)
344 {
345 assert(m_bucket != nullptr);
346 }
347
348 public:
349 KeyValuePair() : m_bucket(nullptr)
350 {
351 }
352
353 inline TKey& Key()
354 {
355 return m_bucket->m_key;
356 }
357
358 inline TValue& Value()
359 {
360 return m_bucket->m_value;
361 }
362 };
363
364 // NOTE: HashTableBase only provides iterators in debug builds because the order in which
365 // the iterator type produces values is undefined (e.g. it is not related to the order in
366 // which key-value pairs were inserted).
367 class Iterator final
368 {
369 friend class HashTableBase<TKey, TValue, TKeyInfo>;
370
371 Bucket* m_buckets;
372 unsigned m_numBuckets;
373 unsigned m_index;
374
375 Iterator(Bucket* buckets, unsigned numBuckets, unsigned index)
376 : m_buckets(buckets), m_numBuckets(numBuckets), m_index(index)
377 {
378 assert((buckets != nullptr) || (numBuckets == 0));
379 assert(index <= numBuckets);
380
381 // Advance to the first occupied bucket
382 while (m_index != m_numBuckets && !m_buckets[m_index].m_isFull)
383 {
384 m_index++;
385 }
386 }
387
388 public:
389 Iterator() : m_buckets(nullptr), m_numBuckets(0), m_index(0)
390 {
391 }
392
393 KeyValuePair operator*() const
394 {
395 if (m_index >= m_numBuckets)
396 {
397 return KeyValuePair();
398 }
399
400 Bucket* bucket = &m_buckets[m_index];
401 assert(bucket->m_isFull);
402 return KeyValuePair(bucket);
403 }
404
405 KeyValuePair operator->() const
406 {
407 return this->operator*();
408 }
409
410 bool operator==(const Iterator& other) const
411 {
412 return (m_buckets == other.m_buckets) && (m_index == other.m_index);
413 }
414
415 bool operator!=(const Iterator& other) const
416 {
417 return (m_buckets != other.m_buckets) || (m_index != other.m_index);
418 }
419
420 Iterator& operator++()
421 {
422 do
423 {
424 m_index++;
425 } while (m_index != m_numBuckets && !m_buckets[m_index].m_isFull);
426
427 return *this;
428 }
429 };
430
431 Iterator begin() const
432 {
433 return Iterator(m_buckets, m_numBuckets, 0);
434 }
435
436 Iterator end() const
437 {
438 return Iterator(m_buckets, m_numBuckets, m_numBuckets);
439 }
440#endif // DEBUG
441
442 unsigned Count() const
443 {
444 return m_numFullBuckets;
445 }
446
447 void Clear()
448 {
449 if (m_numBuckets > 0)
450 {
451 memset(m_buckets, 0, sizeof(Bucket) * m_numBuckets);
452 m_numFullBuckets = 0;
453 }
454 }
455
456 //------------------------------------------------------------------------
457 // HashTableBase::AddOrUpdate: adds a key-value pair to the hash table if
458 // the key does not already exist in the
459 // table, or updates the value if the key
460 // already exists.
461 //
462 // Arguments:
463 // key - The key for which to add or update a value.
464 // value - The value.
465 //
466 // Returns:
467 // True if the value was added; false if it was updated.
468 bool AddOrUpdate(const TKey& key, const TValue& value)
469 {
470 unsigned hash = TKeyInfo::GetHashCode(key);
471
472 unsigned unused, index;
473 if (TryGetBucket(hash, key, &unused, &index))
474 {
475 m_buckets[index].m_value = value;
476 return false;
477 }
478
479 // If the load is greater than 0.8, resize the table before inserting.
480 if ((m_numFullBuckets * 5) >= (m_numBuckets * 4))
481 {
482 Resize();
483 }
484
485 bool inserted = Insert(m_buckets, m_numBuckets, hash, key, value);
486 (assert(inserted), (void)inserted);
487
488 m_numFullBuckets++;
489
490 return true;
491 }
492
493 //------------------------------------------------------------------------
494 // HashTableBase::TryRemove: removes a key from the hash table and returns
495 // its value if the key exists in the table.
496 //
497 // Arguments:
498 // key - The key to remove from the table.
499 // value - An output parameter that will hold the value for the removed
500 // key.
501 //
502 // Returns:
503 // True if the key was removed from the table; false otherwise.
504 bool TryRemove(const TKey& key, TValue* value)
505 {
506 unsigned hash = TKeyInfo::GetHashCode(key);
507
508 unsigned precedingIndexInChain, bucketIndex;
509 if (!TryGetBucket(hash, key, &precedingIndexInChain, &bucketIndex))
510 {
511 return false;
512 }
513
514 Bucket* bucket = &m_buckets[bucketIndex];
515
516 if (precedingIndexInChain != bucketIndex)
517 {
518 const unsigned mask = m_numBuckets - 1;
519 unsigned homeIndex = hash & mask;
520
521 unsigned nextOffset;
522 if (bucket->m_nextOffset == 0)
523 {
524 nextOffset = 0;
525 }
526 else
527 {
528 unsigned nextIndexInChain = (bucketIndex + bucket->m_nextOffset) & mask;
529 nextOffset = (nextIndexInChain - precedingIndexInChain) & mask;
530 }
531
532 if (precedingIndexInChain == homeIndex)
533 {
534 m_buckets[precedingIndexInChain].m_firstOffset = nextOffset;
535 }
536 else
537 {
538 m_buckets[precedingIndexInChain].m_nextOffset = nextOffset;
539 }
540 }
541
542 bucket->m_isFull = false;
543 bucket->m_nextOffset = 0;
544
545 m_numFullBuckets--;
546
547 *value = bucket->m_value;
548 return true;
549 }
550
551 //------------------------------------------------------------------------
552 // HashTableBase::TryGetValue: retrieves the value for a key if the key
553 // exists in the table.
554 //
555 // Arguments:
556 // key - The key to find from the table.
557 // value - An output parameter that will hold the value for the key.
558 //
559 // Returns:
560 // True if the key was found in the table; false otherwise.
561 bool TryGetValue(const TKey& key, TValue* value) const
562 {
563 unsigned unused, index;
564 if (!TryGetBucket(TKeyInfo::GetHashCode(key), key, &unused, &index))
565 {
566 return false;
567 }
568
569 *value = m_buckets[index].m_value;
570 return true;
571 }
572
573 //------------------------------------------------------------------------
574 // HashTableBase::Contains: returns true if a key exists in the table and
575 // false otherwise.
576 //
577 // Arguments:
578 // key - The key to find from the table.
579 //
580 // Returns:
581 // True if the key was found in the table; false otherwise.
582 bool Contains(const TKey& key) const
583 {
584 unsigned unused, index;
585 return TryGetBucket(TKeyInfo::GetHashCode(key), key, &unused, &index);
586 }
587};
588
589//------------------------------------------------------------------------
590// HashTable: a simple subclass of `HashTableBase` that always uses heap
591// storage for its bucket array.
592template <typename TKey, typename TValue, typename TKeyInfo = HashTableInfo<TKey>, typename TAllocator = CompAllocator>
593class HashTable final : public HashTableBase<TKey, TValue, TKeyInfo, TAllocator>
594{
595 typedef HashTableBase<TKey, TValue, TKeyInfo, TAllocator> TBase;
596
597 static unsigned RoundUp(unsigned initialSize)
598 {
599 return 1 << genLog2(initialSize);
600 }
601
602public:
603 HashTable(TAllocator alloc) : TBase(alloc, nullptr, 0)
604 {
605 }
606
607 HashTable(TAllocator alloc, unsigned initialSize)
608 : TBase(alloc, alloc.template allocate<TBase::Bucket>(RoundUp(initialSize)), RoundUp(initialSize))
609 {
610 }
611};
612
613//------------------------------------------------------------------------
614// SmallHashTable: an alternative to `HashTable` that stores the initial
615// bucket array inline. Most useful for situations where
616// the number of key-value pairs that will be stored in
617// the map at any given time falls below a certain
618// threshold. Switches to heap storage once the initial
619// inline storage is exhausted.
620template <typename TKey,
621 typename TValue,
622 unsigned NumInlineBuckets = 8,
623 typename TKeyInfo = HashTableInfo<TKey>,
624 typename TAllocator = CompAllocator>
625class SmallHashTable final : public HashTableBase<TKey, TValue, TKeyInfo, TAllocator>
626{
627 typedef HashTableBase<TKey, TValue, TKeyInfo, TAllocator> TBase;
628
629 enum : unsigned
630 {
631 RoundedNumInlineBuckets = 1 << ConstLog2<NumInlineBuckets>::value
632 };
633
634 typename TBase::Bucket m_inlineBuckets[RoundedNumInlineBuckets];
635
636public:
637 SmallHashTable(TAllocator alloc) : TBase(alloc, m_inlineBuckets, RoundedNumInlineBuckets)
638 {
639 }
640};
641
642#endif // _SMALLHASHTABLE_H_
643