1 | #pragma once |
2 | |
3 | #include <Common/Arena.h> |
4 | |
5 | /** |
6 | * In some aggregation scenarios, when adding a key to the hash table, we |
7 | * start with a temporary key object, and if it turns out to be a new key, |
8 | * we must make it persistent (e.g. copy to an Arena) and use the resulting |
9 | * persistent object as hash table key. This happens only for StringRef keys, |
10 | * because other key types are stored by value, but StringRef is a pointer-like |
11 | * type: the actual data are stored elsewhere. Even for StringRef, we don't |
12 | * make a persistent copy of the key in each of the following cases: |
13 | * 1) the aggregation method doesn't use temporary keys, so they're persistent |
14 | * from the start; |
15 | * 1) the key is already present in the hash table; |
16 | * 3) that particular key is stored by value, e.g. a short StringRef key in |
17 | * StringHashMap. |
18 | * |
19 | * In the past, the caller was responsible for making the key persistent after |
20 | * in was inserted. emplace() returned whether the key is new or not, so the |
21 | * caller only stored new keys (this is case (2) from the above list). However, |
22 | * now we are adding a compound hash table for StringRef keys, so case (3) |
23 | * appears. The decision about persistence now depends on some properties of |
24 | * the key, and the logic of this decision is tied to the particular hash table |
25 | * implementation. This means that the hash table user now doesn't have enough |
26 | * data and logic to make this decision by itself. |
27 | * |
28 | * To support these new requirements, we now manage key persistence by passing |
29 | * a special key holder to emplace(), which has the functions to make the key |
30 | * persistent or to discard it. emplace() then calls these functions at the |
31 | * appropriate moments. |
32 | * |
33 | * This approach has the following benefits: |
34 | * - no extra runtime branches in the caller to make the key persistent. |
35 | * - no additional data is stored in the hash table itself, which is important |
36 | * when it's used in aggregate function states. |
37 | * - no overhead when the key memory management isn't needed: we just pass the |
38 | * bare key without any wrapper to emplace(), and the default callbacks do |
39 | * nothing. |
40 | * |
41 | * This file defines the default key persistence functions, as well as two |
42 | * different key holders and corresponding functions for storing StringRef |
43 | * keys to Arena. |
44 | */ |
45 | |
46 | /** |
47 | * Returns the key. Can return the temporary key initially. |
48 | * After the call to keyHolderPersistKey(), must return the persistent key. |
49 | */ |
50 | template <typename Key> |
51 | inline Key & ALWAYS_INLINE keyHolderGetKey(Key && key) { return key; } |
52 | |
53 | /** |
54 | * Make the key persistent. keyHolderGetKey() must return the persistent key |
55 | * after this call. |
56 | */ |
57 | template <typename Key> |
58 | inline void ALWAYS_INLINE keyHolderPersistKey(Key &&) {} |
59 | |
60 | /** |
61 | * Discard the key. Calling keyHolderGetKey() is ill-defined after this. |
62 | */ |
63 | template <typename Key> |
64 | inline void ALWAYS_INLINE keyHolderDiscardKey(Key &&) {} |
65 | |
66 | namespace DB |
67 | { |
68 | |
69 | /** |
70 | * ArenaKeyHolder is a key holder for hash tables that serializes a StringRef |
71 | * key to an Arena. |
72 | */ |
73 | struct ArenaKeyHolder |
74 | { |
75 | StringRef key; |
76 | Arena & pool; |
77 | |
78 | }; |
79 | |
80 | } |
81 | |
82 | inline StringRef & ALWAYS_INLINE keyHolderGetKey(DB::ArenaKeyHolder & holder) |
83 | { |
84 | return holder.key; |
85 | } |
86 | |
87 | inline void ALWAYS_INLINE keyHolderPersistKey(DB::ArenaKeyHolder & holder) |
88 | { |
89 | // Hash table shouldn't ask us to persist a zero key |
90 | assert(holder.key.size > 0); |
91 | holder.key.data = holder.pool.insert(holder.key.data, holder.key.size); |
92 | } |
93 | |
94 | inline void ALWAYS_INLINE keyHolderDiscardKey(DB::ArenaKeyHolder &) |
95 | { |
96 | } |
97 | |
98 | namespace DB |
99 | { |
100 | |
101 | /** |
102 | * SerializedKeyHolder is a key holder for a StringRef key that is already |
103 | * serialized to an Arena. The key must be the last allocation in this Arena, |
104 | * and is discarded by rolling back the allocation. |
105 | */ |
106 | struct SerializedKeyHolder |
107 | { |
108 | StringRef key; |
109 | Arena & pool; |
110 | }; |
111 | |
112 | } |
113 | |
114 | inline StringRef & ALWAYS_INLINE keyHolderGetKey(DB::SerializedKeyHolder & holder) |
115 | { |
116 | return holder.key; |
117 | } |
118 | |
119 | inline void ALWAYS_INLINE keyHolderPersistKey(DB::SerializedKeyHolder &) |
120 | { |
121 | } |
122 | |
123 | inline void ALWAYS_INLINE keyHolderDiscardKey(DB::SerializedKeyHolder & holder) |
124 | { |
125 | [[maybe_unused]] void * new_head = holder.pool.rollback(holder.key.size); |
126 | assert(new_head == holder.key.data); |
127 | holder.key.data = nullptr; |
128 | holder.key.size = 0; |
129 | } |
130 | |
131 | |