1#pragma once
2
3#include <Common/Arena.h>
4
5/**
6 * In some aggregation scenarios, when adding a key to the hash table, we
7 * start with a temporary key object, and if it turns out to be a new key,
8 * we must make it persistent (e.g. copy to an Arena) and use the resulting
9 * persistent object as hash table key. This happens only for StringRef keys,
10 * because other key types are stored by value, but StringRef is a pointer-like
11 * type: the actual data are stored elsewhere. Even for StringRef, we don't
12 * make a persistent copy of the key in each of the following cases:
13 * 1) the aggregation method doesn't use temporary keys, so they're persistent
14 * from the start;
15 * 1) the key is already present in the hash table;
16 * 3) that particular key is stored by value, e.g. a short StringRef key in
17 * StringHashMap.
18 *
19 * In the past, the caller was responsible for making the key persistent after
20 * in was inserted. emplace() returned whether the key is new or not, so the
21 * caller only stored new keys (this is case (2) from the above list). However,
22 * now we are adding a compound hash table for StringRef keys, so case (3)
23 * appears. The decision about persistence now depends on some properties of
24 * the key, and the logic of this decision is tied to the particular hash table
25 * implementation. This means that the hash table user now doesn't have enough
26 * data and logic to make this decision by itself.
27 *
28 * To support these new requirements, we now manage key persistence by passing
29 * a special key holder to emplace(), which has the functions to make the key
30 * persistent or to discard it. emplace() then calls these functions at the
31 * appropriate moments.
32 *
33 * This approach has the following benefits:
34 * - no extra runtime branches in the caller to make the key persistent.
35 * - no additional data is stored in the hash table itself, which is important
36 * when it's used in aggregate function states.
37 * - no overhead when the key memory management isn't needed: we just pass the
38 * bare key without any wrapper to emplace(), and the default callbacks do
39 * nothing.
40 *
41 * This file defines the default key persistence functions, as well as two
42 * different key holders and corresponding functions for storing StringRef
43 * keys to Arena.
44 */
45
46/**
47 * Returns the key. Can return the temporary key initially.
48 * After the call to keyHolderPersistKey(), must return the persistent key.
49 */
50template <typename Key>
51inline Key & ALWAYS_INLINE keyHolderGetKey(Key && key) { return key; }
52
53/**
54 * Make the key persistent. keyHolderGetKey() must return the persistent key
55 * after this call.
56 */
57template <typename Key>
58inline void ALWAYS_INLINE keyHolderPersistKey(Key &&) {}
59
60/**
61 * Discard the key. Calling keyHolderGetKey() is ill-defined after this.
62 */
63template <typename Key>
64inline void ALWAYS_INLINE keyHolderDiscardKey(Key &&) {}
65
66namespace DB
67{
68
69/**
70 * ArenaKeyHolder is a key holder for hash tables that serializes a StringRef
71 * key to an Arena.
72 */
73struct ArenaKeyHolder
74{
75 StringRef key;
76 Arena & pool;
77
78};
79
80}
81
82inline StringRef & ALWAYS_INLINE keyHolderGetKey(DB::ArenaKeyHolder & holder)
83{
84 return holder.key;
85}
86
87inline void ALWAYS_INLINE keyHolderPersistKey(DB::ArenaKeyHolder & holder)
88{
89 // Hash table shouldn't ask us to persist a zero key
90 assert(holder.key.size > 0);
91 holder.key.data = holder.pool.insert(holder.key.data, holder.key.size);
92}
93
94inline void ALWAYS_INLINE keyHolderDiscardKey(DB::ArenaKeyHolder &)
95{
96}
97
98namespace DB
99{
100
101/**
102 * SerializedKeyHolder is a key holder for a StringRef key that is already
103 * serialized to an Arena. The key must be the last allocation in this Arena,
104 * and is discarded by rolling back the allocation.
105 */
106struct SerializedKeyHolder
107{
108 StringRef key;
109 Arena & pool;
110};
111
112}
113
114inline StringRef & ALWAYS_INLINE keyHolderGetKey(DB::SerializedKeyHolder & holder)
115{
116 return holder.key;
117}
118
119inline void ALWAYS_INLINE keyHolderPersistKey(DB::SerializedKeyHolder &)
120{
121}
122
123inline void ALWAYS_INLINE keyHolderDiscardKey(DB::SerializedKeyHolder & holder)
124{
125 [[maybe_unused]] void * new_head = holder.pool.rollback(holder.key.size);
126 assert(new_head == holder.key.data);
127 holder.key.data = nullptr;
128 holder.key.size = 0;
129}
130
131