1#include <iostream>
2#include <iomanip>
3#include <vector>
4
5#include <unordered_map>
6
7#include <sparsehash/dense_hash_map>
8#include <sparsehash/sparse_hash_map>
9
10#include <Common/Stopwatch.h>
11
12//#define DBMS_HASH_MAP_COUNT_COLLISIONS
13#define DBMS_HASH_MAP_DEBUG_RESIZES
14
15#include <Core/Types.h>
16#include <IO/ReadBufferFromFile.h>
17#include <IO/ReadHelpers.h>
18#include <Compression/CompressedReadBuffer.h>
19#include <common/StringRef.h>
20#include <Common/HashTable/HashMap.h>
21#include <Interpreters/AggregationCommon.h>
22
23
24struct SmallStringRef
25{
26 UInt32 size = 0;
27
28 union
29 {
30 const char * data_big;
31 char data_small[12];
32 };
33
34 bool isSmall() const { return size <= 12; }
35
36 const char * data() const
37 {
38 return isSmall() ? data_small : data_big;
39 }
40
41 SmallStringRef(const char * data_, size_t size_)
42 {
43 size = size_;
44
45 if (isSmall())
46 memcpy(data_small, data_, size_);
47 else
48 data_big = data_;
49 }
50
51 SmallStringRef(const unsigned char * data_, size_t size_) : SmallStringRef(reinterpret_cast<const char *>(data_), size_) {}
52 explicit SmallStringRef(const std::string & s) : SmallStringRef(s.data(), s.size()) {}
53 SmallStringRef() {}
54
55 std::string toString() const { return std::string(data(), size); }
56};
57
58
59inline bool operator==(SmallStringRef lhs, SmallStringRef rhs)
60{
61 if (lhs.size != rhs.size)
62 return false;
63
64 if (lhs.size == 0)
65 return true;
66
67#ifdef __SSE2__
68 return memequalSSE2Wide(lhs.data(), rhs.data(), lhs.size);
69#else
70 return false;
71#endif
72}
73
74
75namespace ZeroTraits
76{
77 template <>
78 inline bool check<SmallStringRef>(SmallStringRef x) { return x.size == 0; }
79
80 template <>
81 inline void set<SmallStringRef>(SmallStringRef & x) { x.size = 0; }
82}
83
84template <>
85struct DefaultHash<SmallStringRef>
86{
87 size_t operator() (SmallStringRef x) const
88 {
89 return DefaultHash<StringRef>()(StringRef(x.data(), x.size));
90 }
91};
92
93
94using Value = UInt64;
95
96
97int main(int argc, char ** argv)
98{
99 if (argc < 3)
100 {
101 std::cerr << "Usage: program n m\n";
102 return 1;
103 }
104
105 size_t n = atoi(argv[1]);
106 size_t m = atoi(argv[2]);
107
108 DB::Arena pool;
109 std::vector<StringRef> data(n);
110
111 std::cerr << "sizeof(Key) = " << sizeof(SmallStringRef) << ", sizeof(Value) = " << sizeof(Value) << std::endl;
112
113 {
114 Stopwatch watch;
115 DB::ReadBufferFromFileDescriptor in1(STDIN_FILENO);
116 DB::CompressedReadBuffer in2(in1);
117
118 std::string tmp;
119 for (size_t i = 0; i < n && !in2.eof(); ++i)
120 {
121 DB::readStringBinary(tmp, in2);
122 data[i] = StringRef(pool.insert(tmp.data(), tmp.size()), tmp.size());
123 }
124
125 watch.stop();
126 std::cerr << std::fixed << std::setprecision(2)
127 << "Vector. Size: " << n
128 << ", elapsed: " << watch.elapsedSeconds()
129 << " (" << n / watch.elapsedSeconds() << " elem/sec.)"
130 << std::endl;
131 }
132
133 if (!m || m == 1)
134 {
135 Stopwatch watch;
136
137 using Map = HashMapWithSavedHash<StringRef, Value>;
138
139 Map map;
140 Map::LookupResult it;
141 bool inserted;
142
143 for (size_t i = 0; i < n; ++i)
144 {
145 map.emplace(data[i], it, inserted);
146 if (inserted)
147 it->getMapped() = 0;
148 ++it->getMapped();
149 }
150
151 watch.stop();
152 std::cerr << std::fixed << std::setprecision(2)
153 << "HashMap (StringRef). Size: " << map.size()
154 << ", elapsed: " << watch.elapsedSeconds()
155 << " (" << n / watch.elapsedSeconds() << " elem/sec.)"
156#ifdef DBMS_HASH_MAP_COUNT_COLLISIONS
157 << ", collisions: " << map.getCollisions()
158#endif
159 << std::endl;
160 }
161
162 if (!m || m == 2)
163 {
164 Stopwatch watch;
165
166 using Map = HashMapWithSavedHash<SmallStringRef, Value>;
167
168 Map map;
169 Map::LookupResult it;
170 bool inserted;
171
172 for (size_t i = 0; i < n; ++i)
173 {
174 map.emplace(SmallStringRef(data[i].data, data[i].size), it, inserted);
175 if (inserted)
176 it->getMapped() = 0;
177 ++it->getMapped();
178 }
179
180 watch.stop();
181 std::cerr << std::fixed << std::setprecision(2)
182 << "HashMap (SmallStringRef). Size: " << map.size()
183 << ", elapsed: " << watch.elapsedSeconds()
184 << " (" << n / watch.elapsedSeconds() << " elem/sec.)"
185#ifdef DBMS_HASH_MAP_COUNT_COLLISIONS
186 << ", collisions: " << map.getCollisions()
187#endif
188 << std::endl;
189 }
190
191 return 0;
192}
193