1#include <iostream>
2#include <fstream>
3#include <iomanip>
4#include <unordered_map>
5#include <sparsehash/dense_hash_map>
6
7#include <Common/Stopwatch.h>
8
9#include <common/StringRef.h>
10#include <Common/Arena.h>
11
12#include <IO/ReadBufferFromFileDescriptor.h>
13#include <IO/ReadHelpers.h>
14
15//#define DBMS_HASH_MAP_COUNT_COLLISIONS
16#include <Common/HashTable/HashMap.h>
17
18int main(int argc, char ** argv)
19{
20 if (argc < 2)
21 {
22 std::cerr << "Usage: program n\n";
23 return 1;
24 }
25
26 std::cerr << std::fixed << std::setprecision(3);
27 std::ofstream devnull("/dev/null");
28
29 DB::ReadBufferFromFileDescriptor in(STDIN_FILENO);
30 size_t n = atoi(argv[1]);
31 size_t elems_show = 1;
32
33 using Vec = std::vector<std::string>;
34 using Set = std::unordered_map<std::string, int>;
35 using RefsSet = std::unordered_map<StringRef, int, StringRefHash>;
36 using DenseSet = ::google::dense_hash_map<std::string, int>;
37 using RefsDenseSet = ::google::dense_hash_map<StringRef, int, StringRefHash>;
38 using RefsHashMap = HashMap<StringRef, int, StringRefHash>;
39 Vec vec;
40
41 vec.reserve(n);
42
43 {
44 Stopwatch watch;
45
46 std::string s;
47 for (size_t i = 0; i < n && !in.eof(); ++i)
48 {
49 DB::readEscapedString(s, in);
50 DB::assertChar('\n', in);
51 vec.push_back(s);
52 }
53
54 std::cerr << "Read and inserted into vector in " << watch.elapsedSeconds() << " sec, "
55 << vec.size() / watch.elapsedSeconds() << " rows/sec., "
56 << in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec."
57 << std::endl;
58 }
59
60 {
61 DB::Arena pool;
62 Stopwatch watch;
63 const char * res = nullptr;
64
65 for (Vec::iterator it = vec.begin(); it != vec.end(); ++it)
66 {
67 const char * tmp = pool.insert(it->data(), it->size());
68 if (it == vec.begin())
69 res = tmp;
70 }
71
72 std::cerr << "Inserted into pool in " << watch.elapsedSeconds() << " sec, "
73 << vec.size() / watch.elapsedSeconds() << " rows/sec., "
74 << in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec."
75 << std::endl;
76
77 devnull.write(res, 100);
78 devnull << std::endl;
79 }
80
81 {
82 Set set;
83 Stopwatch watch;
84
85 for (Vec::iterator it = vec.begin(); it != vec.end(); ++it)
86 set[*it] = 0;
87
88 std::cerr << "Inserted into std::unordered_map in " << watch.elapsedSeconds() << " sec, "
89 << vec.size() / watch.elapsedSeconds() << " rows/sec., "
90 << in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec."
91 << std::endl;
92
93 size_t i = 0;
94 for (Set::const_iterator it = set.begin(); i < elems_show && it != set.end(); ++it, ++i)
95 {
96 devnull << it->first;
97 devnull << std::endl;
98 }
99 }
100
101 {
102 RefsSet set;
103 Stopwatch watch;
104
105 for (Vec::iterator it = vec.begin(); it != vec.end(); ++it)
106 set[StringRef(*it)] = 0;
107
108 std::cerr << "Inserted refs into std::unordered_map in " << watch.elapsedSeconds() << " sec, "
109 << vec.size() / watch.elapsedSeconds() << " rows/sec., "
110 << in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec."
111 << std::endl;
112
113 size_t i = 0;
114 for (RefsSet::const_iterator it = set.begin(); i < elems_show && it != set.end(); ++it, ++i)
115 {
116 devnull.write(it->first.data, it->first.size);
117 devnull << std::endl;
118 }
119 }
120
121 {
122 DB::Arena pool;
123 RefsSet set;
124 Stopwatch watch;
125
126 for (Vec::iterator it = vec.begin(); it != vec.end(); ++it)
127 set[StringRef(pool.insert(it->data(), it->size()), it->size())] = 0;
128
129 std::cerr << "Inserted into pool and refs into std::unordered_map in " << watch.elapsedSeconds() << " sec, "
130 << vec.size() / watch.elapsedSeconds() << " rows/sec., "
131 << in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec."
132 << std::endl;
133
134 size_t i = 0;
135 for (RefsSet::const_iterator it = set.begin(); i < elems_show && it != set.end(); ++it, ++i)
136 {
137 devnull.write(it->first.data, it->first.size);
138 devnull << std::endl;
139 }
140 }
141
142 {
143 DenseSet set;
144 set.set_empty_key(DenseSet::key_type());
145 Stopwatch watch;
146
147 for (Vec::iterator it = vec.begin(); it != vec.end(); ++it)
148 set[*it] = 0;
149
150 std::cerr << "Inserted into google::dense_hash_map in " << watch.elapsedSeconds() << " sec, "
151 << vec.size() / watch.elapsedSeconds() << " rows/sec., "
152 << in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec."
153 << std::endl;
154
155 size_t i = 0;
156 for (DenseSet::const_iterator it = set.begin(); i < elems_show && it != set.end(); ++it, ++i)
157 {
158 devnull << it->first;
159 devnull << std::endl;
160 }
161 }
162
163 {
164 RefsDenseSet set;
165 set.set_empty_key(RefsDenseSet::key_type());
166 Stopwatch watch;
167
168 for (Vec::iterator it = vec.begin(); it != vec.end(); ++it)
169 set[StringRef(it->data(), it->size())] = 0;
170
171 std::cerr << "Inserted refs into google::dense_hash_map in " << watch.elapsedSeconds() << " sec, "
172 << vec.size() / watch.elapsedSeconds() << " rows/sec., "
173 << in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec."
174 << std::endl;
175
176 size_t i = 0;
177 for (RefsDenseSet::const_iterator it = set.begin(); i < elems_show && it != set.end(); ++it, ++i)
178 {
179 devnull.write(it->first.data, it->first.size);
180 devnull << std::endl;
181 }
182 }
183
184 {
185 DB::Arena pool;
186 RefsDenseSet set;
187 set.set_empty_key(RefsDenseSet::key_type());
188 Stopwatch watch;
189
190 for (Vec::iterator it = vec.begin(); it != vec.end(); ++it)
191 set[StringRef(pool.insert(it->data(), it->size()), it->size())] = 0;
192
193 std::cerr << "Inserted into pool and refs into google::dense_hash_map in " << watch.elapsedSeconds() << " sec, "
194 << vec.size() / watch.elapsedSeconds() << " rows/sec., "
195 << in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec."
196 << std::endl;
197
198 size_t i = 0;
199 for (RefsDenseSet::const_iterator it = set.begin(); i < elems_show && it != set.end(); ++it, ++i)
200 {
201 devnull.write(it->first.data, it->first.size);
202 devnull << std::endl;
203 }
204 }
205
206 {
207 RefsHashMap set;
208 Stopwatch watch;
209
210 for (Vec::iterator it = vec.begin(); it != vec.end(); ++it)
211 {
212 RefsHashMap::LookupResult inserted_it;
213 bool inserted;
214 set.emplace(StringRef(*it), inserted_it, inserted);
215 }
216
217 std::cerr << "Inserted refs into HashMap in " << watch.elapsedSeconds() << " sec, "
218 << vec.size() / watch.elapsedSeconds() << " rows/sec., "
219 << in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec."
220 << std::endl;
221
222 size_t i = 0;
223 for (auto it = set.begin(); i < elems_show && it != set.end(); ++it, ++i)
224 {
225 devnull.write(it->getKey().data, it->getKey().size);
226 devnull << std::endl;
227 }
228
229 //std::cerr << set.size() << ", " << set.getCollisions() << std::endl;
230 }
231
232 {
233 DB::Arena pool;
234 RefsHashMap set;
235 Stopwatch watch;
236
237 for (Vec::iterator it = vec.begin(); it != vec.end(); ++it)
238 {
239 RefsHashMap::LookupResult inserted_it;
240 bool inserted;
241 set.emplace(StringRef(pool.insert(it->data(), it->size()), it->size()), inserted_it, inserted);
242 }
243
244 std::cerr << "Inserted into pool and refs into HashMap in " << watch.elapsedSeconds() << " sec, "
245 << vec.size() / watch.elapsedSeconds() << " rows/sec., "
246 << in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec."
247 << std::endl;
248
249 size_t i = 0;
250 for (auto it = set.begin(); i < elems_show && it != set.end(); ++it, ++i)
251 {
252 devnull.write(it->getKey().data, it->getKey().size);
253 devnull << std::endl;
254 }
255 }
256
257 return 0;
258}
259