1 | #include <iostream> |
2 | #include <fstream> |
3 | #include <iomanip> |
4 | #include <unordered_map> |
5 | #include <sparsehash/dense_hash_map> |
6 | |
7 | #include <Common/Stopwatch.h> |
8 | |
9 | #include <common/StringRef.h> |
10 | #include <Common/Arena.h> |
11 | |
12 | #include <IO/ReadBufferFromFileDescriptor.h> |
13 | #include <IO/ReadHelpers.h> |
14 | |
15 | //#define DBMS_HASH_MAP_COUNT_COLLISIONS |
16 | #include <Common/HashTable/HashMap.h> |
17 | |
18 | int main(int argc, char ** argv) |
19 | { |
20 | if (argc < 2) |
21 | { |
22 | std::cerr << "Usage: program n\n" ; |
23 | return 1; |
24 | } |
25 | |
26 | std::cerr << std::fixed << std::setprecision(3); |
27 | std::ofstream devnull("/dev/null" ); |
28 | |
29 | DB::ReadBufferFromFileDescriptor in(STDIN_FILENO); |
30 | size_t n = atoi(argv[1]); |
31 | size_t elems_show = 1; |
32 | |
33 | using Vec = std::vector<std::string>; |
34 | using Set = std::unordered_map<std::string, int>; |
35 | using RefsSet = std::unordered_map<StringRef, int, StringRefHash>; |
36 | using DenseSet = ::google::dense_hash_map<std::string, int>; |
37 | using RefsDenseSet = ::google::dense_hash_map<StringRef, int, StringRefHash>; |
38 | using RefsHashMap = HashMap<StringRef, int, StringRefHash>; |
39 | Vec vec; |
40 | |
41 | vec.reserve(n); |
42 | |
43 | { |
44 | Stopwatch watch; |
45 | |
46 | std::string s; |
47 | for (size_t i = 0; i < n && !in.eof(); ++i) |
48 | { |
49 | DB::readEscapedString(s, in); |
50 | DB::assertChar('\n', in); |
51 | vec.push_back(s); |
52 | } |
53 | |
54 | std::cerr << "Read and inserted into vector in " << watch.elapsedSeconds() << " sec, " |
55 | << vec.size() / watch.elapsedSeconds() << " rows/sec., " |
56 | << in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec." |
57 | << std::endl; |
58 | } |
59 | |
60 | { |
61 | DB::Arena pool; |
62 | Stopwatch watch; |
63 | const char * res = nullptr; |
64 | |
65 | for (Vec::iterator it = vec.begin(); it != vec.end(); ++it) |
66 | { |
67 | const char * tmp = pool.insert(it->data(), it->size()); |
68 | if (it == vec.begin()) |
69 | res = tmp; |
70 | } |
71 | |
72 | std::cerr << "Inserted into pool in " << watch.elapsedSeconds() << " sec, " |
73 | << vec.size() / watch.elapsedSeconds() << " rows/sec., " |
74 | << in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec." |
75 | << std::endl; |
76 | |
77 | devnull.write(res, 100); |
78 | devnull << std::endl; |
79 | } |
80 | |
81 | { |
82 | Set set; |
83 | Stopwatch watch; |
84 | |
85 | for (Vec::iterator it = vec.begin(); it != vec.end(); ++it) |
86 | set[*it] = 0; |
87 | |
88 | std::cerr << "Inserted into std::unordered_map in " << watch.elapsedSeconds() << " sec, " |
89 | << vec.size() / watch.elapsedSeconds() << " rows/sec., " |
90 | << in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec." |
91 | << std::endl; |
92 | |
93 | size_t i = 0; |
94 | for (Set::const_iterator it = set.begin(); i < elems_show && it != set.end(); ++it, ++i) |
95 | { |
96 | devnull << it->first; |
97 | devnull << std::endl; |
98 | } |
99 | } |
100 | |
101 | { |
102 | RefsSet set; |
103 | Stopwatch watch; |
104 | |
105 | for (Vec::iterator it = vec.begin(); it != vec.end(); ++it) |
106 | set[StringRef(*it)] = 0; |
107 | |
108 | std::cerr << "Inserted refs into std::unordered_map in " << watch.elapsedSeconds() << " sec, " |
109 | << vec.size() / watch.elapsedSeconds() << " rows/sec., " |
110 | << in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec." |
111 | << std::endl; |
112 | |
113 | size_t i = 0; |
114 | for (RefsSet::const_iterator it = set.begin(); i < elems_show && it != set.end(); ++it, ++i) |
115 | { |
116 | devnull.write(it->first.data, it->first.size); |
117 | devnull << std::endl; |
118 | } |
119 | } |
120 | |
121 | { |
122 | DB::Arena pool; |
123 | RefsSet set; |
124 | Stopwatch watch; |
125 | |
126 | for (Vec::iterator it = vec.begin(); it != vec.end(); ++it) |
127 | set[StringRef(pool.insert(it->data(), it->size()), it->size())] = 0; |
128 | |
129 | std::cerr << "Inserted into pool and refs into std::unordered_map in " << watch.elapsedSeconds() << " sec, " |
130 | << vec.size() / watch.elapsedSeconds() << " rows/sec., " |
131 | << in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec." |
132 | << std::endl; |
133 | |
134 | size_t i = 0; |
135 | for (RefsSet::const_iterator it = set.begin(); i < elems_show && it != set.end(); ++it, ++i) |
136 | { |
137 | devnull.write(it->first.data, it->first.size); |
138 | devnull << std::endl; |
139 | } |
140 | } |
141 | |
142 | { |
143 | DenseSet set; |
144 | set.set_empty_key(DenseSet::key_type()); |
145 | Stopwatch watch; |
146 | |
147 | for (Vec::iterator it = vec.begin(); it != vec.end(); ++it) |
148 | set[*it] = 0; |
149 | |
150 | std::cerr << "Inserted into google::dense_hash_map in " << watch.elapsedSeconds() << " sec, " |
151 | << vec.size() / watch.elapsedSeconds() << " rows/sec., " |
152 | << in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec." |
153 | << std::endl; |
154 | |
155 | size_t i = 0; |
156 | for (DenseSet::const_iterator it = set.begin(); i < elems_show && it != set.end(); ++it, ++i) |
157 | { |
158 | devnull << it->first; |
159 | devnull << std::endl; |
160 | } |
161 | } |
162 | |
163 | { |
164 | RefsDenseSet set; |
165 | set.set_empty_key(RefsDenseSet::key_type()); |
166 | Stopwatch watch; |
167 | |
168 | for (Vec::iterator it = vec.begin(); it != vec.end(); ++it) |
169 | set[StringRef(it->data(), it->size())] = 0; |
170 | |
171 | std::cerr << "Inserted refs into google::dense_hash_map in " << watch.elapsedSeconds() << " sec, " |
172 | << vec.size() / watch.elapsedSeconds() << " rows/sec., " |
173 | << in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec." |
174 | << std::endl; |
175 | |
176 | size_t i = 0; |
177 | for (RefsDenseSet::const_iterator it = set.begin(); i < elems_show && it != set.end(); ++it, ++i) |
178 | { |
179 | devnull.write(it->first.data, it->first.size); |
180 | devnull << std::endl; |
181 | } |
182 | } |
183 | |
184 | { |
185 | DB::Arena pool; |
186 | RefsDenseSet set; |
187 | set.set_empty_key(RefsDenseSet::key_type()); |
188 | Stopwatch watch; |
189 | |
190 | for (Vec::iterator it = vec.begin(); it != vec.end(); ++it) |
191 | set[StringRef(pool.insert(it->data(), it->size()), it->size())] = 0; |
192 | |
193 | std::cerr << "Inserted into pool and refs into google::dense_hash_map in " << watch.elapsedSeconds() << " sec, " |
194 | << vec.size() / watch.elapsedSeconds() << " rows/sec., " |
195 | << in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec." |
196 | << std::endl; |
197 | |
198 | size_t i = 0; |
199 | for (RefsDenseSet::const_iterator it = set.begin(); i < elems_show && it != set.end(); ++it, ++i) |
200 | { |
201 | devnull.write(it->first.data, it->first.size); |
202 | devnull << std::endl; |
203 | } |
204 | } |
205 | |
206 | { |
207 | RefsHashMap set; |
208 | Stopwatch watch; |
209 | |
210 | for (Vec::iterator it = vec.begin(); it != vec.end(); ++it) |
211 | { |
212 | RefsHashMap::LookupResult inserted_it; |
213 | bool inserted; |
214 | set.emplace(StringRef(*it), inserted_it, inserted); |
215 | } |
216 | |
217 | std::cerr << "Inserted refs into HashMap in " << watch.elapsedSeconds() << " sec, " |
218 | << vec.size() / watch.elapsedSeconds() << " rows/sec., " |
219 | << in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec." |
220 | << std::endl; |
221 | |
222 | size_t i = 0; |
223 | for (auto it = set.begin(); i < elems_show && it != set.end(); ++it, ++i) |
224 | { |
225 | devnull.write(it->getKey().data, it->getKey().size); |
226 | devnull << std::endl; |
227 | } |
228 | |
229 | //std::cerr << set.size() << ", " << set.getCollisions() << std::endl; |
230 | } |
231 | |
232 | { |
233 | DB::Arena pool; |
234 | RefsHashMap set; |
235 | Stopwatch watch; |
236 | |
237 | for (Vec::iterator it = vec.begin(); it != vec.end(); ++it) |
238 | { |
239 | RefsHashMap::LookupResult inserted_it; |
240 | bool inserted; |
241 | set.emplace(StringRef(pool.insert(it->data(), it->size()), it->size()), inserted_it, inserted); |
242 | } |
243 | |
244 | std::cerr << "Inserted into pool and refs into HashMap in " << watch.elapsedSeconds() << " sec, " |
245 | << vec.size() / watch.elapsedSeconds() << " rows/sec., " |
246 | << in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec." |
247 | << std::endl; |
248 | |
249 | size_t i = 0; |
250 | for (auto it = set.begin(); i < elems_show && it != set.end(); ++it, ++i) |
251 | { |
252 | devnull.write(it->getKey().data, it->getKey().size); |
253 | devnull << std::endl; |
254 | } |
255 | } |
256 | |
257 | return 0; |
258 | } |
259 | |