| 1 | #include <iostream> |
| 2 | #include <fstream> |
| 3 | #include <iomanip> |
| 4 | #include <unordered_map> |
| 5 | #include <sparsehash/dense_hash_map> |
| 6 | |
| 7 | #include <Common/Stopwatch.h> |
| 8 | |
| 9 | #include <common/StringRef.h> |
| 10 | #include <Common/Arena.h> |
| 11 | |
| 12 | #include <IO/ReadBufferFromFileDescriptor.h> |
| 13 | #include <IO/ReadHelpers.h> |
| 14 | |
| 15 | //#define DBMS_HASH_MAP_COUNT_COLLISIONS |
| 16 | #include <Common/HashTable/HashMap.h> |
| 17 | |
| 18 | int main(int argc, char ** argv) |
| 19 | { |
| 20 | if (argc < 2) |
| 21 | { |
| 22 | std::cerr << "Usage: program n\n" ; |
| 23 | return 1; |
| 24 | } |
| 25 | |
| 26 | std::cerr << std::fixed << std::setprecision(3); |
| 27 | std::ofstream devnull("/dev/null" ); |
| 28 | |
| 29 | DB::ReadBufferFromFileDescriptor in(STDIN_FILENO); |
| 30 | size_t n = atoi(argv[1]); |
| 31 | size_t elems_show = 1; |
| 32 | |
| 33 | using Vec = std::vector<std::string>; |
| 34 | using Set = std::unordered_map<std::string, int>; |
| 35 | using RefsSet = std::unordered_map<StringRef, int, StringRefHash>; |
| 36 | using DenseSet = ::google::dense_hash_map<std::string, int>; |
| 37 | using RefsDenseSet = ::google::dense_hash_map<StringRef, int, StringRefHash>; |
| 38 | using RefsHashMap = HashMap<StringRef, int, StringRefHash>; |
| 39 | Vec vec; |
| 40 | |
| 41 | vec.reserve(n); |
| 42 | |
| 43 | { |
| 44 | Stopwatch watch; |
| 45 | |
| 46 | std::string s; |
| 47 | for (size_t i = 0; i < n && !in.eof(); ++i) |
| 48 | { |
| 49 | DB::readEscapedString(s, in); |
| 50 | DB::assertChar('\n', in); |
| 51 | vec.push_back(s); |
| 52 | } |
| 53 | |
| 54 | std::cerr << "Read and inserted into vector in " << watch.elapsedSeconds() << " sec, " |
| 55 | << vec.size() / watch.elapsedSeconds() << " rows/sec., " |
| 56 | << in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec." |
| 57 | << std::endl; |
| 58 | } |
| 59 | |
| 60 | { |
| 61 | DB::Arena pool; |
| 62 | Stopwatch watch; |
| 63 | const char * res = nullptr; |
| 64 | |
| 65 | for (Vec::iterator it = vec.begin(); it != vec.end(); ++it) |
| 66 | { |
| 67 | const char * tmp = pool.insert(it->data(), it->size()); |
| 68 | if (it == vec.begin()) |
| 69 | res = tmp; |
| 70 | } |
| 71 | |
| 72 | std::cerr << "Inserted into pool in " << watch.elapsedSeconds() << " sec, " |
| 73 | << vec.size() / watch.elapsedSeconds() << " rows/sec., " |
| 74 | << in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec." |
| 75 | << std::endl; |
| 76 | |
| 77 | devnull.write(res, 100); |
| 78 | devnull << std::endl; |
| 79 | } |
| 80 | |
| 81 | { |
| 82 | Set set; |
| 83 | Stopwatch watch; |
| 84 | |
| 85 | for (Vec::iterator it = vec.begin(); it != vec.end(); ++it) |
| 86 | set[*it] = 0; |
| 87 | |
| 88 | std::cerr << "Inserted into std::unordered_map in " << watch.elapsedSeconds() << " sec, " |
| 89 | << vec.size() / watch.elapsedSeconds() << " rows/sec., " |
| 90 | << in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec." |
| 91 | << std::endl; |
| 92 | |
| 93 | size_t i = 0; |
| 94 | for (Set::const_iterator it = set.begin(); i < elems_show && it != set.end(); ++it, ++i) |
| 95 | { |
| 96 | devnull << it->first; |
| 97 | devnull << std::endl; |
| 98 | } |
| 99 | } |
| 100 | |
| 101 | { |
| 102 | RefsSet set; |
| 103 | Stopwatch watch; |
| 104 | |
| 105 | for (Vec::iterator it = vec.begin(); it != vec.end(); ++it) |
| 106 | set[StringRef(*it)] = 0; |
| 107 | |
| 108 | std::cerr << "Inserted refs into std::unordered_map in " << watch.elapsedSeconds() << " sec, " |
| 109 | << vec.size() / watch.elapsedSeconds() << " rows/sec., " |
| 110 | << in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec." |
| 111 | << std::endl; |
| 112 | |
| 113 | size_t i = 0; |
| 114 | for (RefsSet::const_iterator it = set.begin(); i < elems_show && it != set.end(); ++it, ++i) |
| 115 | { |
| 116 | devnull.write(it->first.data, it->first.size); |
| 117 | devnull << std::endl; |
| 118 | } |
| 119 | } |
| 120 | |
| 121 | { |
| 122 | DB::Arena pool; |
| 123 | RefsSet set; |
| 124 | Stopwatch watch; |
| 125 | |
| 126 | for (Vec::iterator it = vec.begin(); it != vec.end(); ++it) |
| 127 | set[StringRef(pool.insert(it->data(), it->size()), it->size())] = 0; |
| 128 | |
| 129 | std::cerr << "Inserted into pool and refs into std::unordered_map in " << watch.elapsedSeconds() << " sec, " |
| 130 | << vec.size() / watch.elapsedSeconds() << " rows/sec., " |
| 131 | << in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec." |
| 132 | << std::endl; |
| 133 | |
| 134 | size_t i = 0; |
| 135 | for (RefsSet::const_iterator it = set.begin(); i < elems_show && it != set.end(); ++it, ++i) |
| 136 | { |
| 137 | devnull.write(it->first.data, it->first.size); |
| 138 | devnull << std::endl; |
| 139 | } |
| 140 | } |
| 141 | |
| 142 | { |
| 143 | DenseSet set; |
| 144 | set.set_empty_key(DenseSet::key_type()); |
| 145 | Stopwatch watch; |
| 146 | |
| 147 | for (Vec::iterator it = vec.begin(); it != vec.end(); ++it) |
| 148 | set[*it] = 0; |
| 149 | |
| 150 | std::cerr << "Inserted into google::dense_hash_map in " << watch.elapsedSeconds() << " sec, " |
| 151 | << vec.size() / watch.elapsedSeconds() << " rows/sec., " |
| 152 | << in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec." |
| 153 | << std::endl; |
| 154 | |
| 155 | size_t i = 0; |
| 156 | for (DenseSet::const_iterator it = set.begin(); i < elems_show && it != set.end(); ++it, ++i) |
| 157 | { |
| 158 | devnull << it->first; |
| 159 | devnull << std::endl; |
| 160 | } |
| 161 | } |
| 162 | |
| 163 | { |
| 164 | RefsDenseSet set; |
| 165 | set.set_empty_key(RefsDenseSet::key_type()); |
| 166 | Stopwatch watch; |
| 167 | |
| 168 | for (Vec::iterator it = vec.begin(); it != vec.end(); ++it) |
| 169 | set[StringRef(it->data(), it->size())] = 0; |
| 170 | |
| 171 | std::cerr << "Inserted refs into google::dense_hash_map in " << watch.elapsedSeconds() << " sec, " |
| 172 | << vec.size() / watch.elapsedSeconds() << " rows/sec., " |
| 173 | << in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec." |
| 174 | << std::endl; |
| 175 | |
| 176 | size_t i = 0; |
| 177 | for (RefsDenseSet::const_iterator it = set.begin(); i < elems_show && it != set.end(); ++it, ++i) |
| 178 | { |
| 179 | devnull.write(it->first.data, it->first.size); |
| 180 | devnull << std::endl; |
| 181 | } |
| 182 | } |
| 183 | |
| 184 | { |
| 185 | DB::Arena pool; |
| 186 | RefsDenseSet set; |
| 187 | set.set_empty_key(RefsDenseSet::key_type()); |
| 188 | Stopwatch watch; |
| 189 | |
| 190 | for (Vec::iterator it = vec.begin(); it != vec.end(); ++it) |
| 191 | set[StringRef(pool.insert(it->data(), it->size()), it->size())] = 0; |
| 192 | |
| 193 | std::cerr << "Inserted into pool and refs into google::dense_hash_map in " << watch.elapsedSeconds() << " sec, " |
| 194 | << vec.size() / watch.elapsedSeconds() << " rows/sec., " |
| 195 | << in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec." |
| 196 | << std::endl; |
| 197 | |
| 198 | size_t i = 0; |
| 199 | for (RefsDenseSet::const_iterator it = set.begin(); i < elems_show && it != set.end(); ++it, ++i) |
| 200 | { |
| 201 | devnull.write(it->first.data, it->first.size); |
| 202 | devnull << std::endl; |
| 203 | } |
| 204 | } |
| 205 | |
| 206 | { |
| 207 | RefsHashMap set; |
| 208 | Stopwatch watch; |
| 209 | |
| 210 | for (Vec::iterator it = vec.begin(); it != vec.end(); ++it) |
| 211 | { |
| 212 | RefsHashMap::LookupResult inserted_it; |
| 213 | bool inserted; |
| 214 | set.emplace(StringRef(*it), inserted_it, inserted); |
| 215 | } |
| 216 | |
| 217 | std::cerr << "Inserted refs into HashMap in " << watch.elapsedSeconds() << " sec, " |
| 218 | << vec.size() / watch.elapsedSeconds() << " rows/sec., " |
| 219 | << in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec." |
| 220 | << std::endl; |
| 221 | |
| 222 | size_t i = 0; |
| 223 | for (auto it = set.begin(); i < elems_show && it != set.end(); ++it, ++i) |
| 224 | { |
| 225 | devnull.write(it->getKey().data, it->getKey().size); |
| 226 | devnull << std::endl; |
| 227 | } |
| 228 | |
| 229 | //std::cerr << set.size() << ", " << set.getCollisions() << std::endl; |
| 230 | } |
| 231 | |
| 232 | { |
| 233 | DB::Arena pool; |
| 234 | RefsHashMap set; |
| 235 | Stopwatch watch; |
| 236 | |
| 237 | for (Vec::iterator it = vec.begin(); it != vec.end(); ++it) |
| 238 | { |
| 239 | RefsHashMap::LookupResult inserted_it; |
| 240 | bool inserted; |
| 241 | set.emplace(StringRef(pool.insert(it->data(), it->size()), it->size()), inserted_it, inserted); |
| 242 | } |
| 243 | |
| 244 | std::cerr << "Inserted into pool and refs into HashMap in " << watch.elapsedSeconds() << " sec, " |
| 245 | << vec.size() / watch.elapsedSeconds() << " rows/sec., " |
| 246 | << in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec." |
| 247 | << std::endl; |
| 248 | |
| 249 | size_t i = 0; |
| 250 | for (auto it = set.begin(); i < elems_show && it != set.end(); ++it, ++i) |
| 251 | { |
| 252 | devnull.write(it->getKey().data, it->getKey().size); |
| 253 | devnull << std::endl; |
| 254 | } |
| 255 | } |
| 256 | |
| 257 | return 0; |
| 258 | } |
| 259 | |