1#include <iomanip>
2#include <iostream>
3#include <vector>
4
5#include <Common/Stopwatch.h>
6
7#define DBMS_HASH_MAP_COUNT_COLLISIONS
8#define DBMS_HASH_MAP_DEBUG_RESIZES
9
10#include <Compression/CompressedReadBuffer.h>
11#include <Core/Types.h>
12#include <IO/ReadBufferFromFile.h>
13#include <IO/ReadHelpers.h>
14#include <Interpreters/AggregationCommon.h>
15#include <Common/HashTable/FixedHashMap.h>
16#include <Common/HashTable/HashMap.h>
17
18/** Do this:
19for file in ResolutionWidth ResolutionDepth; do
20 for size in 30000 100000 300000 1000000 5000000; do
21 echo
22 BEST_METHOD=0
23 BEST_RESULT=0
24 for method in {1..2}; do
25 echo -ne $file $size $method '';
26 TOTAL_ELEMS=0
27 for i in {0..1000}; do
28 TOTAL_ELEMS=$(( $TOTAL_ELEMS + $size ))
29 if [[ $TOTAL_ELEMS -gt 25000000 ]]; then break; fi
30 ./hash_map_lookup $size $method < ${file}.bin 2>&1 |
31 grep HashMap | grep -oE '[0-9\.]+ elem';
32 done | awk -W interactive '{ if ($1 > x) { x = $1 }; printf(".") } END { print x }' | tee /tmp/hash_map_lookup_res;
33 CUR_RESULT=$(cat /tmp/hash_map_lookup_res | tr -d '.')
34 if [[ $CUR_RESULT -gt $BEST_RESULT ]]; then
35 BEST_METHOD=$method
36 BEST_RESULT=$CUR_RESULT
37 fi;
38 done;
39 echo Best: $BEST_METHOD - $BEST_RESULT
40 done;
41done
42*/
43
44
45template <typename Map>
46void NO_INLINE bench(const std::vector<UInt16> & data, const char * name)
47{
48 Map map;
49
50 Stopwatch watch;
51 for (size_t i = 0, size = data.size(); i < size; ++i)
52 {
53 typename Map::LookupResult it;
54 bool inserted;
55
56 map.emplace(data[i], it, inserted);
57 if (inserted)
58 it->getMapped() = 1;
59 else
60 ++it->getMapped();
61 }
62
63 for (size_t i = 0, size = data.size(); i < size; ++i)
64 {
65 auto it = map.find(data[i]);
66 auto curr = ++it;
67 if (curr)
68 curr->getMapped();
69 }
70 watch.stop();
71 std::cerr << std::fixed << std::setprecision(2) << "HashMap (" << name << "). Size: " << map.size()
72 << ", elapsed: " << watch.elapsedSeconds() << " (" << data.size() / watch.elapsedSeconds() << " elem/sec.)"
73#ifdef DBMS_HASH_MAP_COUNT_COLLISIONS
74 << ", collisions: " << map.getCollisions()
75#endif
76 << std::endl;
77}
78
79template <typename Map>
80void insert(Map & map, StringRef & k)
81{
82 bool inserted;
83 typename Map::LookupResult it;
84 map.emplace(k, it, inserted, nullptr);
85 if (inserted)
86 it->getMapped() = 1;
87 else
88 ++it->getMapped();
89 std::cout << map.find(k)->getMapped() << std::endl;
90}
91
92int main(int argc, char ** argv)
93{
94 if (argc < 3)
95 {
96 std::cerr << "Usage: program n m\n";
97 return 1;
98 }
99
100 size_t n = atoi(argv[1]);
101 size_t m = atoi(argv[2]);
102
103 std::vector<UInt16> data(n);
104
105 {
106 Stopwatch watch;
107 DB::ReadBufferFromFileDescriptor in1(STDIN_FILENO);
108 DB::CompressedReadBuffer in2(in1);
109 for (size_t i = 0; i < n && !in2.eof(); ++i)
110 {
111 DB::readBinary(data[i], in2);
112 }
113
114 watch.stop();
115 std::cerr << std::fixed << std::setprecision(2) << "Vector. Size: " << n << ", elapsed: " << watch.elapsedSeconds() << " ("
116 << n / watch.elapsedSeconds() << " elem/sec.)" << std::endl;
117 }
118
119 using OldLookup = HashMap<UInt16, UInt8, TrivialHash, HashTableFixedGrower<16>>;
120 using NewLookup = FixedHashMap<UInt16, UInt8>;
121
122 if (!m || m == 1)
123 bench<OldLookup>(data, "Old Lookup");
124 if (!m || m == 2)
125 bench<NewLookup>(data, "New Lookup");
126 return 0;
127}
128