1 | #pragma once |
2 | |
3 | #include <IO/WriteBuffer.h> |
4 | #include <IO/BufferWithOwnMemory.h> |
5 | #include <IO/ReadHelpers.h> |
6 | #include <city.h> |
7 | |
8 | #define DBMS_DEFAULT_HASHING_BLOCK_SIZE 2048ULL |
9 | |
10 | |
11 | namespace DB |
12 | { |
13 | |
14 | template <typename Buffer> |
15 | class IHashingBuffer : public BufferWithOwnMemory<Buffer> |
16 | { |
17 | public: |
18 | using uint128 = CityHash_v1_0_2::uint128; |
19 | |
20 | IHashingBuffer<Buffer>(size_t block_size_ = DBMS_DEFAULT_HASHING_BLOCK_SIZE) |
21 | : BufferWithOwnMemory<Buffer>(block_size_), block_pos(0), block_size(block_size_), state(0, 0) |
22 | { |
23 | } |
24 | |
25 | uint128 getHash() |
26 | { |
27 | if (block_pos) |
28 | return CityHash_v1_0_2::CityHash128WithSeed(&BufferWithOwnMemory<Buffer>::memory[0], block_pos, state); |
29 | else |
30 | return state; |
31 | } |
32 | |
33 | void append(DB::BufferBase::Position data) |
34 | { |
35 | state = CityHash_v1_0_2::CityHash128WithSeed(data, block_size, state); |
36 | } |
37 | |
38 | /// computation of the hash depends on the partitioning of blocks |
39 | /// so you need to compute a hash of n complete pieces and one incomplete |
40 | void calculateHash(DB::BufferBase::Position data, size_t len); |
41 | |
42 | protected: |
43 | size_t block_pos; |
44 | size_t block_size; |
45 | uint128 state; |
46 | }; |
47 | |
48 | /** Computes the hash from the data to write and passes it to the specified WriteBuffer. |
49 | * The buffer of the nested WriteBuffer is used as the main buffer. |
50 | */ |
51 | class HashingWriteBuffer : public IHashingBuffer<WriteBuffer> |
52 | { |
53 | private: |
54 | WriteBuffer & out; |
55 | |
56 | void nextImpl() override |
57 | { |
58 | size_t len = offset(); |
59 | |
60 | Position data = working_buffer.begin(); |
61 | calculateHash(data, len); |
62 | |
63 | out.position() = pos; |
64 | out.next(); |
65 | working_buffer = out.buffer(); |
66 | } |
67 | |
68 | public: |
69 | HashingWriteBuffer( |
70 | WriteBuffer & out_, |
71 | size_t block_size_ = DBMS_DEFAULT_HASHING_BLOCK_SIZE) |
72 | : IHashingBuffer<DB::WriteBuffer>(block_size_), out(out_) |
73 | { |
74 | out.next(); /// If something has already been written to `out` before us, we will not let the remains of this data affect the hash. |
75 | working_buffer = out.buffer(); |
76 | pos = working_buffer.begin(); |
77 | state = uint128(0, 0); |
78 | } |
79 | |
80 | uint128 getHash() |
81 | { |
82 | next(); |
83 | return IHashingBuffer<WriteBuffer>::getHash(); |
84 | } |
85 | }; |
86 | |
87 | } |
88 | |