| 1 | #pragma once |
| 2 | |
| 3 | #include <IO/WriteBuffer.h> |
| 4 | #include <IO/BufferWithOwnMemory.h> |
| 5 | #include <IO/ReadHelpers.h> |
| 6 | #include <city.h> |
| 7 | |
| 8 | #define DBMS_DEFAULT_HASHING_BLOCK_SIZE 2048ULL |
| 9 | |
| 10 | |
| 11 | namespace DB |
| 12 | { |
| 13 | |
| 14 | template <typename Buffer> |
| 15 | class IHashingBuffer : public BufferWithOwnMemory<Buffer> |
| 16 | { |
| 17 | public: |
| 18 | using uint128 = CityHash_v1_0_2::uint128; |
| 19 | |
| 20 | IHashingBuffer<Buffer>(size_t block_size_ = DBMS_DEFAULT_HASHING_BLOCK_SIZE) |
| 21 | : BufferWithOwnMemory<Buffer>(block_size_), block_pos(0), block_size(block_size_), state(0, 0) |
| 22 | { |
| 23 | } |
| 24 | |
| 25 | uint128 getHash() |
| 26 | { |
| 27 | if (block_pos) |
| 28 | return CityHash_v1_0_2::CityHash128WithSeed(&BufferWithOwnMemory<Buffer>::memory[0], block_pos, state); |
| 29 | else |
| 30 | return state; |
| 31 | } |
| 32 | |
| 33 | void append(DB::BufferBase::Position data) |
| 34 | { |
| 35 | state = CityHash_v1_0_2::CityHash128WithSeed(data, block_size, state); |
| 36 | } |
| 37 | |
| 38 | /// computation of the hash depends on the partitioning of blocks |
| 39 | /// so you need to compute a hash of n complete pieces and one incomplete |
| 40 | void calculateHash(DB::BufferBase::Position data, size_t len); |
| 41 | |
| 42 | protected: |
| 43 | size_t block_pos; |
| 44 | size_t block_size; |
| 45 | uint128 state; |
| 46 | }; |
| 47 | |
| 48 | /** Computes the hash from the data to write and passes it to the specified WriteBuffer. |
| 49 | * The buffer of the nested WriteBuffer is used as the main buffer. |
| 50 | */ |
| 51 | class HashingWriteBuffer : public IHashingBuffer<WriteBuffer> |
| 52 | { |
| 53 | private: |
| 54 | WriteBuffer & out; |
| 55 | |
| 56 | void nextImpl() override |
| 57 | { |
| 58 | size_t len = offset(); |
| 59 | |
| 60 | Position data = working_buffer.begin(); |
| 61 | calculateHash(data, len); |
| 62 | |
| 63 | out.position() = pos; |
| 64 | out.next(); |
| 65 | working_buffer = out.buffer(); |
| 66 | } |
| 67 | |
| 68 | public: |
| 69 | HashingWriteBuffer( |
| 70 | WriteBuffer & out_, |
| 71 | size_t block_size_ = DBMS_DEFAULT_HASHING_BLOCK_SIZE) |
| 72 | : IHashingBuffer<DB::WriteBuffer>(block_size_), out(out_) |
| 73 | { |
| 74 | out.next(); /// If something has already been written to `out` before us, we will not let the remains of this data affect the hash. |
| 75 | working_buffer = out.buffer(); |
| 76 | pos = working_buffer.begin(); |
| 77 | state = uint128(0, 0); |
| 78 | } |
| 79 | |
| 80 | uint128 getHash() |
| 81 | { |
| 82 | next(); |
| 83 | return IHashingBuffer<WriteBuffer>::getHash(); |
| 84 | } |
| 85 | }; |
| 86 | |
| 87 | } |
| 88 | |