HashingWriteBuffer.h source code [ClickHouse/dbms/src/IO/HashingWriteBuffer.h]

1	#pragma once
2
3	#include <IO/WriteBuffer.h>
4	#include <IO/BufferWithOwnMemory.h>
5	#include <IO/ReadHelpers.h>
6	#include <city.h>
7
8	#define DBMS_DEFAULT_HASHING_BLOCK_SIZE 2048ULL
9
10
11	namespace DB
12	{
13
14	template <typename Buffer>
15	class IHashingBuffer : public BufferWithOwnMemory<Buffer>
16	{
17	public:
18	using uint128 = CityHash_v1_0_2::uint128;
19
20	IHashingBuffer<Buffer>(size_t block_size_ = DBMS_DEFAULT_HASHING_BLOCK_SIZE)
21	: BufferWithOwnMemory<Buffer>(block_size_), block_pos(`0`), block_size(block_size_), state (`0`, `0`)
22	{
23	}
24
25	uint128 getHash()
26	{
27	if (block_pos)
28	return CityHash_v1_0_2::CityHash128WithSeed(&BufferWithOwnMemory<Buffer>::memory[`0`], block_pos, state);
29	else
30	return state;
31	}
32
33	void append(DB::BufferBase::Position data)
34	{
35	state = CityHash_v1_0_2::CityHash128WithSeed(data, block_size, state);
36	}
37
38	/// computation of the hash depends on the partitioning of blocks
39	/// so you need to compute a hash of n complete pieces and one incomplete
40	void calculateHash(DB::BufferBase::Position data, size_t len);
41
42	protected:
43	size_t block_pos;
44	size_t block_size;
45	uint128 state;
46	};
47
48	/* Computes the hash from the data to write and passes it to the specified WriteBuffer.*
49	* The buffer of the nested WriteBuffer is used as the main buffer.
50	*/
51	class HashingWriteBuffer : public IHashingBuffer<WriteBuffer>
52	{
53	private:
54	WriteBuffer & out;
55
56	void nextImpl() override
57	{
58	size_t len = offset();
59
60	Position data = working_buffer.begin();
61	calculateHash(data, len);
62
63	out.position() = pos;
64	out.next();
65	working_buffer = out.buffer();
66	}
67
68	public:
69	HashingWriteBuffer(
70	WriteBuffer & out_,
71	size_t block_size_ = DBMS_DEFAULT_HASHING_BLOCK_SIZE)
72	: IHashingBuffer<DB::WriteBuffer>(block_size_), out(out_)
73	{
74	out.next(); /// If something has already been written to `out` before us, we will not let the remains of this data affect the hash.
75	working_buffer = out.buffer();
76	pos = working_buffer.begin();
77	state = uint128 (`0`, `0`);
78	}
79
80	uint128 getHash()
81	{
82	next();
83	return IHashingBuffer<WriteBuffer>::getHash();
84	}
85	};
86
87	}
88

Browse the source code of ClickHouse/dbms/src/IO/HashingWriteBuffer.h