1#pragma once
2
3#include <IO/WriteBuffer.h>
4#include <IO/BufferWithOwnMemory.h>
5
6
7namespace DB
8{
9
10/** Writes the data to another buffer, replacing the invalid UTF-8 sequences with the specified sequence.
11 * If the valid UTF-8 is already written, it works faster.
12 * Note: before using the resulting string, destroy this object.
13 */
14class WriteBufferValidUTF8 : public BufferWithOwnMemory<WriteBuffer>
15{
16private:
17 WriteBuffer & output_buffer;
18 bool group_replacements;
19 /// The last recorded character was `replacement`.
20 bool just_put_replacement = false;
21 std::string replacement;
22
23 void putReplacement();
24 void putValid(char * data, size_t len);
25
26 void nextImpl() override;
27 void finish();
28
29public:
30 static const size_t DEFAULT_SIZE;
31
32 WriteBufferValidUTF8(
33 WriteBuffer & output_buffer_,
34 bool group_replacements_ = true,
35 const char * replacement_ = "\xEF\xBF\xBD",
36 size_t size = DEFAULT_SIZE);
37
38 ~WriteBufferValidUTF8() override;
39};
40
41}
42