1 | #pragma once |
2 | |
3 | #include <IO/WriteBuffer.h> |
4 | #include <IO/BufferWithOwnMemory.h> |
5 | |
6 | |
7 | namespace DB |
8 | { |
9 | |
10 | /** Writes the data to another buffer, replacing the invalid UTF-8 sequences with the specified sequence. |
11 | * If the valid UTF-8 is already written, it works faster. |
12 | * Note: before using the resulting string, destroy this object. |
13 | */ |
14 | class WriteBufferValidUTF8 : public BufferWithOwnMemory<WriteBuffer> |
15 | { |
16 | private: |
17 | WriteBuffer & output_buffer; |
18 | bool group_replacements; |
19 | /// The last recorded character was `replacement`. |
20 | bool just_put_replacement = false; |
21 | std::string replacement; |
22 | |
23 | void putReplacement(); |
24 | void putValid(char * data, size_t len); |
25 | |
26 | void nextImpl() override; |
27 | void finish(); |
28 | |
29 | public: |
30 | static const size_t DEFAULT_SIZE; |
31 | |
32 | WriteBufferValidUTF8( |
33 | WriteBuffer & output_buffer_, |
34 | bool group_replacements_ = true, |
35 | const char * replacement_ = "\xEF\xBF\xBD" , |
36 | size_t size = DEFAULT_SIZE); |
37 | |
38 | ~WriteBufferValidUTF8() override; |
39 | }; |
40 | |
41 | } |
42 | |