| 1 | #pragma once |
| 2 | |
| 3 | #include <IO/WriteBuffer.h> |
| 4 | #include <IO/BufferWithOwnMemory.h> |
| 5 | |
| 6 | |
| 7 | namespace DB |
| 8 | { |
| 9 | |
| 10 | /** Writes the data to another buffer, replacing the invalid UTF-8 sequences with the specified sequence. |
| 11 | * If the valid UTF-8 is already written, it works faster. |
| 12 | * Note: before using the resulting string, destroy this object. |
| 13 | */ |
| 14 | class WriteBufferValidUTF8 : public BufferWithOwnMemory<WriteBuffer> |
| 15 | { |
| 16 | private: |
| 17 | WriteBuffer & output_buffer; |
| 18 | bool group_replacements; |
| 19 | /// The last recorded character was `replacement`. |
| 20 | bool just_put_replacement = false; |
| 21 | std::string replacement; |
| 22 | |
| 23 | void putReplacement(); |
| 24 | void putValid(char * data, size_t len); |
| 25 | |
| 26 | void nextImpl() override; |
| 27 | void finish(); |
| 28 | |
| 29 | public: |
| 30 | static const size_t DEFAULT_SIZE; |
| 31 | |
| 32 | WriteBufferValidUTF8( |
| 33 | WriteBuffer & output_buffer_, |
| 34 | bool group_replacements_ = true, |
| 35 | const char * replacement_ = "\xEF\xBF\xBD" , |
| 36 | size_t size = DEFAULT_SIZE); |
| 37 | |
| 38 | ~WriteBufferValidUTF8() override; |
| 39 | }; |
| 40 | |
| 41 | } |
| 42 | |