1#pragma once
2
3#include <cstring>
4#include <algorithm>
5#include <memory>
6
7#include <Common/Exception.h>
8#include <IO/BufferBase.h>
9
10
11namespace DB
12{
13
14namespace ErrorCodes
15{
16 extern const int ATTEMPT_TO_READ_AFTER_EOF;
17 extern const int CANNOT_READ_ALL_DATA;
18}
19
20/** A simple abstract class for buffered data reading (char sequences) from somewhere.
21 * Unlike std::istream, it provides access to the internal buffer,
22 * and also allows you to manually manage the position inside the buffer.
23 *
24 * Note! `char *`, not `const char *` is used
25 * (so that you can take out the common code into BufferBase, and also so that you can fill the buffer in with new data).
26 * This causes inconveniences - for example, when using ReadBuffer to read from a chunk of memory const char *,
27 * you have to use const_cast.
28 *
29 * successors must implement the nextImpl() method.
30 */
31class ReadBuffer : public BufferBase
32{
33public:
34 /** Creates a buffer and sets a piece of available data to read to zero size,
35 * so that the next() function is called to load the new data portion into the buffer at the first try.
36 */
37 ReadBuffer(Position ptr, size_t size) : BufferBase(ptr, size, 0) { working_buffer.resize(0); }
38
39 /** Used when the buffer is already full of data that can be read.
40 * (in this case, pass 0 as an offset)
41 */
42 ReadBuffer(Position ptr, size_t size, size_t offset) : BufferBase(ptr, size, offset) {}
43
44 // FIXME: behavior differs greately from `BufferBase::set()` and it's very confusing.
45 void set(Position ptr, size_t size) { BufferBase::set(ptr, size, 0); working_buffer.resize(0); }
46
47 /** read next data and fill a buffer with it; set position to the beginning;
48 * return `false` in case of end, `true` otherwise; throw an exception, if something is wrong
49 */
50 bool next()
51 {
52 bytes += offset();
53 bool res = nextImpl();
54 if (!res)
55 working_buffer.resize(0);
56
57 pos = working_buffer.begin() + working_buffer_offset;
58 working_buffer_offset = 0;
59 return res;
60 }
61
62
63 inline void nextIfAtEnd()
64 {
65 if (!hasPendingData())
66 next();
67 }
68
69 virtual ~ReadBuffer() {}
70
71
72 /** Unlike std::istream, it returns true if all data was read
73 * (and not in case there was an attempt to read after the end).
74 * If at the moment the position is at the end of the buffer, it calls the next() method.
75 * That is, it has a side effect - if the buffer is over, then it updates it and set the position to the beginning.
76 *
77 * Try to read after the end should throw an exception.
78 */
79 bool ALWAYS_INLINE eof()
80 {
81 return !hasPendingData() && !next();
82 }
83
84 void ignore()
85 {
86 if (!eof())
87 ++pos;
88 else
89 throwReadAfterEOF();
90 }
91
92 void ignore(size_t n)
93 {
94 while (n != 0 && !eof())
95 {
96 size_t bytes_to_ignore = std::min(static_cast<size_t>(working_buffer.end() - pos), n);
97 pos += bytes_to_ignore;
98 n -= bytes_to_ignore;
99 }
100
101 if (n)
102 throwReadAfterEOF();
103 }
104
105 /// You could call this method `ignore`, and `ignore` call `ignoreStrict`.
106 size_t tryIgnore(size_t n)
107 {
108 size_t bytes_ignored = 0;
109
110 while (bytes_ignored < n && !eof())
111 {
112 size_t bytes_to_ignore = std::min(static_cast<size_t>(working_buffer.end() - pos), n - bytes_ignored);
113 pos += bytes_to_ignore;
114 bytes_ignored += bytes_to_ignore;
115 }
116
117 return bytes_ignored;
118 }
119
120 /** Reads a single byte. */
121 bool ALWAYS_INLINE read(char & c)
122 {
123 if (eof())
124 return false;
125 c = *pos++;
126 return true;
127 }
128
129 void ALWAYS_INLINE readStrict(char & c)
130 {
131 if (read(c))
132 return;
133 throwReadAfterEOF();
134 }
135
136 /** Reads as many as there are, no more than n bytes. */
137 size_t read(char * to, size_t n)
138 {
139 size_t bytes_copied = 0;
140
141 while (bytes_copied < n && !eof())
142 {
143 size_t bytes_to_copy = std::min(static_cast<size_t>(working_buffer.end() - pos), n - bytes_copied);
144 ::memcpy(to + bytes_copied, pos, bytes_to_copy);
145 pos += bytes_to_copy;
146 bytes_copied += bytes_to_copy;
147 }
148
149 return bytes_copied;
150 }
151
152 /** Reads n bytes, if there are less - throws an exception. */
153 void readStrict(char * to, size_t n)
154 {
155 auto read_bytes = read(to, n);
156 if (n != read_bytes)
157 throw Exception("Cannot read all data. Bytes read: " + std::to_string(read_bytes) + ". Bytes expected: " + std::to_string(n) + ".", ErrorCodes::CANNOT_READ_ALL_DATA);
158 }
159
160 /** A method that can be more efficiently implemented in successors, in the case of reading large enough blocks.
161 * The implementation can read data directly into `to`, without superfluous copying, if in `to` there is enough space for work.
162 * For example, a CompressedReadBuffer can decompress the data directly into `to`, if the entire decompressed block fits there.
163 * By default - the same as read.
164 * Don't use for small reads.
165 */
166 virtual size_t readBig(char * to, size_t n)
167 {
168 return read(to, n);
169 }
170
171protected:
172 /// The number of bytes to ignore from the initial position of `working_buffer` buffer.
173 size_t working_buffer_offset = 0;
174
175private:
176 /** Read the next data and fill a buffer with it.
177 * Return `false` in case of the end, `true` otherwise.
178 * Throw an exception if something is wrong.
179 */
180 virtual bool nextImpl() { return false; }
181
182 [[noreturn]] void throwReadAfterEOF()
183 {
184 throw Exception("Attempt to read after eof", ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF);
185 }
186};
187
188
189using ReadBufferPtr = std::shared_ptr<ReadBuffer>;
190
191
192}
193