serialize.h source code [ClickHouse/contrib/capnproto/c++/src/capnp/serialize.h]

1	// Copyright (c) 2013-2014 Sandstorm Development Group, Inc. and contributors
2	// Licensed under the MIT License:
3	//
4	// Permission is hereby granted, free of charge, to any person obtaining a copy
5	// of this software and associated documentation files (the "Software"), to deal
6	// in the Software without restriction, including without limitation the rights
7	// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8	// copies of the Software, and to permit persons to whom the Software is
9	// furnished to do so, subject to the following conditions:
10	//
11	// The above copyright notice and this permission notice shall be included in
12	// all copies or substantial portions of the Software.
13	//
14	// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15	// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16	// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17	// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18	// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19	// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20	// THE SOFTWARE.
21
22	// This file implements a simple serialization format for Cap'n Proto messages. The format
23	// is as follows:
24	//
25	// 32-bit little-endian segment count (4 bytes).*
26	// 32-bit little-endian size of each segment (4(segment count) bytes).
27	// Padding so that subsequent data is 64-bit-aligned (0 or 4 bytes). (I.e., if there are an even*
28	// number of segments, there are 4 bytes of zeros here, otherwise there is no padding.)
29	// Data from each segment, in order (8sum(segment sizes) bytes)
30	//
31	// This format has some important properties:
32	// - It is self-delimiting, so multiple messages may be written to a stream without any external
33	// delimiter.
34	// - The total size and position of each segment can be determined by reading only the first part
35	// of the message, allowing lazy and random-access reading of the segment data.
36	// - A message is always at least 8 bytes.
37	// - A single-segment message can be read entirely in two system calls with no buffering.
38	// - A multi-segment message can be read entirely in three system calls with no buffering.
39	// - The format is appropriate for mmap()ing since all data is aligned.
40
41	#pragma once
42
43	#if defined(__GNUC__) && !defined(CAPNP_HEADER_WARNINGS)
44	#pragma GCC system_header
45	#endif
46
47	#include "message.h"
48	#include <kj/io.h>
49
50	namespace capnp {
51
52	class UnalignedFlatArrayMessageReader: public MessageReader {
53	// Like FlatArrayMessageReader, but skips checking that the array is properly-aligned.
54	//
55	// WARNING: This only works on architectures that support unaligned reads, like x86/x64 and
56	// modern ARM. Unaligned access may incur a performance penalty on these platforms. On many
57	// other platforms, the program will simply crash on unaligned reads. Also note that unaligned
58	// data access may be considered undefined behavior by compilers; use at your own risk. If at
59	// all possible, try to ensure your data ends up in aligned buffers rather than rely on this
60	// class.
61
62	public:
63	UnalignedFlatArrayMessageReader(
64	kj::ArrayPtr<const word> array, ReaderOptions options = ReaderOptions ());
65	kj::ArrayPtr<const word> getSegment(uint id) override;
66	const word* getEnd() const { return end; }
67
68	private:
69	// Optimize for single-segment case.
70	kj::ArrayPtr<const word> segment0;
71	kj::Array<kj::ArrayPtr<const word>> moreSegments;
72	const word* end;
73	};
74
75	class FlatArrayMessageReader: public UnalignedFlatArrayMessageReader {
76	// Parses a message from a flat array. Note that it makes sense to use this together with mmap()
77	// for extremely fast parsing.
78
79	public:
80	FlatArrayMessageReader(kj::ArrayPtr<const word> array, ReaderOptions options = ReaderOptions ());
81	// The array must remain valid until the MessageReader is destroyed.
82
83	const word* getEnd() const { return UnalignedFlatArrayMessageReader::getEnd(); }
84	// Get a pointer just past the end of the message as determined by reading the message header.
85	// This could actually be before the end of the input array. This pointer is useful e.g. if
86	// you know that the input array has extra stuff appended after the message and you want to
87	// get at it.
88
89	private:
90	static kj::ArrayPtr<const word> checkAlignment(kj::ArrayPtr<const word> array);
91	};
92
93	kj::ArrayPtr<const word> initMessageBuilderFromFlatArrayCopy(
94	kj::ArrayPtr<const word> array, MessageBuilder& target,
95	ReaderOptions options = ReaderOptions ());
96	// Convenience function which reads a message using `FlatArrayMessageReader` then copies the
97	// content into the target `MessageBuilder`, verifying that the message structure is valid
98	// (although not necessarily that it matches the desired schema).
99	//
100	// Returns an ArrayPtr containing any words left over in the array after consuming the whole
101	// message. This is useful when reading multiple messages that have been concatenated. See also
102	// FlatArrayMessageReader::getEnd().
103	//
104	// (Note that it's also possible to initialize a `MessageBuilder` directly without a copy using one
105	// of `MessageBuilder`'s constructors. However, this approach skips the validation step and is not
106	// safe to use on untrusted input. Therefore, we do not provide a convenience method for it.)
107
108	kj::Array<word> messageToFlatArray(MessageBuilder& builder);
109	// Constructs a flat array containing the entire content of the given message.
110	//
111	// To output the message as bytes, use `.asBytes()` on the returned word array. Keep in mind that
112	// `asBytes()` returns an ArrayPtr, so you have to save the Array as well to prevent it from being
113	// deleted. For example:
114	//
115	// kj::Array<capnp::word> words = messageToFlatArray(myMessage);
116	// kj::ArrayPtr<kj::byte> bytes = words.asBytes();
117	// write(fd, bytes.begin(), bytes.size());
118
119	kj::Array<word> messageToFlatArray(kj::ArrayPtr<const kj::ArrayPtr<const word>> segments);
120	// Version of messageToFlatArray that takes a raw segment array.
121
122	size_t computeSerializedSizeInWords(MessageBuilder& builder);
123	// Returns the size, in words, that will be needed to serialize the message, including the header.
124
125	size_t computeSerializedSizeInWords(kj::ArrayPtr<const kj::ArrayPtr<const word>> segments);
126	// Version of computeSerializedSizeInWords that takes a raw segment array.
127
128	size_t expectedSizeInWordsFromPrefix(kj::ArrayPtr<const word> messagePrefix);
129	// Given a prefix of a serialized message, try to determine the expected total size of the message,
130	// in words. The returned size is based on the information known so far; it may be an underestimate
131	// if the prefix doesn't contain the full segment table.
132	//
133	// If the returned value is greater than `messagePrefix.size()`, then the message is not yet
134	// complete and the app cannot parse it yet. If the returned value is less than or equal to
135	// `messagePrefix.size()`, then the returned value is the exact total size of the message; any
136	// remaining bytes are part of the next message.
137	//
138	// This function is useful when reading messages from a stream in an asynchronous way, but when
139	// using the full KJ async infrastructure would be too difficult. Each time bytes are received,
140	// use this function to determine if an entire message is ready to be parsed.
141
142	// =======================================================================================
143
144	class InputStreamMessageReader: public MessageReader {
145	// A MessageReader that reads from an abstract kj::InputStream. See also StreamFdMessageReader
146	// for a subclass specific to file descriptors.
147
148	public:
149	InputStreamMessageReader(kj::InputStream& inputStream,
150	ReaderOptions options = ReaderOptions (),
151	kj::ArrayPtr<word> scratchSpace = nullptr);
152	~InputStreamMessageReader() noexcept(false);
153
154	// implements MessageReader ----------------------------------------
155	kj::ArrayPtr<const word> getSegment(uint id) override;
156
157	private:
158	kj::InputStream& inputStream;
159	byte* readPos;
160
161	// Optimize for single-segment case.
162	kj::ArrayPtr<const word> segment0;
163	kj::Array<kj::ArrayPtr<const word>> moreSegments;
164
165	kj::Array<word> ownedSpace;
166	// Only if scratchSpace wasn't big enough.
167
168	kj::UnwindDetector unwindDetector;
169	};
170
171	void readMessageCopy(kj::InputStream& input, MessageBuilder& target,
172	ReaderOptions options = ReaderOptions (),
173	kj::ArrayPtr<word> scratchSpace = nullptr);
174	// Convenience function which reads a message using `InputStreamMessageReader` then copies the
175	// content into the target `MessageBuilder`, verifying that the message structure is valid
176	// (although not necessarily that it matches the desired schema).
177	//
178	// (Note that it's also possible to initialize a `MessageBuilder` directly without a copy using one
179	// of `MessageBuilder`'s constructors. However, this approach skips the validation step and is not
180	// safe to use on untrusted input. Therefore, we do not provide a convenience method for it.)
181
182	void writeMessage(kj::OutputStream& output, MessageBuilder& builder);
183	// Write the message to the given output stream.
184
185	void writeMessage(kj::OutputStream& output, kj::ArrayPtr<const kj::ArrayPtr<const word>> segments);
186	// Write the segment array to the given output stream.
187
188	// =======================================================================================
189	// Specializations for reading from / writing to file descriptors.
190
191	class StreamFdMessageReader: private kj::FdInputStream, public InputStreamMessageReader {
192	// A MessageReader that reads from a steam-based file descriptor.
193
194	public:
195	StreamFdMessageReader(int fd, ReaderOptions options = ReaderOptions (),
196	kj::ArrayPtr<word> scratchSpace = nullptr)
197	: FdInputStream (fd), InputStreamMessageReader (*this, options, scratchSpace) {}
198	// Read message from a file descriptor, without taking ownership of the descriptor.
199
200	StreamFdMessageReader(kj::AutoCloseFd fd, ReaderOptions options = ReaderOptions (),
201	kj::ArrayPtr<word> scratchSpace = nullptr)
202	: FdInputStream (kj::mv(fd)), InputStreamMessageReader (*this, options, scratchSpace) {}
203	// Read a message from a file descriptor, taking ownership of the descriptor.
204
205	~StreamFdMessageReader() noexcept(false);
206	};
207
208	void readMessageCopyFromFd(int fd, MessageBuilder& target,
209	ReaderOptions options = ReaderOptions (),
210	kj::ArrayPtr<word> scratchSpace = nullptr);
211	// Convenience function which reads a message using `StreamFdMessageReader` then copies the
212	// content into the target `MessageBuilder`, verifying that the message structure is valid
213	// (although not necessarily that it matches the desired schema).
214	//
215	// (Note that it's also possible to initialize a `MessageBuilder` directly without a copy using one
216	// of `MessageBuilder`'s constructors. However, this approach skips the validation step and is not
217	// safe to use on untrusted input. Therefore, we do not provide a convenience method for it.)
218
219	void writeMessageToFd(int fd, MessageBuilder& builder);
220	// Write the message to the given file descriptor.
221	//
222	// This function throws an exception on any I/O error. If your code is not exception-safe, be sure
223	// you catch this exception at the call site. If throwing an exception is not acceptable, you
224	// can implement your own OutputStream with arbitrary error handling and then use writeMessage().
225
226	void writeMessageToFd(int fd, kj::ArrayPtr<const kj::ArrayPtr<const word>> segments);
227	// Write the segment array to the given file descriptor.
228	//
229	// This function throws an exception on any I/O error. If your code is not exception-safe, be sure
230	// you catch this exception at the call site. If throwing an exception is not acceptable, you
231	// can implement your own OutputStream with arbitrary error handling and then use writeMessage().
232
233	// =======================================================================================
234	// inline stuff
235
236	inline FlatArrayMessageReader::FlatArrayMessageReader(
237	kj::ArrayPtr<const word> array, ReaderOptions options)
238	#ifdef KJ_DEBUG
239	: UnalignedFlatArrayMessageReader(checkAlignment(array), options) {}
240	#else
241	: UnalignedFlatArrayMessageReader (array, options) {}
242	#endif
243
244	inline kj::Array<word> messageToFlatArray(MessageBuilder& builder) {
245	return messageToFlatArray(builder.getSegmentsForOutput());
246	}
247
248	inline size_t computeSerializedSizeInWords(MessageBuilder& builder) {
249	return computeSerializedSizeInWords(builder.getSegmentsForOutput());
250	}
251
252	inline void writeMessage(kj::OutputStream& output, MessageBuilder& builder) {
253	writeMessage(output, builder.getSegmentsForOutput());
254	}
255
256	inline void writeMessageToFd(int fd, MessageBuilder& builder) {
257	writeMessageToFd(fd, builder.getSegmentsForOutput());
258	}
259
260	} // namespace capnp
261

Browse the source code of ClickHouse/contrib/capnproto/c++/src/capnp/serialize.h