1 | // Copyright (c) 2013-2014 Sandstorm Development Group, Inc. and contributors |
2 | // Licensed under the MIT License: |
3 | // |
4 | // Permission is hereby granted, free of charge, to any person obtaining a copy |
5 | // of this software and associated documentation files (the "Software"), to deal |
6 | // in the Software without restriction, including without limitation the rights |
7 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
8 | // copies of the Software, and to permit persons to whom the Software is |
9 | // furnished to do so, subject to the following conditions: |
10 | // |
11 | // The above copyright notice and this permission notice shall be included in |
12 | // all copies or substantial portions of the Software. |
13 | // |
14 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
15 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
16 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
17 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
18 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
19 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
20 | // THE SOFTWARE. |
21 | |
22 | // This file implements a simple serialization format for Cap'n Proto messages. The format |
23 | // is as follows: |
24 | // |
25 | // * 32-bit little-endian segment count (4 bytes). |
26 | // * 32-bit little-endian size of each segment (4*(segment count) bytes). |
27 | // * Padding so that subsequent data is 64-bit-aligned (0 or 4 bytes). (I.e., if there are an even |
28 | // number of segments, there are 4 bytes of zeros here, otherwise there is no padding.) |
29 | // * Data from each segment, in order (8*sum(segment sizes) bytes) |
30 | // |
31 | // This format has some important properties: |
32 | // - It is self-delimiting, so multiple messages may be written to a stream without any external |
33 | // delimiter. |
34 | // - The total size and position of each segment can be determined by reading only the first part |
35 | // of the message, allowing lazy and random-access reading of the segment data. |
36 | // - A message is always at least 8 bytes. |
37 | // - A single-segment message can be read entirely in two system calls with no buffering. |
38 | // - A multi-segment message can be read entirely in three system calls with no buffering. |
39 | // - The format is appropriate for mmap()ing since all data is aligned. |
40 | |
41 | #pragma once |
42 | |
43 | #if defined(__GNUC__) && !defined(CAPNP_HEADER_WARNINGS) |
44 | #pragma GCC system_header |
45 | #endif |
46 | |
47 | #include "message.h" |
48 | #include <kj/io.h> |
49 | |
50 | namespace capnp { |
51 | |
52 | class UnalignedFlatArrayMessageReader: public MessageReader { |
53 | // Like FlatArrayMessageReader, but skips checking that the array is properly-aligned. |
54 | // |
55 | // WARNING: This only works on architectures that support unaligned reads, like x86/x64 and |
56 | // modern ARM. Unaligned access may incur a performance penalty on these platforms. On many |
57 | // other platforms, the program will simply crash on unaligned reads. Also note that unaligned |
58 | // data access may be considered undefined behavior by compilers; use at your own risk. If at |
59 | // all possible, try to ensure your data ends up in aligned buffers rather than rely on this |
60 | // class. |
61 | |
62 | public: |
63 | UnalignedFlatArrayMessageReader( |
64 | kj::ArrayPtr<const word> array, ReaderOptions options = ReaderOptions()); |
65 | kj::ArrayPtr<const word> getSegment(uint id) override; |
66 | const word* getEnd() const { return end; } |
67 | |
68 | private: |
69 | // Optimize for single-segment case. |
70 | kj::ArrayPtr<const word> segment0; |
71 | kj::Array<kj::ArrayPtr<const word>> moreSegments; |
72 | const word* end; |
73 | }; |
74 | |
75 | class FlatArrayMessageReader: public UnalignedFlatArrayMessageReader { |
76 | // Parses a message from a flat array. Note that it makes sense to use this together with mmap() |
77 | // for extremely fast parsing. |
78 | |
79 | public: |
80 | FlatArrayMessageReader(kj::ArrayPtr<const word> array, ReaderOptions options = ReaderOptions()); |
81 | // The array must remain valid until the MessageReader is destroyed. |
82 | |
83 | const word* getEnd() const { return UnalignedFlatArrayMessageReader::getEnd(); } |
84 | // Get a pointer just past the end of the message as determined by reading the message header. |
85 | // This could actually be before the end of the input array. This pointer is useful e.g. if |
86 | // you know that the input array has extra stuff appended after the message and you want to |
87 | // get at it. |
88 | |
89 | private: |
90 | static kj::ArrayPtr<const word> checkAlignment(kj::ArrayPtr<const word> array); |
91 | }; |
92 | |
93 | kj::ArrayPtr<const word> initMessageBuilderFromFlatArrayCopy( |
94 | kj::ArrayPtr<const word> array, MessageBuilder& target, |
95 | ReaderOptions options = ReaderOptions()); |
96 | // Convenience function which reads a message using `FlatArrayMessageReader` then copies the |
97 | // content into the target `MessageBuilder`, verifying that the message structure is valid |
98 | // (although not necessarily that it matches the desired schema). |
99 | // |
100 | // Returns an ArrayPtr containing any words left over in the array after consuming the whole |
101 | // message. This is useful when reading multiple messages that have been concatenated. See also |
102 | // FlatArrayMessageReader::getEnd(). |
103 | // |
104 | // (Note that it's also possible to initialize a `MessageBuilder` directly without a copy using one |
105 | // of `MessageBuilder`'s constructors. However, this approach skips the validation step and is not |
106 | // safe to use on untrusted input. Therefore, we do not provide a convenience method for it.) |
107 | |
108 | kj::Array<word> messageToFlatArray(MessageBuilder& builder); |
109 | // Constructs a flat array containing the entire content of the given message. |
110 | // |
111 | // To output the message as bytes, use `.asBytes()` on the returned word array. Keep in mind that |
112 | // `asBytes()` returns an ArrayPtr, so you have to save the Array as well to prevent it from being |
113 | // deleted. For example: |
114 | // |
115 | // kj::Array<capnp::word> words = messageToFlatArray(myMessage); |
116 | // kj::ArrayPtr<kj::byte> bytes = words.asBytes(); |
117 | // write(fd, bytes.begin(), bytes.size()); |
118 | |
119 | kj::Array<word> messageToFlatArray(kj::ArrayPtr<const kj::ArrayPtr<const word>> segments); |
120 | // Version of messageToFlatArray that takes a raw segment array. |
121 | |
122 | size_t computeSerializedSizeInWords(MessageBuilder& builder); |
123 | // Returns the size, in words, that will be needed to serialize the message, including the header. |
124 | |
125 | size_t computeSerializedSizeInWords(kj::ArrayPtr<const kj::ArrayPtr<const word>> segments); |
126 | // Version of computeSerializedSizeInWords that takes a raw segment array. |
127 | |
128 | size_t expectedSizeInWordsFromPrefix(kj::ArrayPtr<const word> messagePrefix); |
129 | // Given a prefix of a serialized message, try to determine the expected total size of the message, |
130 | // in words. The returned size is based on the information known so far; it may be an underestimate |
131 | // if the prefix doesn't contain the full segment table. |
132 | // |
133 | // If the returned value is greater than `messagePrefix.size()`, then the message is not yet |
134 | // complete and the app cannot parse it yet. If the returned value is less than or equal to |
135 | // `messagePrefix.size()`, then the returned value is the exact total size of the message; any |
136 | // remaining bytes are part of the next message. |
137 | // |
138 | // This function is useful when reading messages from a stream in an asynchronous way, but when |
139 | // using the full KJ async infrastructure would be too difficult. Each time bytes are received, |
140 | // use this function to determine if an entire message is ready to be parsed. |
141 | |
142 | // ======================================================================================= |
143 | |
144 | class InputStreamMessageReader: public MessageReader { |
145 | // A MessageReader that reads from an abstract kj::InputStream. See also StreamFdMessageReader |
146 | // for a subclass specific to file descriptors. |
147 | |
148 | public: |
149 | InputStreamMessageReader(kj::InputStream& inputStream, |
150 | ReaderOptions options = ReaderOptions(), |
151 | kj::ArrayPtr<word> scratchSpace = nullptr); |
152 | ~InputStreamMessageReader() noexcept(false); |
153 | |
154 | // implements MessageReader ---------------------------------------- |
155 | kj::ArrayPtr<const word> getSegment(uint id) override; |
156 | |
157 | private: |
158 | kj::InputStream& inputStream; |
159 | byte* readPos; |
160 | |
161 | // Optimize for single-segment case. |
162 | kj::ArrayPtr<const word> segment0; |
163 | kj::Array<kj::ArrayPtr<const word>> moreSegments; |
164 | |
165 | kj::Array<word> ownedSpace; |
166 | // Only if scratchSpace wasn't big enough. |
167 | |
168 | kj::UnwindDetector unwindDetector; |
169 | }; |
170 | |
171 | void readMessageCopy(kj::InputStream& input, MessageBuilder& target, |
172 | ReaderOptions options = ReaderOptions(), |
173 | kj::ArrayPtr<word> scratchSpace = nullptr); |
174 | // Convenience function which reads a message using `InputStreamMessageReader` then copies the |
175 | // content into the target `MessageBuilder`, verifying that the message structure is valid |
176 | // (although not necessarily that it matches the desired schema). |
177 | // |
178 | // (Note that it's also possible to initialize a `MessageBuilder` directly without a copy using one |
179 | // of `MessageBuilder`'s constructors. However, this approach skips the validation step and is not |
180 | // safe to use on untrusted input. Therefore, we do not provide a convenience method for it.) |
181 | |
182 | void writeMessage(kj::OutputStream& output, MessageBuilder& builder); |
183 | // Write the message to the given output stream. |
184 | |
185 | void writeMessage(kj::OutputStream& output, kj::ArrayPtr<const kj::ArrayPtr<const word>> segments); |
186 | // Write the segment array to the given output stream. |
187 | |
188 | // ======================================================================================= |
189 | // Specializations for reading from / writing to file descriptors. |
190 | |
191 | class StreamFdMessageReader: private kj::FdInputStream, public InputStreamMessageReader { |
192 | // A MessageReader that reads from a steam-based file descriptor. |
193 | |
194 | public: |
195 | StreamFdMessageReader(int fd, ReaderOptions options = ReaderOptions(), |
196 | kj::ArrayPtr<word> scratchSpace = nullptr) |
197 | : FdInputStream(fd), InputStreamMessageReader(*this, options, scratchSpace) {} |
198 | // Read message from a file descriptor, without taking ownership of the descriptor. |
199 | |
200 | StreamFdMessageReader(kj::AutoCloseFd fd, ReaderOptions options = ReaderOptions(), |
201 | kj::ArrayPtr<word> scratchSpace = nullptr) |
202 | : FdInputStream(kj::mv(fd)), InputStreamMessageReader(*this, options, scratchSpace) {} |
203 | // Read a message from a file descriptor, taking ownership of the descriptor. |
204 | |
205 | ~StreamFdMessageReader() noexcept(false); |
206 | }; |
207 | |
208 | void readMessageCopyFromFd(int fd, MessageBuilder& target, |
209 | ReaderOptions options = ReaderOptions(), |
210 | kj::ArrayPtr<word> scratchSpace = nullptr); |
211 | // Convenience function which reads a message using `StreamFdMessageReader` then copies the |
212 | // content into the target `MessageBuilder`, verifying that the message structure is valid |
213 | // (although not necessarily that it matches the desired schema). |
214 | // |
215 | // (Note that it's also possible to initialize a `MessageBuilder` directly without a copy using one |
216 | // of `MessageBuilder`'s constructors. However, this approach skips the validation step and is not |
217 | // safe to use on untrusted input. Therefore, we do not provide a convenience method for it.) |
218 | |
219 | void writeMessageToFd(int fd, MessageBuilder& builder); |
220 | // Write the message to the given file descriptor. |
221 | // |
222 | // This function throws an exception on any I/O error. If your code is not exception-safe, be sure |
223 | // you catch this exception at the call site. If throwing an exception is not acceptable, you |
224 | // can implement your own OutputStream with arbitrary error handling and then use writeMessage(). |
225 | |
226 | void writeMessageToFd(int fd, kj::ArrayPtr<const kj::ArrayPtr<const word>> segments); |
227 | // Write the segment array to the given file descriptor. |
228 | // |
229 | // This function throws an exception on any I/O error. If your code is not exception-safe, be sure |
230 | // you catch this exception at the call site. If throwing an exception is not acceptable, you |
231 | // can implement your own OutputStream with arbitrary error handling and then use writeMessage(). |
232 | |
233 | // ======================================================================================= |
234 | // inline stuff |
235 | |
236 | inline FlatArrayMessageReader::FlatArrayMessageReader( |
237 | kj::ArrayPtr<const word> array, ReaderOptions options) |
238 | #ifdef KJ_DEBUG |
239 | : UnalignedFlatArrayMessageReader(checkAlignment(array), options) {} |
240 | #else |
241 | : UnalignedFlatArrayMessageReader(array, options) {} |
242 | #endif |
243 | |
244 | inline kj::Array<word> messageToFlatArray(MessageBuilder& builder) { |
245 | return messageToFlatArray(builder.getSegmentsForOutput()); |
246 | } |
247 | |
248 | inline size_t computeSerializedSizeInWords(MessageBuilder& builder) { |
249 | return computeSerializedSizeInWords(builder.getSegmentsForOutput()); |
250 | } |
251 | |
252 | inline void writeMessage(kj::OutputStream& output, MessageBuilder& builder) { |
253 | writeMessage(output, builder.getSegmentsForOutput()); |
254 | } |
255 | |
256 | inline void writeMessageToFd(int fd, MessageBuilder& builder) { |
257 | writeMessageToFd(fd, builder.getSegmentsForOutput()); |
258 | } |
259 | |
260 | } // namespace capnp |
261 | |