| 1 | // Copyright (c) 2013-2014 Sandstorm Development Group, Inc. and contributors |
| 2 | // Licensed under the MIT License: |
| 3 | // |
| 4 | // Permission is hereby granted, free of charge, to any person obtaining a copy |
| 5 | // of this software and associated documentation files (the "Software"), to deal |
| 6 | // in the Software without restriction, including without limitation the rights |
| 7 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
| 8 | // copies of the Software, and to permit persons to whom the Software is |
| 9 | // furnished to do so, subject to the following conditions: |
| 10 | // |
| 11 | // The above copyright notice and this permission notice shall be included in |
| 12 | // all copies or substantial portions of the Software. |
| 13 | // |
| 14 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| 15 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| 16 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
| 17 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| 18 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| 19 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
| 20 | // THE SOFTWARE. |
| 21 | |
| 22 | // This file implements a simple serialization format for Cap'n Proto messages. The format |
| 23 | // is as follows: |
| 24 | // |
| 25 | // * 32-bit little-endian segment count (4 bytes). |
| 26 | // * 32-bit little-endian size of each segment (4*(segment count) bytes). |
| 27 | // * Padding so that subsequent data is 64-bit-aligned (0 or 4 bytes). (I.e., if there are an even |
| 28 | // number of segments, there are 4 bytes of zeros here, otherwise there is no padding.) |
| 29 | // * Data from each segment, in order (8*sum(segment sizes) bytes) |
| 30 | // |
| 31 | // This format has some important properties: |
| 32 | // - It is self-delimiting, so multiple messages may be written to a stream without any external |
| 33 | // delimiter. |
| 34 | // - The total size and position of each segment can be determined by reading only the first part |
| 35 | // of the message, allowing lazy and random-access reading of the segment data. |
| 36 | // - A message is always at least 8 bytes. |
| 37 | // - A single-segment message can be read entirely in two system calls with no buffering. |
| 38 | // - A multi-segment message can be read entirely in three system calls with no buffering. |
| 39 | // - The format is appropriate for mmap()ing since all data is aligned. |
| 40 | |
| 41 | #pragma once |
| 42 | |
| 43 | #if defined(__GNUC__) && !defined(CAPNP_HEADER_WARNINGS) |
| 44 | #pragma GCC system_header |
| 45 | #endif |
| 46 | |
| 47 | #include "message.h" |
| 48 | #include <kj/io.h> |
| 49 | |
| 50 | namespace capnp { |
| 51 | |
| 52 | class UnalignedFlatArrayMessageReader: public MessageReader { |
| 53 | // Like FlatArrayMessageReader, but skips checking that the array is properly-aligned. |
| 54 | // |
| 55 | // WARNING: This only works on architectures that support unaligned reads, like x86/x64 and |
| 56 | // modern ARM. Unaligned access may incur a performance penalty on these platforms. On many |
| 57 | // other platforms, the program will simply crash on unaligned reads. Also note that unaligned |
| 58 | // data access may be considered undefined behavior by compilers; use at your own risk. If at |
| 59 | // all possible, try to ensure your data ends up in aligned buffers rather than rely on this |
| 60 | // class. |
| 61 | |
| 62 | public: |
| 63 | UnalignedFlatArrayMessageReader( |
| 64 | kj::ArrayPtr<const word> array, ReaderOptions options = ReaderOptions()); |
| 65 | kj::ArrayPtr<const word> getSegment(uint id) override; |
| 66 | const word* getEnd() const { return end; } |
| 67 | |
| 68 | private: |
| 69 | // Optimize for single-segment case. |
| 70 | kj::ArrayPtr<const word> segment0; |
| 71 | kj::Array<kj::ArrayPtr<const word>> moreSegments; |
| 72 | const word* end; |
| 73 | }; |
| 74 | |
| 75 | class FlatArrayMessageReader: public UnalignedFlatArrayMessageReader { |
| 76 | // Parses a message from a flat array. Note that it makes sense to use this together with mmap() |
| 77 | // for extremely fast parsing. |
| 78 | |
| 79 | public: |
| 80 | FlatArrayMessageReader(kj::ArrayPtr<const word> array, ReaderOptions options = ReaderOptions()); |
| 81 | // The array must remain valid until the MessageReader is destroyed. |
| 82 | |
| 83 | const word* getEnd() const { return UnalignedFlatArrayMessageReader::getEnd(); } |
| 84 | // Get a pointer just past the end of the message as determined by reading the message header. |
| 85 | // This could actually be before the end of the input array. This pointer is useful e.g. if |
| 86 | // you know that the input array has extra stuff appended after the message and you want to |
| 87 | // get at it. |
| 88 | |
| 89 | private: |
| 90 | static kj::ArrayPtr<const word> checkAlignment(kj::ArrayPtr<const word> array); |
| 91 | }; |
| 92 | |
| 93 | kj::ArrayPtr<const word> initMessageBuilderFromFlatArrayCopy( |
| 94 | kj::ArrayPtr<const word> array, MessageBuilder& target, |
| 95 | ReaderOptions options = ReaderOptions()); |
| 96 | // Convenience function which reads a message using `FlatArrayMessageReader` then copies the |
| 97 | // content into the target `MessageBuilder`, verifying that the message structure is valid |
| 98 | // (although not necessarily that it matches the desired schema). |
| 99 | // |
| 100 | // Returns an ArrayPtr containing any words left over in the array after consuming the whole |
| 101 | // message. This is useful when reading multiple messages that have been concatenated. See also |
| 102 | // FlatArrayMessageReader::getEnd(). |
| 103 | // |
| 104 | // (Note that it's also possible to initialize a `MessageBuilder` directly without a copy using one |
| 105 | // of `MessageBuilder`'s constructors. However, this approach skips the validation step and is not |
| 106 | // safe to use on untrusted input. Therefore, we do not provide a convenience method for it.) |
| 107 | |
| 108 | kj::Array<word> messageToFlatArray(MessageBuilder& builder); |
| 109 | // Constructs a flat array containing the entire content of the given message. |
| 110 | // |
| 111 | // To output the message as bytes, use `.asBytes()` on the returned word array. Keep in mind that |
| 112 | // `asBytes()` returns an ArrayPtr, so you have to save the Array as well to prevent it from being |
| 113 | // deleted. For example: |
| 114 | // |
| 115 | // kj::Array<capnp::word> words = messageToFlatArray(myMessage); |
| 116 | // kj::ArrayPtr<kj::byte> bytes = words.asBytes(); |
| 117 | // write(fd, bytes.begin(), bytes.size()); |
| 118 | |
| 119 | kj::Array<word> messageToFlatArray(kj::ArrayPtr<const kj::ArrayPtr<const word>> segments); |
| 120 | // Version of messageToFlatArray that takes a raw segment array. |
| 121 | |
| 122 | size_t computeSerializedSizeInWords(MessageBuilder& builder); |
| 123 | // Returns the size, in words, that will be needed to serialize the message, including the header. |
| 124 | |
| 125 | size_t computeSerializedSizeInWords(kj::ArrayPtr<const kj::ArrayPtr<const word>> segments); |
| 126 | // Version of computeSerializedSizeInWords that takes a raw segment array. |
| 127 | |
| 128 | size_t expectedSizeInWordsFromPrefix(kj::ArrayPtr<const word> messagePrefix); |
| 129 | // Given a prefix of a serialized message, try to determine the expected total size of the message, |
| 130 | // in words. The returned size is based on the information known so far; it may be an underestimate |
| 131 | // if the prefix doesn't contain the full segment table. |
| 132 | // |
| 133 | // If the returned value is greater than `messagePrefix.size()`, then the message is not yet |
| 134 | // complete and the app cannot parse it yet. If the returned value is less than or equal to |
| 135 | // `messagePrefix.size()`, then the returned value is the exact total size of the message; any |
| 136 | // remaining bytes are part of the next message. |
| 137 | // |
| 138 | // This function is useful when reading messages from a stream in an asynchronous way, but when |
| 139 | // using the full KJ async infrastructure would be too difficult. Each time bytes are received, |
| 140 | // use this function to determine if an entire message is ready to be parsed. |
| 141 | |
| 142 | // ======================================================================================= |
| 143 | |
| 144 | class InputStreamMessageReader: public MessageReader { |
| 145 | // A MessageReader that reads from an abstract kj::InputStream. See also StreamFdMessageReader |
| 146 | // for a subclass specific to file descriptors. |
| 147 | |
| 148 | public: |
| 149 | InputStreamMessageReader(kj::InputStream& inputStream, |
| 150 | ReaderOptions options = ReaderOptions(), |
| 151 | kj::ArrayPtr<word> scratchSpace = nullptr); |
| 152 | ~InputStreamMessageReader() noexcept(false); |
| 153 | |
| 154 | // implements MessageReader ---------------------------------------- |
| 155 | kj::ArrayPtr<const word> getSegment(uint id) override; |
| 156 | |
| 157 | private: |
| 158 | kj::InputStream& inputStream; |
| 159 | byte* readPos; |
| 160 | |
| 161 | // Optimize for single-segment case. |
| 162 | kj::ArrayPtr<const word> segment0; |
| 163 | kj::Array<kj::ArrayPtr<const word>> moreSegments; |
| 164 | |
| 165 | kj::Array<word> ownedSpace; |
| 166 | // Only if scratchSpace wasn't big enough. |
| 167 | |
| 168 | kj::UnwindDetector unwindDetector; |
| 169 | }; |
| 170 | |
| 171 | void readMessageCopy(kj::InputStream& input, MessageBuilder& target, |
| 172 | ReaderOptions options = ReaderOptions(), |
| 173 | kj::ArrayPtr<word> scratchSpace = nullptr); |
| 174 | // Convenience function which reads a message using `InputStreamMessageReader` then copies the |
| 175 | // content into the target `MessageBuilder`, verifying that the message structure is valid |
| 176 | // (although not necessarily that it matches the desired schema). |
| 177 | // |
| 178 | // (Note that it's also possible to initialize a `MessageBuilder` directly without a copy using one |
| 179 | // of `MessageBuilder`'s constructors. However, this approach skips the validation step and is not |
| 180 | // safe to use on untrusted input. Therefore, we do not provide a convenience method for it.) |
| 181 | |
| 182 | void writeMessage(kj::OutputStream& output, MessageBuilder& builder); |
| 183 | // Write the message to the given output stream. |
| 184 | |
| 185 | void writeMessage(kj::OutputStream& output, kj::ArrayPtr<const kj::ArrayPtr<const word>> segments); |
| 186 | // Write the segment array to the given output stream. |
| 187 | |
| 188 | // ======================================================================================= |
| 189 | // Specializations for reading from / writing to file descriptors. |
| 190 | |
| 191 | class StreamFdMessageReader: private kj::FdInputStream, public InputStreamMessageReader { |
| 192 | // A MessageReader that reads from a steam-based file descriptor. |
| 193 | |
| 194 | public: |
| 195 | StreamFdMessageReader(int fd, ReaderOptions options = ReaderOptions(), |
| 196 | kj::ArrayPtr<word> scratchSpace = nullptr) |
| 197 | : FdInputStream(fd), InputStreamMessageReader(*this, options, scratchSpace) {} |
| 198 | // Read message from a file descriptor, without taking ownership of the descriptor. |
| 199 | |
| 200 | StreamFdMessageReader(kj::AutoCloseFd fd, ReaderOptions options = ReaderOptions(), |
| 201 | kj::ArrayPtr<word> scratchSpace = nullptr) |
| 202 | : FdInputStream(kj::mv(fd)), InputStreamMessageReader(*this, options, scratchSpace) {} |
| 203 | // Read a message from a file descriptor, taking ownership of the descriptor. |
| 204 | |
| 205 | ~StreamFdMessageReader() noexcept(false); |
| 206 | }; |
| 207 | |
| 208 | void readMessageCopyFromFd(int fd, MessageBuilder& target, |
| 209 | ReaderOptions options = ReaderOptions(), |
| 210 | kj::ArrayPtr<word> scratchSpace = nullptr); |
| 211 | // Convenience function which reads a message using `StreamFdMessageReader` then copies the |
| 212 | // content into the target `MessageBuilder`, verifying that the message structure is valid |
| 213 | // (although not necessarily that it matches the desired schema). |
| 214 | // |
| 215 | // (Note that it's also possible to initialize a `MessageBuilder` directly without a copy using one |
| 216 | // of `MessageBuilder`'s constructors. However, this approach skips the validation step and is not |
| 217 | // safe to use on untrusted input. Therefore, we do not provide a convenience method for it.) |
| 218 | |
| 219 | void writeMessageToFd(int fd, MessageBuilder& builder); |
| 220 | // Write the message to the given file descriptor. |
| 221 | // |
| 222 | // This function throws an exception on any I/O error. If your code is not exception-safe, be sure |
| 223 | // you catch this exception at the call site. If throwing an exception is not acceptable, you |
| 224 | // can implement your own OutputStream with arbitrary error handling and then use writeMessage(). |
| 225 | |
| 226 | void writeMessageToFd(int fd, kj::ArrayPtr<const kj::ArrayPtr<const word>> segments); |
| 227 | // Write the segment array to the given file descriptor. |
| 228 | // |
| 229 | // This function throws an exception on any I/O error. If your code is not exception-safe, be sure |
| 230 | // you catch this exception at the call site. If throwing an exception is not acceptable, you |
| 231 | // can implement your own OutputStream with arbitrary error handling and then use writeMessage(). |
| 232 | |
| 233 | // ======================================================================================= |
| 234 | // inline stuff |
| 235 | |
| 236 | inline FlatArrayMessageReader::FlatArrayMessageReader( |
| 237 | kj::ArrayPtr<const word> array, ReaderOptions options) |
| 238 | #ifdef KJ_DEBUG |
| 239 | : UnalignedFlatArrayMessageReader(checkAlignment(array), options) {} |
| 240 | #else |
| 241 | : UnalignedFlatArrayMessageReader(array, options) {} |
| 242 | #endif |
| 243 | |
| 244 | inline kj::Array<word> messageToFlatArray(MessageBuilder& builder) { |
| 245 | return messageToFlatArray(builder.getSegmentsForOutput()); |
| 246 | } |
| 247 | |
| 248 | inline size_t computeSerializedSizeInWords(MessageBuilder& builder) { |
| 249 | return computeSerializedSizeInWords(builder.getSegmentsForOutput()); |
| 250 | } |
| 251 | |
| 252 | inline void writeMessage(kj::OutputStream& output, MessageBuilder& builder) { |
| 253 | writeMessage(output, builder.getSegmentsForOutput()); |
| 254 | } |
| 255 | |
| 256 | inline void writeMessageToFd(int fd, MessageBuilder& builder) { |
| 257 | writeMessageToFd(fd, builder.getSegmentsForOutput()); |
| 258 | } |
| 259 | |
| 260 | } // namespace capnp |
| 261 | |