| 1 | // Copyright (c) 2013-2014 Sandstorm Development Group, Inc. and contributors | 
|---|
| 2 | // Licensed under the MIT License: | 
|---|
| 3 | // | 
|---|
| 4 | // Permission is hereby granted, free of charge, to any person obtaining a copy | 
|---|
| 5 | // of this software and associated documentation files (the "Software"), to deal | 
|---|
| 6 | // in the Software without restriction, including without limitation the rights | 
|---|
| 7 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | 
|---|
| 8 | // copies of the Software, and to permit persons to whom the Software is | 
|---|
| 9 | // furnished to do so, subject to the following conditions: | 
|---|
| 10 | // | 
|---|
| 11 | // The above copyright notice and this permission notice shall be included in | 
|---|
| 12 | // all copies or substantial portions of the Software. | 
|---|
| 13 | // | 
|---|
| 14 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | 
|---|
| 15 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | 
|---|
| 16 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | 
|---|
| 17 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | 
|---|
| 18 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | 
|---|
| 19 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | 
|---|
| 20 | // THE SOFTWARE. | 
|---|
| 21 |  | 
|---|
| 22 | // This file implements a simple serialization format for Cap'n Proto messages.  The format | 
|---|
| 23 | // is as follows: | 
|---|
| 24 | // | 
|---|
| 25 | // * 32-bit little-endian segment count (4 bytes). | 
|---|
| 26 | // * 32-bit little-endian size of each segment (4*(segment count) bytes). | 
|---|
| 27 | // * Padding so that subsequent data is 64-bit-aligned (0 or 4 bytes).  (I.e., if there are an even | 
|---|
| 28 | //     number of segments, there are 4 bytes of zeros here, otherwise there is no padding.) | 
|---|
| 29 | // * Data from each segment, in order (8*sum(segment sizes) bytes) | 
|---|
| 30 | // | 
|---|
| 31 | // This format has some important properties: | 
|---|
| 32 | // - It is self-delimiting, so multiple messages may be written to a stream without any external | 
|---|
| 33 | //   delimiter. | 
|---|
| 34 | // - The total size and position of each segment can be determined by reading only the first part | 
|---|
| 35 | //   of the message, allowing lazy and random-access reading of the segment data. | 
|---|
| 36 | // - A message is always at least 8 bytes. | 
|---|
| 37 | // - A single-segment message can be read entirely in two system calls with no buffering. | 
|---|
| 38 | // - A multi-segment message can be read entirely in three system calls with no buffering. | 
|---|
| 39 | // - The format is appropriate for mmap()ing since all data is aligned. | 
|---|
| 40 |  | 
|---|
| 41 | #pragma once | 
|---|
| 42 |  | 
|---|
| 43 | #if defined(__GNUC__) && !defined(CAPNP_HEADER_WARNINGS) | 
|---|
| 44 | #pragma GCC system_header | 
|---|
| 45 | #endif | 
|---|
| 46 |  | 
|---|
| 47 | #include "message.h" | 
|---|
| 48 | #include <kj/io.h> | 
|---|
| 49 |  | 
|---|
| 50 | namespace capnp { | 
|---|
| 51 |  | 
|---|
| 52 | class UnalignedFlatArrayMessageReader: public MessageReader { | 
|---|
| 53 | // Like FlatArrayMessageReader, but skips checking that the array is properly-aligned. | 
|---|
| 54 | // | 
|---|
| 55 | // WARNING: This only works on architectures that support unaligned reads, like x86/x64 and | 
|---|
| 56 | //   modern ARM. Unaligned access may incur a performance penalty on these platforms. On many | 
|---|
| 57 | //   other platforms, the program will simply crash on unaligned reads. Also note that unaligned | 
|---|
| 58 | //   data access may be considered undefined behavior by compilers; use at your own risk. If at | 
|---|
| 59 | //   all possible, try to ensure your data ends up in aligned buffers rather than rely on this | 
|---|
| 60 | //   class. | 
|---|
| 61 |  | 
|---|
| 62 | public: | 
|---|
| 63 | UnalignedFlatArrayMessageReader( | 
|---|
| 64 | kj::ArrayPtr<const word> array, ReaderOptions options = ReaderOptions()); | 
|---|
| 65 | kj::ArrayPtr<const word> getSegment(uint id) override; | 
|---|
| 66 | const word* getEnd() const { return end; } | 
|---|
| 67 |  | 
|---|
| 68 | private: | 
|---|
| 69 | // Optimize for single-segment case. | 
|---|
| 70 | kj::ArrayPtr<const word> segment0; | 
|---|
| 71 | kj::Array<kj::ArrayPtr<const word>> moreSegments; | 
|---|
| 72 | const word* end; | 
|---|
| 73 | }; | 
|---|
| 74 |  | 
|---|
| 75 | class FlatArrayMessageReader: public UnalignedFlatArrayMessageReader { | 
|---|
| 76 | // Parses a message from a flat array.  Note that it makes sense to use this together with mmap() | 
|---|
| 77 | // for extremely fast parsing. | 
|---|
| 78 |  | 
|---|
| 79 | public: | 
|---|
| 80 | FlatArrayMessageReader(kj::ArrayPtr<const word> array, ReaderOptions options = ReaderOptions()); | 
|---|
| 81 | // The array must remain valid until the MessageReader is destroyed. | 
|---|
| 82 |  | 
|---|
| 83 | const word* getEnd() const { return UnalignedFlatArrayMessageReader::getEnd(); } | 
|---|
| 84 | // Get a pointer just past the end of the message as determined by reading the message header. | 
|---|
| 85 | // This could actually be before the end of the input array.  This pointer is useful e.g. if | 
|---|
| 86 | // you know that the input array has extra stuff appended after the message and you want to | 
|---|
| 87 | // get at it. | 
|---|
| 88 |  | 
|---|
| 89 | private: | 
|---|
| 90 | static kj::ArrayPtr<const word> checkAlignment(kj::ArrayPtr<const word> array); | 
|---|
| 91 | }; | 
|---|
| 92 |  | 
|---|
| 93 | kj::ArrayPtr<const word> initMessageBuilderFromFlatArrayCopy( | 
|---|
| 94 | kj::ArrayPtr<const word> array, MessageBuilder& target, | 
|---|
| 95 | ReaderOptions options = ReaderOptions()); | 
|---|
| 96 | // Convenience function which reads a message using `FlatArrayMessageReader` then copies the | 
|---|
| 97 | // content into the target `MessageBuilder`, verifying that the message structure is valid | 
|---|
| 98 | // (although not necessarily that it matches the desired schema). | 
|---|
| 99 | // | 
|---|
| 100 | // Returns an ArrayPtr containing any words left over in the array after consuming the whole | 
|---|
| 101 | // message. This is useful when reading multiple messages that have been concatenated. See also | 
|---|
| 102 | // FlatArrayMessageReader::getEnd(). | 
|---|
| 103 | // | 
|---|
| 104 | // (Note that it's also possible to initialize a `MessageBuilder` directly without a copy using one | 
|---|
| 105 | // of `MessageBuilder`'s constructors. However, this approach skips the validation step and is not | 
|---|
| 106 | // safe to use on untrusted input. Therefore, we do not provide a convenience method for it.) | 
|---|
| 107 |  | 
|---|
| 108 | kj::Array<word> messageToFlatArray(MessageBuilder& builder); | 
|---|
| 109 | // Constructs a flat array containing the entire content of the given message. | 
|---|
| 110 | // | 
|---|
| 111 | // To output the message as bytes, use `.asBytes()` on the returned word array. Keep in mind that | 
|---|
| 112 | // `asBytes()` returns an ArrayPtr, so you have to save the Array as well to prevent it from being | 
|---|
| 113 | // deleted. For example: | 
|---|
| 114 | // | 
|---|
| 115 | //     kj::Array<capnp::word> words = messageToFlatArray(myMessage); | 
|---|
| 116 | //     kj::ArrayPtr<kj::byte> bytes = words.asBytes(); | 
|---|
| 117 | //     write(fd, bytes.begin(), bytes.size()); | 
|---|
| 118 |  | 
|---|
| 119 | kj::Array<word> messageToFlatArray(kj::ArrayPtr<const kj::ArrayPtr<const word>> segments); | 
|---|
| 120 | // Version of messageToFlatArray that takes a raw segment array. | 
|---|
| 121 |  | 
|---|
| 122 | size_t computeSerializedSizeInWords(MessageBuilder& builder); | 
|---|
| 123 | // Returns the size, in words, that will be needed to serialize the message, including the header. | 
|---|
| 124 |  | 
|---|
| 125 | size_t computeSerializedSizeInWords(kj::ArrayPtr<const kj::ArrayPtr<const word>> segments); | 
|---|
| 126 | // Version of computeSerializedSizeInWords that takes a raw segment array. | 
|---|
| 127 |  | 
|---|
| 128 | size_t expectedSizeInWordsFromPrefix(kj::ArrayPtr<const word> messagePrefix); | 
|---|
| 129 | // Given a prefix of a serialized message, try to determine the expected total size of the message, | 
|---|
| 130 | // in words. The returned size is based on the information known so far; it may be an underestimate | 
|---|
| 131 | // if the prefix doesn't contain the full segment table. | 
|---|
| 132 | // | 
|---|
| 133 | // If the returned value is greater than `messagePrefix.size()`, then the message is not yet | 
|---|
| 134 | // complete and the app cannot parse it yet. If the returned value is less than or equal to | 
|---|
| 135 | // `messagePrefix.size()`, then the returned value is the exact total size of the message; any | 
|---|
| 136 | // remaining bytes are part of the next message. | 
|---|
| 137 | // | 
|---|
| 138 | // This function is useful when reading messages from a stream in an asynchronous way, but when | 
|---|
| 139 | // using the full KJ async infrastructure would be too difficult. Each time bytes are received, | 
|---|
| 140 | // use this function to determine if an entire message is ready to be parsed. | 
|---|
| 141 |  | 
|---|
| 142 | // ======================================================================================= | 
|---|
| 143 |  | 
|---|
| 144 | class InputStreamMessageReader: public MessageReader { | 
|---|
| 145 | // A MessageReader that reads from an abstract kj::InputStream. See also StreamFdMessageReader | 
|---|
| 146 | // for a subclass specific to file descriptors. | 
|---|
| 147 |  | 
|---|
| 148 | public: | 
|---|
| 149 | InputStreamMessageReader(kj::InputStream& inputStream, | 
|---|
| 150 | ReaderOptions options = ReaderOptions(), | 
|---|
| 151 | kj::ArrayPtr<word> scratchSpace = nullptr); | 
|---|
| 152 | ~InputStreamMessageReader() noexcept(false); | 
|---|
| 153 |  | 
|---|
| 154 | // implements MessageReader ---------------------------------------- | 
|---|
| 155 | kj::ArrayPtr<const word> getSegment(uint id) override; | 
|---|
| 156 |  | 
|---|
| 157 | private: | 
|---|
| 158 | kj::InputStream& inputStream; | 
|---|
| 159 | byte* readPos; | 
|---|
| 160 |  | 
|---|
| 161 | // Optimize for single-segment case. | 
|---|
| 162 | kj::ArrayPtr<const word> segment0; | 
|---|
| 163 | kj::Array<kj::ArrayPtr<const word>> moreSegments; | 
|---|
| 164 |  | 
|---|
| 165 | kj::Array<word> ownedSpace; | 
|---|
| 166 | // Only if scratchSpace wasn't big enough. | 
|---|
| 167 |  | 
|---|
| 168 | kj::UnwindDetector unwindDetector; | 
|---|
| 169 | }; | 
|---|
| 170 |  | 
|---|
| 171 | void readMessageCopy(kj::InputStream& input, MessageBuilder& target, | 
|---|
| 172 | ReaderOptions options = ReaderOptions(), | 
|---|
| 173 | kj::ArrayPtr<word> scratchSpace = nullptr); | 
|---|
| 174 | // Convenience function which reads a message using `InputStreamMessageReader` then copies the | 
|---|
| 175 | // content into the target `MessageBuilder`, verifying that the message structure is valid | 
|---|
| 176 | // (although not necessarily that it matches the desired schema). | 
|---|
| 177 | // | 
|---|
| 178 | // (Note that it's also possible to initialize a `MessageBuilder` directly without a copy using one | 
|---|
| 179 | // of `MessageBuilder`'s constructors. However, this approach skips the validation step and is not | 
|---|
| 180 | // safe to use on untrusted input. Therefore, we do not provide a convenience method for it.) | 
|---|
| 181 |  | 
|---|
| 182 | void writeMessage(kj::OutputStream& output, MessageBuilder& builder); | 
|---|
| 183 | // Write the message to the given output stream. | 
|---|
| 184 |  | 
|---|
| 185 | void writeMessage(kj::OutputStream& output, kj::ArrayPtr<const kj::ArrayPtr<const word>> segments); | 
|---|
| 186 | // Write the segment array to the given output stream. | 
|---|
| 187 |  | 
|---|
| 188 | // ======================================================================================= | 
|---|
| 189 | // Specializations for reading from / writing to file descriptors. | 
|---|
| 190 |  | 
|---|
| 191 | class StreamFdMessageReader: private kj::FdInputStream, public InputStreamMessageReader { | 
|---|
| 192 | // A MessageReader that reads from a steam-based file descriptor. | 
|---|
| 193 |  | 
|---|
| 194 | public: | 
|---|
| 195 | StreamFdMessageReader(int fd, ReaderOptions options = ReaderOptions(), | 
|---|
| 196 | kj::ArrayPtr<word> scratchSpace = nullptr) | 
|---|
| 197 | : FdInputStream(fd), InputStreamMessageReader(*this, options, scratchSpace) {} | 
|---|
| 198 | // Read message from a file descriptor, without taking ownership of the descriptor. | 
|---|
| 199 |  | 
|---|
| 200 | StreamFdMessageReader(kj::AutoCloseFd fd, ReaderOptions options = ReaderOptions(), | 
|---|
| 201 | kj::ArrayPtr<word> scratchSpace = nullptr) | 
|---|
| 202 | : FdInputStream(kj::mv(fd)), InputStreamMessageReader(*this, options, scratchSpace) {} | 
|---|
| 203 | // Read a message from a file descriptor, taking ownership of the descriptor. | 
|---|
| 204 |  | 
|---|
| 205 | ~StreamFdMessageReader() noexcept(false); | 
|---|
| 206 | }; | 
|---|
| 207 |  | 
|---|
| 208 | void readMessageCopyFromFd(int fd, MessageBuilder& target, | 
|---|
| 209 | ReaderOptions options = ReaderOptions(), | 
|---|
| 210 | kj::ArrayPtr<word> scratchSpace = nullptr); | 
|---|
| 211 | // Convenience function which reads a message using `StreamFdMessageReader` then copies the | 
|---|
| 212 | // content into the target `MessageBuilder`, verifying that the message structure is valid | 
|---|
| 213 | // (although not necessarily that it matches the desired schema). | 
|---|
| 214 | // | 
|---|
| 215 | // (Note that it's also possible to initialize a `MessageBuilder` directly without a copy using one | 
|---|
| 216 | // of `MessageBuilder`'s constructors. However, this approach skips the validation step and is not | 
|---|
| 217 | // safe to use on untrusted input. Therefore, we do not provide a convenience method for it.) | 
|---|
| 218 |  | 
|---|
| 219 | void writeMessageToFd(int fd, MessageBuilder& builder); | 
|---|
| 220 | // Write the message to the given file descriptor. | 
|---|
| 221 | // | 
|---|
| 222 | // This function throws an exception on any I/O error.  If your code is not exception-safe, be sure | 
|---|
| 223 | // you catch this exception at the call site.  If throwing an exception is not acceptable, you | 
|---|
| 224 | // can implement your own OutputStream with arbitrary error handling and then use writeMessage(). | 
|---|
| 225 |  | 
|---|
| 226 | void writeMessageToFd(int fd, kj::ArrayPtr<const kj::ArrayPtr<const word>> segments); | 
|---|
| 227 | // Write the segment array to the given file descriptor. | 
|---|
| 228 | // | 
|---|
| 229 | // This function throws an exception on any I/O error.  If your code is not exception-safe, be sure | 
|---|
| 230 | // you catch this exception at the call site.  If throwing an exception is not acceptable, you | 
|---|
| 231 | // can implement your own OutputStream with arbitrary error handling and then use writeMessage(). | 
|---|
| 232 |  | 
|---|
| 233 | // ======================================================================================= | 
|---|
| 234 | // inline stuff | 
|---|
| 235 |  | 
|---|
| 236 | inline FlatArrayMessageReader::FlatArrayMessageReader( | 
|---|
| 237 | kj::ArrayPtr<const word> array, ReaderOptions options) | 
|---|
| 238 | #ifdef KJ_DEBUG | 
|---|
| 239 | : UnalignedFlatArrayMessageReader(checkAlignment(array), options) {} | 
|---|
| 240 | #else | 
|---|
| 241 | : UnalignedFlatArrayMessageReader(array, options) {} | 
|---|
| 242 | #endif | 
|---|
| 243 |  | 
|---|
| 244 | inline kj::Array<word> messageToFlatArray(MessageBuilder& builder) { | 
|---|
| 245 | return messageToFlatArray(builder.getSegmentsForOutput()); | 
|---|
| 246 | } | 
|---|
| 247 |  | 
|---|
| 248 | inline size_t computeSerializedSizeInWords(MessageBuilder& builder) { | 
|---|
| 249 | return computeSerializedSizeInWords(builder.getSegmentsForOutput()); | 
|---|
| 250 | } | 
|---|
| 251 |  | 
|---|
| 252 | inline void writeMessage(kj::OutputStream& output, MessageBuilder& builder) { | 
|---|
| 253 | writeMessage(output, builder.getSegmentsForOutput()); | 
|---|
| 254 | } | 
|---|
| 255 |  | 
|---|
| 256 | inline void writeMessageToFd(int fd, MessageBuilder& builder) { | 
|---|
| 257 | writeMessageToFd(fd, builder.getSegmentsForOutput()); | 
|---|
| 258 | } | 
|---|
| 259 |  | 
|---|
| 260 | }  // namespace capnp | 
|---|
| 261 |  | 
|---|