1// Copyright (c) 2013-2016 Sandstorm Development Group, Inc. and contributors
2// Licensed under the MIT License:
3//
4// Permission is hereby granted, free of charge, to any person obtaining a copy
5// of this software and associated documentation files (the "Software"), to deal
6// in the Software without restriction, including without limitation the rights
7// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8// copies of the Software, and to permit persons to whom the Software is
9// furnished to do so, subject to the following conditions:
10//
11// The above copyright notice and this permission notice shall be included in
12// all copies or substantial portions of the Software.
13//
14// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20// THE SOFTWARE.
21
22#include <kj/common.h>
23#include <kj/memory.h>
24#include <kj/mutex.h>
25#include <kj/debug.h>
26#include <kj/vector.h>
27#include "common.h"
28#include "layout.h"
29#include "any.h"
30
31#pragma once
32
33#if defined(__GNUC__) && !defined(CAPNP_HEADER_WARNINGS)
34#pragma GCC system_header
35#endif
36
37namespace capnp {
38
39namespace _ { // private
40 class ReaderArena;
41 class BuilderArena;
42}
43
44class StructSchema;
45class Orphanage;
46template <typename T>
47class Orphan;
48
49// =======================================================================================
50
51struct ReaderOptions {
52 // Options controlling how data is read.
53
54 uint64_t traversalLimitInWords = 8 * 1024 * 1024;
55 // Limits how many total words of data are allowed to be traversed. Traversal is counted when
56 // a new struct or list builder is obtained, e.g. from a get() accessor. This means that calling
57 // the getter for the same sub-struct multiple times will cause it to be double-counted. Once
58 // the traversal limit is reached, an error will be reported.
59 //
60 // This limit exists for security reasons. It is possible for an attacker to construct a message
61 // in which multiple pointers point at the same location. This is technically invalid, but hard
62 // to detect. Using such a message, an attacker could cause a message which is small on the wire
63 // to appear much larger when actually traversed, possibly exhausting server resources leading to
64 // denial-of-service.
65 //
66 // It makes sense to set a traversal limit that is much larger than the underlying message.
67 // Together with sensible coding practices (e.g. trying to avoid calling sub-object getters
68 // multiple times, which is expensive anyway), this should provide adequate protection without
69 // inconvenience.
70 //
71 // The default limit is 64 MiB. This may or may not be a sensible number for any given use case,
72 // but probably at least prevents easy exploitation while also avoiding causing problems in most
73 // typical cases.
74
75 int nestingLimit = 64;
76 // Limits how deeply-nested a message structure can be, e.g. structs containing other structs or
77 // lists of structs.
78 //
79 // Like the traversal limit, this limit exists for security reasons. Since it is common to use
80 // recursive code to traverse recursive data structures, an attacker could easily cause a stack
81 // overflow by sending a very-deeply-nested (or even cyclic) message, without the message even
82 // being very large. The default limit of 64 is probably low enough to prevent any chance of
83 // stack overflow, yet high enough that it is never a problem in practice.
84};
85
86class MessageReader {
87 // Abstract interface for an object used to read a Cap'n Proto message. Subclasses of
88 // MessageReader are responsible for reading the raw, flat message content. Callers should
89 // usually call `messageReader.getRoot<MyStructType>()` to get a `MyStructType::Reader`
90 // representing the root of the message, then use that to traverse the message content.
91 //
92 // Some common subclasses of `MessageReader` include `SegmentArrayMessageReader`, whose
93 // constructor accepts pointers to the raw data, and `StreamFdMessageReader` (from
94 // `serialize.h`), which reads the message from a file descriptor. One might implement other
95 // subclasses to handle things like reading from shared memory segments, mmap()ed files, etc.
96
97public:
98 MessageReader(ReaderOptions options);
99 // It is suggested that subclasses take ReaderOptions as a constructor parameter, but give it a
100 // default value of "ReaderOptions()". The base class constructor doesn't have a default value
101 // in order to remind subclasses that they really need to give the user a way to provide this.
102
103 virtual ~MessageReader() noexcept(false);
104
105 virtual kj::ArrayPtr<const word> getSegment(uint id) = 0;
106 // Gets the segment with the given ID, or returns null if no such segment exists. This method
107 // will be called at most once for each segment ID.
108 //
109 // The returned array must be aligned properly for the host architecture. This means that on
110 // x86/x64, alignment is optional, though recommended for performance, whereas on many other
111 // architectures, alignment is required.
112
113 inline const ReaderOptions& getOptions();
114 // Get the options passed to the constructor.
115
116 template <typename RootType>
117 typename RootType::Reader getRoot();
118 // Get the root struct of the message, interpreting it as the given struct type.
119
120 template <typename RootType, typename SchemaType>
121 typename RootType::Reader getRoot(SchemaType schema);
122 // Dynamically interpret the root struct of the message using the given schema (a StructSchema).
123 // RootType in this case must be DynamicStruct, and you must #include <capnp/dynamic.h> to
124 // use this.
125
126 bool isCanonical();
127 // Returns whether the message encoded in the reader is in canonical form.
128
129private:
130 ReaderOptions options;
131
132 // Space in which we can construct a ReaderArena. We don't use ReaderArena directly here
133 // because we don't want clients to have to #include arena.h, which itself includes a bunch of
134 // other headers. We don't use a pointer to a ReaderArena because that would require an
135 // extra malloc on every message which could be expensive when processing small messages.
136 void* arenaSpace[18 + sizeof(kj::MutexGuarded<void*>) / sizeof(void*)];
137 bool allocatedArena;
138
139 _::ReaderArena* arena() { return reinterpret_cast<_::ReaderArena*>(arenaSpace); }
140 AnyPointer::Reader getRootInternal();
141};
142
143class MessageBuilder {
144 // Abstract interface for an object used to allocate and build a message. Subclasses of
145 // MessageBuilder are responsible for allocating the space in which the message will be written.
146 // The most common subclass is `MallocMessageBuilder`, but other subclasses may be used to do
147 // tricky things like allocate messages in shared memory or mmap()ed files.
148 //
149 // Creating a new message ususually means allocating a new MessageBuilder (ideally on the stack)
150 // and then calling `messageBuilder.initRoot<MyStructType>()` to get a `MyStructType::Builder`.
151 // That, in turn, can be used to fill in the message content. When done, you can call
152 // `messageBuilder.getSegmentsForOutput()` to get a list of flat data arrays containing the
153 // message.
154
155public:
156 MessageBuilder();
157 virtual ~MessageBuilder() noexcept(false);
158 KJ_DISALLOW_COPY(MessageBuilder);
159
160 struct SegmentInit {
161 kj::ArrayPtr<word> space;
162
163 size_t wordsUsed;
164 // Number of words in `space` which are used; the rest are free space in which additional
165 // objects may be allocated.
166 };
167
168 explicit MessageBuilder(kj::ArrayPtr<SegmentInit> segments);
169 // Create a MessageBuilder backed by existing memory. This is an advanced interface that most
170 // people should not use. THIS METHOD IS INSECURE; see below.
171 //
172 // This allows a MessageBuilder to be constructed to modify an in-memory message without first
173 // making a copy of the content. This is especially useful in conjunction with mmap().
174 //
175 // The contents of each segment must outlive the MessageBuilder, but the SegmentInit array itself
176 // only need outlive the constructor.
177 //
178 // SECURITY: Do not use this in conjunction with untrusted data. This constructor assumes that
179 // the input message is valid. This constructor is designed to be used with data you control,
180 // e.g. an mmap'd file which is owned and accessed by only one program. When reading data you
181 // do not trust, you *must* load it into a Reader and then copy into a Builder as a means of
182 // validating the content.
183 //
184 // WARNING: It is NOT safe to initialize a MessageBuilder in this way from memory that is
185 // currently in use by another MessageBuilder or MessageReader. Other readers/builders will
186 // not observe changes to the segment sizes nor newly-allocated segments caused by allocating
187 // new objects in this message.
188
189 virtual kj::ArrayPtr<word> allocateSegment(uint minimumSize) = 0;
190 // Allocates an array of at least the given number of zero'd words, throwing an exception or
191 // crashing if this is not possible. It is expected that this method will usually return more
192 // space than requested, and the caller should use that extra space as much as possible before
193 // allocating more. The returned space remains valid at least until the MessageBuilder is
194 // destroyed.
195 //
196 // allocateSegment() is responsible for zeroing the memory before returning. This is required
197 // because otherwise the Cap'n Proto implementation would have to zero the memory anyway, and
198 // many allocators are able to provide already-zero'd memory more efficiently.
199 //
200 // The returned array must be aligned properly for the host architecture. This means that on
201 // x86/x64, alignment is optional, though recommended for performance, whereas on many other
202 // architectures, alignment is required.
203
204 template <typename RootType>
205 typename RootType::Builder initRoot();
206 // Initialize the root struct of the message as the given struct type.
207
208 template <typename Reader>
209 void setRoot(Reader&& value);
210 // Set the root struct to a deep copy of the given struct.
211
212 template <typename RootType>
213 typename RootType::Builder getRoot();
214 // Get the root struct of the message, interpreting it as the given struct type.
215
216 template <typename RootType, typename SchemaType>
217 typename RootType::Builder getRoot(SchemaType schema);
218 // Dynamically interpret the root struct of the message using the given schema (a StructSchema).
219 // RootType in this case must be DynamicStruct, and you must #include <capnp/dynamic.h> to
220 // use this.
221
222 template <typename RootType, typename SchemaType>
223 typename RootType::Builder initRoot(SchemaType schema);
224 // Dynamically init the root struct of the message using the given schema (a StructSchema).
225 // RootType in this case must be DynamicStruct, and you must #include <capnp/dynamic.h> to
226 // use this.
227
228 template <typename T>
229 void adoptRoot(Orphan<T>&& orphan);
230 // Like setRoot() but adopts the orphan without copying.
231
232 kj::ArrayPtr<const kj::ArrayPtr<const word>> getSegmentsForOutput();
233 // Get the raw data that makes up the message.
234
235 Orphanage getOrphanage();
236
237 bool isCanonical();
238 // Check whether the message builder is in canonical form
239
240private:
241 void* arenaSpace[22];
242 // Space in which we can construct a BuilderArena. We don't use BuilderArena directly here
243 // because we don't want clients to have to #include arena.h, which itself includes a bunch of
244 // big STL headers. We don't use a pointer to a BuilderArena because that would require an
245 // extra malloc on every message which could be expensive when processing small messages.
246
247 bool allocatedArena = false;
248 // We have to initialize the arena lazily because when we do so we want to allocate the root
249 // pointer immediately, and this will allocate a segment, which requires a virtual function
250 // call on the MessageBuilder. We can't do such a call in the constructor since the subclass
251 // isn't constructed yet. This is kind of annoying because it means that getOrphanage() is
252 // not thread-safe, but that shouldn't be a huge deal...
253
254 _::BuilderArena* arena() { return reinterpret_cast<_::BuilderArena*>(arenaSpace); }
255 _::SegmentBuilder* getRootSegment();
256 AnyPointer::Builder getRootInternal();
257};
258
259template <typename RootType>
260typename RootType::Reader readMessageUnchecked(const word* data);
261// IF THE INPUT IS INVALID, THIS MAY CRASH, CORRUPT MEMORY, CREATE A SECURITY HOLE IN YOUR APP,
262// MURDER YOUR FIRST-BORN CHILD, AND/OR BRING ABOUT ETERNAL DAMNATION ON ALL OF HUMANITY. DO NOT
263// USE UNLESS YOU UNDERSTAND THE CONSEQUENCES.
264//
265// Given a pointer to a known-valid message located in a single contiguous memory segment,
266// returns a reader for that message. No bounds-checking will be done while traversing this
267// message. Use this only if you have already verified that all pointers are valid and in-bounds,
268// and there are no far pointers in the message.
269//
270// To create a message that can be passed to this function, build a message using a MallocAllocator
271// whose preferred segment size is larger than the message size. This guarantees that the message
272// will be allocated as a single segment, meaning getSegmentsForOutput() returns a single word
273// array. That word array is your message; you may pass a pointer to its first word into
274// readMessageUnchecked() to read the message.
275//
276// This can be particularly handy for embedding messages in generated code: you can
277// embed the raw bytes (using AlignedData) then make a Reader for it using this. This is the way
278// default values are embedded in code generated by the Cap'n Proto compiler. E.g., if you have
279// a message MyMessage, you can read its default value like so:
280// MyMessage::Reader reader = Message<MyMessage>::readMessageUnchecked(MyMessage::DEFAULT.words);
281//
282// To sanitize a message from an untrusted source such that it can be safely passed to
283// readMessageUnchecked(), use copyToUnchecked().
284
285template <typename Reader>
286void copyToUnchecked(Reader&& reader, kj::ArrayPtr<word> uncheckedBuffer);
287// Copy the content of the given reader into the given buffer, such that it can safely be passed to
288// readMessageUnchecked(). The buffer's size must be exactly reader.totalSizeInWords() + 1,
289// otherwise an exception will be thrown. The buffer must be zero'd before calling.
290
291template <typename RootType>
292typename RootType::Reader readDataStruct(kj::ArrayPtr<const word> data);
293// Interprets the given data as a single, data-only struct. Only primitive fields (booleans,
294// numbers, and enums) will be readable; all pointers will be null. This is useful if you want
295// to use Cap'n Proto as a language/platform-neutral way to pack some bits.
296//
297// The input is a word array rather than a byte array to enforce alignment. If you have a byte
298// array which you know is word-aligned (or if your platform supports unaligned reads and you don't
299// mind the performance penalty), then you can use `reinterpret_cast` to convert a byte array into
300// a word array:
301//
302// kj::arrayPtr(reinterpret_cast<const word*>(bytes.begin()),
303// reinterpret_cast<const word*>(bytes.end()))
304
305template <typename BuilderType>
306typename kj::ArrayPtr<const word> writeDataStruct(BuilderType builder);
307// Given a struct builder, get the underlying data section as a word array, suitable for passing
308// to `readDataStruct()`.
309//
310// Note that you may call `.toBytes()` on the returned value to convert to `ArrayPtr<const byte>`.
311
312template <typename Type>
313static typename Type::Reader defaultValue();
314// Get a default instance of the given struct or list type.
315//
316// TODO(cleanup): Find a better home for this function?
317
318// =======================================================================================
319
320class SegmentArrayMessageReader: public MessageReader {
321 // A simple MessageReader that reads from an array of word arrays representing all segments.
322 // In particular you can read directly from the output of MessageBuilder::getSegmentsForOutput()
323 // (although it would probably make more sense to call builder.getRoot().asReader() in that case).
324
325public:
326 SegmentArrayMessageReader(kj::ArrayPtr<const kj::ArrayPtr<const word>> segments,
327 ReaderOptions options = ReaderOptions());
328 // Creates a message pointing at the given segment array, without taking ownership of the
329 // segments. All arrays passed in must remain valid until the MessageReader is destroyed.
330
331 KJ_DISALLOW_COPY(SegmentArrayMessageReader);
332 ~SegmentArrayMessageReader() noexcept(false);
333
334 virtual kj::ArrayPtr<const word> getSegment(uint id) override;
335
336private:
337 kj::ArrayPtr<const kj::ArrayPtr<const word>> segments;
338};
339
340enum class AllocationStrategy: uint8_t {
341 FIXED_SIZE,
342 // The builder will prefer to allocate the same amount of space for each segment with no
343 // heuristic growth. It will still allocate larger segments when the preferred size is too small
344 // for some single object. This mode is generally not recommended, but can be particularly useful
345 // for testing in order to force a message to allocate a predictable number of segments. Note
346 // that you can force every single object in the message to be located in a separate segment by
347 // using this mode with firstSegmentWords = 0.
348
349 GROW_HEURISTICALLY
350 // The builder will heuristically decide how much space to allocate for each segment. Each
351 // allocated segment will be progressively larger than the previous segments on the assumption
352 // that message sizes are exponentially distributed. The total number of segments that will be
353 // allocated for a message of size n is O(log n).
354};
355
356constexpr uint SUGGESTED_FIRST_SEGMENT_WORDS = 1024;
357constexpr AllocationStrategy SUGGESTED_ALLOCATION_STRATEGY = AllocationStrategy::GROW_HEURISTICALLY;
358
359class MallocMessageBuilder: public MessageBuilder {
360 // A simple MessageBuilder that uses malloc() (actually, calloc()) to allocate segments. This
361 // implementation should be reasonable for any case that doesn't require writing the message to
362 // a specific location in memory.
363
364public:
365 explicit MallocMessageBuilder(uint firstSegmentWords = SUGGESTED_FIRST_SEGMENT_WORDS,
366 AllocationStrategy allocationStrategy = SUGGESTED_ALLOCATION_STRATEGY);
367 // Creates a BuilderContext which allocates at least the given number of words for the first
368 // segment, and then uses the given strategy to decide how much to allocate for subsequent
369 // segments. When choosing a value for firstSegmentWords, consider that:
370 // 1) Reading and writing messages gets slower when multiple segments are involved, so it's good
371 // if most messages fit in a single segment.
372 // 2) Unused bytes will not be written to the wire, so generally it is not a big deal to allocate
373 // more space than you need. It only becomes problematic if you are allocating many messages
374 // in parallel and thus use lots of memory, or if you allocate so much extra space that just
375 // zeroing it out becomes a bottleneck.
376 // The defaults have been chosen to be reasonable for most people, so don't change them unless you
377 // have reason to believe you need to.
378
379 explicit MallocMessageBuilder(kj::ArrayPtr<word> firstSegment,
380 AllocationStrategy allocationStrategy = SUGGESTED_ALLOCATION_STRATEGY);
381 // This version always returns the given array for the first segment, and then proceeds with the
382 // allocation strategy. This is useful for optimization when building lots of small messages in
383 // a tight loop: you can reuse the space for the first segment.
384 //
385 // firstSegment MUST be zero-initialized. MallocMessageBuilder's destructor will write new zeros
386 // over any space that was used so that it can be reused.
387
388 KJ_DISALLOW_COPY(MallocMessageBuilder);
389 virtual ~MallocMessageBuilder() noexcept(false);
390
391 virtual kj::ArrayPtr<word> allocateSegment(uint minimumSize) override;
392
393private:
394 uint nextSize;
395 AllocationStrategy allocationStrategy;
396
397 bool ownFirstSegment;
398 bool returnedFirstSegment;
399
400 void* firstSegment;
401 kj::Vector<void*> moreSegments;
402};
403
404class FlatMessageBuilder: public MessageBuilder {
405 // THIS IS NOT THE CLASS YOU'RE LOOKING FOR.
406 //
407 // If you want to write a message into already-existing scratch space, use `MallocMessageBuilder`
408 // and pass the scratch space to its constructor. It will then only fall back to malloc() if
409 // the scratch space is not large enough.
410 //
411 // Do NOT use this class unless you really know what you're doing. This class is problematic
412 // because it requires advance knowledge of the size of your message, which is usually impossible
413 // to determine without actually building the message. The class was created primarily to
414 // implement `copyToUnchecked()`, which itself exists only to support other internal parts of
415 // the Cap'n Proto implementation.
416
417public:
418 explicit FlatMessageBuilder(kj::ArrayPtr<word> array);
419 KJ_DISALLOW_COPY(FlatMessageBuilder);
420 virtual ~FlatMessageBuilder() noexcept(false);
421
422 void requireFilled();
423 // Throws an exception if the flat array is not exactly full.
424
425 virtual kj::ArrayPtr<word> allocateSegment(uint minimumSize) override;
426
427private:
428 kj::ArrayPtr<word> array;
429 bool allocated;
430};
431
432// =======================================================================================
433// implementation details
434
435inline const ReaderOptions& MessageReader::getOptions() {
436 return options;
437}
438
439template <typename RootType>
440inline typename RootType::Reader MessageReader::getRoot() {
441 return getRootInternal().getAs<RootType>();
442}
443
444template <typename RootType>
445inline typename RootType::Builder MessageBuilder::initRoot() {
446 return getRootInternal().initAs<RootType>();
447}
448
449template <typename Reader>
450inline void MessageBuilder::setRoot(Reader&& value) {
451 getRootInternal().setAs<FromReader<Reader>>(value);
452}
453
454template <typename RootType>
455inline typename RootType::Builder MessageBuilder::getRoot() {
456 return getRootInternal().getAs<RootType>();
457}
458
459template <typename T>
460void MessageBuilder::adoptRoot(Orphan<T>&& orphan) {
461 return getRootInternal().adopt(kj::mv(orphan));
462}
463
464template <typename RootType, typename SchemaType>
465typename RootType::Reader MessageReader::getRoot(SchemaType schema) {
466 return getRootInternal().getAs<RootType>(schema);
467}
468
469template <typename RootType, typename SchemaType>
470typename RootType::Builder MessageBuilder::getRoot(SchemaType schema) {
471 return getRootInternal().getAs<RootType>(schema);
472}
473
474template <typename RootType, typename SchemaType>
475typename RootType::Builder MessageBuilder::initRoot(SchemaType schema) {
476 return getRootInternal().initAs<RootType>(schema);
477}
478
479template <typename RootType>
480typename RootType::Reader readMessageUnchecked(const word* data) {
481 return AnyPointer::Reader(_::PointerReader::getRootUnchecked(data)).getAs<RootType>();
482}
483
484template <typename Reader>
485void copyToUnchecked(Reader&& reader, kj::ArrayPtr<word> uncheckedBuffer) {
486 FlatMessageBuilder builder(uncheckedBuffer);
487 builder.setRoot(kj::fwd<Reader>(reader));
488 builder.requireFilled();
489}
490
491template <typename RootType>
492typename RootType::Reader readDataStruct(kj::ArrayPtr<const word> data) {
493 return typename RootType::Reader(_::StructReader(data));
494}
495
496template <typename BuilderType>
497typename kj::ArrayPtr<const word> writeDataStruct(BuilderType builder) {
498 auto bytes = _::PointerHelpers<FromBuilder<BuilderType>>::getInternalBuilder(kj::mv(builder))
499 .getDataSectionAsBlob();
500 return kj::arrayPtr(reinterpret_cast<word*>(bytes.begin()),
501 reinterpret_cast<word*>(bytes.end()));
502}
503
504template <typename Type>
505static typename Type::Reader defaultValue() {
506 return typename Type::Reader(_::StructReader());
507}
508
509template <typename T>
510kj::Array<word> canonicalize(T&& reader) {
511 return _::PointerHelpers<FromReader<T>>::getInternalReader(reader).canonicalize();
512}
513
514} // namespace capnp
515