1// -*- mode: c++ -*-
2
3// Copyright (c) 2010, Google Inc.
4// All rights reserved.
5//
6// Redistribution and use in source and binary forms, with or without
7// modification, are permitted provided that the following conditions are
8// met:
9//
10// * Redistributions of source code must retain the above copyright
11// notice, this list of conditions and the following disclaimer.
12// * Redistributions in binary form must reproduce the above
13// copyright notice, this list of conditions and the following disclaimer
14// in the documentation and/or other materials provided with the
15// distribution.
16// * Neither the name of Google Inc. nor the names of its
17// contributors may be used to endorse or promote products derived from
18// this software without specific prior written permission.
19//
20// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31
32// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
33
34// byte_cursor.h: Classes for parsing values from a buffer of bytes.
35// The ByteCursor class provides a convenient interface for reading
36// fixed-size integers of arbitrary endianness, being thorough about
37// checking for buffer overruns.
38
39#ifndef COMMON_BYTE_CURSOR_H_
40#define COMMON_BYTE_CURSOR_H_
41
42#include <assert.h>
43#include <stdint.h>
44#include <stdlib.h>
45#include <string.h>
46#include <string>
47
48#include "common/using_std_string.h"
49
50namespace google_breakpad {
51
52// A buffer holding a series of bytes.
53struct ByteBuffer {
54 ByteBuffer() : start(0), end(0) { }
55 ByteBuffer(const uint8_t* set_start, size_t set_size)
56 : start(set_start), end(set_start + set_size) { }
57 ~ByteBuffer() { };
58
59 // Equality operators. Useful in unit tests, and when we're using
60 // ByteBuffers to refer to regions of a larger buffer.
61 bool operator==(const ByteBuffer& that) const {
62 return start == that.start && end == that.end;
63 }
64 bool operator!=(const ByteBuffer& that) const {
65 return start != that.start || end != that.end;
66 }
67
68 // Not C++ style guide compliant, but this definitely belongs here.
69 size_t Size() const {
70 assert(start <= end);
71 return end - start;
72 }
73
74 const uint8_t* start;
75 const uint8_t* end;
76};
77
78// A cursor pointing into a ByteBuffer that can parse numbers of various
79// widths and representations, strings, and data blocks, advancing through
80// the buffer as it goes. All ByteCursor operations check that accesses
81// haven't gone beyond the end of the enclosing ByteBuffer.
82class ByteCursor {
83 public:
84 // Create a cursor reading bytes from the start of BUFFER. By default, the
85 // cursor reads multi-byte values in little-endian form.
86 ByteCursor(const ByteBuffer* buffer, bool big_endian = false)
87 : buffer_(buffer), here_(buffer->start),
88 big_endian_(big_endian), complete_(true) { }
89
90 // Accessor and setter for this cursor's endianness flag.
91 bool big_endian() const { return big_endian_; }
92 void set_big_endian(bool big_endian) { big_endian_ = big_endian; }
93
94 // Accessor and setter for this cursor's current position. The setter
95 // returns a reference to this cursor.
96 const uint8_t* here() const { return here_; }
97 ByteCursor& set_here(const uint8_t* here) {
98 assert(buffer_->start <= here && here <= buffer_->end);
99 here_ = here;
100 return *this;
101 }
102
103 // Return the number of bytes available to read at the cursor.
104 size_t Available() const { return size_t(buffer_->end - here_); }
105
106 // Return true if this cursor is at the end of its buffer.
107 bool AtEnd() const { return Available() == 0; }
108
109 // When used as a boolean value this cursor converts to true if all
110 // prior reads have been completed, or false if we ran off the end
111 // of the buffer.
112 operator bool() const { return complete_; }
113
114 // Read a SIZE-byte integer at this cursor, signed if IS_SIGNED is true,
115 // unsigned otherwise, using the cursor's established endianness, and set
116 // *RESULT to the number. If we read off the end of our buffer, clear
117 // this cursor's complete_ flag, and store a dummy value in *RESULT.
118 // Return a reference to this cursor.
119 template<typename T>
120 ByteCursor& Read(size_t size, bool is_signed, T* result) {
121 if (CheckAvailable(size)) {
122 T v = 0;
123 if (big_endian_) {
124 for (size_t i = 0; i < size; i++)
125 v = (v << 8) + here_[i];
126 } else {
127 // This loop condition looks weird, but size_t is unsigned, so
128 // decrementing i after it is zero yields the largest size_t value.
129 for (size_t i = size - 1; i < size; i--)
130 v = (v << 8) + here_[i];
131 }
132 if (is_signed && size < sizeof(T)) {
133 size_t sign_bit = (T)1 << (size * 8 - 1);
134 v = (v ^ sign_bit) - sign_bit;
135 }
136 here_ += size;
137 *result = v;
138 } else {
139 *result = (T) 0xdeadbeef;
140 }
141 return *this;
142 }
143
144 // Read an integer, using the cursor's established endianness and
145 // *RESULT's size and signedness, and set *RESULT to the number. If we
146 // read off the end of our buffer, clear this cursor's complete_ flag.
147 // Return a reference to this cursor.
148 template<typename T>
149 ByteCursor& operator>>(T& result) {
150 bool T_is_signed = (T)-1 < 0;
151 return Read(sizeof(T), T_is_signed, &result);
152 }
153
154 // Copy the SIZE bytes at the cursor to BUFFER, and advance this
155 // cursor to the end of them. If we read off the end of our buffer,
156 // clear this cursor's complete_ flag, and set *POINTER to NULL.
157 // Return a reference to this cursor.
158 ByteCursor& Read(uint8_t* buffer, size_t size) {
159 if (CheckAvailable(size)) {
160 memcpy(buffer, here_, size);
161 here_ += size;
162 }
163 return *this;
164 }
165
166 // Set STR to a copy of the '\0'-terminated string at the cursor. If the
167 // byte buffer does not contain a terminating zero, clear this cursor's
168 // complete_ flag, and set STR to the empty string. Return a reference to
169 // this cursor.
170 ByteCursor& CString(string* str) {
171 const uint8_t* end
172 = static_cast<const uint8_t*>(memchr(here_, '\0', Available()));
173 if (end) {
174 str->assign(reinterpret_cast<const char*>(here_), end - here_);
175 here_ = end + 1;
176 } else {
177 str->clear();
178 here_ = buffer_->end;
179 complete_ = false;
180 }
181 return *this;
182 }
183
184 // Like CString(STR), but extract the string from a fixed-width buffer
185 // LIMIT bytes long, which may or may not contain a terminating '\0'
186 // byte. Specifically:
187 //
188 // - If there are not LIMIT bytes available at the cursor, clear the
189 // cursor's complete_ flag and set STR to the empty string.
190 //
191 // - Otherwise, if the LIMIT bytes at the cursor contain any '\0'
192 // characters, set *STR to a copy of the bytes before the first '\0',
193 // and advance the cursor by LIMIT bytes.
194 //
195 // - Otherwise, set *STR to a copy of those LIMIT bytes, and advance the
196 // cursor by LIMIT bytes.
197 ByteCursor& CString(string* str, size_t limit) {
198 if (CheckAvailable(limit)) {
199 const uint8_t* end
200 = static_cast<const uint8_t*>(memchr(here_, '\0', limit));
201 if (end)
202 str->assign(reinterpret_cast<const char*>(here_), end - here_);
203 else
204 str->assign(reinterpret_cast<const char*>(here_), limit);
205 here_ += limit;
206 } else {
207 str->clear();
208 }
209 return *this;
210 }
211
212 // Set *POINTER to point to the SIZE bytes at the cursor, and advance
213 // this cursor to the end of them. If SIZE is omitted, don't move the
214 // cursor. If we read off the end of our buffer, clear this cursor's
215 // complete_ flag, and set *POINTER to NULL. Return a reference to this
216 // cursor.
217 ByteCursor& PointTo(const uint8_t** pointer, size_t size = 0) {
218 if (CheckAvailable(size)) {
219 *pointer = here_;
220 here_ += size;
221 } else {
222 *pointer = NULL;
223 }
224 return *this;
225 }
226
227 // Skip SIZE bytes at the cursor. If doing so would advance us off
228 // the end of our buffer, clear this cursor's complete_ flag, and
229 // set *POINTER to NULL. Return a reference to this cursor.
230 ByteCursor& Skip(size_t size) {
231 if (CheckAvailable(size))
232 here_ += size;
233 return *this;
234 }
235
236 private:
237 // If there are at least SIZE bytes available to read from the buffer,
238 // return true. Otherwise, set here_ to the end of the buffer, set
239 // complete_ to false, and return false.
240 bool CheckAvailable(size_t size) {
241 if (Available() >= size) {
242 return true;
243 } else {
244 here_ = buffer_->end;
245 complete_ = false;
246 return false;
247 }
248 }
249
250 // The buffer we're reading bytes from.
251 const ByteBuffer* buffer_;
252
253 // The next byte within buffer_ that we'll read.
254 const uint8_t* here_;
255
256 // True if we should read numbers in big-endian form; false if we
257 // should read in little-endian form.
258 bool big_endian_;
259
260 // True if we've been able to read all we've been asked to.
261 bool complete_;
262};
263
264} // namespace google_breakpad
265
266#endif // COMMON_BYTE_CURSOR_H_
267