1// Copyright 2015 Google Inc.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14//
15////////////////////////////////////////////////////////////////////////////////
16
17#include "src/binary_parse/range_checked_byte_ptr.h"
18
19#include <assert.h>
20#include <cstddef>
21#include <cstring>
22
23namespace piex {
24namespace binary_parse {
25
26#ifdef BREAK_IF_DEBUGGING_AND_OUT_OF_RANGE
27#define BREAK_IF_DEBUGGING() assert(false)
28#else
29#define BREAK_IF_DEBUGGING() assert(true)
30#endif
31
32namespace {
33class MemoryPagedByteArray : public PagedByteArray {
34 public:
35 MemoryPagedByteArray(const unsigned char *buffer, const size_t len);
36
37 virtual size_t length() const;
38 virtual size_t pageSize() const;
39 virtual void getPage(size_t page_index, const unsigned char **begin,
40 const unsigned char **end, PagePtr *page) const;
41
42 private:
43 const unsigned char *buffer_;
44 const size_t len_;
45};
46
47MemoryPagedByteArray::MemoryPagedByteArray(const unsigned char *buffer,
48 const size_t len)
49 : buffer_(buffer), len_(len) {}
50
51size_t MemoryPagedByteArray::length() const { return len_; }
52
53size_t MemoryPagedByteArray::pageSize() const { return len_; }
54
55void MemoryPagedByteArray::getPage(size_t /* page_index */,
56 const unsigned char **begin,
57 const unsigned char **end,
58 PagePtr *page) const {
59 *begin = buffer_;
60 *end = buffer_ + len_;
61 *page = PagePtr();
62}
63
64// A functor that does nothing. This is used as a no-op shared pointer
65// deallocator below.
66class NullFunctor {
67 public:
68 void operator()() {}
69 void operator()(PagedByteArray * /* p */) const {}
70};
71} // namespace
72
73PagedByteArray::~PagedByteArray() {}
74
75RangeCheckedBytePtr::RangeCheckedBytePtr()
76 : array_(),
77 page_data_(NULL),
78 current_pos_(0),
79 sub_array_begin_(0),
80 sub_array_end_(0),
81 page_begin_offset_(0),
82 current_page_len_(0),
83 error_flag_(RANGE_CHECKED_BYTE_ERROR) {}
84
85RangeCheckedBytePtr::RangeCheckedBytePtr(const unsigned char *array,
86 const size_t len)
87 : array_(new MemoryPagedByteArray(array, len)),
88 page_data_(NULL),
89 current_pos_(0),
90 sub_array_begin_(0),
91 sub_array_end_(len),
92 page_begin_offset_(0),
93 current_page_len_(0),
94 error_flag_(RANGE_CHECKED_BYTE_SUCCESS) {
95 assert(array);
96 if (array == NULL) {
97 error_flag_ = RANGE_CHECKED_BYTE_ERROR;
98 }
99}
100
101RangeCheckedBytePtr::RangeCheckedBytePtr(PagedByteArray *array)
102 : array_(array, NullFunctor()),
103 page_data_(NULL),
104 current_pos_(0),
105 sub_array_begin_(0),
106 sub_array_end_(array->length()),
107 page_begin_offset_(0),
108 current_page_len_(0),
109 error_flag_(RANGE_CHECKED_BYTE_SUCCESS) {}
110
111RangeCheckedBytePtr RangeCheckedBytePtr::invalidPointer() {
112 return RangeCheckedBytePtr();
113}
114
115RangeCheckedBytePtr RangeCheckedBytePtr::pointerToSubArray(
116 size_t pos, size_t length) const {
117 RangeCheckedBytePtr sub_result = (*this) + pos;
118 if (!sub_result.errorOccurred() && length <= sub_result.remainingLength()) {
119 sub_result.sub_array_begin_ = sub_result.current_pos_;
120 sub_result.sub_array_end_ = sub_result.sub_array_begin_ + length;
121
122 // Restrict the boundaries of the current page to the newly set sub-array.
123 sub_result.restrictPageToSubArray();
124
125 return sub_result;
126 } else {
127 error_flag_ = RANGE_CHECKED_BYTE_ERROR;
128 return invalidPointer();
129 }
130}
131
132size_t RangeCheckedBytePtr::offsetInArray() const {
133 // sub_array_begin_ <= current_pos_ is a class invariant, but protect
134 // against violations of this invariant.
135 if (sub_array_begin_ <= current_pos_) {
136 return current_pos_ - sub_array_begin_;
137 } else {
138 assert(false);
139 return 0;
140 }
141}
142
143std::string RangeCheckedBytePtr::substr(size_t pos, size_t length) const {
144 std::vector<unsigned char> bytes = extractBytes(pos, length);
145 std::string result;
146 result.reserve(bytes.size());
147 for (size_t i = 0; i < bytes.size(); ++i) {
148 result.push_back(static_cast<char>(bytes[i]));
149 }
150 return result;
151}
152
153std::vector<unsigned char> RangeCheckedBytePtr::extractBytes(
154 size_t pos, size_t length) const {
155 std::vector<unsigned char> result;
156 if (pos + length < pos /* overflow */ || remainingLength() < pos + length) {
157 BREAK_IF_DEBUGGING();
158 error_flag_ = RANGE_CHECKED_BYTE_ERROR_OVERFLOW;
159 return result;
160 }
161 result.reserve(length);
162 for (size_t i = 0; i < length; ++i) {
163 result.push_back((*this)[pos + i]);
164 }
165 return result;
166}
167
168bool operator==(const RangeCheckedBytePtr &x, const RangeCheckedBytePtr &y) {
169 if (x.array_ != y.array_) {
170 assert(false);
171 return false;
172 }
173
174 return x.current_pos_ == y.current_pos_;
175}
176
177bool operator!=(const RangeCheckedBytePtr &x, const RangeCheckedBytePtr &y) {
178 return !(x == y);
179}
180
181void RangeCheckedBytePtr::loadPageForOffset(size_t offset) const {
182 // The offset should always lie within the bounds of the sub-array (this
183 // condition is enforced at the callsite). However, even if the offset lies
184 // outside the sub-array, the restrictPageToSubArray() call at the end
185 // ensures that the object is left in a consistent state that maintains the
186 // class invariants.
187 assert(offset >= sub_array_begin_ && offset < sub_array_end_);
188
189 // Ensure that offset lies within the array.
190 if (offset >= array_->length()) {
191 assert(false);
192 return;
193 }
194
195 // Determine the index of the page to request.
196 size_t page_index = offset / array_->pageSize();
197
198 // Get the page.
199 const unsigned char *page_begin;
200 const unsigned char *page_end;
201 array_->getPage(page_index, &page_begin, &page_end, &page_);
202
203 // Ensure that the page has the expected length (as specified in the
204 // PagedByteArray interface).
205 size_t expected_page_size = array_->pageSize();
206 if (page_index == (array_->length() - 1) / array_->pageSize()) {
207 expected_page_size = array_->length() - array_->pageSize() * page_index;
208 }
209 if ((page_end < page_begin) ||
210 (static_cast<size_t>(page_end - page_begin) != expected_page_size)) {
211 assert(false);
212 return;
213 }
214
215 // Remember information about page.
216 page_data_ = page_begin;
217 page_begin_offset_ = page_index * array_->pageSize();
218 current_page_len_ = static_cast<size_t>(page_end - page_begin);
219
220 // Restrict the boundaries of the page to lie within the sub-array.
221 restrictPageToSubArray();
222}
223
224void RangeCheckedBytePtr::restrictPageToSubArray() const {
225 // Restrict the current page's boundaries so that it is always contained
226 // completely within the extent of the sub-array.
227 // This function is purposely designed to work correctly in the following
228 // two special cases:
229 // a) The current page lies entirely outside the sub-array. In this case,
230 // current_page_len_ will be set to zero. page_data_ may either remain
231 // unchanged or may be changed to point one element beyond the end of the
232 // page, depending on whether the current page lies before or after the
233 // sub-array.
234 // b) The current page is in the state as initialized by the constructor
235 // (i.e. page_data_ is NULL and current_page_len_ is zero). In this case,
236 // page_data_ and current_page_len_ will remain unchanged.
237
238 // Does the beginning of the page lie before the beginning of the sub-array?
239 if (page_begin_offset_ < sub_array_begin_) {
240 // Compute amount by which to shorten page.
241 size_t amount_to_shorten = sub_array_begin_ - page_begin_offset_;
242 if (amount_to_shorten > current_page_len_) {
243 amount_to_shorten = current_page_len_;
244 }
245
246 // Adjust beginning of page accordingly.
247 page_begin_offset_ += amount_to_shorten;
248 page_data_ += amount_to_shorten;
249 current_page_len_ -= amount_to_shorten;
250 }
251
252 // Does the end of the page lie beyond the end of the sub-array?
253 if (page_begin_offset_ + current_page_len_ > sub_array_end_) {
254 // Reduce length of page accordingly.
255 size_t new_len = sub_array_end_ - page_begin_offset_;
256 if (new_len > current_page_len_) {
257 new_len = current_page_len_;
258 }
259 current_page_len_ = new_len;
260 }
261}
262
263int memcmp(const RangeCheckedBytePtr &x, const RangeCheckedBytePtr &y,
264 size_t num) {
265 std::vector<unsigned char> x_vec = x.extractBytes(0, num);
266 std::vector<unsigned char> y_vec = y.extractBytes(0, num);
267
268 if (!x.errorOccurred() && !y.errorOccurred()) {
269 return ::memcmp(&x_vec[0], &y_vec[0], num);
270 } else {
271 // return an arbitrary value
272 return -1;
273 }
274}
275
276int strcmp(const RangeCheckedBytePtr &x, const std::string &y) {
277 std::vector<unsigned char> x_vec = x.extractBytes(0, y.length());
278
279 if (!x.errorOccurred()) {
280 return ::memcmp(&x_vec[0], y.c_str(), y.length());
281 } else {
282 // return an arbitrary value
283 return -1;
284 }
285}
286
287size_t strlen(const RangeCheckedBytePtr &src) {
288 size_t len = 0;
289 RangeCheckedBytePtr str = src;
290 while (!str.errorOccurred() && (str[0] != '\0')) {
291 str++;
292 len++;
293 }
294 return len;
295}
296
297int16 Get16s(const RangeCheckedBytePtr &input, const bool big_endian,
298 MemoryStatus *status) {
299 const uint16 unsigned_value = Get16u(input, big_endian, status);
300 if (*status != RANGE_CHECKED_BYTE_SUCCESS) {
301 // Return an arbitrary value.
302 return 0;
303 }
304
305 // Convert the two's-complement signed integer encoded in 'unsigned_value'
306 // into a signed representation in the implementation's native representation
307 // for signed integers. An optimized Blaze build (x64) compiles all of the
308 // following code to a no-op (as of this writing).
309 // For further details, see the corresponding comment in Get32s().
310 if (unsigned_value == 0x8000u) {
311 return static_cast<int16>(-0x8000);
312 } else if (unsigned_value > 0x8000u) {
313 return -static_cast<int16>(0x10000u - unsigned_value);
314 } else {
315 return static_cast<int16>(unsigned_value);
316 }
317}
318
319uint16 Get16u(const RangeCheckedBytePtr &input, const bool big_endian,
320 MemoryStatus *status) {
321 if (input.remainingLength() < 2) {
322 if (status && *status == RANGE_CHECKED_BYTE_SUCCESS) {
323 *status = RANGE_CHECKED_BYTE_ERROR;
324 }
325 // Return an arbitrary value.
326 return 0;
327 }
328 if (big_endian) {
329 return (static_cast<uint16>(input[0]) << 8) | static_cast<uint16>(input[1]);
330 } else {
331 return (static_cast<uint16>(input[1]) << 8) | static_cast<uint16>(input[0]);
332 }
333}
334
335int32 Get32s(const RangeCheckedBytePtr &input, const bool big_endian,
336 MemoryStatus *status) {
337 const uint32 unsigned_value = Get32u(input, big_endian, status);
338 if (*status != RANGE_CHECKED_BYTE_SUCCESS) {
339 // Return an arbitrary value.
340 return 0;
341 }
342
343 // Convert the two's-complement signed integer encoded in 'unsigned_value'
344 // into a signed representation in the implementation's native representation
345 // for signed integers.
346 // For all practical purposes, the same result could be obtained simply by
347 // casting unsigned_value to int32; the result of this is
348 // implementation-defined, but on all of the platforms we care about, it does
349 // what we want.
350 // The code below, however, arguably has the aesthetic advantage of being
351 // independent of the representation for signed integers chosen by the
352 // implementation, as long as 'int' and 'unsigned' have the required range to
353 // represent all of the required values.
354 // An optimized Blaze build (x64) compiles all of the following code to a
355 // no-op (as of this writing); i.e. the value that Get32u() returned in %eax
356 // is left unchanged.
357 if (unsigned_value == 0x80000000u) {
358 // Read here on why the constant expression is written this way:
359 // http://stackoverflow.com/questions/14695118
360 return -0x7fffffff - 1;
361 } else if (unsigned_value > 0x80000000u) {
362 // The expression
363 // 0xffffffffu - unsigned_value + 1
364 // is a portable way of flipping the sign of a twos-complement signed
365 // integer whose binary representation is stored in an unsigned integer.
366 // '0xffffffffu + 1' is used in preference to simply '0' because it makes
367 // it clearer that the correct result will be obtained even if an int is
368 // greater than 32 bits. The '0xffffffffu + 1' is "spread out" around
369 // 'unsigned_value' to prevent the compiler from warning about an
370 // integral constant overflow. ('0' would produce the correct result in
371 // this case too but would rely in a more subtle way on the rules for
372 // unsigned wraparound.)
373 return -static_cast<int32>(0xffffffffu - unsigned_value + 1);
374 } else {
375 return static_cast<int32>(unsigned_value);
376 }
377}
378
379uint32 Get32u(const RangeCheckedBytePtr &input, const bool big_endian,
380 MemoryStatus *status) {
381 if (input.remainingLength() < 4) {
382 if (status && *status == RANGE_CHECKED_BYTE_SUCCESS) {
383 *status = RANGE_CHECKED_BYTE_ERROR;
384 }
385 // Return an arbitrary value.
386 return 0;
387 }
388 if (big_endian) {
389 return (static_cast<uint32>(input[0]) << 24) |
390 (static_cast<uint32>(input[1]) << 16) |
391 (static_cast<uint32>(input[2]) << 8) |
392 (static_cast<uint32>(input[3]) << 0);
393 } else {
394 return (static_cast<uint32>(input[3]) << 24) |
395 (static_cast<uint32>(input[2]) << 16) |
396 (static_cast<uint32>(input[1]) << 8) |
397 (static_cast<uint32>(input[0]) << 0);
398 }
399}
400
401} // namespace binary_parse
402} // namespace piex
403