1 | // Copyright 2015 Google Inc. |
2 | // |
3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | // you may not use this file except in compliance with the License. |
5 | // You may obtain a copy of the License at |
6 | // |
7 | // http://www.apache.org/licenses/LICENSE-2.0 |
8 | // |
9 | // Unless required by applicable law or agreed to in writing, software |
10 | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | // See the License for the specific language governing permissions and |
13 | // limitations under the License. |
14 | // |
15 | //////////////////////////////////////////////////////////////////////////////// |
16 | |
17 | #include "src/binary_parse/range_checked_byte_ptr.h" |
18 | |
19 | #include <assert.h> |
20 | #include <cstddef> |
21 | #include <cstring> |
22 | |
23 | namespace piex { |
24 | namespace binary_parse { |
25 | |
26 | #ifdef BREAK_IF_DEBUGGING_AND_OUT_OF_RANGE |
27 | #define BREAK_IF_DEBUGGING() assert(false) |
28 | #else |
29 | #define BREAK_IF_DEBUGGING() assert(true) |
30 | #endif |
31 | |
32 | namespace { |
33 | class MemoryPagedByteArray : public PagedByteArray { |
34 | public: |
35 | MemoryPagedByteArray(const unsigned char *buffer, const size_t len); |
36 | |
37 | virtual size_t length() const; |
38 | virtual size_t pageSize() const; |
39 | virtual void getPage(size_t page_index, const unsigned char **begin, |
40 | const unsigned char **end, PagePtr *page) const; |
41 | |
42 | private: |
43 | const unsigned char *buffer_; |
44 | const size_t len_; |
45 | }; |
46 | |
47 | MemoryPagedByteArray::MemoryPagedByteArray(const unsigned char *buffer, |
48 | const size_t len) |
49 | : buffer_(buffer), len_(len) {} |
50 | |
51 | size_t MemoryPagedByteArray::length() const { return len_; } |
52 | |
53 | size_t MemoryPagedByteArray::pageSize() const { return len_; } |
54 | |
55 | void MemoryPagedByteArray::getPage(size_t /* page_index */, |
56 | const unsigned char **begin, |
57 | const unsigned char **end, |
58 | PagePtr *page) const { |
59 | *begin = buffer_; |
60 | *end = buffer_ + len_; |
61 | *page = PagePtr(); |
62 | } |
63 | |
64 | // A functor that does nothing. This is used as a no-op shared pointer |
65 | // deallocator below. |
66 | class NullFunctor { |
67 | public: |
68 | void operator()() {} |
69 | void operator()(PagedByteArray * /* p */) const {} |
70 | }; |
71 | } // namespace |
72 | |
73 | PagedByteArray::~PagedByteArray() {} |
74 | |
75 | RangeCheckedBytePtr::RangeCheckedBytePtr() |
76 | : array_(), |
77 | page_data_(NULL), |
78 | current_pos_(0), |
79 | sub_array_begin_(0), |
80 | sub_array_end_(0), |
81 | page_begin_offset_(0), |
82 | current_page_len_(0), |
83 | error_flag_(RANGE_CHECKED_BYTE_ERROR) {} |
84 | |
85 | RangeCheckedBytePtr::RangeCheckedBytePtr(const unsigned char *array, |
86 | const size_t len) |
87 | : array_(new MemoryPagedByteArray(array, len)), |
88 | page_data_(NULL), |
89 | current_pos_(0), |
90 | sub_array_begin_(0), |
91 | sub_array_end_(len), |
92 | page_begin_offset_(0), |
93 | current_page_len_(0), |
94 | error_flag_(RANGE_CHECKED_BYTE_SUCCESS) { |
95 | assert(array); |
96 | if (array == NULL) { |
97 | error_flag_ = RANGE_CHECKED_BYTE_ERROR; |
98 | } |
99 | } |
100 | |
101 | RangeCheckedBytePtr::RangeCheckedBytePtr(PagedByteArray *array) |
102 | : array_(array, NullFunctor()), |
103 | page_data_(NULL), |
104 | current_pos_(0), |
105 | sub_array_begin_(0), |
106 | sub_array_end_(array->length()), |
107 | page_begin_offset_(0), |
108 | current_page_len_(0), |
109 | error_flag_(RANGE_CHECKED_BYTE_SUCCESS) {} |
110 | |
111 | RangeCheckedBytePtr RangeCheckedBytePtr::invalidPointer() { |
112 | return RangeCheckedBytePtr(); |
113 | } |
114 | |
115 | RangeCheckedBytePtr RangeCheckedBytePtr::pointerToSubArray( |
116 | size_t pos, size_t length) const { |
117 | RangeCheckedBytePtr sub_result = (*this) + pos; |
118 | if (!sub_result.errorOccurred() && length <= sub_result.remainingLength()) { |
119 | sub_result.sub_array_begin_ = sub_result.current_pos_; |
120 | sub_result.sub_array_end_ = sub_result.sub_array_begin_ + length; |
121 | |
122 | // Restrict the boundaries of the current page to the newly set sub-array. |
123 | sub_result.restrictPageToSubArray(); |
124 | |
125 | return sub_result; |
126 | } else { |
127 | error_flag_ = RANGE_CHECKED_BYTE_ERROR; |
128 | return invalidPointer(); |
129 | } |
130 | } |
131 | |
132 | size_t RangeCheckedBytePtr::offsetInArray() const { |
133 | // sub_array_begin_ <= current_pos_ is a class invariant, but protect |
134 | // against violations of this invariant. |
135 | if (sub_array_begin_ <= current_pos_) { |
136 | return current_pos_ - sub_array_begin_; |
137 | } else { |
138 | assert(false); |
139 | return 0; |
140 | } |
141 | } |
142 | |
143 | std::string RangeCheckedBytePtr::substr(size_t pos, size_t length) const { |
144 | std::vector<unsigned char> bytes = extractBytes(pos, length); |
145 | std::string result; |
146 | result.reserve(bytes.size()); |
147 | for (size_t i = 0; i < bytes.size(); ++i) { |
148 | result.push_back(static_cast<char>(bytes[i])); |
149 | } |
150 | return result; |
151 | } |
152 | |
153 | std::vector<unsigned char> RangeCheckedBytePtr::( |
154 | size_t pos, size_t length) const { |
155 | std::vector<unsigned char> result; |
156 | if (pos + length < pos /* overflow */ || remainingLength() < pos + length) { |
157 | BREAK_IF_DEBUGGING(); |
158 | error_flag_ = RANGE_CHECKED_BYTE_ERROR_OVERFLOW; |
159 | return result; |
160 | } |
161 | result.reserve(length); |
162 | for (size_t i = 0; i < length; ++i) { |
163 | result.push_back((*this)[pos + i]); |
164 | } |
165 | return result; |
166 | } |
167 | |
168 | bool operator==(const RangeCheckedBytePtr &x, const RangeCheckedBytePtr &y) { |
169 | if (x.array_ != y.array_) { |
170 | assert(false); |
171 | return false; |
172 | } |
173 | |
174 | return x.current_pos_ == y.current_pos_; |
175 | } |
176 | |
177 | bool operator!=(const RangeCheckedBytePtr &x, const RangeCheckedBytePtr &y) { |
178 | return !(x == y); |
179 | } |
180 | |
181 | void RangeCheckedBytePtr::loadPageForOffset(size_t offset) const { |
182 | // The offset should always lie within the bounds of the sub-array (this |
183 | // condition is enforced at the callsite). However, even if the offset lies |
184 | // outside the sub-array, the restrictPageToSubArray() call at the end |
185 | // ensures that the object is left in a consistent state that maintains the |
186 | // class invariants. |
187 | assert(offset >= sub_array_begin_ && offset < sub_array_end_); |
188 | |
189 | // Ensure that offset lies within the array. |
190 | if (offset >= array_->length()) { |
191 | assert(false); |
192 | return; |
193 | } |
194 | |
195 | // Determine the index of the page to request. |
196 | size_t page_index = offset / array_->pageSize(); |
197 | |
198 | // Get the page. |
199 | const unsigned char *page_begin; |
200 | const unsigned char *page_end; |
201 | array_->getPage(page_index, &page_begin, &page_end, &page_); |
202 | |
203 | // Ensure that the page has the expected length (as specified in the |
204 | // PagedByteArray interface). |
205 | size_t expected_page_size = array_->pageSize(); |
206 | if (page_index == (array_->length() - 1) / array_->pageSize()) { |
207 | expected_page_size = array_->length() - array_->pageSize() * page_index; |
208 | } |
209 | if ((page_end < page_begin) || |
210 | (static_cast<size_t>(page_end - page_begin) != expected_page_size)) { |
211 | assert(false); |
212 | return; |
213 | } |
214 | |
215 | // Remember information about page. |
216 | page_data_ = page_begin; |
217 | page_begin_offset_ = page_index * array_->pageSize(); |
218 | current_page_len_ = static_cast<size_t>(page_end - page_begin); |
219 | |
220 | // Restrict the boundaries of the page to lie within the sub-array. |
221 | restrictPageToSubArray(); |
222 | } |
223 | |
224 | void RangeCheckedBytePtr::restrictPageToSubArray() const { |
225 | // Restrict the current page's boundaries so that it is always contained |
226 | // completely within the extent of the sub-array. |
227 | // This function is purposely designed to work correctly in the following |
228 | // two special cases: |
229 | // a) The current page lies entirely outside the sub-array. In this case, |
230 | // current_page_len_ will be set to zero. page_data_ may either remain |
231 | // unchanged or may be changed to point one element beyond the end of the |
232 | // page, depending on whether the current page lies before or after the |
233 | // sub-array. |
234 | // b) The current page is in the state as initialized by the constructor |
235 | // (i.e. page_data_ is NULL and current_page_len_ is zero). In this case, |
236 | // page_data_ and current_page_len_ will remain unchanged. |
237 | |
238 | // Does the beginning of the page lie before the beginning of the sub-array? |
239 | if (page_begin_offset_ < sub_array_begin_) { |
240 | // Compute amount by which to shorten page. |
241 | size_t amount_to_shorten = sub_array_begin_ - page_begin_offset_; |
242 | if (amount_to_shorten > current_page_len_) { |
243 | amount_to_shorten = current_page_len_; |
244 | } |
245 | |
246 | // Adjust beginning of page accordingly. |
247 | page_begin_offset_ += amount_to_shorten; |
248 | page_data_ += amount_to_shorten; |
249 | current_page_len_ -= amount_to_shorten; |
250 | } |
251 | |
252 | // Does the end of the page lie beyond the end of the sub-array? |
253 | if (page_begin_offset_ + current_page_len_ > sub_array_end_) { |
254 | // Reduce length of page accordingly. |
255 | size_t new_len = sub_array_end_ - page_begin_offset_; |
256 | if (new_len > current_page_len_) { |
257 | new_len = current_page_len_; |
258 | } |
259 | current_page_len_ = new_len; |
260 | } |
261 | } |
262 | |
263 | int memcmp(const RangeCheckedBytePtr &x, const RangeCheckedBytePtr &y, |
264 | size_t num) { |
265 | std::vector<unsigned char> x_vec = x.extractBytes(0, num); |
266 | std::vector<unsigned char> y_vec = y.extractBytes(0, num); |
267 | |
268 | if (!x.errorOccurred() && !y.errorOccurred()) { |
269 | return ::memcmp(&x_vec[0], &y_vec[0], num); |
270 | } else { |
271 | // return an arbitrary value |
272 | return -1; |
273 | } |
274 | } |
275 | |
276 | int strcmp(const RangeCheckedBytePtr &x, const std::string &y) { |
277 | std::vector<unsigned char> x_vec = x.extractBytes(0, y.length()); |
278 | |
279 | if (!x.errorOccurred()) { |
280 | return ::memcmp(&x_vec[0], y.c_str(), y.length()); |
281 | } else { |
282 | // return an arbitrary value |
283 | return -1; |
284 | } |
285 | } |
286 | |
287 | size_t strlen(const RangeCheckedBytePtr &src) { |
288 | size_t len = 0; |
289 | RangeCheckedBytePtr str = src; |
290 | while (!str.errorOccurred() && (str[0] != '\0')) { |
291 | str++; |
292 | len++; |
293 | } |
294 | return len; |
295 | } |
296 | |
297 | int16 Get16s(const RangeCheckedBytePtr &input, const bool big_endian, |
298 | MemoryStatus *status) { |
299 | const uint16 unsigned_value = Get16u(input, big_endian, status); |
300 | if (*status != RANGE_CHECKED_BYTE_SUCCESS) { |
301 | // Return an arbitrary value. |
302 | return 0; |
303 | } |
304 | |
305 | // Convert the two's-complement signed integer encoded in 'unsigned_value' |
306 | // into a signed representation in the implementation's native representation |
307 | // for signed integers. An optimized Blaze build (x64) compiles all of the |
308 | // following code to a no-op (as of this writing). |
309 | // For further details, see the corresponding comment in Get32s(). |
310 | if (unsigned_value == 0x8000u) { |
311 | return static_cast<int16>(-0x8000); |
312 | } else if (unsigned_value > 0x8000u) { |
313 | return -static_cast<int16>(0x10000u - unsigned_value); |
314 | } else { |
315 | return static_cast<int16>(unsigned_value); |
316 | } |
317 | } |
318 | |
319 | uint16 Get16u(const RangeCheckedBytePtr &input, const bool big_endian, |
320 | MemoryStatus *status) { |
321 | if (input.remainingLength() < 2) { |
322 | if (status && *status == RANGE_CHECKED_BYTE_SUCCESS) { |
323 | *status = RANGE_CHECKED_BYTE_ERROR; |
324 | } |
325 | // Return an arbitrary value. |
326 | return 0; |
327 | } |
328 | if (big_endian) { |
329 | return (static_cast<uint16>(input[0]) << 8) | static_cast<uint16>(input[1]); |
330 | } else { |
331 | return (static_cast<uint16>(input[1]) << 8) | static_cast<uint16>(input[0]); |
332 | } |
333 | } |
334 | |
335 | int32 Get32s(const RangeCheckedBytePtr &input, const bool big_endian, |
336 | MemoryStatus *status) { |
337 | const uint32 unsigned_value = Get32u(input, big_endian, status); |
338 | if (*status != RANGE_CHECKED_BYTE_SUCCESS) { |
339 | // Return an arbitrary value. |
340 | return 0; |
341 | } |
342 | |
343 | // Convert the two's-complement signed integer encoded in 'unsigned_value' |
344 | // into a signed representation in the implementation's native representation |
345 | // for signed integers. |
346 | // For all practical purposes, the same result could be obtained simply by |
347 | // casting unsigned_value to int32; the result of this is |
348 | // implementation-defined, but on all of the platforms we care about, it does |
349 | // what we want. |
350 | // The code below, however, arguably has the aesthetic advantage of being |
351 | // independent of the representation for signed integers chosen by the |
352 | // implementation, as long as 'int' and 'unsigned' have the required range to |
353 | // represent all of the required values. |
354 | // An optimized Blaze build (x64) compiles all of the following code to a |
355 | // no-op (as of this writing); i.e. the value that Get32u() returned in %eax |
356 | // is left unchanged. |
357 | if (unsigned_value == 0x80000000u) { |
358 | // Read here on why the constant expression is written this way: |
359 | // http://stackoverflow.com/questions/14695118 |
360 | return -0x7fffffff - 1; |
361 | } else if (unsigned_value > 0x80000000u) { |
362 | // The expression |
363 | // 0xffffffffu - unsigned_value + 1 |
364 | // is a portable way of flipping the sign of a twos-complement signed |
365 | // integer whose binary representation is stored in an unsigned integer. |
366 | // '0xffffffffu + 1' is used in preference to simply '0' because it makes |
367 | // it clearer that the correct result will be obtained even if an int is |
368 | // greater than 32 bits. The '0xffffffffu + 1' is "spread out" around |
369 | // 'unsigned_value' to prevent the compiler from warning about an |
370 | // integral constant overflow. ('0' would produce the correct result in |
371 | // this case too but would rely in a more subtle way on the rules for |
372 | // unsigned wraparound.) |
373 | return -static_cast<int32>(0xffffffffu - unsigned_value + 1); |
374 | } else { |
375 | return static_cast<int32>(unsigned_value); |
376 | } |
377 | } |
378 | |
379 | uint32 Get32u(const RangeCheckedBytePtr &input, const bool big_endian, |
380 | MemoryStatus *status) { |
381 | if (input.remainingLength() < 4) { |
382 | if (status && *status == RANGE_CHECKED_BYTE_SUCCESS) { |
383 | *status = RANGE_CHECKED_BYTE_ERROR; |
384 | } |
385 | // Return an arbitrary value. |
386 | return 0; |
387 | } |
388 | if (big_endian) { |
389 | return (static_cast<uint32>(input[0]) << 24) | |
390 | (static_cast<uint32>(input[1]) << 16) | |
391 | (static_cast<uint32>(input[2]) << 8) | |
392 | (static_cast<uint32>(input[3]) << 0); |
393 | } else { |
394 | return (static_cast<uint32>(input[3]) << 24) | |
395 | (static_cast<uint32>(input[2]) << 16) | |
396 | (static_cast<uint32>(input[1]) << 8) | |
397 | (static_cast<uint32>(input[0]) << 0); |
398 | } |
399 | } |
400 | |
401 | } // namespace binary_parse |
402 | } // namespace piex |
403 | |