| 1 | // Copyright 2015 Google Inc. |
| 2 | // |
| 3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | // you may not use this file except in compliance with the License. |
| 5 | // You may obtain a copy of the License at |
| 6 | // |
| 7 | // http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | // |
| 9 | // Unless required by applicable law or agreed to in writing, software |
| 10 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | // See the License for the specific language governing permissions and |
| 13 | // limitations under the License. |
| 14 | // |
| 15 | //////////////////////////////////////////////////////////////////////////////// |
| 16 | |
| 17 | #ifndef PIEX_BINARY_PARSE_RANGE_CHECKED_BYTE_PTR_H_ |
| 18 | #define PIEX_BINARY_PARSE_RANGE_CHECKED_BYTE_PTR_H_ |
| 19 | |
| 20 | #include <assert.h> |
| 21 | |
| 22 | #include <cstddef> |
| 23 | #include <memory> |
| 24 | #include <string> |
| 25 | #include <vector> |
| 26 | |
| 27 | namespace piex { |
| 28 | namespace binary_parse { |
| 29 | |
| 30 | // Since NaCl does not comply to C++11 we can not just use stdint.h. |
| 31 | typedef unsigned short uint16; // NOLINT |
| 32 | typedef short int16; // NOLINT |
| 33 | typedef unsigned int uint32; |
| 34 | typedef int int32; |
| 35 | |
| 36 | enum MemoryStatus { |
| 37 | RANGE_CHECKED_BYTE_SUCCESS = 0, |
| 38 | RANGE_CHECKED_BYTE_ERROR = 1, |
| 39 | RANGE_CHECKED_BYTE_ERROR_OVERFLOW = 2, |
| 40 | RANGE_CHECKED_BYTE_ERROR_UNDERFLOW = 3, |
| 41 | }; |
| 42 | |
| 43 | // Interface that RangeCheckedBytePtr uses to access the underlying array of |
| 44 | // bytes. This allows RangeCheckedBytePtr to be used to access data as if it |
| 45 | // were stored contiguously in memory, even if the data is in fact split up |
| 46 | // into non-contiguous chunks and / or does not reside in memory. |
| 47 | // |
| 48 | // The only requirement is that the data can be read in pages of a fixed (but |
| 49 | // configurable) size. Notionally, the byte array (which contains length() |
| 50 | // bytes) is split up into non-overlapping pages of pageSize() bytes each. |
| 51 | // (The last page may be shorter if length() is not a multiple of pageSize().) |
| 52 | // There are therefore (length() - 1) / pageSize() + 1 such pages, with indexes |
| 53 | // 0 through (length() - 1) / pageSize(). Page i contains the bytes from offset |
| 54 | // i * pageSize() in the array up to and including the byte at offset |
| 55 | // (i + 1) * pageSize() - 1 (or, in the case of the last page, length() - 1). |
| 56 | // |
| 57 | // In essence, RangeCheckedBytePtr and PagedByteArray together provide a poor |
| 58 | // man's virtual-memory-and-memory-mapped-file work-alike in situations where |
| 59 | // virtual memory cannot be used or would consume too much virtual address |
| 60 | // space. |
| 61 | // |
| 62 | // Thread safety: In general, subclasses implementing this interface should |
| 63 | // ensure that the member functions are thread-safe. It will then be safe to |
| 64 | // access the same array from multiple threads. (Note that RangeCheckedBytePtr |
| 65 | // itself is not thread-safe in the sense that a single instance of |
| 66 | // RangeCheckedBytePtr cannot be used concurrently from multiple threads; it |
| 67 | // is, however, safe to use different RangeCheckedBytePtr instances in |
| 68 | // different threads to access the same PagedByteArray concurrently, assuming |
| 69 | // that the PagedByteArray implementation is thread-safe.) |
| 70 | class PagedByteArray { |
| 71 | public: |
| 72 | // Base class for pages in the byte array. Implementations of PagedByteArray |
| 73 | // can create a subclass of the Page class to manage the lifetime of buffers |
| 74 | // associated with a page returned by getPage(). For example, a |
| 75 | // PagedByteArray backed by a file might define a Page subclass like this: |
| 76 | // |
| 77 | // class FilePage : public Page { |
| 78 | // std::vector<unsigned char> bytes; |
| 79 | // }; |
| 80 | // |
| 81 | // The corresponding getPage() implementation could then look like this: |
| 82 | // |
| 83 | // void getPage(size_t page_index, const unsigned char** begin, |
| 84 | // const unsigned char** end, std::shared_ptr<Page>* page) |
| 85 | // { |
| 86 | // // Create a new page. |
| 87 | // std::shared_ptr<FilePage> file_page(new FilePage()); |
| 88 | // |
| 89 | // // Read contents of page from file into file_page->bytes. |
| 90 | // [...] |
| 91 | // |
| 92 | // // Set *begin and *end to point to beginning and end of |
| 93 | // // file_page->bytes vector. |
| 94 | // *begin = &file_page->bytes[0]; |
| 95 | // *end = *begin + file_page->bytes.size(); |
| 96 | // |
| 97 | // // Return page to caller |
| 98 | // *page = file_page; |
| 99 | // } |
| 100 | // |
| 101 | // In this way, the storage associated with the page (the FilePage::bytes |
| 102 | // vector) will be kept alive until the RangeCheckedBytePtr releases the |
| 103 | // shared pointer. |
| 104 | class Page {}; |
| 105 | |
| 106 | typedef std::shared_ptr<Page> PagePtr; |
| 107 | |
| 108 | virtual ~PagedByteArray(); |
| 109 | |
| 110 | // Returns the length of the array in bytes. The value returned must remain |
| 111 | // the same on every call for the entire lifetime of the object. |
| 112 | virtual size_t length() const = 0; |
| 113 | |
| 114 | // Returns the length of each page in bytes. (The last page may be shorter |
| 115 | // than pageSize() if length() is not a multiple of pageSize() -- see also |
| 116 | // the class-wide comment above.) The value returned must remain the same on |
| 117 | // every call for the entire lifetime of the object. |
| 118 | virtual size_t pageSize() const = 0; |
| 119 | |
| 120 | // Returns a pointer to a memory buffer containing the data for the page |
| 121 | // with index "page_index". |
| 122 | // |
| 123 | // *begin is set to point to the first byte of the page; *end is set to point |
| 124 | // one byte beyond the last byte in the page. This implies that: |
| 125 | // - (*end - *begin) == pageSize() for every page except the last page |
| 126 | // - (*end - *begin) == length() - pageSize() * ((length() - 1) / pageSize()) |
| 127 | // for the last page |
| 128 | // |
| 129 | // *page will be set to a SharedPtr that the caller will hold on to until |
| 130 | // it no longer needs to access the memory buffer. The memory buffer will |
| 131 | // remain valid until the SharedPtr is released or the PagedByteArray object |
| 132 | // is destroyed. An implementation may choose to return a null SharedPtr; |
| 133 | // this indicates that the memory buffer will remain valid until the |
| 134 | // PagedByteArray object is destroyed, even if the caller does not hold on to |
| 135 | // the SharedPtr. (This is intended as an optimization that some |
| 136 | // implementations may choose to take advantage of, as a null SharedPtr is |
| 137 | // cheaper to copy.) |
| 138 | virtual void getPage(size_t page_index, const unsigned char **begin, |
| 139 | const unsigned char **end, PagePtr *page) const = 0; |
| 140 | }; |
| 141 | |
| 142 | typedef std::shared_ptr<PagedByteArray> PagedByteArrayPtr; |
| 143 | |
| 144 | // Smart pointer that has the same semantics as a "const unsigned char *" (plus |
| 145 | // some convenience functions) but provides range checking and the ability to |
| 146 | // access arrays that are not contiguous in memory or do not reside entirely in |
| 147 | // memory (through the PagedByteArray interface). |
| 148 | // |
| 149 | // In the following, we abbreviate RangeCheckedBytePtr as RCBP. |
| 150 | // |
| 151 | // The intent of this class is to allow easy security hardening of code that |
| 152 | // parses binary data structures using raw byte pointers. To do this, only the |
| 153 | // declarations of the pointers need to be changed; the code that uses the |
| 154 | // pointers can remain unchanged. |
| 155 | // |
| 156 | // If an illegal operation occurs on a pointer, an error flag is set, and all |
| 157 | // read operations from this point on return 0. This means that error checking |
| 158 | // need not be done after every access; it is sufficient to check the error flag |
| 159 | // (using errorOccurred()) once before the RCBP is destroyed. Again, this allows |
| 160 | // the majority of the parsing code to remain unchanged. (Note caveats below |
| 161 | // that apply if a copy of the pointer is created.) |
| 162 | // |
| 163 | // Legal operations are exactly the ones that would be legal on a raw C++ |
| 164 | // pointer. Read accesses are legal if they fall within the underlying array. A |
| 165 | // RCBP may point to any element in the underlying array or one element beyond |
| 166 | // the end of the array. |
| 167 | // |
| 168 | // For brevity, the documentation for individual member functions does not state |
| 169 | // explicitly that the error flag will be set on out-of-range operations. |
| 170 | // |
| 171 | // Note: |
| 172 | // |
| 173 | // - Just as for raw pointers, it is legal for a pointer to point one element |
| 174 | // beyond the end of the array, but it is illegal to use operator*() on such a |
| 175 | // pointer. |
| 176 | // |
| 177 | // - If a copy of an RCBP is created, then performing illegal operations on the |
| 178 | // copy affects the error flag of the copy, but not of the original pointer. |
| 179 | // Note that using operator+ and operator- also creates a copy of the pointer. |
| 180 | // For example: |
| 181 | // |
| 182 | // // Assume we have an RCBP called "p" and a size_t variable called |
| 183 | // // "offset". |
| 184 | // RangeCheckedBytePtr sub_data_structure = p + offset; |
| 185 | // |
| 186 | // If "offset" is large enough to cause an out-of-range access, then |
| 187 | // sub_data_structure.errorOccurred() will be true, but p.errorOccurred() will |
| 188 | // still be false. The error flag for sub_data_structure therefore needs to be |
| 189 | // checked before it is destroyed. |
| 190 | class RangeCheckedBytePtr { |
| 191 | private: |
| 192 | // This class maintains the following class invariants: |
| 193 | // - page_data_ always points to a buffer of at least current_page_len_ |
| 194 | // bytes. |
| 195 | // |
| 196 | // - The current position lies within the sub-array, i.e. |
| 197 | // sub_array_begin_ <= current_pos_ <= sub_array_end_ |
| 198 | // |
| 199 | // - The sub-array is entirely contained within the array, i.e. |
| 200 | // 0 <= sub_array_begin <= sub_array_end <= array_->length() |
| 201 | // |
| 202 | // - If the current page is non-empty, it lies completely within the |
| 203 | // sub-array, i.e. |
| 204 | // if _current_page_len_ >= 0, then |
| 205 | // sub_array_begin_ <= page_begin_offset_ |
| 206 | // and |
| 207 | // page_begin_offset_ + current_page_len_ <= sub_array_end_ |
| 208 | // (See also restrictPageToSubArray().) |
| 209 | // (If _current_page_len_ == 0, then page_begin_offset_ may lie outside |
| 210 | // the sub-array; this condition is harmless. Additional logic would be |
| 211 | // required to make page_begin_offset_ lie within the sub-array in this |
| 212 | // case, and it would serve no purpose other than to make the invariant |
| 213 | // slightly simpler.) |
| 214 | // |
| 215 | // Note that it is _not_ a class invariant that current_pos_ needs to lie |
| 216 | // within the current page. Making this an invariant would have two |
| 217 | // undesirable consequences: |
| 218 | // a) When operator[] is called with an index that lies beyond the end of |
| 219 | // the current page, it would need to temporarily load the page that |
| 220 | // contains this index, but it wouldn't be able to "retain" the page |
| 221 | // (i.e. make it the current page) because that would violate the |
| 222 | // proposed invariant. This would lead to inefficient behavior in the |
| 223 | // case where code accesses a large range of indices beyond the end of |
| 224 | // the page because a page would need to be loaded temporarily on each |
| 225 | // access. |
| 226 | // b) It would require more code: loadPageForOffset() would need to be |
| 227 | // called anywhere that current_pos_ changes (whereas, with the present |
| 228 | // approach, loadPageForOffset() is only called in operator[]). |
| 229 | |
| 230 | // PagedByteArray that is accessed by this pointer. |
| 231 | PagedByteArrayPtr array_; |
| 232 | |
| 233 | // Pointer to the current page. |
| 234 | mutable PagedByteArray::PagePtr page_; |
| 235 | |
| 236 | // Pointer to the current page's data buffer. |
| 237 | mutable const unsigned char *page_data_; |
| 238 | |
| 239 | // All of the following offsets are defined relative to the beginning of |
| 240 | // the array defined by the PagedByteArray array_. |
| 241 | |
| 242 | // Array offset that the pointer points to. |
| 243 | size_t current_pos_; |
| 244 | |
| 245 | // Start offset of the current sub-array. |
| 246 | size_t sub_array_begin_; |
| 247 | |
| 248 | // End offset of the current sub-array. |
| 249 | size_t sub_array_end_; |
| 250 | |
| 251 | // Array offset corresponding to the "page_data_" pointer. |
| 252 | mutable size_t page_begin_offset_; |
| 253 | |
| 254 | // Length of the current page. |
| 255 | mutable size_t current_page_len_; |
| 256 | |
| 257 | // Error flag. This is mutable because methods that don't affect the value |
| 258 | // of the pointer itself (such as operator[]) nevertheless need to be able to |
| 259 | // signal error conditions. |
| 260 | mutable MemoryStatus error_flag_; |
| 261 | |
| 262 | RangeCheckedBytePtr(); |
| 263 | |
| 264 | public: |
| 265 | // Creates a pointer that points to the first element of 'array', which has a |
| 266 | // length of 'len'. The caller must ensure that the array remains valid until |
| 267 | // this pointer and any pointers created from it have been destroyed. |
| 268 | // Note: 'len' may be zero, but 'array' must in this case still be a valid, |
| 269 | // non-null pointer. |
| 270 | explicit RangeCheckedBytePtr(const unsigned char *array, const size_t len); |
| 271 | |
| 272 | // Creates a pointer that points to the first element of the given |
| 273 | // PagedByteArray. The caller must ensure that this PagedByteArray remains |
| 274 | // valid until this pointer and any pointers created from it have been |
| 275 | // destroyed. |
| 276 | explicit RangeCheckedBytePtr(PagedByteArray *array); |
| 277 | |
| 278 | // Creates an invalid RangeCheckedBytePtr. Calling errorOccurred() on the |
| 279 | // result of invalidPointer() always returns true. |
| 280 | // Do not check a RangeCheckedBytePtr for validity by comparing against |
| 281 | // invalidPointer(); use errorOccurred() instead. |
| 282 | static RangeCheckedBytePtr invalidPointer(); |
| 283 | |
| 284 | // Returns a RangeCheckedBytePtr that points to a sub-array of this pointer's |
| 285 | // underlying array. The sub-array starts at position 'pos' relative to this |
| 286 | // pointer and is 'length' bytes long. The sub-array must lie within this |
| 287 | // pointer's array, i.e. pos + length <= remainingLength() must hold. If this |
| 288 | // condition is violated, an invalid pointer is returned. |
| 289 | RangeCheckedBytePtr pointerToSubArray(size_t pos, size_t length) const; |
| 290 | |
| 291 | // Returns the number of bytes remaining in the array from this pointer's |
| 292 | // present position. |
| 293 | inline size_t remainingLength() const; |
| 294 | |
| 295 | // Returns the offset (or index) in the underlying array that this pointer |
| 296 | // points to. If this pointer was created using pointerToSubArray(), the |
| 297 | // offset is relative to the beginning of the sub-array (and not relative to |
| 298 | // the beginning of the original array). |
| 299 | size_t offsetInArray() const; |
| 300 | |
| 301 | // Returns whether an out-of-bounds error has ever occurred on this pointer in |
| 302 | // the past. An error occurs if a caller attempts to read from a position |
| 303 | // outside the bounds of the array or to move the pointer outside the bounds |
| 304 | // of the array. |
| 305 | // |
| 306 | // The error flag is never reset. Once an error has occurred, |
| 307 | // all subsequent attempts to read from the pointer (even within the bounds of |
| 308 | // the array) return 0. |
| 309 | // |
| 310 | // Note that it is permissible for a pointer to point one element past the end |
| 311 | // of the array, but it is not permissible to read from this position. This is |
| 312 | // equivalent to the semantics of raw C++ pointers. |
| 313 | inline bool errorOccurred() const; |
| 314 | |
| 315 | // Returns the substring of length 'length' located at position 'pos' relative |
| 316 | // to this pointer. |
| 317 | std::string substr(size_t pos, size_t length) const; |
| 318 | |
| 319 | // Returns 'length' number of bytes from the array starting at position 'pos' |
| 320 | // relative to this pointer. |
| 321 | std::vector<unsigned char> (size_t pos, size_t length) const; |
| 322 | |
| 323 | // Equivalent to calling convert(0, output). |
| 324 | template <class T> |
| 325 | bool convert(T *output) const { |
| 326 | union { |
| 327 | T t; |
| 328 | unsigned char ch[sizeof(T)]; |
| 329 | } buffer; |
| 330 | for (size_t i = 0; i < sizeof(T); i++) { |
| 331 | buffer.ch[i] = (*this)[i]; |
| 332 | } |
| 333 | if (!errorOccurred()) { |
| 334 | *output = buffer.t; |
| 335 | } |
| 336 | return !errorOccurred(); |
| 337 | } |
| 338 | |
| 339 | // Reinterprets this pointer as a pointer to an array of T, then returns the |
| 340 | // element at position 'index' in this array of T. (Note that this position |
| 341 | // corresponds to position index * sizeof(T) in the underlying byte array.) |
| 342 | // |
| 343 | // Returns true if successful; false if an out-of-range error occurred or if |
| 344 | // the error flag was already set on the pointer when calling convert(). |
| 345 | // |
| 346 | // The conversion from a sequence of sizeof(T) bytes to a T is performed in an |
| 347 | // implementation-defined fashion. This conversion is equivalent to the one |
| 348 | // obtained using the following union by filling the array 'ch' and then |
| 349 | // reading the member 't': |
| 350 | // |
| 351 | // union { |
| 352 | // T t; |
| 353 | // unsigned char ch[sizeof(T)]; |
| 354 | // }; |
| 355 | // |
| 356 | // Callers should note that, among other things, the conversion is not |
| 357 | // endian-agnostic with respect to the endianness of T. |
| 358 | template <class T> |
| 359 | bool convert(size_t index, T *output) const { |
| 360 | RangeCheckedBytePtr p = (*this) + index * sizeof(T); |
| 361 | bool valid = p.convert(output); |
| 362 | if (!valid) { |
| 363 | error_flag_ = p.error_flag_; |
| 364 | } |
| 365 | return valid; |
| 366 | } |
| 367 | |
| 368 | // Operators. Unless otherwise noted, these operators have the same semantics |
| 369 | // as the same operators on an unsigned char pointer. |
| 370 | |
| 371 | // If an out-of-range access is attempted, returns 0 (and sets the error |
| 372 | // flag). |
| 373 | inline unsigned char operator[](size_t i) const; |
| 374 | |
| 375 | inline unsigned char operator*() const; |
| 376 | |
| 377 | inline RangeCheckedBytePtr &operator++(); |
| 378 | |
| 379 | inline RangeCheckedBytePtr operator++(int); |
| 380 | |
| 381 | inline RangeCheckedBytePtr &operator--(); |
| 382 | |
| 383 | inline RangeCheckedBytePtr operator--(int); |
| 384 | |
| 385 | inline RangeCheckedBytePtr &operator+=(size_t x); |
| 386 | |
| 387 | inline RangeCheckedBytePtr &operator-=(size_t x); |
| 388 | |
| 389 | inline friend RangeCheckedBytePtr operator+(const RangeCheckedBytePtr &p, |
| 390 | size_t x); |
| 391 | |
| 392 | inline friend RangeCheckedBytePtr operator-(const RangeCheckedBytePtr &p, |
| 393 | size_t x); |
| 394 | |
| 395 | // Tests whether x and y point at the same position in the underlying array. |
| 396 | // Two pointers that point at the same position but have different |
| 397 | // sub-arrays still compare equal. It is not legal to compare two pointers |
| 398 | // that point into different paged byte arrays. |
| 399 | friend bool operator==(const RangeCheckedBytePtr &x, |
| 400 | const RangeCheckedBytePtr &y); |
| 401 | |
| 402 | // Returns !(x == y). |
| 403 | friend bool operator!=(const RangeCheckedBytePtr &x, |
| 404 | const RangeCheckedBytePtr &y); |
| 405 | |
| 406 | private: |
| 407 | void loadPageForOffset(size_t offset) const; |
| 408 | void restrictPageToSubArray() const; |
| 409 | }; |
| 410 | |
| 411 | // Returns the result of calling std::memcmp() on the sequences of 'num' bytes |
| 412 | // pointed to by 'x' and 'y'. The result is undefined if either |
| 413 | // x.remainingLength() or y.remainingLength() is less than 'num'. |
| 414 | int memcmp(const RangeCheckedBytePtr &x, const RangeCheckedBytePtr &y, |
| 415 | size_t num); |
| 416 | |
| 417 | // Returns the result of calling std::memcmp() (note: _not_ strcmp()) on the |
| 418 | // y.length() number of bytes pointed to by 'x' and the string 'y'. The result |
| 419 | // is undefined if x.remainingLength() is less than y.length(). |
| 420 | int strcmp(const RangeCheckedBytePtr &x, const std::string &y); |
| 421 | |
| 422 | // Returns the length of the zero-terminated string starting at 'src' (not |
| 423 | // including the '\0' terminator). If no '\0' occurs before the end of the |
| 424 | // array, the result is undefined. |
| 425 | size_t strlen(const RangeCheckedBytePtr &src); |
| 426 | |
| 427 | // Integer decoding functions. |
| 428 | // |
| 429 | // These functions read signed (Get16s, Get32s) or unsigned (Get16u, Get32u) |
| 430 | // integers from 'input'. The integer read from the input can be specified to be |
| 431 | // either big-endian (big_endian == true) or little-endian |
| 432 | // (little_endian == false). Signed integers are read in two's-complement |
| 433 | // representation. The integer read in the specified format is then converted to |
| 434 | // the implementation's native integer representation and returned. In other |
| 435 | // words, the semantics of these functions are independent of the |
| 436 | // implementation's endianness and signed integer representation. |
| 437 | // |
| 438 | // If an out-of-range error occurs, these functions do _not_ set the error flag |
| 439 | // on 'input'. Instead, they set 'status' to RANGE_CHECKED_BYTE_ERROR and return |
| 440 | // 0. |
| 441 | // |
| 442 | // Note: |
| 443 | // - If an error occurs and 'status' is already set to an error value (i.e. a |
| 444 | // value different from RANGE_CHECKED_BYTE_SUCCESS), the value of 'status' is |
| 445 | // left unchanged. |
| 446 | // - If the operation is successful, 'status' is left unchanged (i.e. it is not |
| 447 | // actively set to RANGE_CHECKED_BYTE_SUCCESS). |
| 448 | // |
| 449 | // Together, these two properties mean that these functions can be used to read |
| 450 | // a number of integers in succession with only a single error check, like this: |
| 451 | // |
| 452 | // MemoryStatus status = RANGE_CHECKED_BYTE_SUCCESS; |
| 453 | // int16 val1 = Get16s(input, false, &status); |
| 454 | // int32 val2 = Get32s(input + 2, false, &status); |
| 455 | // uint32 val3 = Get32u(input + 6, false, &status); |
| 456 | // if (status != RANGE_CHECKED_BYTE_SUCCESS) { |
| 457 | // // error handling |
| 458 | // } |
| 459 | int16 Get16s(const RangeCheckedBytePtr &input, const bool big_endian, |
| 460 | MemoryStatus *status); |
| 461 | uint16 Get16u(const RangeCheckedBytePtr &input, const bool big_endian, |
| 462 | MemoryStatus *status); |
| 463 | int32 Get32s(const RangeCheckedBytePtr &input, const bool big_endian, |
| 464 | MemoryStatus *status); |
| 465 | uint32 Get32u(const RangeCheckedBytePtr &input, const bool big_endian, |
| 466 | MemoryStatus *status); |
| 467 | |
| 468 | size_t RangeCheckedBytePtr::remainingLength() const { |
| 469 | if (!errorOccurred()) { |
| 470 | // current_pos_ <= sub_array_end_ is a class invariant, but protect |
| 471 | // against violations of this invariant. |
| 472 | if (current_pos_ <= sub_array_end_) { |
| 473 | return sub_array_end_ - current_pos_; |
| 474 | } else { |
| 475 | assert(false); |
| 476 | return 0; |
| 477 | } |
| 478 | } else { |
| 479 | return 0; |
| 480 | } |
| 481 | } |
| 482 | |
| 483 | bool RangeCheckedBytePtr::errorOccurred() const { |
| 484 | return error_flag_ != RANGE_CHECKED_BYTE_SUCCESS; |
| 485 | } |
| 486 | |
| 487 | unsigned char RangeCheckedBytePtr::operator[](size_t i) const { |
| 488 | // Check that pointer doesn't have an error flag set. |
| 489 | if (!errorOccurred()) { |
| 490 | // Offset in array to read from. |
| 491 | const size_t read_offset = current_pos_ + i; |
| 492 | |
| 493 | // Check for the common case first: The byte we want to read lies in the |
| 494 | // current page. For performance reasons, we don't check for the case |
| 495 | // "read_offset < page_begin_offset_" explicitly; if it occurs, it will |
| 496 | // lead to wraparound (which is well-defined for unsigned quantities), and |
| 497 | // this will cause the test "pos_in_page < current_page_len_" to fail. |
| 498 | size_t pos_in_page = read_offset - page_begin_offset_; |
| 499 | if (pos_in_page < current_page_len_) { |
| 500 | return page_data_[pos_in_page]; |
| 501 | } |
| 502 | |
| 503 | // Check that the offset we're trying to read lies within the sub-array |
| 504 | // we're allowed to access. |
| 505 | if (read_offset >= sub_array_begin_ && read_offset < sub_array_end_) { |
| 506 | // Read the page that contains the offset "read_offset". |
| 507 | loadPageForOffset(read_offset); |
| 508 | |
| 509 | // Compute the position within the new page from which we need to read. |
| 510 | pos_in_page = read_offset - page_begin_offset_; |
| 511 | |
| 512 | // After the call to loadPageForOffset(), read_offset must lie within |
| 513 | // the current page, and therefore pos_in_page must be less than the |
| 514 | // length of the page. We nevertheless check for this to protect against |
| 515 | // potential bugs in loadPageForOffset(). |
| 516 | assert(pos_in_page < current_page_len_); |
| 517 | if (pos_in_page < current_page_len_) { |
| 518 | return page_data_[pos_in_page]; |
| 519 | } |
| 520 | } |
| 521 | } |
| 522 | |
| 523 | // All error cases fall through to here. |
| 524 | #ifdef BREAK_IF_DEBUGGING_AND_OUT_OF_RANGE |
| 525 | assert(false); |
| 526 | #endif |
| 527 | error_flag_ = RANGE_CHECKED_BYTE_ERROR_OVERFLOW; |
| 528 | // return 0, which represents the invalid value |
| 529 | return static_cast<unsigned char>(0); |
| 530 | } |
| 531 | |
| 532 | unsigned char RangeCheckedBytePtr::operator*() const { return (*this)[0]; } |
| 533 | |
| 534 | RangeCheckedBytePtr &RangeCheckedBytePtr::operator++() { |
| 535 | if (current_pos_ < sub_array_end_) { |
| 536 | current_pos_++; |
| 537 | } else { |
| 538 | #ifdef BREAK_IF_DEBUGGING_AND_OUT_OF_RANGE |
| 539 | assert(false); |
| 540 | #endif |
| 541 | error_flag_ = RANGE_CHECKED_BYTE_ERROR_OVERFLOW; |
| 542 | } |
| 543 | return *this; |
| 544 | } |
| 545 | |
| 546 | RangeCheckedBytePtr RangeCheckedBytePtr::operator++(int) { |
| 547 | RangeCheckedBytePtr result(*this); |
| 548 | ++(*this); |
| 549 | return result; |
| 550 | } |
| 551 | |
| 552 | RangeCheckedBytePtr &RangeCheckedBytePtr::operator--() { |
| 553 | if (current_pos_ > sub_array_begin_) { |
| 554 | current_pos_--; |
| 555 | } else { |
| 556 | #ifdef BREAK_IF_DEBUGGING_AND_OUT_OF_RANGE |
| 557 | assert(false); |
| 558 | #endif |
| 559 | error_flag_ = RANGE_CHECKED_BYTE_ERROR_UNDERFLOW; |
| 560 | } |
| 561 | return *this; |
| 562 | } |
| 563 | |
| 564 | RangeCheckedBytePtr RangeCheckedBytePtr::operator--(int) { |
| 565 | RangeCheckedBytePtr result(*this); |
| 566 | --(*this); |
| 567 | return result; |
| 568 | } |
| 569 | |
| 570 | RangeCheckedBytePtr &RangeCheckedBytePtr::operator+=(size_t x) { |
| 571 | if (remainingLength() >= x) { |
| 572 | current_pos_ += x; |
| 573 | } else { |
| 574 | #ifdef BREAK_IF_DEBUGGING_AND_OUT_OF_RANGE |
| 575 | assert(false); |
| 576 | #endif |
| 577 | error_flag_ = RANGE_CHECKED_BYTE_ERROR_OVERFLOW; |
| 578 | } |
| 579 | return *this; |
| 580 | } |
| 581 | |
| 582 | RangeCheckedBytePtr &RangeCheckedBytePtr::operator-=(size_t x) { |
| 583 | if (x <= current_pos_ - sub_array_begin_) { |
| 584 | current_pos_ -= x; |
| 585 | } else { |
| 586 | #ifdef BREAK_IF_DEBUGGING_AND_OUT_OF_RANGE |
| 587 | assert(false); |
| 588 | #endif |
| 589 | error_flag_ = RANGE_CHECKED_BYTE_ERROR_UNDERFLOW; |
| 590 | } |
| 591 | return *this; |
| 592 | } |
| 593 | |
| 594 | RangeCheckedBytePtr operator+(const RangeCheckedBytePtr &p, size_t x) { |
| 595 | RangeCheckedBytePtr result(p); |
| 596 | result += x; |
| 597 | return result; |
| 598 | } |
| 599 | |
| 600 | RangeCheckedBytePtr operator-(const RangeCheckedBytePtr &p, size_t x) { |
| 601 | RangeCheckedBytePtr result(p); |
| 602 | result -= x; |
| 603 | return result; |
| 604 | } |
| 605 | |
| 606 | } // namespace binary_parse |
| 607 | } // namespace piex |
| 608 | |
| 609 | #endif // PIEX_BINARY_PARSE_RANGE_CHECKED_BYTE_PTR_H_ |
| 610 | |