1// Copyright 2011 Google Inc. All Rights Reserved.
2//
3// Redistribution and use in source and binary forms, with or without
4// modification, are permitted provided that the following conditions are
5// met:
6//
7// * Redistributions of source code must retain the above copyright
8// notice, this list of conditions and the following disclaimer.
9// * Redistributions in binary form must reproduce the above
10// copyright notice, this list of conditions and the following disclaimer
11// in the documentation and/or other materials provided with the
12// distribution.
13// * Neither the name of Google Inc. nor the names of its
14// contributors may be used to endorse or promote products derived from
15// this software without specific prior written permission.
16//
17// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28//
29// Various stubs for the open-source version of Snappy.
30
31#ifndef THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_INTERNAL_H_
32#define THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_INTERNAL_H_
33
34#ifdef HAVE_CONFIG_H
35#include "config.h"
36#endif
37
38#include <string>
39
40#include <assert.h>
41#include <stdlib.h>
42#include <string.h>
43
44#ifdef HAVE_SYS_MMAN_H
45#include <sys/mman.h>
46#endif
47
48#ifdef HAVE_UNISTD_H
49#include <unistd.h>
50#endif
51
52#if defined(_MSC_VER)
53#include <intrin.h>
54#endif // defined(_MSC_VER)
55
56#ifndef __has_feature
57#define __has_feature(x) 0
58#endif
59
60#if __has_feature(memory_sanitizer)
61#include <sanitizer/msan_interface.h>
62#define SNAPPY_ANNOTATE_MEMORY_IS_INITIALIZED(address, size) \
63 __msan_unpoison((address), (size))
64#else
65#define SNAPPY_ANNOTATE_MEMORY_IS_INITIALIZED(address, size) /* empty */
66#endif // __has_feature(memory_sanitizer)
67
68#include "snappy-stubs-public.h"
69
70#if defined(__x86_64__)
71
72// Enable 64-bit optimized versions of some routines.
73#define ARCH_K8 1
74
75#elif defined(__ppc64__)
76
77#define ARCH_PPC 1
78
79#elif defined(__aarch64__)
80
81#define ARCH_ARM 1
82
83#endif
84
85// Needed by OS X, among others.
86#ifndef MAP_ANONYMOUS
87#define MAP_ANONYMOUS MAP_ANON
88#endif
89
90// The size of an array, if known at compile-time.
91// Will give unexpected results if used on a pointer.
92// We undefine it first, since some compilers already have a definition.
93#ifdef ARRAYSIZE
94#undef ARRAYSIZE
95#endif
96#define ARRAYSIZE(a) (sizeof(a) / sizeof(*(a)))
97
98// Static prediction hints.
99#ifdef HAVE_BUILTIN_EXPECT
100#define SNAPPY_PREDICT_FALSE(x) (__builtin_expect(x, 0))
101#define SNAPPY_PREDICT_TRUE(x) (__builtin_expect(!!(x), 1))
102#else
103#define SNAPPY_PREDICT_FALSE(x) x
104#define SNAPPY_PREDICT_TRUE(x) x
105#endif
106
107// This is only used for recomputing the tag byte table used during
108// decompression; for simplicity we just remove it from the open-source
109// version (anyone who wants to regenerate it can just do the call
110// themselves within main()).
111#define DEFINE_bool(flag_name, default_value, description) \
112 bool FLAGS_ ## flag_name = default_value
113#define DECLARE_bool(flag_name) \
114 extern bool FLAGS_ ## flag_name
115
116namespace snappy {
117
118static const uint32 kuint32max = static_cast<uint32>(0xFFFFFFFF);
119static const int64 kint64max = static_cast<int64>(0x7FFFFFFFFFFFFFFFLL);
120
121// Potentially unaligned loads and stores.
122
123// x86, PowerPC, and ARM64 can simply do these loads and stores native.
124
125#if defined(__i386__) || defined(__x86_64__) || defined(__powerpc__) || \
126 defined(__aarch64__)
127
128#define UNALIGNED_LOAD16(_p) (*reinterpret_cast<const uint16 *>(_p))
129#define UNALIGNED_LOAD32(_p) (*reinterpret_cast<const uint32 *>(_p))
130#define UNALIGNED_LOAD64(_p) (*reinterpret_cast<const uint64 *>(_p))
131
132#define UNALIGNED_STORE16(_p, _val) (*reinterpret_cast<uint16 *>(_p) = (_val))
133#define UNALIGNED_STORE32(_p, _val) (*reinterpret_cast<uint32 *>(_p) = (_val))
134#define UNALIGNED_STORE64(_p, _val) (*reinterpret_cast<uint64 *>(_p) = (_val))
135
136// ARMv7 and newer support native unaligned accesses, but only of 16-bit
137// and 32-bit values (not 64-bit); older versions either raise a fatal signal,
138// do an unaligned read and rotate the words around a bit, or do the reads very
139// slowly (trip through kernel mode). There's no simple #define that says just
140// “ARMv7 or higher”, so we have to filter away all ARMv5 and ARMv6
141// sub-architectures.
142//
143// This is a mess, but there's not much we can do about it.
144//
145// To further complicate matters, only LDR instructions (single reads) are
146// allowed to be unaligned, not LDRD (two reads) or LDM (many reads). Unless we
147// explicitly tell the compiler that these accesses can be unaligned, it can and
148// will combine accesses. On armcc, the way to signal this is done by accessing
149// through the type (uint32 __packed *), but GCC has no such attribute
150// (it ignores __attribute__((packed)) on individual variables). However,
151// we can tell it that a _struct_ is unaligned, which has the same effect,
152// so we do that.
153
154#elif defined(__arm__) && \
155 !defined(__ARM_ARCH_4__) && \
156 !defined(__ARM_ARCH_4T__) && \
157 !defined(__ARM_ARCH_5__) && \
158 !defined(__ARM_ARCH_5T__) && \
159 !defined(__ARM_ARCH_5TE__) && \
160 !defined(__ARM_ARCH_5TEJ__) && \
161 !defined(__ARM_ARCH_6__) && \
162 !defined(__ARM_ARCH_6J__) && \
163 !defined(__ARM_ARCH_6K__) && \
164 !defined(__ARM_ARCH_6Z__) && \
165 !defined(__ARM_ARCH_6ZK__) && \
166 !defined(__ARM_ARCH_6T2__)
167
168#if __GNUC__
169#define ATTRIBUTE_PACKED __attribute__((__packed__))
170#else
171#define ATTRIBUTE_PACKED
172#endif
173
174namespace base {
175namespace internal {
176
177struct Unaligned16Struct {
178 uint16 value;
179 uint8 dummy; // To make the size non-power-of-two.
180} ATTRIBUTE_PACKED;
181
182struct Unaligned32Struct {
183 uint32 value;
184 uint8 dummy; // To make the size non-power-of-two.
185} ATTRIBUTE_PACKED;
186
187} // namespace internal
188} // namespace base
189
190#define UNALIGNED_LOAD16(_p) \
191 ((reinterpret_cast<const ::snappy::base::internal::Unaligned16Struct *>(_p))->value)
192#define UNALIGNED_LOAD32(_p) \
193 ((reinterpret_cast<const ::snappy::base::internal::Unaligned32Struct *>(_p))->value)
194
195#define UNALIGNED_STORE16(_p, _val) \
196 ((reinterpret_cast< ::snappy::base::internal::Unaligned16Struct *>(_p))->value = \
197 (_val))
198#define UNALIGNED_STORE32(_p, _val) \
199 ((reinterpret_cast< ::snappy::base::internal::Unaligned32Struct *>(_p))->value = \
200 (_val))
201
202// TODO(user): NEON supports unaligned 64-bit loads and stores.
203// See if that would be more efficient on platforms supporting it,
204// at least for copies.
205
206inline uint64 UNALIGNED_LOAD64(const void *p) {
207 uint64 t;
208 memcpy(&t, p, sizeof t);
209 return t;
210}
211
212inline void UNALIGNED_STORE64(void *p, uint64 v) {
213 memcpy(p, &v, sizeof v);
214}
215
216#else
217
218// These functions are provided for architectures that don't support
219// unaligned loads and stores.
220
221inline uint16 UNALIGNED_LOAD16(const void *p) {
222 uint16 t;
223 memcpy(&t, p, sizeof t);
224 return t;
225}
226
227inline uint32 UNALIGNED_LOAD32(const void *p) {
228 uint32 t;
229 memcpy(&t, p, sizeof t);
230 return t;
231}
232
233inline uint64 UNALIGNED_LOAD64(const void *p) {
234 uint64 t;
235 memcpy(&t, p, sizeof t);
236 return t;
237}
238
239inline void UNALIGNED_STORE16(void *p, uint16 v) {
240 memcpy(p, &v, sizeof v);
241}
242
243inline void UNALIGNED_STORE32(void *p, uint32 v) {
244 memcpy(p, &v, sizeof v);
245}
246
247inline void UNALIGNED_STORE64(void *p, uint64 v) {
248 memcpy(p, &v, sizeof v);
249}
250
251#endif
252
253// The following guarantees declaration of the byte swap functions.
254#if defined(SNAPPY_IS_BIG_ENDIAN)
255
256#ifdef HAVE_SYS_BYTEORDER_H
257#include <sys/byteorder.h>
258#endif
259
260#ifdef HAVE_SYS_ENDIAN_H
261#include <sys/endian.h>
262#endif
263
264#ifdef _MSC_VER
265#include <stdlib.h>
266#define bswap_16(x) _byteswap_ushort(x)
267#define bswap_32(x) _byteswap_ulong(x)
268#define bswap_64(x) _byteswap_uint64(x)
269
270#elif defined(__APPLE__)
271// Mac OS X / Darwin features
272#include <libkern/OSByteOrder.h>
273#define bswap_16(x) OSSwapInt16(x)
274#define bswap_32(x) OSSwapInt32(x)
275#define bswap_64(x) OSSwapInt64(x)
276
277#elif defined(HAVE_BYTESWAP_H)
278#include <byteswap.h>
279
280#elif defined(bswap32)
281// FreeBSD defines bswap{16,32,64} in <sys/endian.h> (already #included).
282#define bswap_16(x) bswap16(x)
283#define bswap_32(x) bswap32(x)
284#define bswap_64(x) bswap64(x)
285
286#elif defined(BSWAP_64)
287// Solaris 10 defines BSWAP_{16,32,64} in <sys/byteorder.h> (already #included).
288#define bswap_16(x) BSWAP_16(x)
289#define bswap_32(x) BSWAP_32(x)
290#define bswap_64(x) BSWAP_64(x)
291
292#else
293
294inline uint16 bswap_16(uint16 x) {
295 return (x << 8) | (x >> 8);
296}
297
298inline uint32 bswap_32(uint32 x) {
299 x = ((x & 0xff00ff00UL) >> 8) | ((x & 0x00ff00ffUL) << 8);
300 return (x >> 16) | (x << 16);
301}
302
303inline uint64 bswap_64(uint64 x) {
304 x = ((x & 0xff00ff00ff00ff00ULL) >> 8) | ((x & 0x00ff00ff00ff00ffULL) << 8);
305 x = ((x & 0xffff0000ffff0000ULL) >> 16) | ((x & 0x0000ffff0000ffffULL) << 16);
306 return (x >> 32) | (x << 32);
307}
308
309#endif
310
311#endif // defined(SNAPPY_IS_BIG_ENDIAN)
312
313// Convert to little-endian storage, opposite of network format.
314// Convert x from host to little endian: x = LittleEndian.FromHost(x);
315// convert x from little endian to host: x = LittleEndian.ToHost(x);
316//
317// Store values into unaligned memory converting to little endian order:
318// LittleEndian.Store16(p, x);
319//
320// Load unaligned values stored in little endian converting to host order:
321// x = LittleEndian.Load16(p);
322class LittleEndian {
323 public:
324 // Conversion functions.
325#if defined(SNAPPY_IS_BIG_ENDIAN)
326
327 static uint16 FromHost16(uint16 x) { return bswap_16(x); }
328 static uint16 ToHost16(uint16 x) { return bswap_16(x); }
329
330 static uint32 FromHost32(uint32 x) { return bswap_32(x); }
331 static uint32 ToHost32(uint32 x) { return bswap_32(x); }
332
333 static bool IsLittleEndian() { return false; }
334
335#else // !defined(SNAPPY_IS_BIG_ENDIAN)
336
337 static uint16 FromHost16(uint16 x) { return x; }
338 static uint16 ToHost16(uint16 x) { return x; }
339
340 static uint32 FromHost32(uint32 x) { return x; }
341 static uint32 ToHost32(uint32 x) { return x; }
342
343 static bool IsLittleEndian() { return true; }
344
345#endif // !defined(SNAPPY_IS_BIG_ENDIAN)
346
347 // Functions to do unaligned loads and stores in little-endian order.
348 static uint16 Load16(const void *p) {
349 return ToHost16(UNALIGNED_LOAD16(p));
350 }
351
352 static void Store16(void *p, uint16 v) {
353 UNALIGNED_STORE16(p, FromHost16(v));
354 }
355
356 static uint32 Load32(const void *p) {
357 return ToHost32(UNALIGNED_LOAD32(p));
358 }
359
360 static void Store32(void *p, uint32 v) {
361 UNALIGNED_STORE32(p, FromHost32(v));
362 }
363};
364
365// Some bit-manipulation functions.
366class Bits {
367 public:
368 // Return floor(log2(n)) for positive integer n.
369 static int Log2FloorNonZero(uint32 n);
370
371 // Return floor(log2(n)) for positive integer n. Returns -1 iff n == 0.
372 static int Log2Floor(uint32 n);
373
374 // Return the first set least / most significant bit, 0-indexed. Returns an
375 // undefined value if n == 0. FindLSBSetNonZero() is similar to ffs() except
376 // that it's 0-indexed.
377 static int FindLSBSetNonZero(uint32 n);
378
379#if defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM)
380 static int FindLSBSetNonZero64(uint64 n);
381#endif // defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM)
382
383 private:
384 // No copying
385 Bits(const Bits&);
386 void operator=(const Bits&);
387};
388
389#ifdef HAVE_BUILTIN_CTZ
390
391inline int Bits::Log2FloorNonZero(uint32 n) {
392 assert(n != 0);
393 // (31 ^ x) is equivalent to (31 - x) for x in [0, 31]. An easy proof
394 // represents subtraction in base 2 and observes that there's no carry.
395 //
396 // GCC and Clang represent __builtin_clz on x86 as 31 ^ _bit_scan_reverse(x).
397 // Using "31 ^" here instead of "31 -" allows the optimizer to strip the
398 // function body down to _bit_scan_reverse(x).
399 return 31 ^ __builtin_clz(n);
400}
401
402inline int Bits::Log2Floor(uint32 n) {
403 return (n == 0) ? -1 : Bits::Log2FloorNonZero(n);
404}
405
406inline int Bits::FindLSBSetNonZero(uint32 n) {
407 assert(n != 0);
408 return __builtin_ctz(n);
409}
410
411#if defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM)
412inline int Bits::FindLSBSetNonZero64(uint64 n) {
413 assert(n != 0);
414 return __builtin_ctzll(n);
415}
416#endif // defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM)
417
418#elif defined(_MSC_VER)
419
420inline int Bits::Log2FloorNonZero(uint32 n) {
421 assert(n != 0);
422 unsigned long where;
423 _BitScanReverse(&where, n);
424 return static_cast<int>(where);
425}
426
427inline int Bits::Log2Floor(uint32 n) {
428 unsigned long where;
429 if (_BitScanReverse(&where, n))
430 return static_cast<int>(where);
431 return -1;
432}
433
434inline int Bits::FindLSBSetNonZero(uint32 n) {
435 assert(n != 0);
436 unsigned long where;
437 if (_BitScanForward(&where, n))
438 return static_cast<int>(where);
439 return 32;
440}
441
442#if defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM)
443inline int Bits::FindLSBSetNonZero64(uint64 n) {
444 assert(n != 0);
445 unsigned long where;
446 if (_BitScanForward64(&where, n))
447 return static_cast<int>(where);
448 return 64;
449}
450#endif // defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM)
451
452#else // Portable versions.
453
454inline int Bits::Log2FloorNonZero(uint32 n) {
455 assert(n != 0);
456
457 int log = 0;
458 uint32 value = n;
459 for (int i = 4; i >= 0; --i) {
460 int shift = (1 << i);
461 uint32 x = value >> shift;
462 if (x != 0) {
463 value = x;
464 log += shift;
465 }
466 }
467 assert(value == 1);
468 return log;
469}
470
471inline int Bits::Log2Floor(uint32 n) {
472 return (n == 0) ? -1 : Bits::Log2FloorNonZero(arg);
473}
474
475inline int Bits::FindLSBSetNonZero(uint32 n) {
476 assert(n != 0);
477
478 int rc = 31;
479 for (int i = 4, shift = 1 << 4; i >= 0; --i) {
480 const uint32 x = n << shift;
481 if (x != 0) {
482 n = x;
483 rc -= shift;
484 }
485 shift >>= 1;
486 }
487 return rc;
488}
489
490#if defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM)
491// FindLSBSetNonZero64() is defined in terms of FindLSBSetNonZero().
492inline int Bits::FindLSBSetNonZero64(uint64 n) {
493 assert(n != 0);
494
495 const uint32 bottombits = static_cast<uint32>(n);
496 if (bottombits == 0) {
497 // Bottom bits are zero, so scan in top bits
498 return 32 + FindLSBSetNonZero(static_cast<uint32>(n >> 32));
499 } else {
500 return FindLSBSetNonZero(bottombits);
501 }
502}
503#endif // defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM)
504
505#endif // End portable versions.
506
507// Variable-length integer encoding.
508class Varint {
509 public:
510 // Maximum lengths of varint encoding of uint32.
511 static const int kMax32 = 5;
512
513 // Attempts to parse a varint32 from a prefix of the bytes in [ptr,limit-1].
514 // Never reads a character at or beyond limit. If a valid/terminated varint32
515 // was found in the range, stores it in *OUTPUT and returns a pointer just
516 // past the last byte of the varint32. Else returns NULL. On success,
517 // "result <= limit".
518 static const char* Parse32WithLimit(const char* ptr, const char* limit,
519 uint32* OUTPUT);
520
521 // REQUIRES "ptr" points to a buffer of length sufficient to hold "v".
522 // EFFECTS Encodes "v" into "ptr" and returns a pointer to the
523 // byte just past the last encoded byte.
524 static char* Encode32(char* ptr, uint32 v);
525
526 // EFFECTS Appends the varint representation of "value" to "*s".
527 static void Append32(string* s, uint32 value);
528};
529
530inline const char* Varint::Parse32WithLimit(const char* p,
531 const char* l,
532 uint32* OUTPUT) {
533 const unsigned char* ptr = reinterpret_cast<const unsigned char*>(p);
534 const unsigned char* limit = reinterpret_cast<const unsigned char*>(l);
535 uint32 b, result;
536 if (ptr >= limit) return NULL;
537 b = *(ptr++); result = b & 127; if (b < 128) goto done;
538 if (ptr >= limit) return NULL;
539 b = *(ptr++); result |= (b & 127) << 7; if (b < 128) goto done;
540 if (ptr >= limit) return NULL;
541 b = *(ptr++); result |= (b & 127) << 14; if (b < 128) goto done;
542 if (ptr >= limit) return NULL;
543 b = *(ptr++); result |= (b & 127) << 21; if (b < 128) goto done;
544 if (ptr >= limit) return NULL;
545 b = *(ptr++); result |= (b & 127) << 28; if (b < 16) goto done;
546 return NULL; // Value is too long to be a varint32
547 done:
548 *OUTPUT = result;
549 return reinterpret_cast<const char*>(ptr);
550}
551
552inline char* Varint::Encode32(char* sptr, uint32 v) {
553 // Operate on characters as unsigneds
554 unsigned char* ptr = reinterpret_cast<unsigned char*>(sptr);
555 static const int B = 128;
556 if (v < (1<<7)) {
557 *(ptr++) = v;
558 } else if (v < (1<<14)) {
559 *(ptr++) = v | B;
560 *(ptr++) = v>>7;
561 } else if (v < (1<<21)) {
562 *(ptr++) = v | B;
563 *(ptr++) = (v>>7) | B;
564 *(ptr++) = v>>14;
565 } else if (v < (1<<28)) {
566 *(ptr++) = v | B;
567 *(ptr++) = (v>>7) | B;
568 *(ptr++) = (v>>14) | B;
569 *(ptr++) = v>>21;
570 } else {
571 *(ptr++) = v | B;
572 *(ptr++) = (v>>7) | B;
573 *(ptr++) = (v>>14) | B;
574 *(ptr++) = (v>>21) | B;
575 *(ptr++) = v>>28;
576 }
577 return reinterpret_cast<char*>(ptr);
578}
579
580// If you know the internal layout of the std::string in use, you can
581// replace this function with one that resizes the string without
582// filling the new space with zeros (if applicable) --
583// it will be non-portable but faster.
584inline void STLStringResizeUninitialized(string* s, size_t new_size) {
585 s->resize(new_size);
586}
587
588// Return a mutable char* pointing to a string's internal buffer,
589// which may not be null-terminated. Writing through this pointer will
590// modify the string.
591//
592// string_as_array(&str)[i] is valid for 0 <= i < str.size() until the
593// next call to a string method that invalidates iterators.
594//
595// As of 2006-04, there is no standard-blessed way of getting a
596// mutable reference to a string's internal buffer. However, issue 530
597// (http://www.open-std.org/JTC1/SC22/WG21/docs/lwg-defects.html#530)
598// proposes this as the method. It will officially be part of the standard
599// for C++0x. This should already work on all current implementations.
600inline char* string_as_array(string* str) {
601 return str->empty() ? NULL : &*str->begin();
602}
603
604} // namespace snappy
605
606#endif // THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_INTERNAL_H_
607