snappy-stubs-internal.h source code [DuckDB/third_party/miniparquet/snappy/snappy-stubs-internal.h]

1	// Copyright 2011 Google Inc. All Rights Reserved.
2	//
3	// Redistribution and use in source and binary forms, with or without
4	// modification, are permitted provided that the following conditions are
5	// met:
6	//
7	// Redistributions of source code must retain the above copyright*
8	// notice, this list of conditions and the following disclaimer.
9	// Redistributions in binary form must reproduce the above*
10	// copyright notice, this list of conditions and the following disclaimer
11	// in the documentation and/or other materials provided with the
12	// distribution.
13	// Neither the name of Google Inc. nor the names of its*
14	// contributors may be used to endorse or promote products derived from
15	// this software without specific prior written permission.
16	//
17	// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18	// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19	// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20	// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21	// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22	// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23	// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24	// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25	// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26	// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27	// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28	//
29	// Various stubs for the open-source version of Snappy.
30
31	#ifndef THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_INTERNAL_H_
32	#define THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_INTERNAL_H_
33
34	#ifdef HAVE_CONFIG_H
35	#include "config.h"
36	#endif
37
38	#include <string>
39
40	#include <assert.h>
41	#include <stdlib.h>
42	#include <string.h>
43
44	#ifdef HAVE_SYS_MMAN_H
45	#include <sys/mman.h>
46	#endif
47
48	#ifdef HAVE_UNISTD_H
49	#include <unistd.h>
50	#endif
51
52	#if defined(_MSC_VER)
53	#include <intrin.h>
54	#endif // defined(_MSC_VER)
55
56	#ifndef __has_feature
57	#define __has_feature(x) 0
58	#endif
59
60	#if __has_feature(memory_sanitizer)
61	#include <sanitizer/msan_interface.h>
62	#define SNAPPY_ANNOTATE_MEMORY_IS_INITIALIZED(address, size) \
63	__msan_unpoison((address), (size))
64	#else
65	#define SNAPPY_ANNOTATE_MEMORY_IS_INITIALIZED(address, size) /* empty */
66	#endif // __has_feature(memory_sanitizer)
67
68	#include "snappy-stubs-public.h"
69
70	#if defined(__x86_64__)
71
72	// Enable 64-bit optimized versions of some routines.
73	#define ARCH_K8 1
74
75	#elif defined(__ppc64__)
76
77	#define ARCH_PPC 1
78
79	#elif defined(__aarch64__)
80
81	#define ARCH_ARM 1
82
83	#endif
84
85	// Needed by OS X, among others.
86	#ifndef MAP_ANONYMOUS
87	#define MAP_ANONYMOUS MAP_ANON
88	#endif
89
90	// The size of an array, if known at compile-time.
91	// Will give unexpected results if used on a pointer.
92	// We undefine it first, since some compilers already have a definition.
93	#ifdef ARRAYSIZE
94	#undef ARRAYSIZE
95	#endif
96	#define ARRAYSIZE(a) (sizeof(a) / sizeof(*(a)))
97
98	// Static prediction hints.
99	#ifdef HAVE_BUILTIN_EXPECT
100	#define SNAPPY_PREDICT_FALSE(x) (__builtin_expect(x, 0))
101	#define SNAPPY_PREDICT_TRUE(x) (__builtin_expect(!!(x), 1))
102	#else
103	#define SNAPPY_PREDICT_FALSE(x) x
104	#define SNAPPY_PREDICT_TRUE(x) x
105	#endif
106
107	// This is only used for recomputing the tag byte table used during
108	// decompression; for simplicity we just remove it from the open-source
109	// version (anyone who wants to regenerate it can just do the call
110	// themselves within main()).
111	#define DEFINE_bool(flag_name, default_value, description) \
112	bool FLAGS_ ## flag_name = default_value
113	#define DECLARE_bool(flag_name) \
114	extern bool FLAGS_ ## flag_name
115
116	namespace snappy {
117
118	static const uint32 kuint32max = static_cast<uint32>(`0xFFFFFFFF`);
119	//static const int64 kint64max = static_cast<int64>(0x7FFFFFFFFFFFFFFFLL);
120
121
122	// HM: Always use aligned load to keep ourselves out of trouble. Sorry.
123
124	inline uint16 UNALIGNED_LOAD16(const void *p) {
125	uint16 t;
126	memcpy(&t, p, sizeof t);
127	return t;
128	}
129
130	inline uint32 UNALIGNED_LOAD32(const void *p) {
131	uint32 t;
132	memcpy(&t, p, sizeof t);
133	return t;
134	}
135
136	inline uint64 UNALIGNED_LOAD64(const void *p) {
137	uint64 t;
138	memcpy(&t, p, sizeof t);
139	return t;
140	}
141
142	inline void UNALIGNED_STORE16(void *p, uint16 v) {
143	memcpy(p, &v, sizeof v);
144	}
145
146	inline void UNALIGNED_STORE32(void *p, uint32 v) {
147	memcpy(p, &v, sizeof v);
148	}
149
150	inline void UNALIGNED_STORE64(void *p, uint64 v) {
151	memcpy(p, &v, sizeof v);
152	}
153
154
155	// The following guarantees declaration of the byte swap functions.
156	#if defined(SNAPPY_IS_BIG_ENDIAN)
157
158	#ifdef HAVE_SYS_BYTEORDER_H
159	#include <sys/byteorder.h>
160	#endif
161
162	#ifdef HAVE_SYS_ENDIAN_H
163	#include <sys/endian.h>
164	#endif
165
166	#ifdef _MSC_VER
167	#include <stdlib.h>
168	#define bswap_16(x) _byteswap_ushort(x)
169	#define bswap_32(x) _byteswap_ulong(x)
170	#define bswap_64(x) _byteswap_uint64(x)
171
172	#elif defined(__APPLE__)
173	// Mac OS X / Darwin features
174	#include <libkern/OSByteOrder.h>
175	#define bswap_16(x) OSSwapInt16(x)
176	#define bswap_32(x) OSSwapInt32(x)
177	#define bswap_64(x) OSSwapInt64(x)
178
179	#elif defined(HAVE_BYTESWAP_H)
180	#include <byteswap.h>
181
182	#elif defined(bswap32)
183	// FreeBSD defines bswap{16,32,64} in <sys/endian.h> (already #included).
184	#define bswap_16(x) bswap16(x)
185	#define bswap_32(x) bswap32(x)
186	#define bswap_64(x) bswap64(x)
187
188	#elif defined(BSWAP_64)
189	// Solaris 10 defines BSWAP_{16,32,64} in <sys/byteorder.h> (already #included).
190	#define bswap_16(x) BSWAP_16(x)
191	#define bswap_32(x) BSWAP_32(x)
192	#define bswap_64(x) BSWAP_64(x)
193
194	#else
195
196	inline uint16 bswap_16(uint16 x) {
197	return (x << `8`) \| (x >> `8`);
198	}
199
200	inline uint32 bswap_32(uint32 x) {
201	x = ((x & `0xff00ff00UL`) >> `8`) \| ((x & `0x00ff00ffUL`) << `8`);
202	return (x >> `16`) \| (x << `16`);
203	}
204
205	inline uint64 bswap_64(uint64 x) {
206	x = ((x & `0xff00ff00ff00ff00ULL`) >> `8`) \| ((x & `0x00ff00ff00ff00ffULL`) << `8`);
207	x = ((x & `0xffff0000ffff0000ULL`) >> `16`) \| ((x & `0x0000ffff0000ffffULL`) << `16`);
208	return (x >> `32`) \| (x << `32`);
209	}
210
211	#endif
212
213	#endif // defined(SNAPPY_IS_BIG_ENDIAN)
214
215	// Convert to little-endian storage, opposite of network format.
216	// Convert x from host to little endian: x = LittleEndian.FromHost(x);
217	// convert x from little endian to host: x = LittleEndian.ToHost(x);
218	//
219	// Store values into unaligned memory converting to little endian order:
220	// LittleEndian.Store16(p, x);
221	//
222	// Load unaligned values stored in little endian converting to host order:
223	// x = LittleEndian.Load16(p);
224	class LittleEndian {
225	public:
226	// Conversion functions.
227	#if defined(SNAPPY_IS_BIG_ENDIAN)
228
229	static uint16 FromHost16(uint16 x) { return bswap_16(x); }
230	static uint16 ToHost16(uint16 x) { return bswap_16(x); }
231
232	static uint32 FromHost32(uint32 x) { return bswap_32(x); }
233	static uint32 ToHost32(uint32 x) { return bswap_32(x); }
234
235	static bool IsLittleEndian() { return false; }
236
237	#else // !defined(SNAPPY_IS_BIG_ENDIAN)
238
239	static uint16 FromHost16(uint16 x) { return x; }
240	static uint16 ToHost16(uint16 x) { return x; }
241
242	static uint32 FromHost32(uint32 x) { return x; }
243	static uint32 ToHost32(uint32 x) { return x; }
244
245	static bool IsLittleEndian() { return true; }
246
247	#endif // !defined(SNAPPY_IS_BIG_ENDIAN)
248
249	// Functions to do unaligned loads and stores in little-endian order.
250	static uint16 Load16(const void *p) {
251	return ToHost16(UNALIGNED_LOAD16(p));
252	}
253
254	static void Store16(void *p, uint16 v) {
255	UNALIGNED_STORE16(p, FromHost16(v));
256	}
257
258	static uint32 Load32(const void *p) {
259	return ToHost32(UNALIGNED_LOAD32(p));
260	}
261
262	static void Store32(void *p, uint32 v) {
263	UNALIGNED_STORE32(p, FromHost32(v));
264	}
265	};
266
267	// Some bit-manipulation functions.
268	class Bits {
269	public:
270	// Return floor(log2(n)) for positive integer n.
271	static int Log2FloorNonZero(uint32 n);
272
273	// Return floor(log2(n)) for positive integer n. Returns -1 iff n == 0.
274	static int Log2Floor(uint32 n);
275
276	// Return the first set least / most significant bit, 0-indexed. Returns an
277	// undefined value if n == 0. FindLSBSetNonZero() is similar to ffs() except
278	// that it's 0-indexed.
279	static int FindLSBSetNonZero(uint32 n);
280
281	#if defined(ARCH_K8) \|\| defined(ARCH_PPC) \|\| defined(ARCH_ARM)
282	static int FindLSBSetNonZero64(uint64 n);
283	#endif // defined(ARCH_K8) \|\| defined(ARCH_PPC) \|\| defined(ARCH_ARM)
284
285	private:
286	// No copying
287	Bits(const Bits&);
288	void operator=(const Bits&);
289	};
290
291	#ifdef HAVE_BUILTIN_CTZ
292
293	inline int Bits::Log2FloorNonZero(uint32 n) {
294	assert(n != `0`);
295	// (31 ^ x) is equivalent to (31 - x) for x in [0, 31]. An easy proof
296	// represents subtraction in base 2 and observes that there's no carry.
297	//
298	// GCC and Clang represent __builtin_clz on x86 as 31 ^ _bit_scan_reverse(x).
299	// Using "31 ^" here instead of "31 -" allows the optimizer to strip the
300	// function body down to _bit_scan_reverse(x).
301	return `31` ^ __builtin_clz(n);
302	}
303
304	inline int Bits::Log2Floor(uint32 n) {
305	return (n == `0`) ? -`1` : Bits::Log2FloorNonZero(n);
306	}
307
308	inline int Bits::FindLSBSetNonZero(uint32 n) {
309	assert(n != `0`);
310	return __builtin_ctz(n);
311	}
312
313	#if defined(ARCH_K8) \|\| defined(ARCH_PPC) \|\| defined(ARCH_ARM)
314	inline int Bits::FindLSBSetNonZero64(uint64 n) {
315	assert(n != `0`);
316	return __builtin_ctzll(n);
317	}
318	#endif // defined(ARCH_K8) \|\| defined(ARCH_PPC) \|\| defined(ARCH_ARM)
319
320	#elif defined(_MSC_VER)
321
322	inline int Bits::Log2FloorNonZero(uint32 n) {
323	assert(n != `0`);
324	unsigned long where;
325	_BitScanReverse(&where, n);
326	return static_cast<int>(where);
327	}
328
329	inline int Bits::Log2Floor(uint32 n) {
330	unsigned long where;
331	if (_BitScanReverse(&where, n))
332	return static_cast<int>(where);
333	return -`1`;
334	}
335
336	inline int Bits::FindLSBSetNonZero(uint32 n) {
337	assert(n != `0`);
338	unsigned long where;
339	if (_BitScanForward(&where, n))
340	return static_cast<int>(where);
341	return `32`;
342	}
343
344	#if defined(ARCH_K8) \|\| defined(ARCH_PPC) \|\| defined(ARCH_ARM)
345	inline int Bits::FindLSBSetNonZero64(uint64 n) {
346	assert(n != `0`);
347	unsigned long where;
348	if (_BitScanForward64(&where, n))
349	return static_cast<int>(where);
350	return `64`;
351	}
352	#endif // defined(ARCH_K8) \|\| defined(ARCH_PPC) \|\| defined(ARCH_ARM)
353
354	#else // Portable versions.
355
356	inline int Bits::Log2FloorNonZero(uint32 n) {
357	assert(n != `0`);
358
359	int log = `0`;
360	uint32 value = n;
361	for (int i = `4`; i >= `0`; --i) {
362	int shift = (`1` << i);
363	uint32 x = value >> shift;
364	if (x != `0`) {
365	value = x;
366	log += shift;
367	}
368	}
369	assert(value == `1`);
370	return log;
371	}
372
373	inline int Bits::Log2Floor(uint32 n) {
374	return (n == `0`) ? -`1` : Bits::Log2FloorNonZero(n);
375	}
376
377	inline int Bits::FindLSBSetNonZero(uint32 n) {
378	assert(n != `0`);
379
380	int rc = `31`;
381	for (int i = `4`, shift = `1` << `4`; i >= `0`; --i) {
382	const uint32 x = n << shift;
383	if (x != `0`) {
384	n = x;
385	rc -= shift;
386	}
387	shift >>= `1`;
388	}
389	return rc;
390	}
391
392	#if defined(ARCH_K8) \|\| defined(ARCH_PPC) \|\| defined(ARCH_ARM)
393	// FindLSBSetNonZero64() is defined in terms of FindLSBSetNonZero().
394	inline int Bits::FindLSBSetNonZero64(uint64 n) {
395	assert(n != `0`);
396
397	const uint32 bottombits = static_cast<uint32>(n);
398	if (bottombits == `0`) {
399	// Bottom bits are zero, so scan in top bits
400	return `32` + FindLSBSetNonZero(static_cast<uint32>(n >> `32`));
401	} else {
402	return FindLSBSetNonZero(bottombits);
403	}
404	}
405	#endif // defined(ARCH_K8) \|\| defined(ARCH_PPC) \|\| defined(ARCH_ARM)
406
407	#endif // End portable versions.
408
409	// Variable-length integer encoding.
410	class Varint {
411	public:
412	// Maximum lengths of varint encoding of uint32.
413	static const int kMax32 = `5`;
414
415	// Attempts to parse a varint32 from a prefix of the bytes in [ptr,limit-1].
416	// Never reads a character at or beyond limit. If a valid/terminated varint32
417	// was found in the range, stores it in OUTPUT and returns a pointer just*
418	// past the last byte of the varint32. Else returns NULL. On success,
419	// "result <= limit".
420	static const char* Parse32WithLimit(const char* ptr, const char* limit,
421	uint32* OUTPUT);
422
423	// REQUIRES "ptr" points to a buffer of length sufficient to hold "v".
424	// EFFECTS Encodes "v" into "ptr" and returns a pointer to the
425	// byte just past the last encoded byte.
426	static char* Encode32(char* ptr, uint32 v);
427
428	// EFFECTS Appends the varint representation of "value" to "s".*
429	static void Append32(string* s, uint32 value);
430	};
431
432	inline const char* Varint::Parse32WithLimit(const char* p,
433	const char* l,
434	uint32* OUTPUT) {
435	const unsigned char* ptr = reinterpret_cast<const unsigned char*>(p);
436	const unsigned char* limit = reinterpret_cast<const unsigned char*>(l);
437	uint32 b, result;
438	if (ptr >= limit) return NULL;
439	b = (ptr++); result = b & `127`; if (b < `128`) goto* done;
440	if (ptr >= limit) return NULL;
441	b = (ptr++); result \|= (b & `127`) << `7`; if (b < `128`) goto* done;
442	if (ptr >= limit) return NULL;
443	b = (ptr++); result \|= (b & `127`) << `14`; if (b < `128`) goto* done;
444	if (ptr >= limit) return NULL;
445	b = (ptr++); result \|= (b & `127`) << `21`; if (b < `128`) goto* done;
446	if (ptr >= limit) return NULL;
447	b = (ptr++); result \|= (b & `127`) << `28`; if (b < `16`) goto* done;
448	return NULL; // Value is too long to be a varint32
449	done:
450	*OUTPUT = result;
451	return reinterpret_cast<const char*>(ptr);
452	}
453
454	inline char* Varint::Encode32(char* sptr, uint32 v) {
455	// Operate on characters as unsigneds
456	unsigned char* ptr = reinterpret_cast<unsigned char*>(sptr);
457	static const int B = `128`;
458	if (v < (`1`<<`7`)) {
459	*(ptr++) = v;
460	} else if (v < (`1`<<`14`)) {
461	*(ptr++) = v \| B;
462	*(ptr++) = v>>`7`;
463	} else if (v < (`1`<<`21`)) {
464	*(ptr++) = v \| B;
465	*(ptr++) = (v>>`7`) \| B;
466	*(ptr++) = v>>`14`;
467	} else if (v < (`1`<<`28`)) {
468	*(ptr++) = v \| B;
469	*(ptr++) = (v>>`7`) \| B;
470	*(ptr++) = (v>>`14`) \| B;
471	*(ptr++) = v>>`21`;
472	} else {
473	*(ptr++) = v \| B;
474	*(ptr++) = (v>>`7`) \| B;
475	*(ptr++) = (v>>`14`) \| B;
476	*(ptr++) = (v>>`21`) \| B;
477	*(ptr++) = v>>`28`;
478	}
479	return reinterpret_cast<char*>(ptr);
480	}
481
482	// If you know the internal layout of the std::string in use, you can
483	// replace this function with one that resizes the string without
484	// filling the new space with zeros (if applicable) --
485	// it will be non-portable but faster.
486	inline void STLStringResizeUninitialized(string* s, size_t new_size) {
487	s->resize(new_size);
488	}
489
490	// Return a mutable char pointing to a string's internal buffer,*
491	// which may not be null-terminated. Writing through this pointer will
492	// modify the string.
493	//
494	// string_as_array(&str)[i] is valid for 0 <= i < str.size() until the
495	// next call to a string method that invalidates iterators.
496	//
497	// As of 2006-04, there is no standard-blessed way of getting a
498	// mutable reference to a string's internal buffer. However, issue 530
499	// (http://www.open-std.org/JTC1/SC22/WG21/docs/lwg-defects.html#530)
500	// proposes this as the method. It will officially be part of the standard
501	// for C++0x. This should already work on all current implementations.
502	inline char* string_as_array(string* str) {
503	return str->empty() ? NULL : &*str->begin();
504	}
505
506	} // namespace snappy
507
508	#endif // THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_INTERNAL_H_
509

Browse the source code of DuckDB/third_party/miniparquet/snappy/snappy-stubs-internal.h