gdk_atoms.h source code [MonetDB/gdk/gdk_atoms.h]

1	/*
2	* This Source Code Form is subject to the terms of the Mozilla Public
3	* License, v. 2.0. If a copy of the MPL was not distributed with this
4	* file, You can obtain one at http://mozilla.org/MPL/2.0/.
5	*
6	* Copyright 1997 - July 2008 CWI, August 2008 - 2019 MonetDB B.V.
7	*/
8
9	#ifndef _GDK_ATOMS_H_
10	#define _GDK_ATOMS_H_
11
12	#define MAXATOMS 128
13
14	/*
15	* @- maximum atomic string lengths
16	*/
17	#define bitStrlen 8
18	#define bteStrlen 8
19	#define shtStrlen 12
20	#define intStrlen 24
21	#if SIZEOF_OID == SIZEOF_INT
22	#define oidStrlen 24
23	#else
24	#define oidStrlen 48
25	#endif
26	#if SIZEOF_PTR == SIZEOF_INT
27	#define ptrStrlen 24
28	#else
29	#define ptrStrlen 48
30	#endif
31	#define lngStrlen 48
32	#ifdef HAVE_HGE
33	#define hgeStrlen 96
34	#endif
35	#define fltStrlen 48
36	#define dblStrlen 96
37
38	/*
39	* The system comes with the traditional atomic types: int (4 bytes),
40	* bool(1 byte) and str (variable). In addition, we support the notion
41	* of an OID type, which ensures uniqueness of its members. This
42	* leads to the following type descriptor table.
43	*/
44
45	#ifdef HAVE_HGE
46	gdk_export ssize_t hgeFromStr(const char src, size_t len, hge **dst, bool external);
47	gdk_export ssize_t hgeToStr(str dst, size_t len, const hge *src, bool external);
48	#endif
49	gdk_export ssize_t lngFromStr(const char src, size_t len, lng **dst, bool external);
50	gdk_export ssize_t lngToStr(str dst, size_t len, const lng *src, bool external);
51	gdk_export ssize_t intFromStr(const char src, size_t len, int **dst, bool external);
52	gdk_export ssize_t intToStr(str dst, size_t len, const int *src, bool external);
53	gdk_export ssize_t batFromStr(const char src, size_t len, bat **dst, bool external);
54	gdk_export ssize_t batToStr(str dst, size_t len, const bat *src, bool external);
55	gdk_export ssize_t ptrFromStr(const char src, size_t len, ptr **dst, bool external);
56	gdk_export ssize_t ptrToStr(str dst, size_t len, const ptr *src, bool external);
57	gdk_export ssize_t bitFromStr(const char src, size_t len, bit **dst, bool external);
58	gdk_export ssize_t bitToStr(str dst, size_t len, const bit *src, bool external);
59	gdk_export ssize_t OIDfromStr(const char src, size_t len, oid **dst, bool external);
60	gdk_export ssize_t OIDtoStr(str dst, size_t len, const oid *src, bool external);
61	gdk_export ssize_t shtFromStr(const char src, size_t len, sht **dst, bool external);
62	gdk_export ssize_t shtToStr(str dst, size_t len, const sht *src, bool external);
63	gdk_export ssize_t bteFromStr(const char src, size_t len, bte **dst, bool external);
64	gdk_export ssize_t bteToStr(str dst, size_t len, const bte *src, bool external);
65	gdk_export ssize_t fltFromStr(const char src, size_t len, flt **dst, bool external);
66	gdk_export ssize_t fltToStr(str dst, size_t len, const flt *src, bool external);
67	gdk_export ssize_t dblFromStr(const char src, size_t len, dbl **dst, bool external);
68	gdk_export ssize_t dblToStr(str dst, size_t len, const dbl *src, bool external);
69	gdk_export ssize_t GDKstrFromStr(unsigned char *restrict dst, const unsigned char *restrict src, ssize_t len);
70	gdk_export ssize_t strFromStr(const char *restrict src, size_t *restrict len, str *restrict dst, bool external);
71	gdk_export BUN strHash(const char *s);
72	gdk_export size_t strLen(const char *s);
73	gdk_export int strNil(const char *s);
74	gdk_export size_t escapedStrlen(const char *restrict src, const char sep1, const* char sep2, int* quote);
75	gdk_export size_t escapedStr(char *restrict dst, const char *restrict src, size_t dstlen, const char sep1, const* char sep2, int* quote);
76	/*
77	* @- nil values
78	* All types have a single value designated as a NIL value. It
79	* designates a missing value and it is ignored (forbidden) in several
80	* primitives. The current policy is to use the smallest value in any
81	* ordered domain. The routine atomnil returns a pointer to the nil
82	* value representation.
83	*/
84	#define GDK_bit_max ((bit) 1)
85	#define GDK_bit_min ((bit) 0)
86	#define GDK_bte_max ((bte) INT8_MAX)
87	#define GDK_bte_min ((bte) INT8_MIN+1)
88	#define GDK_sht_max ((sht) INT16_MAX)
89	#define GDK_sht_min ((sht) INT16_MIN+1)
90	#define GDK_int_max ((int) INT32_MAX)
91	#define GDK_int_min ((int) INT32_MIN+1)
92	#define GDK_lng_max ((lng) INT64_MAX)
93	#define GDK_lng_min ((lng) INT64_MIN+1)
94	#ifdef HAVE_HGE
95	#define GDK_hge_max ((((hge) 1) << 126) - 1 + (((hge) 1) << 126))
96	#define GDK_hge_min (-GDK_hge_max)
97	#endif
98	#define GDK_flt_max ((flt) FLT_MAX)
99	#define GDK_flt_min ((flt) -FLT_MAX)
100	#define GDK_dbl_max ((dbl) DBL_MAX)
101	#define GDK_dbl_min ((dbl) -DBL_MAX)
102	#define GDK_oid_max (((oid) 1 << ((8 * SIZEOF_OID) - 1)) - 1)
103	#define GDK_oid_min ((oid) 0)
104	/ representation of the nil /
105	gdk_export const bte bte_nil;
106	gdk_export const sht sht_nil;
107	gdk_export const int int_nil;
108	#ifdef NAN_CANNOT_BE_USED_AS_INITIALIZER
109	/ Definition of NAN is seriously broken on Intel compiler (at least*
110	* in some versions), so we work around it. */
111	union _flt_nil_t {
112	uint32_t l;
113	flt f;
114	};
115	gdk_export const union _flt_nil_t _flt_nil_;
116	#define flt_nil (_flt_nil_.f)
117	union _dbl_nil_t {
118	uint64_t l;
119	dbl d;
120	};
121	gdk_export const union _dbl_nil_t _dbl_nil_;
122	#define dbl_nil (_dbl_nil_.d)
123	#else
124	gdk_export const flt flt_nil;
125	gdk_export const dbl dbl_nil;
126	#endif
127	gdk_export const lng lng_nil;
128	#ifdef HAVE_HGE
129	gdk_export const hge hge_nil;
130	#endif
131	gdk_export const oid oid_nil;
132	gdk_export const char str_nil[`2`];
133	gdk_export const ptr ptr_nil;
134
135	/ derived NIL values - OIDDEPEND /
136	#define bit_nil ((bit) bte_nil)
137	#define bat_nil ((bat) int_nil)
138
139	#define void_nil oid_nil
140
141	#define is_bit_nil(v) ((v) == bit_nil)
142	#define is_bte_nil(v) ((v) == bte_nil)
143	#define is_sht_nil(v) ((v) == sht_nil)
144	#define is_int_nil(v) ((v) == int_nil)
145	#define is_lng_nil(v) ((v) == lng_nil)
146	#ifdef HAVE_HGE
147	#define is_hge_nil(v) ((v) == hge_nil)
148	#endif
149	#define is_oid_nil(v) ((v) == oid_nil)
150	#define is_flt_nil(v) isnan(v)
151	#define is_dbl_nil(v) isnan(v)
152	#define is_bat_nil(v) ((v) == bat_nil \|\| (v) == 0)
153
154	#include <math.h>
155
156	#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) && _MSC_VER < 1800
157	#include <float.h>
158	#define isnan(x) _isnan(x)
159	#define isinf(x) (_fpclass(x) & (_FPCLASS_NINF \| _FPCLASS_PINF))
160	#define isfinite(x) _finite(x)
161	#endif
162
163	/*
164	* @- Derived types
165	* In all algorithms across GDK, you will find switches on the types
166	* (bte, sht, int, flt, dbl, lng, hge, str). They respectively
167	* represent an octet, a 16-bit int, a 32-bit int, a 32-bit float, a
168	* 64-bit double, a 64-bit int, and a pointer-sized location of a
169	* char-buffer (ended by a zero char).
170	*
171	* In contrast, the types (bit, ptr, bat, oid) are derived types. They
172	* do not occur in the switches. The ATOMstorage macro maps them
173	* respectively onto a @code{ bte}, @code{ int} (pointers are 32-bit),
174	* @code{ int}, and @code{ int}. OIDs are 32-bit.
175	*
176	* This approach makes it tractable to switch to 64-bits OIDs, or to a
177	* fully 64-bits OS easily. One only has to map the @code{ oid} and
178	* @code{ ptr} types to @code{ lng} instead of @code{ int}.
179	*
180	* Derived types mimic their fathers in many ways. They inherit the
181	* @code{ size}, @code{ linear}, and @code{ null}
182	* properties of their father. The same goes for the
183	* ADT functions HASH, CMP, PUT, NULL, DEL, LEN, and HEAP. So, a
184	* derived type differs in only two ways from its father:
185	* @table @code
186	* @item [string representation]
187	* the only two ADT operations specific for a derived type are FROMSTR
188	* and TOSTR.
189	* @item [identity]
190	* (a @code{ bit} is really of a different type than @code{ bte}). The
191	* set of operations on derived type values or BATs of such types may
192	* differ from the sets of operations on the father type.
193	* @end table
194	*/
195	/ use "do ... while(0)" so that lhs can safely be used in if statements /
196	#define ATOMstorage(t) BATatoms[t].storage
197	#define ATOMsize(t) BATatoms[t].size
198	#define ATOMfromstr(t,s,l,src,ext) BATatoms[t].atomFromStr(src,l,s,ext)
199	#define ATOMnilptr(t) BATatoms[t].atomNull
200	#define ATOMcompare(t) BATatoms[t].atomCmp
201	#define ATOMcmp(t,l,r) ((*ATOMcompare(t))(l, r))
202	#define ATOMhash(t,src) BATatoms[t].atomHash(src)
203	#define ATOMdel(t,hp,src) do if (BATatoms[t].atomDel) BATatoms[t].atomDel(hp,src); while (0)
204	#define ATOMvarsized(t) (BATatoms[t].atomPut != NULL)
205	#define ATOMlinear(t) BATatoms[t].linear
206	#define ATOMtype(t) ((t) == TYPE_void ? TYPE_oid : (t))
207	#define ATOMfix(t,v) do if (BATatoms[t].atomFix) BATatoms[t].atomFix(v); while (0)
208	#define ATOMunfix(t,v) do if (BATatoms[t].atomUnfix) BATatoms[t].atomUnfix(v); while (0)
209
210	/ The base type is the storage type if the comparison function, the*
211	* hash function, and the nil value are the same as those of the
212	* storage type; otherwise it is the type itself. */
213	#define ATOMbasetype(t) ((t) != ATOMstorage(t) && \
214	ATOMnilptr(t) == ATOMnilptr(ATOMstorage(t)) && \
215	ATOMcompare(t) == ATOMcompare(ATOMstorage(t)) && \
216	BATatoms[t].atomHash == BATatoms[ATOMstorage(t)].atomHash ? \
217	ATOMstorage(t) : (t))
218
219	/*
220	* In case that atoms are added to a bat, their logical reference
221	* count should be incremented (and decremented if deleted). Notice
222	* that BATs with atomic types that have logical references (e.g. BATs
223	* of BATs but also BATs of ODMG odSet) can never be persistent, as
224	* this would make the commit tremendously complicated.
225	*/
226	#ifdef HAVE_HGE
227	#define ATOM_CASE_16_hge \
228	case 16: \
229	* (hge ) d_ = (hge *) s_; \
230	break
231	#else
232	#define ATOM_CASE_16_hge
233	#endif
234
235	#define ATOMputVAR(type, heap, dst, src) \
236	do { \
237	assert(BATatoms[type].atomPut != NULL); \
238	if ((*BATatoms[type].atomPut)(heap, dst, src) == 0) \
239	goto bunins_failed; \
240	} while (0)
241	#define ATOMputFIX(type, dst, src) \
242	do { \
243	int t_ = (type); \
244	void *d_ = (dst); \
245	const void *s_ = (src); \
246	\
247	assert(BATatoms[t_].atomPut == NULL); \
248	ATOMfix(t_, s_); \
249	switch (ATOMsize(t_)) { \
250	case 0: /* void */ \
251	break; \
252	case 1: \
253	* (bte ) d_ = (bte *) s_; \
254	break; \
255	case 2: \
256	* (sht ) d_ = (sht *) s_; \
257	break; \
258	case 4: \
259	* (int ) d_ = (int *) s_; \
260	break; \
261	case 8: \
262	* (lng ) d_ = (lng *) s_; \
263	break; \
264	ATOM_CASE_16_hge; \
265	default: \
266	memcpy(d_, s_, ATOMsize(t_)); \
267	break; \
268	} \
269	} while (0)
270
271	#define ATOMreplaceVAR(type, heap, dst, src) \
272	do { \
273	int t_ = (type); \
274	var_t d_ = (var_t ) (dst); \
275	const void *s_ = (src); \
276	var_t loc_ = *d_; \
277	Heap *h_ = (heap); \
278	\
279	assert(BATatoms[t_].atomPut != NULL); \
280	if ((*BATatoms[t_].atomPut)(h_, &loc_, s_) == 0) \
281	goto bunins_failed; \
282	ATOMunfix(t_, d_); \
283	ATOMdel(t_, h_, d_); \
284	*d_ = loc_; \
285	ATOMfix(t_, s_); \
286	} while (0)
287
288	/ string heaps:*
289	* - strings are 8 byte aligned
290	* - start with a 1024 bucket hash table
291	* - heaps < 64KiB are fully duplicate eliminated with this hash tables
292	* - heaps >= 64KiB are opportunistically (imperfect) duplicate
293	* eliminated as only the last 128KiB chunk is considered and there
294	* is no linked list
295	* - buckets and next pointers are unsigned short "indices"
296	* - indices should be multiplied by 8 and takes from ELIMBASE to get
297	* an offset
298	* Note that a 64KiB chunk of the heap contains at most 8K 8-byte
299	* aligned strings. The 1K bucket list means that in worst load, the
300	* list length is 8 (OK).
301	*/
302	#define GDK_STRHASHTABLE (1<<10) /* 1024 */
303	#define GDK_STRHASHMASK (GDK_STRHASHTABLE-1)
304	#define GDK_STRHASHSIZE (GDK_STRHASHTABLE * sizeof(stridx_t))
305	#define GDK_ELIMPOWER 16 /* 64KiB is the threshold */
306	#define GDK_ELIMDOUBLES(h) ((h)->free < GDK_ELIMLIMIT)
307	#define GDK_ELIMLIMIT (1<<GDK_ELIMPOWER) /* equivalently: ELIMBASE == 0 */
308	#define GDK_ELIMBASE(x) (((x) >> GDK_ELIMPOWER) << GDK_ELIMPOWER)
309	#define GDK_VAROFFSET ((var_t) GDK_STRHASHSIZE)
310
311	/*
312	* @- String Comparison, NILs and UTF-8
313	*
314	* Using the char* type for strings is handy as this is the type of
315	* any constant strings in a C/C++ program. Therefore, MonetDB uses
316	* this definition for str. However, different compilers and
317	* platforms use either signed or unsigned characters for the char
318	* type. It is required that string ordering in MonetDB is consistent
319	* over platforms though.
320	*
321	* As for the choice how strings should be ordered, our support for
322	* UTF-8 actually imposes that it should follow 'unsigned char'
323	* doctrine (like in the AIX native compiler). In this semantics,
324	* though we have to take corrective action to ensure that str(nil) is
325	* the smallest value of the domain.
326	*/
327	#define GDK_STRNIL(s) ((s) == NULL \|\| (const char) (s) == '\200')
328	#define GDK_STRLEN(s) ((GDK_STRNIL(s)?1:strlen(s))+1)
329	#define GDK_STRCMP(l,r) (GDK_STRNIL(l)?(GDK_STRNIL(r)?0:-1):GDK_STRNIL(r)?1: \
330	((const unsigned char)(l) < (const unsigned char)(r))?-1: \
331	((const unsigned char)(l) > (const unsigned char)(r))?1: \
332	strCmpNoNil((const unsigned char)(l),(const unsigned char)(r)))
333	/*
334	* @- Hash Function
335	* The string hash function is a very simple hash function that xors
336	* and rotates all characters together. It is optimized to process 2
337	* characters at a time (adding 16-bits to the hash value each
338	* iteration).
339	*/
340	#define GDK_STRHASH(x,y) \
341	do { \
342	const char _key = (const char ) (x); \
343	BUN _i; \
344	for (_i = y = 0; _key[_i]; _i++) { \
345	y += _key[_i]; \
346	y += (y << 10); \
347	y ^= (y >> 6); \
348	} \
349	y += (y << 3); \
350	y ^= (y >> 11); \
351	y += (y << 15); \
352	} while (0)
353
354	#endif /* _GDK_ATOMS_H_ */
355

Browse the source code of MonetDB/gdk/gdk_atoms.h