gdk_batop.c source code [MonetDB/gdk/gdk_batop.c]

1	/*
2	* This Source Code Form is subject to the terms of the Mozilla Public
3	* License, v. 2.0. If a copy of the MPL was not distributed with this
4	* file, You can obtain one at http://mozilla.org/MPL/2.0/.
5	*
6	* Copyright 1997 - July 2008 CWI, August 2008 - 2019 MonetDB B.V.
7	*/
8
9	/*
10	* (c) M. L. Kersten, P. Boncz, S. Manegold, N. Nes, K.S. Mullender
11	* Common BAT Operations
12	* We factor out all possible overhead by inlining code. This
13	* includes the macros BUNhead and BUNtail, which do a test to see
14	* whether the atom resides in the buns or in a variable storage
15	* heap.
16	*/
17	#include "monetdb_config.h"
18	#include "gdk.h"
19	#include "gdk_private.h"
20
21	gdk_return
22	unshare_string_heap(BAT *b)
23	{
24	assert(b->batCacheid > `0`);
25	if (b->ttype == TYPE_str &&
26	b->tvheap->parentid != b->batCacheid) {
27	Heap h = GDKzalloc(sizeof*(Heap));
28	if (h == NULL)
29	return GDK_FAIL;
30	h->parentid = b->batCacheid;
31	h->farmid = BBPselectfarm(b->batRole, TYPE_str, varheap);
32	strconcat_len(h->filename, sizeof(h->filename),
33	BBP_physical(b->batCacheid), ".theap", NULL);
34	if (HEAPcopy(h, b->tvheap) != GDK_SUCCEED) {
35	HEAPfree(h, true);
36	GDKfree(h);
37	return GDK_FAIL;
38	}
39	BBPunshare(b->tvheap->parentid);
40	b->tvheap = h;
41	}
42	return GDK_SUCCEED;
43	}
44
45	/ We try to be clever when appending one string bat to another.*
46	* First of all, we try to actually share the string heap so that we
47	* don't need an extra copy, and if that can't be done, we see whether
48	* it makes sense to just quickly copy the whole string heap instead
49	* of inserting individual strings. See the comments in the code for
50	* more information. */
51	static gdk_return
52	insert_string_bat(BAT b, BAT n, BAT *s, bool force)
53	{
54	BATiter ni; / iterator /
55	size_t toff = ~(size_t) `0`; / tail offset /
56	BUN p, r; / loop variables /
57	const void tp; /* tail value pointer /
58	unsigned char tbv; / tail value-as-bte /
59	unsigned short tsv; / tail value-as-sht /
60	#if SIZEOF_VAR_T == 8
61	unsigned int tiv; / tail value-as-int /
62	#endif
63	var_t v; / value /
64	size_t off; / offset within n's string heap /
65	struct canditer ci;
66	BUN cnt;
67
68	assert(b->ttype == TYPE_str);
69	/ only transient bats can use some other bat's string heap /
70	assert(b->batRole == TRANSIENT \|\| b->tvheap->parentid == b->batCacheid);
71	if (n->batCount == `0` \|\| (s && s->batCount == `0`))
72	return GDK_SUCCEED;
73	ni = bat_iterator(n);
74	tp = NULL;
75	cnt = canditer_init(&ci, n, s);
76	if (cnt == `0`)
77	return GDK_SUCCEED;
78	if ((!GDK_ELIMDOUBLES(b->tvheap) \|\| b->batCount == `0`) &&
79	!GDK_ELIMDOUBLES(n->tvheap) &&
80	b->tvheap->hashash == n->tvheap->hashash) {
81	if (b->batRole == TRANSIENT \|\| b->tvheap == n->tvheap) {
82	/ If b is in the transient farm (i.e. b will*
83	* never become persistent), we try some
84	* clever tricks to avoid copying:
85	* - if b is empty, we just let it share the
86	* string heap with n;
87	* - otherwise, if b's string heap and n's
88	* string heap are the same (i.e. shared),
89	* we leave it that way (this includes the
90	* case that b is persistent and n shares
91	* its string heap with b);
92	* - otherwise, if b shares its string heap
93	* with some other bat, we materialize it
94	* and we will have to copy strings.
95	*/
96	bat bid = b->batCacheid;
97
98	/ if candidates are not dense, there is no*
99	* wholesale copying of n's offset heap, but
100	* we may still be able to share the string
101	* heap */
102	if (b->batCount == `0` &&
103	b->tvheap != n->tvheap &&
104	ci.tpe == cand_dense) {
105	if (b->tvheap->parentid != bid) {
106	BBPunshare(b->tvheap->parentid);
107	} else {
108	HEAPfree(b->tvheap, true);
109	GDKfree(b->tvheap);
110	}
111	BBPshare(n->tvheap->parentid);
112	b->tvheap = n->tvheap;
113	b->batDirtydesc = true;
114	toff = `0`;
115	} else if (b->tvheap->parentid == n->tvheap->parentid &&
116	ci.tpe == cand_dense) {
117	toff = `0`;
118	} else if (b->tvheap->parentid != bid &&
119	unshare_string_heap(b) != GDK_SUCCEED) {
120	return GDK_FAIL;
121	}
122	}
123	if (toff == ~(size_t) `0` && cnt > `1024`) {
124	/ If b and n aren't sharing their string*
125	* heaps, we try to determine whether to copy
126	* n's whole string heap to the end of b's, or
127	* whether we will insert each string from n
128	* individually. We do this by testing a
129	* sample of n's strings and extrapolating
130	* from that sample whether n uses a
131	* significant part of its string heap for its
132	* strings (i.e. whether there are many unused
133	* strings in n's string heap). If n doesn't
134	* have many strings in the first place, we
135	* skip this and just insert them all
136	* individually. We also check whether a
137	* significant number of n's strings happen to
138	* have the same offset in b. In the latter
139	* case we also want to insert strings
140	* individually, but reusing the string in b's
141	* string heap. */
142	int match = `0`, i;
143	size_t len = b->tvheap->hashash ? `1024` * EXTRALEN : `0`;
144	for (i = `0`; i < `1024`; i++) {
145	p = (BUN) (((double) rand() / RAND_MAX) * (cnt - `1`));
146	p = canditer_idx(&ci, p) - n->hseqbase;
147	off = BUNtvaroff(ni, p);
148	if (off < b->tvheap->free &&
149	strcmp(b->tvheap->base + off, n->tvheap->base + off) == `0` &&
150	(!b->tvheap->hashash \|\|
151	((BUN ) (b->tvheap->base + off))[-`1`] == (n->tvheap->hashash ? ((BUN ) (n->tvheap->base + off))[-`1`] : strHash(n->tvheap->base + off))))
152	match++;
153	len += (strlen(n->tvheap->base + off) + `8`) & ~`7`;
154	}
155	if (match < `768` && (size_t) (BATcount(n) * (double) len / `1024`) >= n->tvheap->free / `2`) {
156	/ append string heaps /
157	toff = b->batCount == `0` ? `0` : b->tvheap->free;
158	/ make sure we get alignment right /
159	toff = (toff + GDK_VARALIGN - `1`) & ~(GDK_VARALIGN - `1`);
160	/ if in "force" mode, the heap may be*
161	* shared when memory mapped */
162	if (HEAPextend(b->tvheap, toff + n->tvheap->size, force) != GDK_SUCCEED) {
163	toff = ~(size_t) `0`;
164	goto bunins_failed;
165	}
166	memcpy(b->tvheap->base + toff, n->tvheap->base, n->tvheap->free);
167	b->tvheap->free = toff + n->tvheap->free;
168	if (toff > `0`) {
169	/ flush double-elimination*
170	* hash table */
171	memset(b->tvheap->base, `0`,
172	GDK_STRHASHSIZE);
173	}
174	}
175	}
176	if (toff != ~(size_t) `0`) {
177	/ we only have to copy the offsets from n to*
178	* b, possibly with an offset (if toff != 0),
179	* so set up some variables and set up b's
180	* tail so that it looks like it's a fixed
181	* size column. Of course, we must make sure
182	* first that the width of b's offset heap can
183	* accommodate all values. */
184	if (b->twidth < SIZEOF_VAR_T &&
185	((size_t) `1` << `8` * b->twidth) <= (b->twidth <= `2` ? b->tvheap->size - GDK_VAROFFSET : b->tvheap->size)) {
186	/ offsets aren't going to fit, so*
187	* widen offset heap */
188	if (GDKupgradevarheap(b, (var_t) b->tvheap->size, false, force) != GDK_SUCCEED) {
189	toff = ~(size_t) `0`;
190	goto bunins_failed;
191	}
192	}
193	}
194	} else if (unshare_string_heap(b) != GDK_SUCCEED)
195	return GDK_FAIL;
196	if (toff == `0` && n->twidth == b->twidth && ci.tpe == cand_dense) {
197	/ we don't need to do any translation of offset*
198	* values, so we can use fast memcpy */
199	memcpy(Tloc(b, BUNlast(b)), Tloc(n, ci.seq - n->hseqbase), cnt << n->tshift);
200	BATsetcount(b, BATcount(b) + cnt);
201	} else if (toff != ~(size_t) `0`) {
202	/ we don't need to insert any actual strings since we*
203	* have already made sure that they are all in b's
204	* string heap at known locations (namely the offset
205	* in n added to toff), so insert offsets from n after
206	* adding toff into b */
207	/ note the use of the "restrict" qualifier here: all*
208	* four pointers below point to the same value, but
209	* only one of them will actually be used, hence we
210	* still obey the rule for restrict-qualified
211	* pointers */
212	const unsigned char *restrict tbp = (const unsigned char *) Tloc(n, `0`);
213	const unsigned short *restrict tsp = (const unsigned short *) Tloc(n, `0`);
214	#if SIZEOF_VAR_T == 8
215	const unsigned int *restrict tip = (const unsigned int *) Tloc(n, `0`);
216	#endif
217	const var_t *restrict tvp = (const var_t *) Tloc(n, `0`);
218
219	switch (b->twidth) {
220	case `1`:
221	b->ttype = TYPE_bte;
222	tp = &tbv;
223	break;
224	case `2`:
225	b->ttype = TYPE_sht;
226	tp = &tsv;
227	break;
228	#if SIZEOF_VAR_T == 8
229	case `4`:
230	b->ttype = TYPE_int;
231	tp = &tiv;
232	break;
233	case `8`:
234	b->ttype = TYPE_lng;
235	tp = &v;
236	break;
237	#else
238	case `4`:
239	b->ttype = TYPE_int;
240	tp = &v;
241	break;
242	#endif
243	default:
244	assert(`0`);
245	}
246	b->tvarsized = false;
247	while (cnt > `0`) {
248	cnt--;
249	p = canditer_next(&ci) - n->hseqbase;
250	switch (n->twidth) {
251	case `1`:
252	v = (var_t) tbp[p] + GDK_VAROFFSET;
253	break;
254	case `2`:
255	v = (var_t) tsp[p] + GDK_VAROFFSET;
256	break;
257	#if SIZEOF_VAR_T == 8
258	case `4`:
259	v = (var_t) tip[p];
260	break;
261	#endif
262	default:
263	v = tvp[p];
264	break;
265	}
266	v = (var_t) ((size_t) v + toff);
267	assert(v >= GDK_VAROFFSET);
268	assert((size_t) v < b->tvheap->free);
269	if (BUNlast(b) >= BATcapacity(b)) {
270	if (BATcount(b) == BUN_MAX) {
271	GDKerror("bunfastapp: too many elements to accomodate (" BUNFMT ")\n", BUN_MAX);
272	goto bunins_failed;
273	}
274	if (BATextend(b, BATgrows(b)) != GDK_SUCCEED)
275	goto bunins_failed;
276	}
277	switch (b->twidth) {
278	case `1`:
279	assert(v - GDK_VAROFFSET < ((var_t) `1` << `8`));
280	((uint8_t *) b->theap.base)[b->batCount++] = (uint8_t) (v - GDK_VAROFFSET);
281	b->theap.free += `1`;
282	break;
283	case `2`:
284	assert(v - GDK_VAROFFSET < ((var_t) `1` << `16`));
285	((uint16_t *) b->theap.base)[b->batCount++] = (uint16_t) (v - GDK_VAROFFSET);
286	b->theap.free += `2`;
287	break;
288	#if SIZEOF_VAR_T == 8
289	case `4`:
290	assert(v < ((var_t) `1` << `32`));
291	((uint32_t *) b->theap.base)[b->batCount++] = (uint32_t) v;
292	b->theap.free += `4`;
293	break;
294	#endif
295	default:
296	((var_t *) b->theap.base)[b->batCount++] = v;
297	b->theap.free += sizeof(var_t);
298	break;
299	}
300	b->theap.dirty = true;
301	}
302	b->tvarsized = true;
303	b->ttype = TYPE_str;
304	} else if (b->tvheap->free < n->tvheap->free / `2` \|\|
305	GDK_ELIMDOUBLES(b->tvheap)) {
306	/ if b's string heap is much smaller than n's string*
307	* heap, don't bother checking whether n's string
308	* values occur in b's string heap; also, if b is
309	* (still) fully double eliminated, we must continue
310	* to use the double elimination mechanism */
311	r = BUNlast(b);
312	oid hseq = n->hseqbase;
313	while (cnt > `0`) {
314	cnt--;
315	p = canditer_next(&ci) - hseq;
316	tp = BUNtvar(ni, p);
317	bunfastappVAR(b, tp);
318	HASHins(b, r, tp);
319	r++;
320	}
321	} else {
322	/ Insert values from n individually into b; however,*
323	* we check whether there is a string in b's string
324	* heap at the same offset as the string is in n's
325	* string heap (in case b's string heap is a copy of
326	* n's). If this is the case, we just copy the
327	* offset, otherwise we insert normally. */
328	r = BUNlast(b);
329	while (cnt > `0`) {
330	cnt--;
331	p = canditer_next(&ci) - n->hseqbase;
332	off = BUNtvaroff(ni, p); / the offset /
333	tp = n->tvheap->base + off; / the string /
334	if (off < b->tvheap->free &&
335	strcmp(b->tvheap->base + off, tp) == `0` &&
336	(!b->tvheap->hashash \|\|
337	((BUN ) (b->tvheap->base + off))[-`1`] == (n->tvheap->hashash ? ((BUN ) tp)[-`1`] : strHash(tp)))) {
338	/ we found the string at the same*
339	* offset in b's string heap as it was
340	* in n's string heap, so we don't
341	* have to insert a new string into b:
342	* we can just copy the offset */
343	v = (var_t) off;
344	if (b->twidth < SIZEOF_VAR_T &&
345	((size_t) `1` << `8` * b->twidth) <= (b->twidth <= `2` ? v - GDK_VAROFFSET : v)) {
346	/ offset isn't going to fit,*
347	* so widen offset heap */
348	if (GDKupgradevarheap(b, v, false, force) != GDK_SUCCEED) {
349	goto bunins_failed;
350	}
351	}
352	switch (b->twidth) {
353	case `1`:
354	assert(v - GDK_VAROFFSET < ((var_t) `1` << `8`));
355	(unsigned* char )Tloc(b, BUNlast(b)) = (unsigned* char) (v - GDK_VAROFFSET);
356	b->theap.free += `1`;
357	break;
358	case `2`:
359	assert(v - GDK_VAROFFSET < ((var_t) `1` << `16`));
360	(unsigned* short )Tloc(b, BUNlast(b)) = (unsigned* short) (v - GDK_VAROFFSET);
361	b->theap.free += `2`;
362	break;
363	#if SIZEOF_VAR_T == 8
364	case `4`:
365	assert(v < ((var_t) `1` << `32`));
366	(unsigned* int )Tloc(b, BUNlast(b)) = (unsigned* int) v;
367	b->theap.free += `4`;
368	break;
369	#endif
370	default:
371	(var_t )Tloc(b, BUNlast(b)) = v;
372	b->theap.free += SIZEOF_VAR_T;
373	break;
374	}
375	b->batCount++;
376	} else {
377	bunfastappVAR(b, tp);
378	}
379	HASHins(b, r, tp);
380	r++;
381	}
382	}
383	b->theap.dirty = true;
384	return GDK_SUCCEED;
385	bunins_failed:
386	b->tvarsized = true;
387	b->ttype = TYPE_str;
388	return GDK_FAIL;
389	}
390
391	static gdk_return
392	append_varsized_bat(BAT b, BAT n, BAT *s)
393	{
394	BATiter ni;
395	struct canditer ci;
396	BUN cnt, r;
397	oid hseq = n->hseqbase;
398
399	/ only transient bats can use some other bat's vheap /
400	assert(b->batRole == TRANSIENT \|\| b->tvheap->parentid == b->batCacheid);
401	/ make sure the bats use var_t /
402	assert(b->twidth == n->twidth);
403	assert(b->twidth == SIZEOF_VAR_T);
404	if (n->batCount == `0` \|\| (s && s->batCount == `0`))
405	return GDK_SUCCEED;
406	cnt = canditer_init(&ci, n, s);
407	if (cnt == `0`)
408	return GDK_SUCCEED;
409	if (BATcount(b) == `0` &&
410	b->batRole == TRANSIENT &&
411	n->batRestricted == BAT_READ &&
412	b->tvheap != n->tvheap) {
413	/ if b is still empty, in the transient farm, and n*
414	* is read-only, we replace b's vheap with a reference
415	* to n's */
416	if (b->tvheap->parentid != b->batCacheid) {
417	BBPunshare(b->tvheap->parentid);
418	} else {
419	HEAPfree(b->tvheap, true);
420	GDKfree(b->tvheap);
421	}
422	BBPshare(n->tvheap->parentid);
423	b->tvheap = n->tvheap;
424	b->batDirtydesc = true;
425	}
426	if (b->tvheap == n->tvheap) {
427	/ if b and n use the same vheap, we only need to copy*
428	* the offsets from n to b */
429	HASHdestroy(b); / not maintaining, so destroy it /
430	if (ci.tpe == cand_dense) {
431	/ fast memcpy since we copy a consecutive*
432	* chunk of memory */
433	memcpy(Tloc(b, BUNlast(b)),
434	Tloc(n, ci.seq - hseq),
435	cnt << b->tshift);
436	} else {
437	var_t *restrict dst = (var_t *) Tloc(b, BUNlast(b));
438	const var_t *restrict src = (const var_t *) Tloc(n, `0`);
439	while (cnt > `0`) {
440	cnt--;
441	*dst++ = src[canditer_next(&ci) - hseq];
442	}
443	}
444	b->theap.dirty = true;
445	BATsetcount(b, BATcount(b) + ci.ncand);
446	return GDK_SUCCEED;
447	}
448	/ b and n do not share their vheap, so we need to copy data /
449	if (b->tvheap->parentid != b->batCacheid) {
450	/ if b shares its vheap with some other bat, unshare it /
451	Heap h = GDKzalloc(sizeof*(Heap));
452	if (h == NULL)
453	return GDK_FAIL;
454	h->parentid = b->batCacheid;
455	h->farmid = BBPselectfarm(b->batRole, b->ttype, varheap);
456	strconcat_len(h->filename, sizeof(h->filename),
457	BBP_physical(b->batCacheid), ".theap", NULL);
458	if (HEAPcopy(h, b->tvheap) != GDK_SUCCEED) {
459	HEAPfree(h, true);
460	GDKfree(h);
461	return GDK_FAIL;
462	}
463	BBPunshare(b->tvheap->parentid);
464	b->tvheap = h;
465	}
466	/ copy data from n to b /
467	ni = bat_iterator(n);
468	r = BUNlast(b);
469	while (cnt > `0`) {
470	cnt--;
471	BUN p = canditer_next(&ci) - hseq;
472	const void *t = BUNtvar(ni, p);
473	bunfastapp_nocheckVAR(b, r, t, Tsize(b));
474	HASHins(b, r, t);
475	r++;
476	}
477	b->theap.dirty = true;
478	return GDK_SUCCEED;
479
480	bunins_failed:
481	if (b->tunique)
482	BBPunfix(s->batCacheid);
483	return GDK_FAIL;
484	}
485
486	/ Append the contents of BAT n (subject to the optional candidate*
487	* list s) to BAT b. If b is empty, b will get the seqbase of s if it
488	* was passed in, and else the seqbase of n. */
489	gdk_return
490	BATappend(BAT b, BAT n, BAT *s, bool force)
491	{
492	struct canditer ci;
493	BUN cnt;
494	BUN r;
495	PROPrec prop, nprop;
496	oid hseq = n->hseqbase;
497
498	if (b == NULL \|\| n == NULL \|\| (cnt = BATcount(n)) == `0`) {
499	return GDK_SUCCEED;
500	}
501	assert(b->batCacheid > `0`);
502	assert(b->theap.parentid == `0`);
503
504	ALIGNapp(b, "BATappend", force, GDK_FAIL);
505
506	if (ATOMstorage(ATOMtype(b->ttype)) != ATOMstorage(ATOMtype(n->ttype))) {
507	GDKerror("Incompatible operands.\n");
508	return GDK_FAIL;
509	}
510	CHECKDEBUG {
511	if (BATttype(b) != BATttype(n) &&
512	ATOMtype(b->ttype) != ATOMtype(n->ttype)) {
513	fprintf(stderr,"#Interpreting %s as %s.\n",
514	ATOMname(BATttype(n)), ATOMname(BATttype(b)));
515	}
516	}
517
518	if (b->tunique) {
519	/ if b has the unique bit set, only insert values*
520	* from n that don't already occur in b, and make sure
521	* we don't insert any duplicates either; we do this
522	* by calculating a subset of n that complies with
523	* this */
524	BAT *d;
525
526	d = BATdiff(n, b, s, NULL, true, false, BUN_NONE);
527	if (d == NULL)
528	return GDK_FAIL;
529	s = BATunique(n, d);
530	BBPunfix(d->batCacheid);
531	if (s == NULL)
532	return GDK_FAIL;
533	if (BATcount(s) == `0`) {
534	/ no new values in subset of n /
535	BBPunfix(s->batCacheid);
536	return GDK_SUCCEED;
537	}
538	}
539
540	cnt = canditer_init(&ci, n, s);
541	if (cnt == `0`) {
542	assert(!b->tunique);
543	return GDK_SUCCEED;
544	}
545
546	if (BUNlast(b) + cnt > BUN_MAX) {
547	if (b->tunique)
548	BBPunfix(s->batCacheid);
549	GDKerror("BATappend: combined BATs too large\n");
550	return GDK_FAIL;
551	}
552
553	if (b->hseqbase + BATcount(b) + cnt >= GDK_oid_max) {
554	if (b->tunique)
555	BBPunfix(s->batCacheid);
556	GDKerror("BATappend: overflow of head value\n");
557	return GDK_FAIL;
558	}
559
560	b->batDirtydesc = true;
561
562	IMPSdestroy(b); / imprints do not support updates yet /
563	OIDXdestroy(b);
564	if ((prop = BATgetprop(b, GDK_MAX_VALUE)) != NULL) {
565	if ((nprop = BATgetprop(n, GDK_MAX_VALUE)) != NULL) {
566	if (ATOMcmp(b->ttype, VALptr(&prop->v), VALptr(&nprop->v)) < `0`) {
567	if (s == NULL)
568	BATsetprop(b, GDK_MAX_VALUE, b->ttype, VALptr(&nprop->v));
569	else
570	BATrmprop(b, GDK_MAX_VALUE);
571	}
572	} else {
573	BATrmprop(b, GDK_MAX_VALUE);
574	}
575	}
576	if ((prop = BATgetprop(b, GDK_MIN_VALUE)) != NULL) {
577	if ((nprop = BATgetprop(n, GDK_MIN_VALUE)) != NULL) {
578	if (ATOMcmp(b->ttype, VALptr(&prop->v), VALptr(&nprop->v)) > `0`) {
579	if (s == NULL)
580	BATsetprop(b, GDK_MIN_VALUE, b->ttype, VALptr(&nprop->v));
581	else
582	BATrmprop(b, GDK_MIN_VALUE);
583	}
584	} else {
585	BATrmprop(b, GDK_MIN_VALUE);
586	}
587	}
588	#if 0 /* enable if we have more properties than just min/max */
589	do {
590	for (prop = b->tprops; prop; prop = prop->next)
591	if (prop->id != GDK_MAX_VALUE &&
592	prop->id != GDK_MIN_VALUE &&
593	prop->id != GDK_HASH_MASK) {
594	BATrmprop(b, prop->id);
595	break;
596	}
597	} while (prop);
598	#endif
599	if (b->thash == (Hash *) `1` \|\| BATcount(b) == `0` \|\|
600	(b->thash && ((size_t *) b->thash->heap.base)[`0`] & (`1` << `24`))) {
601	/ don't bother first loading the hash to then change*
602	* it, or updating the hash if we replace the heap,
603	* also, we cannot maintain persistent hashes */
604	HASHdestroy(b);
605	}
606
607	if (b->ttype == TYPE_void) {
608	/ b does not have storage, keep it that way if we can /
609	HASHdestroy(b); / we're not maintaining the hash here /
610	if (BATtdense(n) && ci.tpe == cand_dense &&
611	(BATcount(b) == `0` \|\|
612	(BATtdense(b) &&
613	b->tseqbase + BATcount(b) == n->tseqbase + ci.seq - hseq))) {
614	/ n is also dense and consecutive with b /
615	if (BATcount(b) == `0`)
616	BATtseqbase(b, n->tseqbase + ci.seq - hseq);
617	BATsetcount(b, BATcount(b) + cnt);
618	if (b->tunique)
619	BBPunfix(s->batCacheid);
620	return GDK_SUCCEED;
621	}
622	if ((BATcount(b) == `0` \|\| is_oid_nil(b->tseqbase)) &&
623	n->ttype == TYPE_void && is_oid_nil(n->tseqbase)) {
624	/ both b and n are void/nil /
625	BATtseqbase(b, oid_nil);
626	BATsetcount(b, BATcount(b) + cnt);
627	if (b->tunique)
628	BBPunfix(s->batCacheid);
629	return GDK_SUCCEED;
630	}
631	/ we need to materialize b; allocate enough capacity /
632	b->batCapacity = BATcount(b) + cnt;
633	if (BATmaterialize(b) != GDK_SUCCEED)
634	goto bunins_failed;
635	} else if (cnt > BATcapacity(b) - BATcount(b)) {
636	/ if needed space exceeds a normal growth extend just*
637	* with what's needed */
638	BUN ncap = BATcount(b) + cnt;
639	BUN grows = BATgrows(b);
640
641	if (ncap > grows)
642	grows = ncap;
643	if (BATextend(b, grows) != GDK_SUCCEED)
644	goto bunins_failed;
645	}
646
647	/ if growing too much, remove the hash, else we maintain it /
648	MT_lock_set(&b->batIdxLock);
649	if (b->thash == (Hash *) `1` \|\|
650	(b->thash != NULL &&
651	(`2` * b->thash->mask) < (BATcount(b) + cnt))) {
652	MT_lock_unset(&b->batIdxLock);
653	HASHdestroy(b);
654	} else {
655	MT_lock_unset(&b->batIdxLock);
656	}
657
658	r = BUNlast(b);
659
660	if (BATcount(b) == `0`) {
661	b->tsorted = n->tsorted;
662	b->trevsorted = n->trevsorted;
663	b->tseqbase = oid_nil;
664	b->tnonil = n->tnonil;
665	b->tnil = n->tnil && cnt == BATcount(n);
666	b->tseqbase = oid_nil;
667	if (ci.tpe == cand_dense) {
668	b->tnosorted = ci.seq - hseq <= n->tnosorted && n->tnosorted < ci.seq + cnt - hseq ? n->tnosorted + hseq - ci.seq : `0`;
669	b->tnorevsorted = ci.seq - hseq <= n->tnorevsorted && n->tnorevsorted < ci.seq + cnt - hseq ? n->tnorevsorted + hseq - ci.seq : `0`;
670	if (BATtdense(n)) {
671	b->tseqbase = n->tseqbase + ci.seq - hseq;
672	}
673	} else {
674	b->tnosorted = `0`;
675	b->tnorevsorted = `0`;
676	}
677	/ if tunique, uniqueness is guaranteed above /
678	b->tkey = n->tkey \| b->tunique;
679	if (!b->tunique && cnt == BATcount(n)) {
680	b->tnokey[`0`] = n->tnokey[`0`];
681	b->tnokey[`1`] = n->tnokey[`1`];
682	} else {
683	b->tnokey[`0`] = b->tnokey[`1`] = `0`;
684	}
685	} else {
686	BUN last = r - `1`;
687	BATiter ni = bat_iterator(n);
688	BATiter bi = bat_iterator(b);
689	int xx = ATOMcmp(b->ttype,
690	BUNtail(ni, ci.seq - hseq),
691	BUNtail(bi, last));
692	if (BATtordered(b) && (!BATtordered(n) \|\| xx < `0`)) {
693	b->tsorted = false;
694	b->tnosorted = `0`;
695	b->tseqbase = oid_nil;
696	}
697	if (BATtrevordered(b) &&
698	(!BATtrevordered(n) \|\| xx > `0`)) {
699	b->trevsorted = false;
700	b->tnorevsorted = `0`;
701	}
702	if (!b->tunique && / uniqueness is guaranteed above /
703	b->tkey &&
704	(!(BATtordered(b) \|\| BATtrevordered(b)) \|\|
705	!n->tkey \|\| xx == `0`)) {
706	BATkey(b, false);
707	}
708	if (b->ttype != TYPE_void && b->tsorted && BATtdense(b) &&
709	(!BATtdense(n) \|\|
710	ci.tpe != cand_dense \|\|
711	`1` + (oid ) BUNtloc(bi, last) != BUNtoid(n, ci.seq - hseq))) {
712	b->tseqbase = oid_nil;
713	}
714	b->tnonil &= n->tnonil;
715	b->tnil \|= n->tnil && cnt == BATcount(n);
716	}
717	if (b->ttype == TYPE_str) {
718	if (insert_string_bat(b, n, s, force) != GDK_SUCCEED) {
719	if (b->tunique)
720	BBPunfix(s->batCacheid);
721	return GDK_FAIL;
722	}
723	} else if (ATOMvarsized(b->ttype)) {
724	if (append_varsized_bat(b, n, s) != GDK_SUCCEED) {
725	if (b->tunique)
726	BBPunfix(s->batCacheid);
727	return GDK_FAIL;
728	}
729	} else {
730	if (BATatoms[b->ttype].atomFix == NULL &&
731	b->ttype != TYPE_void &&
732	n->ttype != TYPE_void &&
733	ci.tpe == cand_dense) {
734	/ use fast memcpy if we can, but then we*
735	* can't maintain the hash */
736	HASHdestroy(b);
737	memcpy(Tloc(b, BUNlast(b)),
738	Tloc(n, ci.seq - hseq),
739	cnt * Tsize(n));
740	BATsetcount(b, BATcount(b) + cnt);
741	} else {
742	BATiter ni = bat_iterator(n);
743
744	while (cnt > `0`) {
745	cnt--;
746	BUN p = canditer_next(&ci) - hseq;
747	const void *t = BUNtail(ni, p);
748	bunfastapp_nocheck(b, r, t, Tsize(b));
749	HASHins(b, r, t);
750	r++;
751	}
752	}
753	b->theap.dirty = true;
754	}
755	if (b->tunique)
756	BBPunfix(s->batCacheid);
757	return GDK_SUCCEED;
758	bunins_failed:
759	if (b->tunique)
760	BBPunfix(s->batCacheid);
761	return GDK_FAIL;
762	}
763
764	gdk_return
765	BATdel(BAT b, BAT d)
766	{
767	int (unfix) (const* void *) = BATatoms[b->ttype].atomUnfix;
768	void (atmdel) (Heap , var_t *) = BATatoms[b->ttype].atomDel;
769	BATiter bi = bat_iterator(b);
770
771	assert(ATOMtype(d->ttype) == TYPE_oid);
772	assert(d->tsorted);
773	assert(d->tkey);
774	if (BATcount(d) == `0`)
775	return GDK_SUCCEED;
776	if (BATtdense(d)) {
777	oid o = d->tseqbase;
778	BUN c = BATcount(d);
779
780	if (o + c <= b->hseqbase)
781	return GDK_SUCCEED;
782	if (o < b->hseqbase) {
783	c -= b->hseqbase - o;
784	o = b->hseqbase;
785	}
786	if (o - b->hseqbase < b->batInserted) {
787	GDKerror("BATdelete: cannot delete committed values\n");
788	return GDK_FAIL;
789	}
790	if (o + c > b->hseqbase + BATcount(b))
791	c = b->hseqbase + BATcount(b) - o;
792	if (c == `0`)
793	return GDK_SUCCEED;
794	if (unfix \|\| atmdel) {
795	BUN p = o - b->hseqbase;
796	BUN q = p + c;
797	while (p < q) {
798	if (unfix)
799	(*unfix)(BUNtail(bi, p));
800	if (atmdel)
801	(atmdel)(b->tvheap, (var_t ) BUNtloc(bi, p));
802	p++;
803	}
804	}
805	if (BATtdense(b) && BATmaterialize(b) != GDK_SUCCEED)
806	return GDK_FAIL;
807	if (o + c < b->hseqbase + BATcount(b)) {
808	memmove(Tloc(b, o - b->hseqbase),
809	Tloc(b, o + c - b->hseqbase),
810	Tsize(b) * (BATcount(b) - (o + c - b->hseqbase)));
811	}
812	b->batCount -= c;
813	} else {
814	const oid o = (const* oid *) Tloc(d, `0`);
815	const oid *s;
816	BUN c = BATcount(d);
817	BUN nd = `0`;
818	char *p;
819
820	if (o[c - `1`] <= b->hseqbase)
821	return GDK_SUCCEED;
822	while (*o < b->hseqbase) {
823	o++;
824	c--;
825	}
826	if (*o - b->hseqbase < b->batInserted) {
827	GDKerror("BATdelete: cannot delete committed values\n");
828	return GDK_FAIL;
829	}
830	if (BATtdense(b) && BATmaterialize(b) != GDK_SUCCEED)
831	return GDK_FAIL;
832	s = o;
833	p = Tloc(b, *o - b->hseqbase);
834	while (c > `0` && *o < b->hseqbase + BATcount(b)) {
835	size_t n;
836	if (unfix)
837	(unfix)(BUNtail(bi, o - b->hseqbase));
838	if (atmdel)
839	(atmdel)(b->tvheap, (var_t ) BUNtloc(bi, *o - b->hseqbase));
840	o++;
841	c--;
842	nd++;
843	if (c == `0` \|\| *o - b->hseqbase >= BATcount(b))
844	n = b->hseqbase + BATcount(b) - o[-`1`] - `1`;
845	else if ((oid) (o - s) < o - s)
846	n = o[`0`] - o[-`1`] - `1`;
847	else
848	n = `0`;
849	if (n > `0`) {
850	n *= Tsize(b);
851	memmove(p,
852	Tloc(b, o[-`1`] + `1` - b->hseqbase),
853	n);
854	p += n;
855	s = o;
856	}
857	}
858	b->batCount -= nd;
859	}
860	if (b->batCount <= `1`) {
861	/ some trivial properties /
862	b->tkey = true;
863	b->tsorted = b->trevsorted = true;
864	if (b->batCount == `0`) {
865	b->tnil = false;
866	b->tnonil = true;
867	}
868	}
869	/ not sure about these anymore /
870	b->tnosorted = b->tnorevsorted = `0`;
871	b->tnokey[`0`] = b->tnokey[`1`] = `0`;
872	PROPdestroy(b);
873
874	return GDK_SUCCEED;
875	}
876
877	/*
878	* The last in this series is a BATreplace, which replaces all the
879	* buns mentioned.
880	*/
881	gdk_return
882	BATreplace(BAT b, BAT p, BAT *n, bool force)
883	{
884	if (b == NULL \|\| p == NULL \|\| n == NULL \|\| BATcount(n) == `0`) {
885	return GDK_SUCCEED;
886	}
887	if (void_replace_bat(b, p, n, force) != GDK_SUCCEED)
888	return GDK_FAIL;
889	return GDK_SUCCEED;
890	}
891
892
893	/*
894	* BAT Selections
895	* The BAT selectors are among the most heavily used operators.
896	* Their efficient implementation is therefore mandatory.
897	*
898	* BAT slice
899	* This function returns a horizontal slice from a BAT. It optimizes
900	* execution by avoiding to copy when the BAT is memory mapped (in
901	* this case, an independent submap is created) or else when it is
902	* read-only, then a VIEW bat is created as a result.
903	*
904	* If a new copy has to be created, this function takes care to
905	* preserve void-columns (in this case, the seqbase has to be
906	* recomputed in the result).
907	*
908	* NOTE new semantics, the selected range is excluding the high value.
909	*/
910	BAT *
911	BATslice(BAT *b, BUN l, BUN h)
912	{
913	BUN low = l;
914	BAT *bn;
915	BATiter bni, bi = bat_iterator(b);
916	oid foid; / first oid value if oid column /
917
918	BATcheck(b, "BATslice", NULL);
919	if (h > BATcount(b))
920	h = BATcount(b);
921	if (h < l)
922	h = l;
923
924	if (l > BUN_MAX \|\| h > BUN_MAX) {
925	GDKerror("BATslice: boundary out of range\n");
926	return NULL;
927	}
928
929	if (b->ttype == TYPE_void && b->tvheap != NULL) {
930	/ slicing a candidate list with exceptions /
931	struct canditer ci;
932	canditer_init(&ci, NULL, b);
933	return canditer_slice(&ci, l, h);
934	}
935	/ If the source BAT is readonly, then we can obtain a VIEW*
936	* that just reuses the memory of the source. */
937	if (b->batRestricted == BAT_READ &&
938	(!VIEWtparent(b) \|\|
939	BBP_cache(VIEWtparent(b))->batRestricted == BAT_READ)) {
940	bn = VIEWcreate(b->hseqbase + low, b);
941	if (bn == NULL)
942	return NULL;
943	VIEWbounds(b, bn, l, h);
944	} else {
945	/ create a new BAT and put everything into it /
946	BUN p = l;
947	BUN q = h;
948
949	bn = COLnew((oid) (b->hseqbase + low), BATtdense(b) ? TYPE_void : b->ttype, h - l, TRANSIENT);
950	if (bn == NULL)
951	return NULL;
952
953	if (bn->ttype == TYPE_void \|\|
954	(!bn->tvarsized &&
955	BATatoms[bn->ttype].atomPut == NULL &&
956	BATatoms[bn->ttype].atomFix == NULL)) {
957	if (bn->ttype) {
958	memcpy(Tloc(bn, `0`), Tloc(b, p),
959	(q - p) * Tsize(bn));
960	bn->theap.dirty = true;
961	}
962	BATsetcount(bn, h - l);
963	} else {
964	for (; p < q; p++) {
965	bunfastapp(bn, BUNtail(bi, p));
966	}
967	}
968	bn->theap.dirty = true;
969	bn->tsorted = b->tsorted;
970	bn->trevsorted = b->trevsorted;
971	bn->tkey = b->tkey;
972	bn->tnonil = b->tnonil;
973	if (b->tnosorted > l && b->tnosorted < h)
974	bn->tnosorted = b->tnosorted - l;
975	else
976	bn->tnosorted = `0`;
977	if (b->tnorevsorted > l && b->tnorevsorted < h)
978	bn->tnorevsorted = b->tnorevsorted - l;
979	else
980	bn->tnorevsorted = `0`;
981	if (b->tnokey[`0`] >= l && b->tnokey[`0`] < h &&
982	b->tnokey[`1`] >= l && b->tnokey[`1`] < h &&
983	b->tnokey[`0`] != b->tnokey[`1`]) {
984	bn->tnokey[`0`] = b->tnokey[`0`] - l;
985	bn->tnokey[`1`] = b->tnokey[`1`] - l;
986	} else {
987	bn->tnokey[`0`] = bn->tnokey[`1`] = `0`;
988	}
989	}
990	bn->tnonil = b->tnonil \|\| bn->batCount == `0`;
991	bn->tnil = false; / we just don't know /
992	bn->tnosorted = `0`;
993	bn->tnokey[`0`] = bn->tnokey[`1`] = `0`;
994	bni = bat_iterator(bn);
995	if (BATtdense(b)) {
996	BATtseqbase(bn, (oid) (b->tseqbase + low));
997	} else if (bn->ttype == TYPE_oid) {
998	if (BATcount(bn) == `0`) {
999	BATtseqbase(bn, `0`);
1000	} else if (!is_oid_nil((foid = (oid ) BUNtloc(bni, `0`))) &&
1001	(BATcount(bn) == `1` \|\|
1002	(bn->tkey &&
1003	bn->tsorted &&
1004	foid + BATcount(bn) - `1` == (oid ) BUNtloc(bni, BUNlast(bn) - `1`)))) {
1005	BATtseqbase(bn, foid);
1006	}
1007	}
1008	if (bn->batCount <= `1`) {
1009	bn->tsorted = ATOMlinear(b->ttype);
1010	bn->trevsorted = ATOMlinear(b->ttype);
1011	BATkey(bn, true);
1012	} else {
1013	bn->tsorted = b->tsorted;
1014	bn->trevsorted = b->trevsorted;
1015	BATkey(bn, BATtkey(b));
1016	}
1017	ALGODEBUG fprintf(stderr,
1018	"#BATslice(" ALGOBATFMT "," BUNFMT "," BUNFMT ")"
1019	"=" ALGOBATFMT "\n",
1020	ALGOBATPAR(b), l, h, ALGOBATPAR(bn));
1021	return bn;
1022	bunins_failed:
1023	BBPreclaim(bn);
1024	return NULL;
1025	}
1026
1027	/ Return whether the BAT has all unique values or not. It we don't*
1028	* know, invest in a proper check and record the results in the bat
1029	* descriptor. */
1030	bool
1031	BATkeyed(BAT *b)
1032	{
1033	lng t0 = GDKusec();
1034	BATiter bi = bat_iterator(b);
1035	int (cmpf)(const* void , const* void *) = ATOMcompare(b->ttype);
1036	BUN p, q, hb;
1037	Hash *hs = NULL;
1038
1039	if (b->ttype == TYPE_void)
1040	return BATtdense(b) \|\| BATcount(b) <= `1`;
1041	if (BATcount(b) <= `1`)
1042	return true;
1043	if (b->twidth < SIZEOF_BUN &&
1044	BATcount(b) > (BUN) `1` << (`8` * b->twidth)) {
1045	/ more rows than possible bit combinations in the atom /
1046	assert(!b->tkey);
1047	return false;
1048	}
1049
1050	b->batDirtydesc = true;
1051	if (!b->tkey && b->tnokey[`0`] == `0` && b->tnokey[`1`] == `0`) {
1052	if (b->tsorted \|\| b->trevsorted) {
1053	const void *prev = BUNtail(bi, `0`);
1054	const void *cur;
1055	for (q = BUNlast(b), p = `1`; p < q; p++) {
1056	cur = BUNtail(bi, p);
1057	if ((*cmpf)(prev, cur) == `0`) {
1058	b->tnokey[`0`] = p - `1`;
1059	b->tnokey[`1`] = p;
1060	ALGODEBUG fprintf(stderr, "#BATkeyed: fixed nokey(" BUNFMT "," BUNFMT ") for %s#" BUNFMT " (" LLFMT " usec)\n", p - `1`, p, BATgetId(b), BATcount(b), GDKusec() - t0);
1061	goto doreturn;
1062	}
1063	prev = cur;
1064	}
1065	/ we completed the scan: no duplicates /
1066	b->tkey = true;
1067	} else if (BATcheckhash(b) \|\|
1068	(!b->batTransient &&
1069	BAThash(b) == GDK_SUCCEED) \|\|
1070	(VIEWtparent(b) != `0` &&
1071	BATcheckhash(BBPdescriptor(VIEWtparent(b))))) {
1072	/ we already have a hash table on b, or b is*
1073	* persistent and we could create a hash
1074	* table, or b is a view on a bat that already
1075	* has a hash table */
1076	BUN lo = `0`;
1077
1078	hs = b->thash;
1079	if (hs == NULL && VIEWtparent(b) != `0`) {
1080	BAT *b2 = BBPdescriptor(VIEWtparent(b));
1081	lo = (BUN) ((b->theap.base - b2->theap.base) >> b->tshift);
1082	hs = b2->thash;
1083	}
1084	for (q = BUNlast(b), p = `0`; p < q; p++) {
1085	const void *v = BUNtail(bi, p);
1086	for (hb = HASHgetlink(hs, p + lo);
1087	hb != HASHnil(hs) && hb >= lo;
1088	hb = HASHgetlink(hs, hb)) {
1089	assert(hb < p + lo);
1090	if ((*cmpf)(v, BUNtail(bi, hb - lo)) == `0`) {
1091	b->tnokey[`0`] = hb - lo;
1092	b->tnokey[`1`] = p;
1093	ALGODEBUG fprintf(stderr, "#BATkeyed: fixed nokey(" BUNFMT "," BUNFMT ") for %s#" BUNFMT " (" LLFMT " usec)\n", hb - lo, p, BATgetId(b), BATcount(b), GDKusec() - t0);
1094	goto doreturn;
1095	}
1096	}
1097	}
1098	/ we completed the scan: no duplicates /
1099	b->tkey = true;
1100	} else {
1101	const char *nme;
1102	BUN prb;
1103	BUN mask;
1104	int len;
1105
1106	GDKclrerr(); / not interested in BAThash errors /
1107	nme = BBP_physical(b->batCacheid);
1108	if (ATOMbasetype(b->ttype) == TYPE_bte) {
1109	mask = (BUN) `1` << `8`;
1110	cmpf = NULL; / no compare needed, "hash" is perfect /
1111	} else if (ATOMbasetype(b->ttype) == TYPE_sht) {
1112	mask = (BUN) `1` << `16`;
1113	cmpf = NULL; / no compare needed, "hash" is perfect /
1114	} else {
1115	mask = HASHmask(b->batCount);
1116	if (mask < ((BUN) `1` << `16`))
1117	mask = (BUN) `1` << `16`;
1118	}
1119	if ((hs = GDKzalloc(sizeof(Hash))) == NULL)
1120	goto doreturn;
1121	len = snprintf(hs->heap.filename, sizeof(hs->heap.filename), "%s.hash%d", nme, THRgettid());
1122	if (len == -`1` \|\| len >= (int) sizeof(hs->heap.filename) \|\|
1123	HASHnew(hs, b->ttype, BUNlast(b), mask, BUN_NONE) != GDK_SUCCEED) {
1124	GDKfree(hs);
1125	/ err on the side of caution: not keyed /
1126	goto doreturn;
1127	}
1128	for (q = BUNlast(b), p = `0`; p < q; p++) {
1129	const void *v = BUNtail(bi, p);
1130	prb = HASHprobe(hs, v);
1131	for (hb = HASHget(hs, prb);
1132	hb != HASHnil(hs);
1133	hb = HASHgetlink(hs, hb)) {
1134	if (cmpf == NULL \|\|
1135	(*cmpf)(v, BUNtail(bi, hb)) == `0`) {
1136	b->tnokey[`0`] = hb;
1137	b->tnokey[`1`] = p;
1138	ALGODEBUG fprintf(stderr, "#BATkeyed: fixed nokey(" BUNFMT "," BUNFMT ") for %s#" BUNFMT " (" LLFMT " usec)\n", hb, p, BATgetId(b), BATcount(b), GDKusec() - t0);
1139	goto doreturn_free;
1140	}
1141	}
1142	/ enter into hash table /
1143	HASHputlink(hs, p, HASHget(hs, prb));
1144	HASHput(hs, prb, p);
1145	}
1146	doreturn_free:
1147	HEAPfree(&hs->heap, true);
1148	GDKfree(hs);
1149	if (p == q) {
1150	/ we completed the complete scan: no*
1151	* duplicates */
1152	b->tkey = true;
1153	}
1154	}
1155	}
1156	doreturn:
1157	return b->tkey;
1158	}
1159
1160	/ Return whether the BAT is ordered or not. If we don't know, invest*
1161	* in a scan and record the results in the bat descriptor. If during
1162	* the scan we happen to find evidence that the BAT is not reverse
1163	* sorted, we record the location. */
1164	bool
1165	BATordered(BAT *b)
1166	{
1167	lng t0 = `0`;
1168
1169	ALGODEBUG t0 = GDKusec();
1170
1171	if (b->ttype == TYPE_void)
1172	return true;
1173	/ In order that multiple threads don't scan the same BAT at*
1174	* the same time (happens a lot with mitosis/mergetable), we
1175	* use a lock. We reuse the hash lock for this, not because
1176	* this scanning interferes with hashes, but because it's
1177	* there, and not so likely to be used at the same time. */
1178	MT_lock_set(&b->batIdxLock);
1179	if (!b->tsorted && b->tnosorted == `0`) {
1180	BATiter bi = bat_iterator(b);
1181	int (cmpf)(const* void , const* void *) = ATOMcompare(b->ttype);
1182	BUN p, q;
1183	b->batDirtydesc = true;
1184	switch (ATOMbasetype(b->ttype)) {
1185	case TYPE_int: {
1186	const int iptr = (const* int *) Tloc(b, `0`);
1187	for (q = BUNlast(b), p = `1`; p < q; p++) {
1188	if (iptr[p - `1`] > iptr[p]) {
1189	b->tnosorted = p;
1190	ALGODEBUG fprintf(stderr, "#BATordered: fixed nosorted(" BUNFMT ") for %s#" BUNFMT " (" LLFMT " usec)\n", p, BATgetId(b), BATcount(b), GDKusec() - t0);
1191	goto doreturn;
1192	} else if (!b->trevsorted &&
1193	b->tnorevsorted == `0` &&
1194	iptr[p - `1`] < iptr[p]) {
1195	b->tnorevsorted = p;
1196	ALGODEBUG fprintf(stderr, "#BATordered: fixed norevsorted(" BUNFMT ") for %s#" BUNFMT "\n", p, BATgetId(b), BATcount(b));
1197	}
1198	}
1199	break;
1200	}
1201	case TYPE_lng: {
1202	const lng lptr = (const* lng *) Tloc(b, `0`);
1203	for (q = BUNlast(b), p = `1`; p < q; p++) {
1204	if (lptr[p - `1`] > lptr[p]) {
1205	b->tnosorted = p;
1206	ALGODEBUG fprintf(stderr, "#BATordered: fixed nosorted(" BUNFMT ") for %s#" BUNFMT " (" LLFMT " usec)\n", p, BATgetId(b), BATcount(b), GDKusec() - t0);
1207	goto doreturn;
1208	} else if (!b->trevsorted &&
1209	b->tnorevsorted == `0` &&
1210	lptr[p - `1`] < lptr[p]) {
1211	b->tnorevsorted = p;
1212	ALGODEBUG fprintf(stderr, "#BATordered: fixed norevsorted(" BUNFMT ") for %s#" BUNFMT "\n", p, BATgetId(b), BATcount(b));
1213	}
1214	}
1215	break;
1216	}
1217	default:
1218	for (q = BUNlast(b), p = `1`; p < q; p++) {
1219	int c;
1220	if ((c = cmpf(BUNtail(bi, p - `1`), BUNtail(bi, p))) > `0`) {
1221	b->tnosorted = p;
1222	ALGODEBUG fprintf(stderr, "#BATordered: fixed nosorted(" BUNFMT ") for %s#" BUNFMT " (" LLFMT " usec)\n", p, BATgetId(b), BATcount(b), GDKusec() - t0);
1223	goto doreturn;
1224	} else if (!b->trevsorted &&
1225	b->tnorevsorted == `0` &&
1226	c < `0`) {
1227	b->tnorevsorted = p;
1228	ALGODEBUG fprintf(stderr, "#BATordered: fixed norevsorted(" BUNFMT ") for %s#" BUNFMT "\n", p, BATgetId(b), BATcount(b));
1229	}
1230	}
1231	break;
1232	}
1233	/ we only get here if we completed the scan; note*
1234	* that if we didn't record evidence about reverse
1235	* sortedness, we know that the BAT is also reverse
1236	* sorted */
1237	b->tsorted = true;
1238	ALGODEBUG fprintf(stderr, "#BATordered: fixed sorted for %s#" BUNFMT " (" LLFMT " usec)\n", BATgetId(b), BATcount(b), GDKusec() - t0);
1239	if (!b->trevsorted && b->tnorevsorted == `0`) {
1240	b->trevsorted = true;
1241	ALGODEBUG fprintf(stderr, "#BATordered: fixed revsorted for %s#" BUNFMT "\n", BATgetId(b), BATcount(b));
1242	}
1243	}
1244	doreturn:
1245	MT_lock_unset(&b->batIdxLock);
1246	return b->tsorted;
1247	}
1248
1249	/ Return whether the BAT is reverse ordered or not. If we don't*
1250	* know, invest in a scan and record the results in the bat
1251	* descriptor. */
1252	bool
1253	BATordered_rev(BAT *b)
1254	{
1255	lng t0 = `0`;
1256
1257	ALGODEBUG t0 = GDKusec();
1258
1259	if (b == NULL)
1260	return false;
1261	if (BATcount(b) <= `1`)
1262	return true;
1263	if (b->ttype == TYPE_void)
1264	return is_oid_nil(b->tseqbase);
1265	if (BATtdense(b))
1266	return false;
1267	MT_lock_set(&b->batIdxLock);
1268	if (!b->trevsorted && b->tnorevsorted == `0`) {
1269	BATiter bi = bat_iterator(b);
1270	int (cmpf)(const* void , const* void *) = ATOMcompare(b->ttype);
1271	BUN p, q;
1272	b->batDirtydesc = true;
1273	for (q = BUNlast(b), p = `1`; p < q; p++) {
1274	if (cmpf(BUNtail(bi, p - `1`), BUNtail(bi, p)) < `0`) {
1275	b->tnorevsorted = p;
1276	ALGODEBUG fprintf(stderr, "#BATordered_rev: fixed norevsorted(" BUNFMT ") for %s#" BUNFMT " (" LLFMT " usec)\n", p, BATgetId(b), BATcount(b), GDKusec() - t0);
1277	goto doreturn;
1278	}
1279	}
1280	b->trevsorted = true;
1281	ALGODEBUG fprintf(stderr, "#BATordered_rev: fixed revsorted for %s#" BUNFMT " (" LLFMT " usec)\n", BATgetId(b), BATcount(b), GDKusec() - t0);
1282	}
1283	doreturn:
1284	MT_lock_unset(&b->batIdxLock);
1285	return b->trevsorted;
1286	}
1287
1288	/ figure out which sort function is to be called*
1289	* stable sort can produce an error (not enough memory available),
1290	* "quick" sort does not produce errors */
1291	static gdk_return
1292	do_sort(void *restrict h, void *restrict t, const void *restrict base,
1293	size_t n, int hs, int ts, int tpe, bool reverse, bool nilslast,
1294	bool stable)
1295	{
1296	if (n <= `1`) / trivially sorted /
1297	return GDK_SUCCEED;
1298	if (stable) {
1299	if (reverse)
1300	return GDKssort_rev(h, t, base, n, hs, ts, tpe);
1301	else
1302	return GDKssort(h, t, base, n, hs, ts, tpe);
1303	} else {
1304	GDKqsort(h, t, base, n, hs, ts, tpe, reverse, nilslast);
1305	}
1306	return GDK_SUCCEED;
1307	}
1308
1309	/ Sort the bat b according to both o and g. The stable and reverse*
1310	* parameters indicate whether the sort should be stable or descending
1311	* respectively. The parameter b is required, o and g are optional
1312	* (i.e., they may be NULL).
1313	*
1314	* A sorted copy is returned through the sorted parameter, the new
1315	* ordering is returned through the order parameter, group information
1316	* is returned through the groups parameter. All three output
1317	* parameters may be NULL. If they're all NULL, this function does
1318	* nothing.
1319	*
1320	* If o is specified, it is used to first rearrange b according to the
1321	* order specified in o, after which b is sorted taking g into
1322	* account.
1323	*
1324	* If g is specified, it indicates groups which should be individually
1325	* ordered. Each row of consecutive equal values in g indicates a
1326	* group which is sorted according to stable and reverse. g is used
1327	* after the order in b was rearranged according to o.
1328	*
1329	* The outputs order and groups can be used in subsequent calls to
1330	* this function. This can be used if multiple BATs need to be sorted
1331	* together. The BATs should then be sorted in order of significance,
1332	* and each following call should use the original unordered BAT plus
1333	* the order and groups bat from the previous call. In this case, the
1334	* sorted BATs are not of much use, so the sorted output parameter
1335	* does not need to be specified.
1336	* Apart from error checking and maintaining reference counts, sorting
1337	* three columns (col1, col2, col3) could look like this with the
1338	* sorted results in (col1s, col2s, col3s):
1339	* BATsort(&col1s, &ord1, &grp1, col1, NULL, NULL, false, false, false);
1340	* BATsort(&col2s, &ord2, &grp2, col2, ord1, grp1, false, false, false);
1341	* BATsort(&col3s, NULL, NULL, col3, ord2, grp2, false, false, false);
1342	* Note that the "reverse" parameter can be different for each call.
1343	*/
1344	gdk_return
1345	BATsort(BAT sorted, BAT order, BAT **groups,
1346	BAT b, BAT o, BAT *g, bool reverse, bool nilslast, bool stable)
1347	{
1348	BAT bn = NULL, on = NULL, gn = NULL, pb = NULL;
1349	oid *restrict grps, *restrict ords, prev;
1350	BUN p, q, r;
1351	lng t0 = `0`;
1352	bool mkorderidx, orderidxlock = false;
1353
1354	ALGODEBUG t0 = GDKusec();
1355
1356	/ we haven't implemented NILs as largest value for stable*
1357	* sort, so NILs come first for ascending and last for
1358	* descending */
1359	assert(!stable \|\| reverse == nilslast);
1360
1361	if (b == NULL) {
1362	GDKerror("BATsort: b must exist\n");
1363	return GDK_FAIL;
1364	}
1365	if (stable && reverse != nilslast) {
1366	GDKerror("BATsort: stable sort cannot have "
1367	"reverse != nilslast\n");
1368	return GDK_FAIL;
1369	}
1370	if (!ATOMlinear(b->ttype)) {
1371	GDKerror("BATsort: type %s cannot be sorted\n",
1372	ATOMname(b->ttype));
1373	return GDK_FAIL;
1374	}
1375	if (b->ttype == TYPE_void) {
1376	if (!b->tsorted) {
1377	b->tsorted = true;
1378	b->batDirtydesc = true;
1379	}
1380	if (b->trevsorted != is_oid_nil(b->tseqbase) \|\| b->batCount <= `1`) {
1381	b->trevsorted = !b->trevsorted;
1382	b->batDirtydesc = true;
1383	}
1384	if (b->tkey != !is_oid_nil(b->tseqbase)) {
1385	b->tkey = !b->tkey;
1386	b->batDirtydesc = true;
1387	}
1388	} else if (b->batCount <= `1`) {
1389	if (!b->tsorted \|\| !b->trevsorted) {
1390	b->tsorted = b->trevsorted = true;
1391	b->batDirtydesc = true;
1392	}
1393	}
1394	if (o != NULL &&
1395	(ATOMtype(o->ttype) != TYPE_oid \|\| / oid tail /
1396	BATcount(o) != BATcount(b) \|\| / same size as b /
1397	(o->ttype == TYPE_void && / no nil tail /
1398	BATcount(o) != `0` &&
1399	is_oid_nil(o->tseqbase)))) {
1400	GDKerror("BATsort: o must have type oid and same size as b\n");
1401	return GDK_FAIL;
1402	}
1403	if (g != NULL &&
1404	(ATOMtype(g->ttype) != TYPE_oid \|\| / oid tail /
1405	!g->tsorted \|\| / sorted /
1406	BATcount(o) != BATcount(b) \|\| / same size as b /
1407	(g->ttype == TYPE_void && / no nil tail /
1408	BATcount(g) != `0` &&
1409	is_oid_nil(g->tseqbase)))) {
1410	GDKerror("BATsort: g must have type oid, sorted on the tail, "
1411	"and same size as b\n");
1412	return GDK_FAIL;
1413	}
1414	if (sorted == NULL && order == NULL) {
1415	/ no place to put result, so we're done quickly /
1416	GDKerror("BATsort: no place to put the result.\n");
1417	return GDK_FAIL;
1418	}
1419	if (g == NULL && !stable) {
1420	/ pre-ordering doesn't make sense if we're not*
1421	* subsorting and the sort is not stable */
1422	o = NULL;
1423	}
1424	if (b->tnonil) {
1425	/ if there are no nils, placement of nils doesn't*
1426	* matter, so set nilslast such that ordered bits can
1427	* be used */
1428	nilslast = reverse;
1429	}
1430	if (BATcount(b) <= `1` \|\|
1431	(reverse == nilslast &&
1432	(reverse ? BATtrevordered(b) : BATtordered(b)) &&
1433	o == NULL && g == NULL &&
1434	(groups == NULL \|\| BATtkey(b) \|\|
1435	(reverse ? BATtordered(b) : BATtrevordered(b))))) {
1436	/ trivially (sub)sorted, and either we don't need to*
1437	* return group information, or we can trivially
1438	* deduce the groups */
1439	if (sorted) {
1440	bn = COLcopy(b, b->ttype, false, TRANSIENT);
1441	if (bn == NULL)
1442	goto error;
1443	*sorted = bn;
1444	}
1445	if (order) {
1446	on = BATdense(b->hseqbase, b->hseqbase, BATcount(b));
1447	if (on == NULL)
1448	goto error;
1449	*order = on;
1450	}
1451	if (groups) {
1452	if (BATtkey(b)) {
1453	/ singleton groups /
1454	gn = BATdense(`0`, `0`, BATcount(b));
1455	if (gn == NULL)
1456	goto error;
1457	} else {
1458	/ single group /
1459	const oid *o = `0`;
1460	assert(BATcount(b) == `1` \|\|
1461	(BATtordered(b) && BATtrevordered(b)));
1462	gn = BATconstant(`0`, TYPE_oid, &o, BATcount(b), TRANSIENT);
1463	if (gn == NULL)
1464	goto error;
1465	}
1466	*groups = gn;
1467	}
1468	ALGODEBUG fprintf(stderr, "#BATsort(b=" ALGOBATFMT ",o="
1469	ALGOOPTBATFMT ",g=" ALGOOPTBATFMT
1470	",reverse=%d,nilslast=%d,stable=%d) = ("
1471	ALGOOPTBATFMT "," ALGOOPTBATFMT ","
1472	ALGOOPTBATFMT ") -- trivial (" LLFMT
1473	" usec)\n",
1474	ALGOBATPAR(b), ALGOOPTBATPAR(o),
1475	ALGOOPTBATPAR(g), reverse, nilslast, stable,
1476	ALGOOPTBATPAR(bn), ALGOOPTBATPAR(gn),
1477	ALGOOPTBATPAR(on), GDKusec() - t0);
1478	return GDK_SUCCEED;
1479	}
1480	if (VIEWtparent(b)) {
1481	pb = BBPdescriptor(VIEWtparent(b));
1482	if (b->theap.base != pb->theap.base \|\|
1483	BATcount(b) != BATcount(pb) \|\|
1484	b->hseqbase != pb->hseqbase \|\|
1485	BATatoms[b->ttype].atomCmp != BATatoms[pb->ttype].atomCmp)
1486	pb = NULL;
1487	} else {
1488	pb = b;
1489	}
1490	/ when we will create an order index if it doesn't already exist /
1491	mkorderidx = (g == NULL && !reverse && !nilslast && pb != NULL && (order \|\| !pb->batTransient));
1492	if (g == NULL && !reverse && !nilslast &&
1493	pb != NULL && !BATcheckorderidx(pb)) {
1494	MT_lock_set(&pb->batIdxLock);
1495	if (pb->torderidx == NULL) {
1496	/ no index created while waiting for lock /
1497	if (mkorderidx) / keep lock when going to create /
1498	orderidxlock = true;
1499	} else {
1500	/ no need to create an index: it already exists /
1501	mkorderidx = false;
1502	}
1503	if (!orderidxlock)
1504	MT_lock_unset(&pb->batIdxLock);
1505	} else {
1506	mkorderidx = false;
1507	}
1508	if (g == NULL && o == NULL && !reverse && !nilslast &&
1509	pb != NULL && pb->torderidx != NULL &&
1510	/ if we want a stable sort, the order index must be*
1511	* stable, if we don't want stable, we don't care */
1512	(!stable \|\| ((oid *) pb->torderidx->base)[`2`])) {
1513	/ there is an order index that we can use /
1514	on = COLnew(pb->hseqbase, TYPE_oid, BATcount(pb), TRANSIENT);
1515	if (on == NULL)
1516	goto error;
1517	memcpy(Tloc(on, `0`), (oid ) pb->torderidx->base + ORDERIDXOFF, BATcount(pb) sizeof(oid));
1518	BATsetcount(on, BATcount(b));
1519	on->tkey = true;
1520	on->tnil = false;
1521	on->tnonil = true;
1522	on->tsorted = on->trevsorted = false;
1523	on->tseqbase = oid_nil;
1524	if (sorted \|\| groups) {
1525	bn = BATproject(on, b);
1526	if (bn == NULL)
1527	goto error;
1528	bn->tsorted = true;
1529	if (groups) {
1530	if (BATgroup_internal(groups, NULL, NULL, bn, NULL, g, NULL, NULL, true) != GDK_SUCCEED)
1531	goto error;
1532	if (sorted &&
1533	(*groups)->tkey &&
1534	g == NULL) {
1535	/ if new groups bat is key*
1536	* and since there is no input
1537	* groups bat, we know the
1538	* result bat is key */
1539	bn->tkey = true;
1540	}
1541	}
1542	if (sorted)
1543	*sorted = bn;
1544	else {
1545	BBPunfix(bn->batCacheid);
1546	bn = NULL;
1547	}
1548	}
1549	if (order)
1550	*order = on;
1551	else {
1552	BBPunfix(on->batCacheid);
1553	on = NULL;
1554	}
1555	ALGODEBUG fprintf(stderr, "#BATsort(b=" ALGOBATFMT ",o="
1556	ALGOOPTBATFMT ",g=" ALGOOPTBATFMT
1557	",reverse=%d,nilslast=%d,stable=%d) = ("
1558	ALGOOPTBATFMT "," ALGOOPTBATFMT ","
1559	ALGOOPTBATFMT ") -- orderidx (" LLFMT
1560	" usec)\n",
1561	ALGOBATPAR(b), ALGOOPTBATPAR(o),
1562	ALGOOPTBATPAR(g), reverse, nilslast, stable,
1563	ALGOOPTBATPAR(bn), ALGOOPTBATPAR(gn),
1564	ALGOOPTBATPAR(on), GDKusec() - t0);
1565	return GDK_SUCCEED;
1566	}
1567	if (o) {
1568	bn = BATproject(o, b);
1569	if (bn == NULL)
1570	goto error;
1571	if (bn->ttype == TYPE_void \|\| isVIEW(bn)) {
1572	BAT *b2 = COLcopy(bn, ATOMtype(bn->ttype), true, TRANSIENT);
1573	BBPunfix(bn->batCacheid);
1574	bn = b2;
1575	}
1576	pb = NULL;
1577	} else {
1578	bn = COLcopy(b, b->ttype, true, TRANSIENT);
1579	}
1580	if (bn == NULL)
1581	goto error;
1582	if (order) {
1583	/ prepare order bat /
1584	if (o) {
1585	/ make copy of input so that we can refine it;*
1586	* copy can be read-only if we take the shortcut
1587	* below in the case g is "key" */
1588	on = COLcopy(o, TYPE_oid,
1589	g == NULL \|\|
1590	!(g->tkey \|\| g->ttype == TYPE_void),
1591	TRANSIENT);
1592	if (on == NULL)
1593	goto error;
1594	BAThseqbase(on, b->hseqbase);
1595	} else {
1596	/ create new order /
1597	on = COLnew(b->hseqbase, TYPE_oid, BATcount(bn), TRANSIENT);
1598	if (on == NULL)
1599	goto error;
1600	ords = (oid *) Tloc(on, `0`);
1601	for (p = `0`, q = BATcount(bn); p < q; p++)
1602	ords[p] = p + b->hseqbase;
1603	BATsetcount(on, BATcount(bn));
1604	on->tkey = true;
1605	on->tnil = false;
1606	on->tnonil = true;
1607	}
1608	/ COLcopy above can create TYPE_void /
1609	if (on->ttype != TYPE_void) {
1610	on->tsorted = on->trevsorted = false; / it won't be sorted /
1611	on->tseqbase = oid_nil; / and hence not dense /
1612	on->tnosorted = on->tnorevsorted = `0`;
1613	}
1614	*order = on;
1615	ords = (oid *) Tloc(on, `0`);
1616	} else {
1617	ords = NULL;
1618	}
1619	if (g) {
1620	if (g->tkey \|\| g->ttype == TYPE_void) {
1621	/ if g is "key", all groups are size 1, so no*
1622	* subsorting needed */
1623	if (sorted) {
1624	*sorted = bn;
1625	} else {
1626	BBPunfix(bn->batCacheid);
1627	bn = NULL;
1628	}
1629	if (order) {
1630	*order = on;
1631	if (o) {
1632	/ we can inherit sortedness*
1633	* after all */
1634	on->tsorted = o->tsorted;
1635	on->trevsorted = o->trevsorted;
1636	if (o->tnosorted)
1637	on->tnosorted = o->tnosorted;
1638	if (o->tnorevsorted)
1639	on->tnorevsorted = o->tnorevsorted;
1640	} else {
1641	/ we didn't rearrange, so*
1642	* still sorted */
1643	on->tsorted = true;
1644	on->trevsorted = false;
1645	}
1646	if (BATcount(on) <= `1`) {
1647	on->tsorted = true;
1648	on->trevsorted = true;
1649	}
1650	}
1651	if (groups) {
1652	gn = COLcopy(g, g->ttype, false, TRANSIENT);
1653	if (gn == NULL)
1654	goto error;
1655	*groups = gn;
1656	}
1657	ALGODEBUG fprintf(stderr, "#BATsort(b=" ALGOBATFMT
1658	",o=" ALGOOPTBATFMT ",g=" ALGOBATFMT
1659	",reverse=%d,nilslast=%d,stable=%d"
1660	") = (" ALGOOPTBATFMT ","
1661	ALGOOPTBATFMT "," ALGOOPTBATFMT
1662	") -- key group (" LLFMT " usec)\n",
1663	ALGOBATPAR(b), ALGOOPTBATPAR(o),
1664	ALGOBATPAR(g), reverse, nilslast,
1665	stable, ALGOOPTBATPAR(bn),
1666	ALGOOPTBATPAR(gn), ALGOOPTBATPAR(on),
1667	GDKusec() - t0);
1668	return GDK_SUCCEED;
1669	}
1670	assert(g->ttype == TYPE_oid);
1671	grps = (oid *) Tloc(g, `0`);
1672	prev = grps[`0`];
1673	if (BATmaterialize(bn) != GDK_SUCCEED)
1674	goto error;
1675	for (r = `0`, p = `1`, q = BATcount(g); p < q; p++) {
1676	if (grps[p] != prev) {
1677	/ sub sort [r,p) /
1678	if (do_sort(Tloc(bn, r),
1679	ords ? ords + r : NULL,
1680	bn->tvheap ? bn->tvheap->base : NULL,
1681	p - r, Tsize(bn), ords ? sizeof(oid) : `0`,
1682	bn->ttype, reverse, nilslast, stable) != GDK_SUCCEED)
1683	goto error;
1684	r = p;
1685	prev = grps[p];
1686	}
1687	}
1688	/ sub sort [r,q) /
1689	if (do_sort(Tloc(bn, r),
1690	ords ? ords + r : NULL,
1691	bn->tvheap ? bn->tvheap->base : NULL,
1692	p - r, Tsize(bn), ords ? sizeof(oid) : `0`,
1693	bn->ttype, reverse, nilslast, stable) != GDK_SUCCEED)
1694	goto error;
1695	/ if single group (r==0) the result is (rev)sorted,*
1696	* otherwise (maybe) not */
1697	bn->tsorted = r == `0` && !reverse && !nilslast;
1698	bn->trevsorted = r == `0` && reverse && nilslast;
1699	} else {
1700	Heap *m = NULL;
1701	/ only invest in creating an order index if the BAT*
1702	* is persistent */
1703	if (mkorderidx) {
1704	assert(orderidxlock);
1705	if ((m = createOIDXheap(pb, stable)) != NULL &&
1706	ords == NULL) {
1707	ords = (oid *) m->base + ORDERIDXOFF;
1708	if (o && o->ttype != TYPE_void)
1709	memcpy(ords, Tloc(o, `0`), BATcount(o) * sizeof(oid));
1710	else if (o)
1711	for (p = `0`, q = BATcount(o); p < q; p++)
1712	ords[p] = p + o->tseqbase;
1713	else
1714	for (p = `0`, q = BATcount(b); p < q; p++)
1715	ords[p] = p + b->hseqbase;
1716	}
1717	}
1718	if ((reverse != nilslast \|\|
1719	(reverse ? !bn->trevsorted : !bn->tsorted)) &&
1720	(BATmaterialize(bn) != GDK_SUCCEED \|\|
1721	do_sort(Tloc(bn, `0`),
1722	ords,
1723	bn->tvheap ? bn->tvheap->base : NULL,
1724	BATcount(bn), Tsize(bn), ords ? sizeof(oid) : `0`,
1725	bn->ttype, reverse, nilslast, stable) != GDK_SUCCEED)) {
1726	if (m != NULL) {
1727	HEAPfree(m, true);
1728	GDKfree(m);
1729	}
1730	if (orderidxlock)
1731	MT_lock_unset(&pb->batIdxLock);
1732	goto error;
1733	}
1734	bn->tsorted = !reverse && !nilslast;
1735	bn->trevsorted = reverse && nilslast;
1736	if (m != NULL) {
1737	assert(orderidxlock);
1738	if (pb->torderidx == NULL) {
1739	pb->batDirtydesc = true;
1740	if (ords != (oid *) m->base + ORDERIDXOFF) {
1741	memcpy((oid *) m->base + ORDERIDXOFF,
1742	ords,
1743	BATcount(pb) * sizeof(oid));
1744	}
1745	pb->torderidx = m;
1746	persistOIDX(pb);
1747	} else {
1748	HEAPfree(m, true);
1749	GDKfree(m);
1750	}
1751	}
1752	}
1753	if (orderidxlock)
1754	MT_lock_unset(&pb->batIdxLock);
1755	bn->theap.dirty = true;
1756	bn->tnosorted = `0`;
1757	bn->tnorevsorted = `0`;
1758	bn->tnokey[`0`] = bn->tnokey[`1`] = `0`;
1759	if (groups) {
1760	if (BATgroup_internal(groups, NULL, NULL, bn, NULL, g, NULL, NULL, true) != GDK_SUCCEED)
1761	goto error;
1762	if ((*groups)->tkey &&
1763	(g == NULL \|\| (g->tsorted && g->trevsorted))) {
1764	/ if new groups bat is key and the input*
1765	* group bat has a single value (both sorted
1766	* and revsorted), we know the result bat is
1767	* key */
1768	bn->tkey = true;
1769	}
1770	}
1771
1772	if (sorted)
1773	*sorted = bn;
1774	else {
1775	BBPunfix(bn->batCacheid);
1776	bn = NULL;
1777	}
1778
1779	ALGODEBUG fprintf(stderr, "#BATsort(b=" ALGOBATFMT ",o=" ALGOOPTBATFMT
1780	",g=" ALGOOPTBATFMT ",reverse=%d,nilslast=%d,"
1781	"stable=%d) = (" ALGOOPTBATFMT "," ALGOOPTBATFMT ","
1782	ALGOOPTBATFMT ") -- %ssort (" LLFMT " usec)\n",
1783	ALGOBATPAR(b), ALGOOPTBATPAR(o), ALGOOPTBATPAR(g),
1784	reverse, nilslast, stable, ALGOOPTBATPAR(bn),
1785	ALGOOPTBATPAR(gn), ALGOOPTBATPAR(on),
1786	g ? "grouped " : "", GDKusec() - t0);
1787	return GDK_SUCCEED;
1788
1789	error:
1790	if (bn)
1791	BBPunfix(bn->batCacheid);
1792	BBPreclaim(on);
1793	if (sorted)
1794	*sorted = NULL;
1795	if (order)
1796	*order = NULL;
1797	if (groups)
1798	*groups = NULL;
1799	return GDK_FAIL;
1800	}
1801
1802	/ return a new BAT of length n with seqbase hseq, and the constant v*
1803	* in the tail */
1804	BAT *
1805	BATconstant(oid hseq, int tailtype, const void *v, BUN n, role_t role)
1806	{
1807	BAT *bn;
1808	void *restrict p;
1809	BUN i;
1810	lng t0 = `0`;
1811
1812	ALGODEBUG t0 = GDKusec();
1813
1814	if (v == NULL)
1815	return NULL;
1816	bn = COLnew(hseq, tailtype, n, role);
1817	if (bn != NULL && n > `0`) {
1818	p = Tloc(bn, `0`);
1819	switch (ATOMstorage(tailtype)) {
1820	case TYPE_void:
1821	v = &oid_nil;
1822	BATtseqbase(bn, oid_nil);
1823	break;
1824	case TYPE_bte:
1825	for (i = `0`; i < n; i++)
1826	((bte ) p)[i] = (bte *) v;
1827	break;
1828	case TYPE_sht:
1829	for (i = `0`; i < n; i++)
1830	((sht ) p)[i] = (sht *) v;
1831	break;
1832	case TYPE_int:
1833	case TYPE_flt:
1834	assert(sizeof(int) == sizeof(flt));
1835	for (i = `0`; i < n; i++)
1836	((int ) p)[i] = (int *) v;
1837	break;
1838	case TYPE_lng:
1839	case TYPE_dbl:
1840	assert(sizeof(lng) == sizeof(dbl));
1841	for (i = `0`; i < n; i++)
1842	((lng ) p)[i] = (lng *) v;
1843	break;
1844	#ifdef HAVE_HGE
1845	case TYPE_hge:
1846	for (i = `0`; i < n; i++)
1847	((hge ) p)[i] = (hge *) v;
1848	break;
1849	#endif
1850	default:
1851	for (i = `0`, n += i; i < n; i++)
1852	tfastins_nocheck(bn, i, v, Tsize(bn));
1853	break;
1854	}
1855	bn->theap.dirty = true;
1856	bn->tnil = n >= `1` && (*ATOMcompare(tailtype))(v, ATOMnilptr(tailtype)) == `0`;
1857	BATsetcount(bn, n);
1858	bn->tsorted = true;
1859	bn->trevsorted = true;
1860	bn->tnonil = !bn->tnil;
1861	bn->tkey = BATcount(bn) <= `1`;
1862	}
1863	ALGODEBUG fprintf(stderr, "#%s: %s()=" ALGOOPTBATFMT
1864	" (" LLFMT "usec)\n",
1865	MT_thread_getname(), __func__,
1866	ALGOOPTBATPAR(bn), GDKusec() - t0);
1867	return bn;
1868
1869	bunins_failed:
1870	BBPreclaim(bn);
1871	return NULL;
1872	}
1873
1874	/*
1875	* BAT Aggregates
1876	*
1877	* We retain the size() and card() aggregate results in the column
1878	* descriptor. We would like to have such functionality in an
1879	* extensible way for many aggregates, for DD (1) we do not want to
1880	* change the binary BAT format on disk and (2) aggr and size are the
1881	* most relevant aggregates.
1882	*
1883	* It is all hacked into the aggr[3] records; three adjacent integers
1884	* that were left over in the column record. We refer to these as if
1885	* it where an int aggr[3] array. The below routines set and retrieve
1886	* the aggregate values from the tail of the BAT, as many
1887	* aggregate-manipulating BAT functions work on tail.
1888	*
1889	* The rules are as follows: aggr[0] contains the alignment ID of the
1890	* column (if set i.e. nonzero). Hence, if this value is nonzero and
1891	* equal to b->talign, the precomputed aggregate values in
1892	* aggr[GDK_AGGR_SIZE] and aggr[GDK_AGGR_CARD] hold. However, only one
1893	* of them may be set at the time. This is encoded by the value
1894	* int_nil, which cannot occur in these two aggregates.
1895	*
1896	* This was now extended to record the property whether we know there
1897	* is a nil value present by mis-using the highest bits of both
1898	* GDK_AGGR_SIZE and GDK_AGGR_CARD.
1899	*/
1900
1901	void
1902	PROPdestroy(BAT *b)
1903	{
1904	PROPrec *p = b->tprops;
1905	PROPrec *n;
1906
1907	b->tprops = NULL;
1908	while (p) {
1909	n = p->next;
1910	VALclear(&p->v);
1911	GDKfree(p);
1912	p = n;
1913	}
1914	}
1915
1916	PROPrec *
1917	BATgetprop_nolock(BAT b, enum* prop_t idx)
1918	{
1919	PROPrec *p;
1920
1921	p = b->tprops;
1922	while (p && p->id != idx)
1923	p = p->next;
1924	return p;
1925	}
1926
1927	static void
1928	BATrmprop_nolock(BAT b, enum* prop_t idx)
1929	{
1930	PROPrec prop = b->tprops, prev = NULL;
1931
1932	while (prop) {
1933	if (prop->id == idx) {
1934	if (prev)
1935	prev->next = prop->next;
1936	else
1937	b->tprops = prop->next;
1938	VALclear(&prop->v);
1939	GDKfree(prop);
1940	return;
1941	}
1942	prev = prop;
1943	prop = prop->next;
1944	}
1945	}
1946
1947	void
1948	BATsetprop_nolock(BAT b, enum* prop_t idx, int type, const void *v)
1949	{
1950	PROPrec *p;
1951
1952	p = b->tprops;
1953	while (p && p->id != idx)
1954	p = p->next;
1955	if (p == NULL) {
1956	if ((p = GDKmalloc(sizeof(PROPrec))) == NULL) {
1957	/ properties are hints, so if we can't create*
1958	* one we ignore the error */
1959	GDKclrerr();
1960	return;
1961	}
1962	p->id = idx;
1963	p->next = b->tprops;
1964	p->v.vtype = `0`;
1965	b->tprops = p;
1966	} else {
1967	VALclear(&p->v);
1968	}
1969	if (VALinit(&p->v, type, v) == NULL) {
1970	/ failed to initialize, so remove property /
1971	BATrmprop_nolock(b, idx);
1972	GDKclrerr();
1973	}
1974	b->batDirtydesc = true;
1975	}
1976
1977	PROPrec *
1978	BATgetprop(BAT b, enum* prop_t idx)
1979	{
1980	PROPrec *p;
1981
1982	MT_lock_set(&b->batIdxLock);
1983	p = BATgetprop_nolock(b, idx);
1984	MT_lock_unset(&b->batIdxLock);
1985	return p;
1986	}
1987
1988	void
1989	BATsetprop(BAT b, enum* prop_t idx, int type, const void *v)
1990	{
1991	MT_lock_set(&b->batIdxLock);
1992	BATsetprop_nolock(b, idx, type, v);
1993	MT_lock_unset(&b->batIdxLock);
1994	}
1995
1996	void
1997	BATrmprop(BAT b, enum* prop_t idx)
1998	{
1999	MT_lock_set(&b->batIdxLock);
2000	BATrmprop_nolock(b, idx);
2001	MT_lock_unset(&b->batIdxLock);
2002	}
2003
2004
2005	/*
2006	* The BATcount_no_nil function counts all BUN in a BAT that have a
2007	* non-nil tail value.
2008	*/
2009	BUN
2010	BATcount_no_nil(BAT *b)
2011	{
2012	BUN cnt = `0`;
2013	BUN i, n;
2014	const void *restrict p, *restrict nil;
2015	const char *restrict base;
2016	int t;
2017	int (cmp)(const* void , const* void *);
2018
2019	BATcheck(b, "BATcnt", `0`);
2020	n = BATcount(b);
2021	if (b->tnonil)
2022	return n;
2023	p = Tloc(b, `0`);
2024	t = ATOMbasetype(b->ttype);
2025	switch (t) {
2026	case TYPE_void:
2027	cnt = n * BATtdense(b);
2028	break;
2029	case TYPE_bte:
2030	for (i = `0`; i < n; i++)
2031	cnt += !is_bte_nil(((const bte *) p)[i]);
2032	break;
2033	case TYPE_sht:
2034	for (i = `0`; i < n; i++)
2035	cnt += !is_sht_nil(((const sht *) p)[i]);
2036	break;
2037	case TYPE_int:
2038	for (i = `0`; i < n; i++)
2039	cnt += !is_int_nil(((const int *) p)[i]);
2040	break;
2041	case TYPE_lng:
2042	for (i = `0`; i < n; i++)
2043	cnt += !is_lng_nil(((const lng *) p)[i]);
2044	break;
2045	#ifdef HAVE_HGE
2046	case TYPE_hge:
2047	for (i = `0`; i < n; i++)
2048	cnt += !is_hge_nil(((const hge *) p)[i]);
2049	break;
2050	#endif
2051	case TYPE_flt:
2052	for (i = `0`; i < n; i++)
2053	cnt += !is_flt_nil(((const flt *) p)[i]);
2054	break;
2055	case TYPE_dbl:
2056	for (i = `0`; i < n; i++)
2057	cnt += !is_dbl_nil(((const dbl *) p)[i]);
2058	break;
2059	case TYPE_str:
2060	base = b->tvheap->base;
2061	switch (b->twidth) {
2062	case `1`:
2063	for (i = `0`; i < n; i++)
2064	cnt += base[(var_t) ((const unsigned char *) p)[i] + GDK_VAROFFSET] != `'\200'`;
2065	break;
2066	case `2`:
2067	for (i = `0`; i < n; i++)
2068	cnt += base[(var_t) ((const unsigned short *) p)[i] + GDK_VAROFFSET] != `'\200'`;
2069	break;
2070	#if SIZEOF_VAR_T != SIZEOF_INT
2071	case `4`:
2072	for (i = `0`; i < n; i++)
2073	cnt += base[(var_t) ((const unsigned int *) p)[i]] != `'\200'`;
2074	break;
2075	#endif
2076	default:
2077	for (i = `0`; i < n; i++)
2078	cnt += base[((const var_t *) p)[i]] != `'\200'`;
2079	break;
2080	}
2081	break;
2082	default:
2083	nil = ATOMnilptr(t);
2084	cmp = ATOMcompare(t);
2085	if (b->tvarsized) {
2086	base = b->tvheap->base;
2087	for (i = `0`; i < n; i++)
2088	cnt += (cmp)(nil, base + ((const* var_t *) p)[i]) != `0`;
2089	} else {
2090	for (i = `0`, n += i; i < n; i++)
2091	cnt += (*cmp)(Tloc(b, i), nil) != `0`;
2092	}
2093	break;
2094	}
2095	if (cnt == BATcount(b)) {
2096	/ we learned something /
2097	b->tnonil = true;
2098	assert(!b->tnil);
2099	b->tnil = false;
2100	}
2101	return cnt;
2102	}
2103

Browse the source code of MonetDB/gdk/gdk_batop.c