1 | /* |
2 | * This Source Code Form is subject to the terms of the Mozilla Public |
3 | * License, v. 2.0. If a copy of the MPL was not distributed with this |
4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. |
5 | * |
6 | * Copyright 1997 - July 2008 CWI, August 2008 - 2019 MonetDB B.V. |
7 | */ |
8 | |
9 | #include "monetdb_config.h" |
10 | #include "gdk.h" |
11 | #include "gdk_private.h" |
12 | #include "gdk_calc_private.h" |
13 | |
14 | /* BATfirstn select the smallest n elements from the input bat b (if |
15 | * asc(ending) is set, else the largest n elements). Conceptually, b |
16 | * is sorted in ascending or descending order (depending on the asc |
17 | * argument) and then the OIDs of the first n elements are returned. |
18 | * If there are NILs in the BAT, their relative ordering is set by |
19 | * using the nilslast argument: if set, NILs come last (largest value |
20 | * when ascending, smallest value when descending), so if there are |
21 | * enough non-NIL values, no NILs will be returned. If unset (false), |
22 | * NILs come first and will be returned. |
23 | * |
24 | * In addition to the input BAT b, there can be a standard candidate |
25 | * list s. If s is specified (non-NULL), only elements in b that are |
26 | * referred to in s are considered. |
27 | * |
28 | * If the third input bat g is non-NULL, then s must also be non-NULL |
29 | * and must be aligned with g. G then specifies groups to which the |
30 | * elements referred to in s belong. Conceptually, the group values |
31 | * are sorted in ascending order together with the elements in b that |
32 | * are referred to in s (in ascending or descending order depending on |
33 | * asc), and the first n elements are then returned. |
34 | * |
35 | * If the output argument gids is NULL, only n elements are returned. |
36 | * If the output argument gids is non-NULL, more than n elements may |
37 | * be returned. If there are duplicate values (if g is given, the |
38 | * group value counts in determining duplication), all duplicates are |
39 | * returned. |
40 | * |
41 | * If distinct is set, the result contains n complete groups of values |
42 | * instead of just n values (or slightly more than n if gids is set |
43 | * since then the "last" group is returned completely). |
44 | * |
45 | * Note that BATfirstn can be called in cascading fashion to calculate |
46 | * the first n values of a table of multiple columns: |
47 | * BATfirstn(&s1, &g1, b1, NULL, NULL, n, asc, nilslast, distinct); |
48 | * BATfirstn(&s2, &g2, b2, s1, g1, n, asc, nilslast, distinct); |
49 | * BATfirstn(&s3, NULL, b3, s2, g2, n, asc, nilslast, distinct); |
50 | * If the input BATs b1, b2, b3 are large enough, s3 will contain the |
51 | * OIDs of the smallest (largest) n elements in the table consisting |
52 | * of the columns b1, b2, b3 when ordered in ascending order with b1 |
53 | * the major key. |
54 | */ |
55 | |
56 | /* We use a binary heap for the implementation of the simplest form of |
57 | * first-N. During processing, the oids list forms a heap with the |
58 | * root at position 0 and the children of a node at position i at |
59 | * positions 2*i+1 and 2*i+2. The parent node is always |
60 | * smaller/larger (depending on the value of asc) than its children |
61 | * (recursively). The heapify macro creates the heap from the input |
62 | * in-place. We start off with a heap containing the first N elements |
63 | * of the input, and then go over the rest of the input, replacing the |
64 | * root of the heap with a new value if appropriate (if the new value |
65 | * is among the first-N seen so far). The siftdown macro then |
66 | * restores the heap property. */ |
67 | #define siftdown(OPER, START, SWAP) \ |
68 | do { \ |
69 | pos = (START); \ |
70 | childpos = (pos << 1) + 1; \ |
71 | while (childpos < n) { \ |
72 | /* find most extreme child */ \ |
73 | if (childpos + 1 < n && \ |
74 | !(OPER(childpos + 1, childpos))) \ |
75 | childpos++; \ |
76 | /* compare parent with most extreme child */ \ |
77 | if (!OPER(pos, childpos)) { \ |
78 | /* already correctly ordered */ \ |
79 | break; \ |
80 | } \ |
81 | /* exchange parent with child and sift child */ \ |
82 | /* further */ \ |
83 | SWAP(pos, childpos); \ |
84 | pos = childpos; \ |
85 | childpos = (pos << 1) + 1; \ |
86 | } \ |
87 | } while (0) |
88 | |
89 | #define heapify(OPER, SWAP) \ |
90 | do { \ |
91 | for (i = n / 2; i > 0; i--) \ |
92 | siftdown(OPER, i - 1, SWAP); \ |
93 | } while (0) |
94 | |
95 | /* we inherit LT and GT from gdk_calc_private.h */ |
96 | |
97 | #define nLTbte(a, b) (!is_bte_nil(b) && (is_bte_nil(a) || (a) < (b))) |
98 | #define nLTsht(a, b) (!is_sht_nil(b) && (is_sht_nil(a) || (a) < (b))) |
99 | #define nLTint(a, b) (!is_int_nil(b) && (is_int_nil(a) || (a) < (b))) |
100 | #define nLTlng(a, b) (!is_lng_nil(b) && (is_lng_nil(a) || (a) < (b))) |
101 | #define nLThge(a, b) (!is_hge_nil(b) && (is_hge_nil(a) || (a) < (b))) |
102 | |
103 | #define nGTbte(a, b) (!is_bte_nil(b) && (is_bte_nil(a) || (a) > (b))) |
104 | #define nGTsht(a, b) (!is_sht_nil(b) && (is_sht_nil(a) || (a) > (b))) |
105 | #define nGTint(a, b) (!is_int_nil(b) && (is_int_nil(a) || (a) > (b))) |
106 | #define nGTlng(a, b) (!is_lng_nil(b) && (is_lng_nil(a) || (a) > (b))) |
107 | #define nGThge(a, b) (!is_hge_nil(b) && (is_hge_nil(a) || (a) > (b))) |
108 | |
109 | #define LTany(p1, p2) (cmp(BUNtail(bi, oids[p1] - b->hseqbase), \ |
110 | BUNtail(bi, oids[p2] - b->hseqbase)) < 0) |
111 | #define GTany(p1, p2) (cmp(BUNtail(bi, oids[p1] - b->hseqbase), \ |
112 | BUNtail(bi, oids[p2] - b->hseqbase)) > 0) |
113 | |
114 | #define nLTany(p1, p2) (cmp(BUNtail(bi, oids[p1] - b->hseqbase), nil) != 0 \ |
115 | && (cmp(BUNtail(bi, oids[p2] - b->hseqbase), nil) == 0 \ |
116 | || cmp(BUNtail(bi, oids[p1] - b->hseqbase), \ |
117 | BUNtail(bi, oids[p2] - b->hseqbase)) < 0)) |
118 | #define nGTany(p1, p2) (cmp(BUNtail(bi, oids[p2] - b->hseqbase), nil) != 0 \ |
119 | && (cmp(BUNtail(bi, oids[p1] - b->hseqbase), nil) == 0 \ |
120 | || cmp(BUNtail(bi, oids[p1] - b->hseqbase), \ |
121 | BUNtail(bi, oids[p2] - b->hseqbase)) > 0)) |
122 | |
123 | #define LTflt(a, b) (!is_flt_nil(b) && (is_flt_nil(a) || (a) < (b))) |
124 | #define LTdbl(a, b) (!is_dbl_nil(b) && (is_dbl_nil(a) || (a) < (b))) |
125 | #define GTflt(a, b) (!is_flt_nil(a) && (is_flt_nil(b) || (a) > (b))) |
126 | #define GTdbl(a, b) (!is_dbl_nil(a) && (is_dbl_nil(b) || (a) > (b))) |
127 | |
128 | #define nLTflt(a, b) (!is_flt_nil(a) && (is_flt_nil(b) || (a) < (b))) |
129 | #define nLTdbl(a, b) (!is_dbl_nil(a) && (is_dbl_nil(b) || (a) < (b))) |
130 | #define nGTflt(a, b) (!is_flt_nil(b) && (is_flt_nil(a) || (a) > (b))) |
131 | #define nGTdbl(a, b) (!is_dbl_nil(b) && (is_dbl_nil(a) || (a) > (b))) |
132 | |
133 | #define LTfltfix(p1, p2) LTflt(vals[oids[p1] - b->hseqbase], \ |
134 | vals[oids[p2] - b->hseqbase]) |
135 | #define GTfltfix(p1, p2) GTflt(vals[oids[p1] - b->hseqbase], \ |
136 | vals[oids[p2] - b->hseqbase]) |
137 | #define LTdblfix(p1, p2) LTdbl(vals[oids[p1] - b->hseqbase], \ |
138 | vals[oids[p2] - b->hseqbase]) |
139 | #define GTdblfix(p1, p2) GTdbl(vals[oids[p1] - b->hseqbase], \ |
140 | vals[oids[p2] - b->hseqbase]) |
141 | #define LTfix(p1, p2) LT(vals[oids[p1] - b->hseqbase], \ |
142 | vals[oids[p2] - b->hseqbase]) |
143 | #define GTfix(p1, p2) GT(vals[oids[p1] - b->hseqbase], \ |
144 | vals[oids[p2] - b->hseqbase]) |
145 | |
146 | #define nLTfltfix(p1, p2) nLTflt(vals[oids[p1] - b->hseqbase], \ |
147 | vals[oids[p2] - b->hseqbase]) |
148 | #define nGTfltfix(p1, p2) nGTflt(vals[oids[p1] - b->hseqbase], \ |
149 | vals[oids[p2] - b->hseqbase]) |
150 | #define nLTdblfix(p1, p2) nLTdbl(vals[oids[p1] - b->hseqbase], \ |
151 | vals[oids[p2] - b->hseqbase]) |
152 | #define nGTdblfix(p1, p2) nGTdbl(vals[oids[p1] - b->hseqbase], \ |
153 | vals[oids[p2] - b->hseqbase]) |
154 | #define nLTbtefix(p1, p2) nLTbte(vals[oids[p1] - b->hseqbase], \ |
155 | vals[oids[p2] - b->hseqbase]) |
156 | #define nGTbtefix(p1, p2) nGTbte(vals[oids[p1] - b->hseqbase], \ |
157 | vals[oids[p2] - b->hseqbase]) |
158 | #define nLTshtfix(p1, p2) nLTsht(vals[oids[p1] - b->hseqbase], \ |
159 | vals[oids[p2] - b->hseqbase]) |
160 | #define nGTshtfix(p1, p2) nGTsht(vals[oids[p1] - b->hseqbase], \ |
161 | vals[oids[p2] - b->hseqbase]) |
162 | #define nLTintfix(p1, p2) nLTint(vals[oids[p1] - b->hseqbase], \ |
163 | vals[oids[p2] - b->hseqbase]) |
164 | #define nGTintfix(p1, p2) nGTint(vals[oids[p1] - b->hseqbase], \ |
165 | vals[oids[p2] - b->hseqbase]) |
166 | #define nLTlngfix(p1, p2) nLTlng(vals[oids[p1] - b->hseqbase], \ |
167 | vals[oids[p2] - b->hseqbase]) |
168 | #define nGTlngfix(p1, p2) nGTlng(vals[oids[p1] - b->hseqbase], \ |
169 | vals[oids[p2] - b->hseqbase]) |
170 | #define nLThgefix(p1, p2) nLThge(vals[oids[p1] - b->hseqbase], \ |
171 | vals[oids[p2] - b->hseqbase]) |
172 | #define nGThgefix(p1, p2) nGThge(vals[oids[p1] - b->hseqbase], \ |
173 | vals[oids[p2] - b->hseqbase]) |
174 | |
175 | #define SWAP1(p1, p2) \ |
176 | do { \ |
177 | item = oids[p1]; \ |
178 | oids[p1] = oids[p2]; \ |
179 | oids[p2] = item; \ |
180 | } while (0) |
181 | |
182 | #define shuffle_unique(TYPE, OP) \ |
183 | do { \ |
184 | const TYPE *restrict vals = (const TYPE *) Tloc(b, 0); \ |
185 | heapify(OP##fix, SWAP1); \ |
186 | while (cnt > 0) { \ |
187 | cnt--; \ |
188 | i = canditer_next(&ci); \ |
189 | if (OP(vals[i - b->hseqbase], \ |
190 | vals[oids[0] - b->hseqbase])) { \ |
191 | oids[0] = i; \ |
192 | siftdown(OP##fix, 0, SWAP1); \ |
193 | } \ |
194 | } \ |
195 | } while (0) |
196 | |
197 | /* This version of BATfirstn returns a list of N oids (where N is the |
198 | * smallest among BATcount(b), BATcount(s), and n). The oids returned |
199 | * refer to the N smallest/largest (depending on asc) tail values of b |
200 | * (taking the optional candidate list s into account). If there are |
201 | * multiple equal values to take us past N, we return a subset of those. |
202 | * |
203 | * If lastp is non-NULL, it is filled in with the oid of the "last" |
204 | * value, i.e. the value of which there may be multiple occurrences |
205 | * that are not all included in the first N. |
206 | */ |
207 | static BAT * |
208 | BATfirstn_unique(BAT *b, BAT *s, BUN n, bool asc, bool nilslast, oid *lastp) |
209 | { |
210 | BAT *bn; |
211 | BATiter bi = bat_iterator(b); |
212 | oid *restrict oids; |
213 | BUN i, cnt; |
214 | struct canditer ci; |
215 | int tpe = b->ttype; |
216 | int (*cmp)(const void *, const void *); |
217 | const void *nil; |
218 | /* variables used in heapify/siftdown macros */ |
219 | oid item; |
220 | BUN pos, childpos; |
221 | |
222 | cnt = canditer_init(&ci, b, s); |
223 | |
224 | if (n >= cnt) { |
225 | /* trivial: return all candidates */ |
226 | if (lastp) |
227 | *lastp = 0; |
228 | return canditer_slice(&ci, 0, cnt); |
229 | } |
230 | |
231 | if (BATtvoid(b)) { |
232 | /* nilslast doesn't make a difference: either all are |
233 | * nil, or none are */ |
234 | if (asc || is_oid_nil(b->tseqbase)) { |
235 | /* return the first part of the candidate list |
236 | * or of the BAT itself */ |
237 | bn = canditer_slice(&ci, 0, n); |
238 | if (bn && lastp) |
239 | *lastp = BUNtoid(bn, n - 1); |
240 | return bn; |
241 | } |
242 | /* return the last part of the candidate list or of |
243 | * the BAT itself */ |
244 | bn = canditer_slice(&ci, cnt - n, cnt); |
245 | if (bn && lastp) |
246 | *lastp = BUNtoid(bn, 0); |
247 | return bn; |
248 | } |
249 | /* note, we want to do both calls */ |
250 | if (BATordered(b) | BATordered_rev(b)) { |
251 | /* trivial: b is sorted so we just need to return the |
252 | * initial or final part of it (or of the candidate |
253 | * list); however, if nilslast == asc, then the nil |
254 | * values (if any) are in the wrong place, so we need |
255 | * to do a little more work */ |
256 | |
257 | /* after we create the to-be-returned BAT, we set pos |
258 | * to the BUN in the new BAT whose value we should |
259 | * return through *lastp */ |
260 | if (nilslast == asc && !b->tnonil) { |
261 | pos = SORTfndlast(b, ATOMnilptr(tpe)); |
262 | pos = canditer_search(&ci, b->hseqbase + pos, true); |
263 | /* 0 <= pos <= cnt |
264 | * 0 < n < cnt |
265 | */ |
266 | if (b->tsorted) { |
267 | /* [0..pos) -- nil |
268 | * [pos..cnt) -- non-nil <<< |
269 | */ |
270 | if (asc) { /* i.e. nilslast */ |
271 | /* prefer non-nil and |
272 | * smallest */ |
273 | if (cnt - pos < n) { |
274 | bn = canditer_slice(&ci, cnt - n, cnt); |
275 | pos = 0; |
276 | } else { |
277 | bn = canditer_slice(&ci, pos, pos + n); |
278 | pos = n - 1; |
279 | } |
280 | } else { /* i.e. !asc, !nilslast */ |
281 | /* prefer nil and largest */ |
282 | if (pos < n) { |
283 | bn = canditer_slice2(&ci, 0, pos, cnt - (n - pos), cnt); |
284 | /* pos = pos; */ |
285 | } else { |
286 | bn = canditer_slice(&ci, 0, n); |
287 | pos = 0; |
288 | } |
289 | } |
290 | } else { /* i.e. trevsorted */ |
291 | /* [0..pos) -- non-nil >>> |
292 | * [pos..cnt) -- nil |
293 | */ |
294 | if (asc) { /* i.e. nilslast */ |
295 | /* prefer non-nil and |
296 | * smallest */ |
297 | if (pos < n) { |
298 | bn = canditer_slice(&ci, 0, n); |
299 | /* pos = pos; */ |
300 | } else { |
301 | bn = canditer_slice(&ci, pos - n, pos); |
302 | pos = 0; |
303 | } |
304 | } else { /* i.e. !asc, !nilslast */ |
305 | /* prefer nil and largest */ |
306 | if (cnt - pos < n) { |
307 | bn = canditer_slice2(&ci, 0, n - (cnt - pos), pos, cnt); |
308 | pos = n - (cnt - pos) - 1; |
309 | } else { |
310 | bn = canditer_slice(&ci, pos, pos + n); |
311 | pos = 0; |
312 | } |
313 | } |
314 | } |
315 | } else { |
316 | /* either there are no nils, or they are in |
317 | * the appropriate position already, so we can |
318 | * just slice */ |
319 | if (asc ? b->tsorted : b->trevsorted) { |
320 | /* return copy of first part of |
321 | * candidate list */ |
322 | bn = canditer_slice(&ci, 0, n); |
323 | pos = n - 1; |
324 | } else { |
325 | /* return copy of last part of |
326 | * candidate list */ |
327 | bn = canditer_slice(&ci, cnt - n, cnt); |
328 | pos = 0; |
329 | } |
330 | } |
331 | if (bn && lastp) |
332 | *lastp = BUNtoid(bn, pos); |
333 | return bn; |
334 | } |
335 | |
336 | bn = COLnew(0, TYPE_oid, n, TRANSIENT); |
337 | if (bn == NULL) |
338 | return NULL; |
339 | BATsetcount(bn, n); |
340 | oids = (oid *) Tloc(bn, 0); |
341 | cmp = ATOMcompare(tpe); |
342 | nil = ATOMnilptr(tpe); |
343 | /* if base type has same comparison function as type itself, we |
344 | * can use the base type */ |
345 | tpe = ATOMbasetype(tpe); /* takes care of oid */ |
346 | /* if the input happens to be almost sorted in ascending order |
347 | * (likely a common use case), it is more efficient to start |
348 | * off with the first n elements when doing a firstn-ascending |
349 | * and to start off with the last n elements when doing a |
350 | * firstn-descending so that most values that we look at after |
351 | * this will be skipped. */ |
352 | if (asc) { |
353 | for (i = 0; i < n; i++) |
354 | oids[i] = canditer_next(&ci); |
355 | } else { |
356 | item = canditer_idx(&ci, cnt - n); |
357 | ci.next = cnt - n; |
358 | ci.add = item - ci.seq - (cnt - n); |
359 | for (i = n; i > 0; i--) |
360 | oids[i - 1] = canditer_next(&ci); |
361 | canditer_reset(&ci); |
362 | } |
363 | cnt -= n; |
364 | |
365 | if (asc) { |
366 | if (nilslast && !b->tnonil) { |
367 | switch (tpe) { |
368 | case TYPE_bte: |
369 | shuffle_unique(bte, nLTbte); |
370 | break; |
371 | case TYPE_sht: |
372 | shuffle_unique(sht, nLTsht); |
373 | break; |
374 | case TYPE_int: |
375 | shuffle_unique(int, nLTint); |
376 | break; |
377 | case TYPE_lng: |
378 | shuffle_unique(lng, nLTlng); |
379 | break; |
380 | #ifdef HAVE_HGE |
381 | case TYPE_hge: |
382 | shuffle_unique(hge, nLThge); |
383 | break; |
384 | #endif |
385 | case TYPE_flt: |
386 | shuffle_unique(flt, nLTflt); |
387 | break; |
388 | case TYPE_dbl: |
389 | shuffle_unique(dbl, nLTdbl); |
390 | break; |
391 | default: |
392 | heapify(nLTany, SWAP1); |
393 | while (cnt > 0) { |
394 | cnt--; |
395 | i = canditer_next(&ci); |
396 | if (cmp(BUNtail(bi, i - b->hseqbase), nil) != 0 |
397 | && (cmp(BUNtail(bi, oids[0] - b->hseqbase), nil) == 0 |
398 | || cmp(BUNtail(bi, i - b->hseqbase), |
399 | BUNtail(bi, oids[0] - b->hseqbase)) < 0)) { |
400 | oids[0] = i; |
401 | siftdown(nLTany, 0, SWAP1); |
402 | } |
403 | } |
404 | break; |
405 | } |
406 | } else { |
407 | switch (tpe) { |
408 | case TYPE_bte: |
409 | shuffle_unique(bte, LT); |
410 | break; |
411 | case TYPE_sht: |
412 | shuffle_unique(sht, LT); |
413 | break; |
414 | case TYPE_int: |
415 | shuffle_unique(int, LT); |
416 | break; |
417 | case TYPE_lng: |
418 | shuffle_unique(lng, LT); |
419 | break; |
420 | #ifdef HAVE_HGE |
421 | case TYPE_hge: |
422 | shuffle_unique(hge, LT); |
423 | break; |
424 | #endif |
425 | case TYPE_flt: |
426 | shuffle_unique(flt, LTflt); |
427 | break; |
428 | case TYPE_dbl: |
429 | shuffle_unique(dbl, LTdbl); |
430 | break; |
431 | default: |
432 | heapify(LTany, SWAP1); |
433 | while (cnt > 0) { |
434 | cnt--; |
435 | i = canditer_next(&ci); |
436 | if (cmp(BUNtail(bi, i - b->hseqbase), |
437 | BUNtail(bi, oids[0] - b->hseqbase)) < 0) { |
438 | oids[0] = i; |
439 | siftdown(LTany, 0, SWAP1); |
440 | } |
441 | } |
442 | break; |
443 | } |
444 | } |
445 | } else { |
446 | if (nilslast || b->tnonil) { |
447 | switch (tpe) { |
448 | case TYPE_bte: |
449 | shuffle_unique(bte, GT); |
450 | break; |
451 | case TYPE_sht: |
452 | shuffle_unique(sht, GT); |
453 | break; |
454 | case TYPE_int: |
455 | shuffle_unique(int, GT); |
456 | break; |
457 | case TYPE_lng: |
458 | shuffle_unique(lng, GT); |
459 | break; |
460 | #ifdef HAVE_HGE |
461 | case TYPE_hge: |
462 | shuffle_unique(hge, GT); |
463 | break; |
464 | #endif |
465 | case TYPE_flt: |
466 | shuffle_unique(flt, GTflt); |
467 | break; |
468 | case TYPE_dbl: |
469 | shuffle_unique(dbl, GTdbl); |
470 | break; |
471 | default: |
472 | heapify(GTany, SWAP1); |
473 | while (cnt > 0) { |
474 | cnt--; |
475 | i = canditer_next(&ci); |
476 | if (cmp(BUNtail(bi, i - b->hseqbase), |
477 | BUNtail(bi, oids[0] - b->hseqbase)) > 0) { |
478 | oids[0] = i; |
479 | siftdown(GTany, 0, SWAP1); |
480 | } |
481 | } |
482 | break; |
483 | } |
484 | } else { |
485 | switch (tpe) { |
486 | case TYPE_bte: |
487 | shuffle_unique(bte, nGTbte); |
488 | break; |
489 | case TYPE_sht: |
490 | shuffle_unique(sht, nGTsht); |
491 | break; |
492 | case TYPE_int: |
493 | shuffle_unique(int, nGTint); |
494 | break; |
495 | case TYPE_lng: |
496 | shuffle_unique(lng, nGTlng); |
497 | break; |
498 | #ifdef HAVE_HGE |
499 | case TYPE_hge: |
500 | shuffle_unique(hge, nGThge); |
501 | break; |
502 | #endif |
503 | case TYPE_flt: |
504 | shuffle_unique(flt, nGTflt); |
505 | break; |
506 | case TYPE_dbl: |
507 | shuffle_unique(dbl, nGTdbl); |
508 | break; |
509 | default: |
510 | heapify(nGTany, SWAP1); |
511 | while (cnt > 0) { |
512 | cnt--; |
513 | i = canditer_next(&ci); |
514 | if (cmp(BUNtail(bi, oids[0] - b->hseqbase), nil) != 0 |
515 | && (cmp(BUNtail(bi, i - b->hseqbase), nil) == 0 |
516 | || cmp(BUNtail(bi, i - b->hseqbase), |
517 | BUNtail(bi, oids[0] - b->hseqbase)) > 0)) { |
518 | oids[0] = i; |
519 | siftdown(nGTany, 0, SWAP1); |
520 | } |
521 | } |
522 | break; |
523 | } |
524 | } |
525 | } |
526 | if (lastp) |
527 | *lastp = oids[0]; /* store id of largest value */ |
528 | /* output must be sorted since it's a candidate list */ |
529 | GDKqsort(oids, NULL, NULL, (size_t) n, sizeof(oid), 0, TYPE_oid, false, false); |
530 | bn->tsorted = true; |
531 | bn->trevsorted = n <= 1; |
532 | bn->tkey = true; |
533 | bn->tseqbase = n <= 1 ? oids[0] : oid_nil; |
534 | bn->tnil = false; |
535 | bn->tnonil = true; |
536 | return virtualize(bn); |
537 | } |
538 | |
539 | #define LTfixgrp(p1, p2) \ |
540 | (goids[p1] < goids[p2] || \ |
541 | (goids[p1] == goids[p2] && \ |
542 | LTfix(p1, p2))) |
543 | #define nLTbtefixgrp(p1, p2) \ |
544 | (goids[p1] < goids[p2] || \ |
545 | (goids[p1] == goids[p2] && \ |
546 | nLTbtefix(p1, p2))) |
547 | #define nLTshtfixgrp(p1, p2) \ |
548 | (goids[p1] < goids[p2] || \ |
549 | (goids[p1] == goids[p2] && \ |
550 | nLTshtfix(p1, p2))) |
551 | #define nLTintfixgrp(p1, p2) \ |
552 | (goids[p1] < goids[p2] || \ |
553 | (goids[p1] == goids[p2] && \ |
554 | nLTintfix(p1, p2))) |
555 | #define nLTlngfixgrp(p1, p2) \ |
556 | (goids[p1] < goids[p2] || \ |
557 | (goids[p1] == goids[p2] && \ |
558 | nLTlngfix(p1, p2))) |
559 | #define nLThgefixgrp(p1, p2) \ |
560 | (goids[p1] < goids[p2] || \ |
561 | (goids[p1] == goids[p2] && \ |
562 | nLThgefix(p1, p2))) |
563 | #define LTfltfixgrp(p1, p2) \ |
564 | (goids[p1] < goids[p2] || \ |
565 | (goids[p1] == goids[p2] && \ |
566 | LTfltfix(p1, p2))) |
567 | #define LTdblfixgrp(p1, p2) \ |
568 | (goids[p1] < goids[p2] || \ |
569 | (goids[p1] == goids[p2] && \ |
570 | LTdblfix(p1, p2))) |
571 | #define nLTfltfixgrp(p1, p2) \ |
572 | (goids[p1] < goids[p2] || \ |
573 | (goids[p1] == goids[p2] && \ |
574 | nLTfltfix(p1, p2))) |
575 | #define nLTdblfixgrp(p1, p2) \ |
576 | (goids[p1] < goids[p2] || \ |
577 | (goids[p1] == goids[p2] && \ |
578 | nLTdblfix(p1, p2))) |
579 | #define GTfixgrp(p1, p2) \ |
580 | (goids[p1] < goids[p2] || \ |
581 | (goids[p1] == goids[p2] && \ |
582 | GTfix(p1, p2))) |
583 | #define nGTbtefixgrp(p1, p2) \ |
584 | (goids[p1] < goids[p2] || \ |
585 | (goids[p1] == goids[p2] && \ |
586 | nGTbtefix(p1, p2))) |
587 | #define nGTshtfixgrp(p1, p2) \ |
588 | (goids[p1] < goids[p2] || \ |
589 | (goids[p1] == goids[p2] && \ |
590 | nGTshtfix(p1, p2))) |
591 | #define nGTintfixgrp(p1, p2) \ |
592 | (goids[p1] < goids[p2] || \ |
593 | (goids[p1] == goids[p2] && \ |
594 | nGTintfix(p1, p2))) |
595 | #define nGTlngfixgrp(p1, p2) \ |
596 | (goids[p1] < goids[p2] || \ |
597 | (goids[p1] == goids[p2] && \ |
598 | nGTlngfix(p1, p2))) |
599 | #define nGThgefixgrp(p1, p2) \ |
600 | (goids[p1] < goids[p2] || \ |
601 | (goids[p1] == goids[p2] && \ |
602 | nGThgefix(p1, p2))) |
603 | #define GTfltfixgrp(p1, p2) \ |
604 | (goids[p1] < goids[p2] || \ |
605 | (goids[p1] == goids[p2] && \ |
606 | GTfltfix(p1, p2))) |
607 | #define GTdblfixgrp(p1, p2) \ |
608 | (goids[p1] < goids[p2] || \ |
609 | (goids[p1] == goids[p2] && \ |
610 | GTdblfix(p1, p2))) |
611 | #define nGTfltfixgrp(p1, p2) \ |
612 | (goids[p1] < goids[p2] || \ |
613 | (goids[p1] == goids[p2] && \ |
614 | nGTfltfix(p1, p2))) |
615 | #define nGTdblfixgrp(p1, p2) \ |
616 | (goids[p1] < goids[p2] || \ |
617 | (goids[p1] == goids[p2] && \ |
618 | nGTdblfix(p1, p2))) |
619 | #define LTvoidgrp(p1, p2) \ |
620 | (goids[p1] < goids[p2] || \ |
621 | (goids[p1] == goids[p2] && oids[p1] < oids[p2])) |
622 | #define GTvoidgrp(p1, p2) \ |
623 | (goids[p1] < goids[p2] || \ |
624 | (goids[p1] == goids[p2] && oids[p1] > oids[p2])) |
625 | #define LTanygrp(p1, p2) \ |
626 | (goids[p1] < goids[p2] || \ |
627 | (goids[p1] == goids[p2] && \ |
628 | LTany(p1, p2))) |
629 | #define GTanygrp(p1, p2) \ |
630 | (goids[p1] < goids[p2] || \ |
631 | (goids[p1] == goids[p2] && \ |
632 | GTany(p1, p2))) |
633 | #define nLTanygrp(p1, p2) \ |
634 | (goids[p1] < goids[p2] || \ |
635 | (goids[p1] == goids[p2] && \ |
636 | nLTany(p1, p2))) |
637 | #define nGTanygrp(p1, p2) \ |
638 | (goids[p1] < goids[p2] || \ |
639 | (goids[p1] == goids[p2] && \ |
640 | nGTany(p1, p2))) |
641 | #define SWAP2(p1, p2) \ |
642 | do { \ |
643 | item = oids[p1]; \ |
644 | oids[p1] = oids[p2]; \ |
645 | oids[p2] = item; \ |
646 | item = goids[p1]; \ |
647 | goids[p1] = goids[p2]; \ |
648 | goids[p2] = item; \ |
649 | } while (0) |
650 | |
651 | #define shuffle_unique_with_groups(TYPE, OP) \ |
652 | do { \ |
653 | const TYPE *restrict vals = (const TYPE *) Tloc(b, 0); \ |
654 | heapify(OP##fixgrp, SWAP2); \ |
655 | while (cnt > 0) { \ |
656 | cnt--; \ |
657 | i = canditer_next(&ci); \ |
658 | if (gv[j] < goids[0] || \ |
659 | (gv[j] == goids[0] && \ |
660 | OP(vals[i - b->hseqbase], \ |
661 | vals[oids[0] - b->hseqbase]))) { \ |
662 | oids[0] = i; \ |
663 | goids[0] = gv[j]; \ |
664 | siftdown(OP##fixgrp, 0, SWAP2); \ |
665 | } \ |
666 | j++; \ |
667 | } \ |
668 | } while (0) |
669 | |
670 | /* This version of BATfirstn is like the one above, except that it |
671 | * also looks at groups. The values of the group IDs are important: |
672 | * we return only the smallest N (i.e., not dependent on asc which |
673 | * refers only to the values in the BAT b). |
674 | * |
675 | * If lastp is non-NULL, it is filled in with the oid of the "last" |
676 | * value, i.e. the value of which there may be multiple occurrences |
677 | * that are not all included in the first N. If lastgp is non-NULL, |
678 | * it is filled with the group ID (not the oid of the group ID) for |
679 | * that same value. |
680 | */ |
681 | static BAT * |
682 | BATfirstn_unique_with_groups(BAT *b, BAT *s, BAT *g, BUN n, bool asc, bool nilslast, oid *lastp, oid *lastgp) |
683 | { |
684 | BAT *bn; |
685 | BATiter bi = bat_iterator(b); |
686 | oid *restrict oids, *restrict goids; |
687 | const oid *restrict gv; |
688 | BUN i, j, cnt; |
689 | struct canditer ci; |
690 | int tpe = b->ttype; |
691 | int (*cmp)(const void *, const void *); |
692 | const void *nil; |
693 | /* variables used in heapify/siftdown macros */ |
694 | oid item; |
695 | BUN pos, childpos; |
696 | |
697 | cnt = canditer_init(&ci, b, s); |
698 | |
699 | if (n > cnt) |
700 | n = cnt; |
701 | |
702 | if (n == 0) { |
703 | /* candidate list might refer only to values outside |
704 | * of the bat and hence be effectively empty */ |
705 | if (lastp) |
706 | *lastp = 0; |
707 | if (lastgp) |
708 | *lastgp = 0; |
709 | return BATdense(0, 0, 0); |
710 | } |
711 | |
712 | if (BATtdense(g)) { |
713 | /* trivial: g determines ordering, return reference to |
714 | * initial part of b (or slice of s) */ |
715 | if (lastgp) |
716 | *lastgp = g->tseqbase + n - 1; |
717 | bn = canditer_slice(&ci, 0, n); |
718 | if (bn && lastp) |
719 | *lastp = BUNtoid(bn, n - 1); |
720 | return bn; |
721 | } |
722 | |
723 | bn = COLnew(0, TYPE_oid, n, TRANSIENT); |
724 | if (bn == NULL) |
725 | return NULL; |
726 | BATsetcount(bn, n); |
727 | oids = (oid *) Tloc(bn, 0); |
728 | gv = (const oid *) Tloc(g, 0); |
729 | goids = GDKmalloc(n * sizeof(oid)); |
730 | if (goids == NULL) { |
731 | BBPreclaim(bn); |
732 | return NULL; |
733 | } |
734 | |
735 | cmp = ATOMcompare(tpe); |
736 | nil = ATOMnilptr(tpe); |
737 | /* if base type has same comparison function as type itself, we |
738 | * can use the base type */ |
739 | tpe = ATOMbasetype(tpe); /* takes care of oid */ |
740 | j = 0; |
741 | for (i = 0; i < n; i++) { |
742 | oids[i] = canditer_next(&ci); |
743 | goids[i] = gv[j++]; |
744 | } |
745 | cnt -= n; |
746 | |
747 | if (BATtvoid(b)) { |
748 | /* nilslast doesn't make a difference (all nil, or no nil) */ |
749 | if (asc) { |
750 | heapify(LTvoidgrp, SWAP2); |
751 | while (cnt > 0) { |
752 | cnt--; |
753 | i = canditer_next(&ci); |
754 | if (gv[j] < goids[0] |
755 | /* || (gv[j] == goids[0] |
756 | && i < oids[0]) -- always false */) { |
757 | oids[0] = i; |
758 | goids[0] = gv[j]; |
759 | siftdown(LTvoidgrp, 0, SWAP2); |
760 | } |
761 | j++; |
762 | } |
763 | } else { |
764 | heapify(GTvoidgrp, SWAP2); |
765 | while (cnt > 0) { |
766 | cnt--; |
767 | i = canditer_next(&ci); |
768 | if (gv[j] < goids[0] |
769 | || (gv[j] == goids[0] |
770 | /* && i > oids[0] -- always true */)) { |
771 | oids[0] = i; |
772 | goids[0] = gv[j]; |
773 | siftdown(GTvoidgrp, 0, SWAP2); |
774 | } |
775 | j++; |
776 | } |
777 | } |
778 | } else if (asc) { |
779 | if (nilslast && !b->tnonil) { |
780 | switch (tpe) { |
781 | case TYPE_bte: |
782 | shuffle_unique_with_groups(bte, nLTbte); |
783 | break; |
784 | case TYPE_sht: |
785 | shuffle_unique_with_groups(sht, nLTsht); |
786 | break; |
787 | case TYPE_int: |
788 | shuffle_unique_with_groups(int, nLTint); |
789 | break; |
790 | case TYPE_lng: |
791 | shuffle_unique_with_groups(lng, nLTlng); |
792 | break; |
793 | #ifdef HAVE_HGE |
794 | case TYPE_hge: |
795 | shuffle_unique_with_groups(hge, nLThge); |
796 | break; |
797 | #endif |
798 | case TYPE_flt: |
799 | shuffle_unique_with_groups(flt, nLTflt); |
800 | break; |
801 | case TYPE_dbl: |
802 | shuffle_unique_with_groups(dbl, nLTdbl); |
803 | break; |
804 | default: |
805 | heapify(nLTanygrp, SWAP2); |
806 | while (cnt > 0) { |
807 | cnt--; |
808 | i = canditer_next(&ci); |
809 | if (gv[j] < goids[0] |
810 | || (gv[j] == goids[0] |
811 | && cmp(BUNtail(bi, i - b->hseqbase), nil) != 0 |
812 | && (cmp(BUNtail(bi, oids[0] - b->hseqbase), nil) == 0 |
813 | || cmp(BUNtail(bi, i - b->hseqbase), |
814 | BUNtail(bi, oids[0] - b->hseqbase)) < 0))) { |
815 | oids[0] = i; |
816 | goids[0] = gv[j]; |
817 | siftdown(nLTanygrp, 0, SWAP2); |
818 | } |
819 | j++; |
820 | } |
821 | break; |
822 | } |
823 | } else { |
824 | switch (tpe) { |
825 | case TYPE_bte: |
826 | shuffle_unique_with_groups(bte, LT); |
827 | break; |
828 | case TYPE_sht: |
829 | shuffle_unique_with_groups(sht, LT); |
830 | break; |
831 | case TYPE_int: |
832 | shuffle_unique_with_groups(int, LT); |
833 | break; |
834 | case TYPE_lng: |
835 | shuffle_unique_with_groups(lng, LT); |
836 | break; |
837 | #ifdef HAVE_HGE |
838 | case TYPE_hge: |
839 | shuffle_unique_with_groups(hge, LT); |
840 | break; |
841 | #endif |
842 | case TYPE_flt: |
843 | shuffle_unique_with_groups(flt, LTflt); |
844 | break; |
845 | case TYPE_dbl: |
846 | shuffle_unique_with_groups(dbl, LTdbl); |
847 | break; |
848 | default: |
849 | heapify(LTanygrp, SWAP2); |
850 | while (cnt > 0) { |
851 | cnt--; |
852 | i = canditer_next(&ci); |
853 | if (gv[j] < goids[0] || |
854 | (gv[j] == goids[0] && |
855 | cmp(BUNtail(bi, i - b->hseqbase), |
856 | BUNtail(bi, oids[0] - b->hseqbase)) < 0)) { |
857 | oids[0] = i; |
858 | goids[0] = gv[j]; |
859 | siftdown(LTanygrp, 0, SWAP2); |
860 | } |
861 | j++; |
862 | } |
863 | break; |
864 | } |
865 | } |
866 | } else if (nilslast || b->tnonil) { |
867 | switch (tpe) { |
868 | case TYPE_bte: |
869 | shuffle_unique_with_groups(bte, GT); |
870 | break; |
871 | case TYPE_sht: |
872 | shuffle_unique_with_groups(sht, GT); |
873 | break; |
874 | case TYPE_int: |
875 | shuffle_unique_with_groups(int, GT); |
876 | break; |
877 | case TYPE_lng: |
878 | shuffle_unique_with_groups(lng, GT); |
879 | break; |
880 | #ifdef HAVE_HGE |
881 | case TYPE_hge: |
882 | shuffle_unique_with_groups(hge, GT); |
883 | break; |
884 | #endif |
885 | case TYPE_flt: |
886 | shuffle_unique_with_groups(flt, GTflt); |
887 | break; |
888 | case TYPE_dbl: |
889 | shuffle_unique_with_groups(dbl, GTdbl); |
890 | break; |
891 | default: |
892 | heapify(GTanygrp, SWAP2); |
893 | while (cnt > 0) { |
894 | cnt--; |
895 | i = canditer_next(&ci); |
896 | if (gv[j] < goids[0] || |
897 | (gv[j] == goids[0] && |
898 | cmp(BUNtail(bi, i - b->hseqbase), |
899 | BUNtail(bi, oids[0] - b->hseqbase)) > 0)) { |
900 | oids[0] = i; |
901 | goids[0] = gv[j]; |
902 | siftdown(GTanygrp, 0, SWAP2); |
903 | } |
904 | j++; |
905 | } |
906 | break; |
907 | } |
908 | } else { |
909 | switch (tpe) { |
910 | case TYPE_bte: |
911 | shuffle_unique_with_groups(bte, nGTbte); |
912 | break; |
913 | case TYPE_sht: |
914 | shuffle_unique_with_groups(sht, nGTsht); |
915 | break; |
916 | case TYPE_int: |
917 | shuffle_unique_with_groups(int, nGTint); |
918 | break; |
919 | case TYPE_lng: |
920 | shuffle_unique_with_groups(lng, nGTlng); |
921 | break; |
922 | #ifdef HAVE_HGE |
923 | case TYPE_hge: |
924 | shuffle_unique_with_groups(hge, nGThge); |
925 | break; |
926 | #endif |
927 | case TYPE_flt: |
928 | shuffle_unique_with_groups(flt, nGTflt); |
929 | break; |
930 | case TYPE_dbl: |
931 | shuffle_unique_with_groups(dbl, nGTdbl); |
932 | break; |
933 | default: |
934 | heapify(nGTanygrp, SWAP2); |
935 | while (cnt > 0) { |
936 | cnt--; |
937 | i = canditer_next(&ci); |
938 | if (gv[j] < goids[0] |
939 | || (gv[j] == goids[0] |
940 | && cmp(BUNtail(bi, oids[0] - b->hseqbase), nil) != 0 |
941 | && (cmp(BUNtail(bi, i - b->hseqbase), nil) == 0 |
942 | || cmp(BUNtail(bi, i - b->hseqbase), |
943 | BUNtail(bi, oids[0] - b->hseqbase)) > 0))) { |
944 | oids[0] = i; |
945 | goids[0] = gv[j]; |
946 | siftdown(nGTanygrp, 0, SWAP2); |
947 | } |
948 | j++; |
949 | } |
950 | break; |
951 | } |
952 | } |
953 | if (lastp) |
954 | *lastp = oids[0]; |
955 | if (lastgp) |
956 | *lastgp = goids[0]; |
957 | GDKfree(goids); |
958 | /* output must be sorted since it's a candidate list */ |
959 | GDKqsort(oids, NULL, NULL, (size_t) n, sizeof(oid), 0, TYPE_oid, false, false); |
960 | bn->tsorted = true; |
961 | bn->trevsorted = n <= 1; |
962 | bn->tkey = true; |
963 | bn->tseqbase = n <= 1 ? oids[0] : oid_nil; |
964 | bn->tnil = false; |
965 | bn->tnonil = true; |
966 | return bn; |
967 | } |
968 | |
969 | static gdk_return |
970 | BATfirstn_grouped(BAT **topn, BAT **gids, BAT *b, BAT *s, BUN n, bool asc, bool nilslast, bool distinct) |
971 | { |
972 | BAT *bn, *gn, *su = NULL; |
973 | oid last; |
974 | gdk_return rc; |
975 | |
976 | if (distinct && !b->tkey) { |
977 | su = s; |
978 | s = BATunique(b, s); |
979 | if (s == NULL) |
980 | return GDK_FAIL; |
981 | } |
982 | bn = BATfirstn_unique(b, s, n, asc, nilslast, &last); |
983 | if (bn == NULL) |
984 | return GDK_FAIL; |
985 | if (BATcount(bn) == 0) { |
986 | if (gids) { |
987 | gn = BATdense(0, 0, 0); |
988 | if (gn == NULL) { |
989 | BBPunfix(bn->batCacheid); |
990 | return GDK_FAIL; |
991 | } |
992 | *gids = gn; |
993 | } |
994 | *topn = bn; |
995 | return GDK_SUCCEED; |
996 | } |
997 | if (!b->tkey) { |
998 | if (distinct) { |
999 | BAT *bn1; |
1000 | |
1001 | bn1 = bn; |
1002 | BBPunfix(s->batCacheid); |
1003 | bn = BATintersect(b, b, su, bn1, true, BUN_NONE); |
1004 | BBPunfix(bn1->batCacheid); |
1005 | if (bn == NULL) |
1006 | return GDK_FAIL; |
1007 | } else { |
1008 | BATiter bi = bat_iterator(b); |
1009 | BAT *bn1, *bn2; |
1010 | |
1011 | bn1 = bn; |
1012 | bn2 = BATselect(b, s, BUNtail(bi, last - b->hseqbase), NULL, true, false, false); |
1013 | if (bn2 == NULL) { |
1014 | BBPunfix(bn1->batCacheid); |
1015 | return GDK_FAIL; |
1016 | } |
1017 | bn = BATmergecand(bn1, bn2); |
1018 | BBPunfix(bn1->batCacheid); |
1019 | BBPunfix(bn2->batCacheid); |
1020 | if (bn == NULL) |
1021 | return GDK_FAIL; |
1022 | } |
1023 | } |
1024 | if (gids) { |
1025 | BAT *bn1, *bn2, *bn3, *bn4; |
1026 | bn1 = BATproject(bn, b); |
1027 | if (bn1 == NULL) { |
1028 | BBPunfix(bn->batCacheid); |
1029 | return GDK_FAIL; |
1030 | } |
1031 | rc = BATsort(NULL, &bn2, &bn3, bn1, NULL, NULL, !asc, !asc, false); |
1032 | BBPunfix(bn1->batCacheid); |
1033 | if (rc != GDK_SUCCEED) { |
1034 | BBPunfix(bn->batCacheid); |
1035 | return GDK_FAIL; |
1036 | } |
1037 | rc = BATsort(NULL, &bn4, NULL, bn2, NULL, NULL, false, false, false); |
1038 | BBPunfix(bn2->batCacheid); |
1039 | if (rc != GDK_SUCCEED) { |
1040 | BBPunfix(bn->batCacheid); |
1041 | BBPunfix(bn3->batCacheid); |
1042 | return GDK_FAIL; |
1043 | } |
1044 | gn = BATproject(bn4, bn3); |
1045 | BBPunfix(bn3->batCacheid); |
1046 | BBPunfix(bn4->batCacheid); |
1047 | if (gn == NULL) { |
1048 | BBPunfix(bn->batCacheid); |
1049 | return GDK_FAIL; |
1050 | } |
1051 | *gids = gn; |
1052 | assert(BATcount(gn) == BATcount(bn)); |
1053 | } |
1054 | *topn = bn; |
1055 | return GDK_SUCCEED; |
1056 | } |
1057 | |
1058 | static gdk_return |
1059 | BATfirstn_grouped_with_groups(BAT **topn, BAT **gids, BAT *b, BAT *s, BAT *g, BUN n, bool asc, bool nilslast, bool distinct) |
1060 | { |
1061 | BAT *bn, *gn; |
1062 | oid last, lastg; |
1063 | gdk_return rc; |
1064 | |
1065 | if (distinct) { |
1066 | BAT *bn1, *bn2, *bn3, *bn4, *bn5, *bn6, *bn7, *bn8; |
1067 | if (BATgroup(&bn1, &bn2, NULL, b, s, g, NULL, NULL) != GDK_SUCCEED) |
1068 | return GDK_FAIL; |
1069 | bn3 = BATproject(bn2, b); |
1070 | if (bn3 == NULL) { |
1071 | BBPunfix(bn1->batCacheid); |
1072 | BBPunfix(bn2->batCacheid); |
1073 | return GDK_FAIL; |
1074 | } |
1075 | bn4 = BATintersect(s, bn2, NULL, NULL, false, BUN_NONE); |
1076 | BBPunfix(bn2->batCacheid); |
1077 | if (bn4 == NULL) { |
1078 | BBPunfix(bn1->batCacheid); |
1079 | return GDK_FAIL; |
1080 | } |
1081 | bn5 = BATproject(bn4, g); |
1082 | BBPunfix(bn4->batCacheid); |
1083 | if (bn5 == NULL) { |
1084 | BBPunfix(bn1->batCacheid); |
1085 | return GDK_FAIL; |
1086 | } |
1087 | bn6 = BATfirstn_unique_with_groups(bn3, NULL, bn5, n, asc, nilslast, NULL, NULL); |
1088 | BBPunfix(bn3->batCacheid); |
1089 | BBPunfix(bn5->batCacheid); |
1090 | if (bn6 == NULL) { |
1091 | BBPunfix(bn1->batCacheid); |
1092 | return GDK_FAIL; |
1093 | } |
1094 | rc = BATleftjoin(&bn8, &bn7, bn1, bn6, NULL, NULL, false, BUN_NONE); |
1095 | BBPunfix(bn6->batCacheid); |
1096 | if (rc != GDK_SUCCEED) |
1097 | return GDK_FAIL; |
1098 | BBPunfix(bn7->batCacheid); |
1099 | bn = BATproject(bn8, s); |
1100 | BBPunfix(bn8->batCacheid); |
1101 | if (bn == NULL) |
1102 | return GDK_FAIL; |
1103 | } else { |
1104 | bn = BATfirstn_unique_with_groups(b, s, g, n, asc, nilslast, &last, &lastg); |
1105 | if (bn == NULL) |
1106 | return GDK_FAIL; |
1107 | } |
1108 | if (BATcount(bn) == 0) { |
1109 | if (gids) { |
1110 | gn = BATdense(0, 0, 0); |
1111 | if (gn == NULL) { |
1112 | BBPunfix(bn->batCacheid); |
1113 | return GDK_FAIL; |
1114 | } |
1115 | *gids = gn; |
1116 | } |
1117 | *topn = bn; |
1118 | return GDK_SUCCEED; |
1119 | } |
1120 | if (!distinct && !b->tkey) { |
1121 | BAT *bn1, *bn2, *bn3, *bn4; |
1122 | BATiter bi = bat_iterator(b); |
1123 | |
1124 | bn1 = bn; |
1125 | bn2 = BATselect(g, NULL, &lastg, NULL, true, false, false); |
1126 | if (bn2 == NULL) { |
1127 | BBPunfix(bn1->batCacheid); |
1128 | return GDK_FAIL; |
1129 | } |
1130 | bn3 = BATproject(bn2, s); |
1131 | BBPunfix(bn2->batCacheid); |
1132 | if (bn3 == NULL) { |
1133 | BBPunfix(bn1->batCacheid); |
1134 | return GDK_FAIL; |
1135 | } |
1136 | bn4 = BATselect(b, bn3, BUNtail(bi, last - b->hseqbase), NULL, true, false, false); |
1137 | BBPunfix(bn3->batCacheid); |
1138 | if (bn4 == NULL) { |
1139 | BBPunfix(bn1->batCacheid); |
1140 | return GDK_FAIL; |
1141 | } |
1142 | bn = BATmergecand(bn1, bn4); |
1143 | BBPunfix(bn1->batCacheid); |
1144 | BBPunfix(bn4->batCacheid); |
1145 | if (bn == NULL) |
1146 | return GDK_FAIL; |
1147 | } |
1148 | if (gids) { |
1149 | BAT *bn1, *bn2, *bn3, *bn4, *bn5, *bn6, *bn7, *bn8; |
1150 | |
1151 | if ((bn1 = BATintersect(s, bn, NULL, NULL, false, BUN_NONE)) == NULL) { |
1152 | BBPunfix(bn->batCacheid); |
1153 | return GDK_FAIL; |
1154 | } |
1155 | bn2 = BATproject(bn1, g); |
1156 | BBPunfix(bn1->batCacheid); |
1157 | if (bn2 == NULL) { |
1158 | BBPunfix(bn->batCacheid); |
1159 | return GDK_FAIL; |
1160 | } |
1161 | bn3 = BATproject(bn, b); |
1162 | if (bn3 == NULL) { |
1163 | BBPunfix(bn2->batCacheid); |
1164 | return GDK_FAIL; |
1165 | } |
1166 | rc = BATsort(NULL, &bn4, &bn5, bn2, NULL, NULL, false, false, false); |
1167 | BBPunfix(bn2->batCacheid); |
1168 | if (rc != GDK_SUCCEED) { |
1169 | BBPunfix(bn->batCacheid); |
1170 | BBPunfix(bn3->batCacheid); |
1171 | return GDK_FAIL; |
1172 | } |
1173 | rc = BATsort(NULL, &bn6, &bn7, bn3, bn4, bn5, !asc, !asc, false); |
1174 | BBPunfix(bn3->batCacheid); |
1175 | BBPunfix(bn4->batCacheid); |
1176 | BBPunfix(bn5->batCacheid); |
1177 | if (rc != GDK_SUCCEED) { |
1178 | BBPunfix(bn->batCacheid); |
1179 | return GDK_FAIL; |
1180 | } |
1181 | rc = BATsort(NULL, &bn8, NULL, bn6, NULL, NULL, false, false, false); |
1182 | BBPunfix(bn6->batCacheid); |
1183 | if (rc != GDK_SUCCEED) { |
1184 | BBPunfix(bn->batCacheid); |
1185 | BBPunfix(bn7->batCacheid); |
1186 | return GDK_FAIL; |
1187 | } |
1188 | gn = BATproject(bn8, bn7); |
1189 | BBPunfix(bn7->batCacheid); |
1190 | BBPunfix(bn8->batCacheid); |
1191 | if (gn == NULL) { |
1192 | BBPunfix(bn->batCacheid); |
1193 | return GDK_FAIL; |
1194 | } |
1195 | *gids = gn; |
1196 | assert(BATcount(gn) == BATcount(bn)); |
1197 | } |
1198 | *topn = bn; |
1199 | return GDK_SUCCEED; |
1200 | } |
1201 | |
1202 | gdk_return |
1203 | BATfirstn(BAT **topn, BAT **gids, BAT *b, BAT *s, BAT *g, BUN n, bool asc, bool nilslast, bool distinct) |
1204 | { |
1205 | assert(topn != NULL); |
1206 | if (b == NULL) { |
1207 | *topn = NULL; |
1208 | return GDK_SUCCEED; |
1209 | } |
1210 | |
1211 | /* if g specified, then so must s */ |
1212 | assert(g == NULL || s != NULL); |
1213 | /* g and s must be aligned (same size, same hseqbase) */ |
1214 | assert(g == NULL || BATcount(s) == BATcount(g)); |
1215 | assert(g == NULL || BATcount(g) == 0 || s->hseqbase == g->hseqbase); |
1216 | |
1217 | if (n == 0 || BATcount(b) == 0 || (s != NULL && BATcount(s) == 0)) { |
1218 | /* trivial: empty result */ |
1219 | *topn = BATdense(0, 0, 0); |
1220 | if (*topn == NULL) |
1221 | return GDK_FAIL; |
1222 | if (gids) { |
1223 | *gids = BATdense(0, 0, 0); |
1224 | if (*gids == NULL) { |
1225 | BBPreclaim(*topn); |
1226 | return GDK_FAIL; |
1227 | } |
1228 | } |
1229 | return GDK_SUCCEED; |
1230 | } |
1231 | |
1232 | if (g == NULL) { |
1233 | if (gids == NULL && !distinct) { |
1234 | *topn = BATfirstn_unique(b, s, n, asc, nilslast, NULL); |
1235 | return *topn ? GDK_SUCCEED : GDK_FAIL; |
1236 | } |
1237 | return BATfirstn_grouped(topn, gids, b, s, n, asc, nilslast, distinct); |
1238 | } |
1239 | if (gids == NULL && !distinct) { |
1240 | *topn = BATfirstn_unique_with_groups(b, s, g, n, asc, nilslast, NULL, NULL); |
1241 | return *topn ? GDK_SUCCEED : GDK_FAIL; |
1242 | } |
1243 | return BATfirstn_grouped_with_groups(topn, gids, b, s, g, n, asc, nilslast, distinct); |
1244 | } |
1245 | |