1/*
2 * This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
5 *
6 * Copyright 1997 - July 2008 CWI, August 2008 - 2019 MonetDB B.V.
7 */
8
9#include "monetdb_config.h"
10#include "gdk.h"
11#include "gdk_private.h"
12#include "gdk_cand.h"
13
14/* how much to extend the extent and histo bats when we run out of space */
15#define GROUPBATINCR 8192
16
17/* BATgroup returns three bats that indicate the grouping of the input
18 * bat.
19 *
20 * Grouping means that all equal values are in the same group, and
21 * differing values are in different groups. If specified, the input
22 * bat g gives a pre-existing grouping which is then subdivided. If a
23 * candidate list s is specified, groups (both the pre-existing
24 * grouping in g and the output grouping) are aligned with the
25 * candidate list, else they are aligned with b.
26 *
27 * The outputs are as follows.
28 *
29 * The groups bat is aligned with the candidate list s, or the input
30 * bat b if there is no candidate list, and the tail has group id's
31 * (type oid).
32 *
33 * The extents and histo bats are indexed by group id. The tail of
34 * extents is the head oid from b of a representative of the group.
35 * The tail of histo is of type lng and contains the number of
36 * elements from b that are member of the group. The extents BAT can
37 * be used as a candidate list (sorted and unique).
38 *
39 * The extents and histo bats are optionally created. The groups bat
40 * is always created. In other words, the groups argument may not be
41 * NULL, but the extents and histo arguments may be NULL.
42 *
43 * There are six different implementations of the grouping code.
44 *
45 * If it can be trivially determined that all groups are singletons,
46 * we can produce the outputs trivially.
47 *
48 * If all values in b are known to be equal (both sorted and reverse
49 * sorted), we produce a single group or copy the input group.
50 *
51 * If the input bats b and g are sorted (either direction) or g is not
52 * specified and b is sorted, or if the subsorted flag is set (only
53 * used by BATsort), we only need to compare consecutive values.
54 *
55 * If the input bat b is sorted, but g is not, we can compare
56 * consecutive values in b and need to scan sections of g for equal
57 * groups.
58 *
59 * If a hash table already exists on b, we can make use of it.
60 *
61 * Otherwise we build a partial hash table on the fly.
62 *
63 * A decision should be made on the order in which grouping occurs.
64 * Let |b| have << different values than |g| then the linked lists
65 * gets extremely long, leading to a n^2 algorithm.
66 * At the MAL level, the multigroup function would perform the dynamic
67 * optimization.
68 */
69
70#define GRPnotfound() \
71 do { \
72 /* no equal found: start new group */ \
73 if (ngrp == maxgrps) { \
74 /* we need to extend extents and histo bats, */ \
75 /* do it at most once */ \
76 maxgrps = BATcount(b); \
77 if (extents) { \
78 BATsetcount(en, ngrp); \
79 if (BATextend(en, maxgrps) != GDK_SUCCEED) \
80 goto error; \
81 exts = (oid *) Tloc(en, 0); \
82 } \
83 if (histo) { \
84 BATsetcount(hn, ngrp); \
85 if (BATextend(hn, maxgrps) != GDK_SUCCEED) \
86 goto error; \
87 cnts = (lng *) Tloc(hn, 0); \
88 } \
89 } \
90 if (extents) \
91 exts[ngrp] = hseqb + p - lo; \
92 if (histo) \
93 cnts[ngrp] = 1; \
94 ngrps[r] = ngrp++; \
95 } while (0)
96
97
98#define GRP_compare_consecutive_values(INIT_0,INIT_1,DIFFER,KEEP) \
99 do { \
100 INIT_0; \
101 if (ci.tpe == cand_dense) { \
102 if (grps) { \
103 for (r = 0; r < cnt; r++) { \
104 p = canditer_next_dense(&ci) - hseqb; \
105 INIT_1; \
106 if (ngrp == 0 || grps[r] != prev || DIFFER) { \
107 GRPnotfound(); \
108 } else { \
109 ngrps[r] = ngrp - 1; \
110 if (histo) \
111 cnts[ngrp - 1]++; \
112 } \
113 KEEP; \
114 prev = grps[r]; \
115 } \
116 } else { \
117 for (r = 0; r < cnt; r++) { \
118 p = canditer_next_dense(&ci) - hseqb; \
119 INIT_1; \
120 if (ngrp == 0 || DIFFER) { \
121 GRPnotfound(); \
122 } else { \
123 ngrps[r] = ngrp - 1; \
124 if (histo) \
125 cnts[ngrp - 1]++; \
126 } \
127 KEEP; \
128 } \
129 } \
130 } else { \
131 if (grps) { \
132 for (r = 0; r < cnt; r++) { \
133 p = canditer_next(&ci) - hseqb; \
134 INIT_1; \
135 if (ngrp == 0 || grps[r] != prev || DIFFER) { \
136 GRPnotfound(); \
137 } else { \
138 ngrps[r] = ngrp - 1; \
139 if (histo) \
140 cnts[ngrp - 1]++; \
141 } \
142 KEEP; \
143 prev = grps[r]; \
144 } \
145 } else { \
146 for (r = 0; r < cnt; r++) { \
147 p = canditer_next(&ci) - hseqb; \
148 INIT_1; \
149 if (ngrp == 0 || DIFFER) { \
150 GRPnotfound(); \
151 } else { \
152 ngrps[r] = ngrp - 1; \
153 if (histo) \
154 cnts[ngrp - 1]++; \
155 } \
156 KEEP; \
157 } \
158 } \
159 } \
160 } while(0)
161
162#define flt_neq(a, b) (is_flt_nil(a) ? !is_flt_nil(b) : is_flt_nil(b) || (a) != (b))
163#define dbl_neq(a, b) (is_dbl_nil(a) ? !is_dbl_nil(b) : is_dbl_nil(b) || (a) != (b))
164#define bte_neq(a, b) ((a) != (b))
165#define sht_neq(a, b) ((a) != (b))
166#define int_neq(a, b) ((a) != (b))
167#define lng_neq(a, b) ((a) != (b))
168#define hge_neq(a, b) ((a) != (b))
169
170#define GRP_compare_consecutive_values_tpe(TYPE) \
171 GRP_compare_consecutive_values( \
172 /* INIT_0 */ const TYPE *w = (TYPE *) Tloc(b, 0); \
173 TYPE pw = 0 , \
174 /* INIT_1 */ , \
175 /* DIFFER */ TYPE##_neq(w[p], pw) , \
176 /* KEEP */ pw = w[p] \
177 )
178
179#define GRP_compare_consecutive_values_any() \
180 GRP_compare_consecutive_values( \
181 /* INIT_0 */ pv = NULL , \
182 /* INIT_1 */ v = BUNtail(bi, p) , \
183 /* DIFFER */ cmp(v, pv) != 0 , \
184 /* KEEP */ pv = v \
185 )
186
187
188#define GRP_subscan_old_groups(INIT_0,INIT_1,EQUAL,KEEP) \
189 do { \
190 INIT_0; \
191 pgrp[grps[0]] = 0; \
192 j = 0; \
193 if (ci.tpe == cand_dense) { \
194 for (r = 0; r < cnt; r++) { \
195 p = canditer_next_dense(&ci) - hseqb; \
196 INIT_1; \
197 if (ngrp != 0 && EQUAL) { \
198 /* range [j, r) is all same value */ \
199 /* i is position where we saw r's */ \
200 /* old group last */ \
201 i = pgrp[grps[r]]; \
202 /* p is new position where we saw this \
203 * group */ \
204 pgrp[grps[r]] = r; \
205 if (j <= i && i < r) { \
206 /* i is position of equal */ \
207 /* value in same old group */ \
208 /* as r, so r gets same new */ \
209 /* group as i */ \
210 oid grp = ngrps[i]; \
211 ngrps[r] = grp; \
212 if (histo) \
213 cnts[grp]++; \
214 if (gn->tsorted && \
215 grp != ngrp - 1) \
216 gn->tsorted = false; \
217 /* we found the value/group */ \
218 /* combination, go to next */ \
219 /* value */ \
220 continue; \
221 } \
222 } else { \
223 /* value differs from previous value */ \
224 /* (or is the first) */ \
225 j = r; \
226 KEEP; \
227 pgrp[grps[r]] = r; \
228 } \
229 /* start a new group */ \
230 GRPnotfound(); \
231 } \
232 } else { \
233 for (r = 0; r < cnt; r++) { \
234 p = canditer_next(&ci) - hseqb; \
235 INIT_1; \
236 if (ngrp != 0 && EQUAL) { \
237 /* range [j, r) is all same value */ \
238 /* i is position where we saw r's */ \
239 /* old group last */ \
240 i = pgrp[grps[r]]; \
241 /* p is new position where we saw this \
242 * group */ \
243 pgrp[grps[r]] = r; \
244 if (j <= i && i < r) { \
245 /* i is position of equal */ \
246 /* value in same old group */ \
247 /* as r, so r gets same new */ \
248 /* group as i */ \
249 oid grp = ngrps[i]; \
250 ngrps[r] = grp; \
251 if (histo) \
252 cnts[grp]++; \
253 if (gn->tsorted && \
254 grp != ngrp - 1) \
255 gn->tsorted = false; \
256 /* we found the value/group */ \
257 /* combination, go to next */ \
258 /* value */ \
259 continue; \
260 } \
261 } else { \
262 /* value differs from previous value */ \
263 /* (or is the first) */ \
264 j = r; \
265 KEEP; \
266 pgrp[grps[r]] = r; \
267 } \
268 /* start a new group */ \
269 GRPnotfound(); \
270 } \
271 } \
272 } while(0)
273
274#define flt_equ(a, b) (is_flt_nil(a) ? is_flt_nil(b) : !is_flt_nil(b) && (a) == (b))
275#define dbl_equ(a, b) (is_dbl_nil(a) ? is_dbl_nil(b) : !is_dbl_nil(b) && (a) == (b))
276#define bte_equ(a, b) ((a) == (b))
277#define sht_equ(a, b) ((a) == (b))
278#define int_equ(a, b) ((a) == (b))
279#define lng_equ(a, b) ((a) == (b))
280#define hge_equ(a, b) ((a) == (b))
281
282#define GRP_subscan_old_groups_tpe(TYPE) \
283 GRP_subscan_old_groups( \
284 /* INIT_0 */ const TYPE *w = (TYPE *) Tloc(b, 0); \
285 TYPE pw = 0 , \
286 /* INIT_1 */ , \
287 /* EQUAL */ TYPE##_equ(w[p], pw) , \
288 /* KEEP */ pw = w[p] \
289 )
290
291#define GRP_subscan_old_groups_any() \
292 GRP_subscan_old_groups( \
293 /* INIT_0 */ pv = NULL , \
294 /* INIT_1 */ v = BUNtail(bi, p) , \
295 /* EQUAL */ cmp(v, pv) == 0 , \
296 /* KEEP */ pv = v \
297 )
298
299/* If a hash table exists on b we use it.
300 *
301 * The algorithm is simple. We go through b and for each value we
302 * follow the hash chain starting at the next element after that value
303 * to find one that is equal to the value we're currently looking at.
304 * If we found such a value, we add the value to the same group. If
305 * we reach the end of the chain, we create a new group.
306 *
307 * If b (the original, that is) is a view on another BAT, and this
308 * other BAT has a hash, we use that. The lo and hi values are the
309 * bounds of the parent BAT that we're considering.
310 *
311 * Note this algorithm depends critically on the fact that our hash
312 * chains go from higher to lower BUNs.
313 */
314#define GRP_use_existing_hash_table(INIT_0,INIT_1,EQUAL) \
315 do { \
316 INIT_0; \
317 assert(grps == NULL); \
318 if (ci.tpe == cand_dense) { \
319 for (r = 0; r < cnt; r++) { \
320 oid o = canditer_next_dense(&ci); \
321 p = o - hseqb + lo; \
322 INIT_1; \
323 /* this loop is similar, but not */ \
324 /* equal, to HASHloop: the difference */ \
325 /* is that we only consider BUNs */ \
326 /* smaller than the one we're looking */ \
327 /* up (p) */ \
328 for (hb = HASHgetlink(hs, p); \
329 hb != HASHnil(hs) && hb >= lo; \
330 hb = HASHgetlink(hs, hb)) { \
331 oid grp; \
332 assert(hb < p); \
333 q = canditer_search_dense(&ci, hb + hseqb - lo, false); \
334 if (q == BUN_NONE) \
335 continue; \
336 if (EQUAL) { \
337 grp = ngrps[q]; \
338 ngrps[r] = grp; \
339 if (histo) \
340 cnts[grp]++; \
341 if (gn->tsorted && \
342 grp != ngrp - 1) \
343 gn->tsorted = false; \
344 break; \
345 } \
346 } \
347 if (hb == HASHnil(hs) || hb < lo) { \
348 GRPnotfound(); \
349 } \
350 } \
351 } else { \
352 for (r = 0; r < cnt; r++) { \
353 oid o = canditer_next(&ci); \
354 p = o - hseqb + lo; \
355 INIT_1; \
356 /* this loop is similar, but not */ \
357 /* equal, to HASHloop: the difference */ \
358 /* is that we only consider BUNs */ \
359 /* smaller than the one we're looking */ \
360 /* up (p) */ \
361 for (hb = HASHgetlink(hs, p); \
362 hb != HASHnil(hs) && hb >= lo; \
363 hb = HASHgetlink(hs, hb)) { \
364 oid grp; \
365 assert(hb < p); \
366 q = canditer_search(&ci, hb + hseqb - lo, false); \
367 if (q == BUN_NONE) \
368 continue; \
369 if (EQUAL) { \
370 grp = ngrps[q]; \
371 ngrps[r] = grp; \
372 if (histo) \
373 cnts[grp]++; \
374 if (gn->tsorted && \
375 grp != ngrp - 1) \
376 gn->tsorted = false; \
377 break; \
378 } \
379 } \
380 if (hb == HASHnil(hs) || hb < lo) { \
381 GRPnotfound(); \
382 } \
383 } \
384 } \
385 } while(0)
386
387#define GRP_use_existing_hash_table_tpe(TYPE) \
388 GRP_use_existing_hash_table( \
389 /* INIT_0 */ const TYPE *w = (TYPE *) Tloc(b, 0), \
390 /* INIT_1 */ , \
391 /* EQUAL */ TYPE##_equ(w[p], w[hb]) \
392 )
393
394#define GRP_use_existing_hash_table_any() \
395 GRP_use_existing_hash_table( \
396 /* INIT_0 */ , \
397 /* INIT_1 */ v = BUNtail(bi, p) , \
398 /* EQUAL */ cmp(v, BUNtail(bi, hb)) == 0 \
399 )
400
401/* reverse the bits of an OID value */
402static inline oid
403rev(oid x)
404{
405#if SIZEOF_OID == 8
406 x = ((x & 0x5555555555555555) << 1) | ((x >> 1) & 0x5555555555555555);
407 x = ((x & 0x3333333333333333) << 2) | ((x >> 2) & 0x3333333333333333);
408 x = ((x & 0x0F0F0F0F0F0F0F0F) << 4) | ((x >> 4) & 0x0F0F0F0F0F0F0F0F);
409 x = ((x & 0x00FF00FF00FF00FF) << 8) | ((x >> 8) & 0x00FF00FF00FF00FF);
410 x = ((x & 0x0000FFFF0000FFFF) << 16) | ((x >> 16) & 0x0000FFFF0000FFFF);
411 x = ((x & 0x00000000FFFFFFFF) << 32) | ((x >> 32) & 0x00000000FFFFFFFF);
412#else
413 x = ((x & 0x55555555) << 1) | ((x >> 1) & 0x55555555);
414 x = ((x & 0x33333333) << 2) | ((x >> 2) & 0x33333333);
415 x = ((x & 0x0F0F0F0F) << 4) | ((x >> 4) & 0x0F0F0F0F);
416 x = ((x & 0x00FF00FF) << 8) | ((x >> 8) & 0x00FF00FF);
417 x = ((x & 0x0000FFFF) << 16) | ((x >> 16) & 0x0000FFFF);
418#endif
419 return x;
420}
421
422/* population count: count number of 1 bits in a value */
423#ifdef __GNUC__
424#if SIZEOF_OID == SIZEOF_INT
425#define pop(x) __builtin_popcount(x)
426#else
427#define pop(x) __builtin_popcountl(x)
428#endif
429#else
430static inline int
431pop(oid x)
432{
433 /* divide and conquer implementation */
434#if SIZEOF_OID == 8
435 x = (x & 0x5555555555555555) + ((x >> 1) & 0x5555555555555555);
436 x = (x & 0x3333333333333333) + ((x >> 2) & 0x3333333333333333);
437 x = (x & 0x0F0F0F0F0F0F0F0F) + ((x >> 4) & 0x0F0F0F0F0F0F0F0F);
438 x = (x & 0x00FF00FF00FF00FF) + ((x >> 8) & 0x00FF00FF00FF00FF);
439 x = (x & 0x0000FFFF0000FFFF) + ((x >> 16) & 0x0000FFFF0000FFFF);
440 x = (x & 0x00000000FFFFFFFF) + ((x >> 32) & 0x00000000FFFFFFFF);
441#else
442 x = (x & 0x55555555) + ((x >> 1) & 0x55555555);
443 x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
444 x = (x & 0x0F0F0F0F) + ((x >> 4) & 0x0F0F0F0F);
445 x = (x & 0x00FF00FF) + ((x >> 8) & 0x00FF00FF);
446 x = (x & 0x0000FFFF) + ((x >> 16) & 0x0000FFFF);
447#endif
448 return (int) x;
449}
450#endif
451
452#define GRP_create_partial_hash_table_core(INIT_1,HASH,EQUAL,ASSERT,GRPTST) \
453 do { \
454 if (ci.tpe == cand_dense) { \
455 for (r = 0; r < cnt; r++) { \
456 p = canditer_next_dense(&ci) - hseqb; \
457 INIT_1; \
458 prb = HASH; \
459 for (hb = HASHget(hs, prb); \
460 hb != HASHnil(hs); \
461 hb = HASHgetlink(hs, hb)) { \
462 ASSERT; \
463 q = canditer_search_dense(&ci, hb + hseqb, false); \
464 if (q == BUN_NONE) \
465 continue; \
466 GRPTST(q, r); \
467 if (EQUAL) { \
468 grp = ngrps[q]; \
469 ngrps[r] = grp; \
470 if (histo) \
471 cnts[grp]++; \
472 if (gn->tsorted && \
473 grp != ngrp - 1) \
474 gn->tsorted = false; \
475 break; \
476 } \
477 } \
478 if (hb == HASHnil(hs)) { \
479 GRPnotfound(); \
480 /* enter new group into hash table */ \
481 HASHputlink(hs, p, HASHget(hs, prb)); \
482 HASHput(hs, prb, p); \
483 } \
484 } \
485 } else { \
486 for (r = 0; r < cnt; r++) { \
487 p = canditer_next(&ci) - hseqb; \
488 INIT_1; \
489 prb = HASH; \
490 for (hb = HASHget(hs, prb); \
491 hb != HASHnil(hs); \
492 hb = HASHgetlink(hs, hb)) { \
493 ASSERT; \
494 q = canditer_search(&ci, hb + hseqb, false); \
495 if (q == BUN_NONE) \
496 continue; \
497 GRPTST(q, r); \
498 if (EQUAL) { \
499 grp = ngrps[q]; \
500 ngrps[r] = grp; \
501 if (histo) \
502 cnts[grp]++; \
503 if (gn->tsorted && \
504 grp != ngrp - 1) \
505 gn->tsorted = false; \
506 break; \
507 } \
508 } \
509 if (hb == HASHnil(hs)) { \
510 GRPnotfound(); \
511 /* enter new group into hash table */ \
512 HASHputlink(hs, p, HASHget(hs, prb)); \
513 HASHput(hs, prb, p); \
514 } \
515 } \
516 } \
517 } while (0)
518#define GCGRPTST(i, j) if (grps[i] != grps[j]) { hb = HASHnil(hs); break; }
519#define GRPTST(i, j) if (grps[i] != grps[j]) continue
520#define NOGRPTST(i, j) (void) 0
521#define GRP_create_partial_hash_table(INIT_0,INIT_1,HASH,EQUAL) \
522 do { \
523 INIT_0; \
524 if (grps) { \
525 if (gc) { \
526 GRP_create_partial_hash_table_core(INIT_1,HASH,EQUAL,assert(HASHgetlink(hs, hb) == HASHnil(hs) || HASHgetlink(hs, hb) < hb),GCGRPTST); \
527 } else { \
528 GRP_create_partial_hash_table_core(INIT_1,HASH ^ (rev(grps[r]) >> bits),EQUAL,(void)0,GRPTST); \
529 } \
530 } else { \
531 GRP_create_partial_hash_table_core(INIT_1,HASH,EQUAL,(void)0,NOGRPTST); \
532 } \
533 } while (0)
534
535#define GRP_create_partial_hash_table_tpe(TYPE) \
536 GRP_create_partial_hash_table( \
537 /* INIT_0 */ const TYPE *w = (TYPE *) Tloc(b, 0), \
538 /* INIT_1 */ , \
539 /* HASH */ hash_##TYPE(hs, &w[p]) , \
540 /* EQUAL */ TYPE##_equ(w[p], w[hb]) \
541 )
542
543#define GRP_create_partial_hash_table_any() \
544 GRP_create_partial_hash_table( \
545 /* INIT_0 */ , \
546 /* INIT_1 */ v = BUNtail(bi, p) , \
547 /* HASH */ hash_any(hs, v) , \
548 /* EQUAL */ cmp(v, BUNtail(bi, hb)) == 0 \
549 )
550
551
552gdk_return
553BATgroup_internal(BAT **groups, BAT **extents, BAT **histo,
554 BAT *b, BAT *s, BAT *g, BAT *e, BAT *h, bool subsorted)
555{
556 BAT *gn = NULL, *en = NULL, *hn = NULL;
557 int t;
558 int (*cmp)(const void *, const void *);
559 const oid *grps = NULL;
560 oid *restrict ngrps, ngrp, prev = 0, hseqb = 0;
561 oid *restrict exts = NULL;
562 lng *restrict cnts = NULL;
563 BUN p, q, r;
564 const void *v, *pv;
565 BATiter bi;
566 Hash *hs = NULL;
567 BUN hb;
568 BUN maxgrps;
569 bat parent;
570 BUN cnt;
571 BUN lo = 0;
572 struct canditer ci;
573 oid maxgrp = oid_nil; /* maximum value of g BAT (if subgrouping) */
574 PROPrec *prop;
575
576 if (b == NULL) {
577 GDKerror("BATgroup: b must exist\n");
578 return GDK_FAIL;
579 }
580 assert(s == NULL || BATttype(s) == TYPE_oid);
581 cnt = canditer_init(&ci, b, s);
582
583 /* g is NULL or [oid(dense),oid] and same size as b or s */
584 assert(g == NULL || BATttype(g) == TYPE_oid || BATcount(g) == 0);
585 assert(g == NULL || BATcount(g) == cnt);
586 assert(g == NULL || BATcount(b) == 0 || (s ? g->hseqbase == s->hseqbase : g->hseqbase == b->hseqbase));
587 /* e is NULL or [oid(dense),oid] */
588 assert(e == NULL || BATttype(e) == TYPE_oid);
589 /* h is NULL or [oid(dense),lng] */
590 assert(h == NULL || h->ttype == TYPE_lng);
591 /* e and h are aligned */
592 assert(e == NULL || h == NULL || BATcount(e) == BATcount(h));
593 assert(e == NULL || h == NULL || e->hseqbase == h->hseqbase);
594 /* we want our output to go somewhere */
595 assert(groups != NULL);
596
597 if (cnt == 0) {
598 hseqb = 0;
599 } else if (s) {
600 hseqb = s->hseqbase + ci.offset;
601 } else {
602 hseqb = b->hseqbase;
603 }
604 if (b->tkey || cnt <= 1 || (g && (g->tkey || BATtdense(g)))) {
605 /* grouping is trivial: 1 element per group */
606 ALGODEBUG fprintf(stderr, "#BATgroup(b=%s#" BUNFMT "[%s],"
607 "s=%s#" BUNFMT ","
608 "g=%s#" BUNFMT ","
609 "e=%s#" BUNFMT ","
610 "h=%s#" BUNFMT ",subsorted=%d): "
611 "trivial case: 1 element per group\n",
612 BATgetId(b), BATcount(b), ATOMname(b->ttype),
613 s ? BATgetId(s) : "NULL", s ? BATcount(s) : 0,
614 g ? BATgetId(g) : "NULL", g ? BATcount(g) : 0,
615 e ? BATgetId(e) : "NULL", e ? BATcount(e) : 0,
616 h ? BATgetId(h) : "NULL", h ? BATcount(h) : 0,
617 subsorted);
618 gn = BATdense(hseqb, 0, BATcount(b));
619 if (gn == NULL)
620 goto error;
621 *groups = gn;
622 if (extents) {
623 en = canditer_slice(&ci, 0, cnt);
624 if (en == NULL)
625 goto error;
626 *extents = en;
627 }
628 if (histo) {
629 hn = BATconstant(0, TYPE_lng, &(lng){1}, cnt, TRANSIENT);
630 if (hn == NULL)
631 goto error;
632 *histo = hn;
633 }
634 return GDK_SUCCEED;
635 }
636 assert(!BATtdense(b));
637 if (g) {
638 if (BATtdense(g))
639 maxgrp = g->tseqbase + BATcount(g);
640 else if (BATtordered(g))
641 maxgrp = * (oid *) Tloc(g, BATcount(g) - 1);
642 else {
643 prop = BATgetprop(g, GDK_MAX_VALUE);
644 if (prop)
645 maxgrp = prop->v.val.oval;
646 else if (BATordered(g) && BATordered_rev(g))
647 maxgrp = 0;
648 }
649 if (maxgrp == 0)
650 g = NULL; /* single group */
651 else
652 grps = (const oid *) Tloc(g, 0);
653 }
654 if (BATordered(b) && BATordered_rev(b)) {
655 /* all values are equal */
656 if (g == NULL || (BATordered(g) && BATordered_rev(g))) {
657 /* there's only a single group: 0 */
658 ALGODEBUG fprintf(stderr, "#BATgroup(b=%s#" BUNFMT "[%s],"
659 "s=%s#" BUNFMT ","
660 "g=%s#" BUNFMT ","
661 "e=%s#" BUNFMT ","
662 "h=%s#" BUNFMT ",subsorted=%d): "
663 "trivial case: single output group\n",
664 BATgetId(b), BATcount(b), ATOMname(b->ttype),
665 s ? BATgetId(s) : "NULL", s ? BATcount(s) : 0,
666 g ? BATgetId(g) : "NULL", g ? BATcount(g) : 0,
667 e ? BATgetId(e) : "NULL", e ? BATcount(e) : 0,
668 h ? BATgetId(h) : "NULL", h ? BATcount(h) : 0,
669 subsorted);
670 gn = BATconstant(hseqb, TYPE_oid, &(oid){0}, cnt, TRANSIENT);
671 if (gn == NULL)
672 goto error;
673 *groups = gn;
674 if (extents) {
675 en = BATdense(0, canditer_next(&ci), 1);
676 if (en == NULL)
677 goto error;
678 *extents = en;
679 }
680 if (histo) {
681 hn = BATconstant(0, TYPE_lng, &(lng){(lng)cnt}, 1, TRANSIENT);
682 if (hn == NULL)
683 goto error;
684 *histo = hn;
685 }
686 return GDK_SUCCEED;
687 }
688 if ((extents == NULL || e != NULL) &&
689 (histo == NULL || h != NULL) &&
690 cnt == BATcount(b)) {
691 /* inherit given grouping; note that if
692 * extents/histo is to be returned, we need
693 * e/h available in order to copy them,
694 * otherwise we will need to calculate them
695 * which we will do using the "normal" case */
696 ALGODEBUG fprintf(stderr, "#BATgroup(b=%s#" BUNFMT "[%s],"
697 "s=%s#" BUNFMT ","
698 "g=%s#" BUNFMT ","
699 "e=%s#" BUNFMT ","
700 "h=%s#" BUNFMT ",subsorted=%d): "
701 "trivial case: copy input groups\n",
702 BATgetId(b), BATcount(b), ATOMname(b->ttype),
703 s ? BATgetId(s) : "NULL", s ? BATcount(s) : 0,
704 g ? BATgetId(g) : "NULL", g ? BATcount(g) : 0,
705 e ? BATgetId(e) : "NULL", e ? BATcount(e) : 0,
706 h ? BATgetId(h) : "NULL", h ? BATcount(h) : 0,
707 subsorted);
708 gn = COLcopy(g, g->ttype, false, TRANSIENT);
709 if (gn == NULL)
710 goto error;
711 if (!is_oid_nil(maxgrp)) {
712 prop = BATgetprop(g, GDK_MAX_VALUE);
713 if (prop)
714 BATsetprop(gn, GDK_MAX_VALUE, TYPE_oid, &maxgrp);
715 }
716
717 *groups = gn;
718 if (extents) {
719 en = COLcopy(e, e->ttype, false, TRANSIENT);
720 if (en == NULL)
721 goto error;
722 *extents = en;
723 }
724 if (histo) {
725 hn = COLcopy(h, h->ttype, false, TRANSIENT);
726 if (hn == NULL)
727 goto error;
728 *histo = hn;
729 }
730 return GDK_SUCCEED;
731 }
732 }
733 assert(g == NULL || !BATtdense(g)); /* i.e. g->ttype == TYPE_oid */
734 bi = bat_iterator(b);
735 cmp = ATOMcompare(b->ttype);
736 gn = COLnew(hseqb, TYPE_oid, cnt, TRANSIENT);
737 if (gn == NULL)
738 goto error;
739 ngrps = (oid *) Tloc(gn, 0);
740 maxgrps = cnt / 10;
741 if (!is_oid_nil(maxgrp) && maxgrps < maxgrp)
742 maxgrps += maxgrp;
743 if (e && maxgrps < BATcount(e))
744 maxgrps += BATcount(e);
745 if (h && maxgrps < BATcount(h))
746 maxgrps += BATcount(h);
747 if (maxgrps < GROUPBATINCR)
748 maxgrps = GROUPBATINCR;
749 if (b->twidth <= 2)
750 maxgrps = (BUN) 1 << (8 << (b->twidth == 2));
751 if (extents) {
752 en = COLnew(0, TYPE_oid, maxgrps, TRANSIENT);
753 if (en == NULL)
754 goto error;
755 exts = (oid *) Tloc(en, 0);
756 }
757 if (histo) {
758 hn = COLnew(0, TYPE_lng, maxgrps, TRANSIENT);
759 if (hn == NULL)
760 goto error;
761 cnts = (lng *) Tloc(hn, 0);
762 }
763 ngrp = 0;
764 BATsetcount(gn, cnt);
765
766 hseqb = b->hseqbase; /* abbreviation */
767
768 /* figure out if we can use the storage type also for
769 * comparing values */
770 t = ATOMbasetype(b->ttype);
771 /* for strings we can use the offset instead of the actual
772 * string values if we know that the strings in the string
773 * heap are unique */
774 if (t == TYPE_str && GDK_ELIMDOUBLES(b->tvheap)) {
775 switch (b->twidth) {
776 case 1:
777 t = TYPE_bte;
778 break;
779 case 2:
780 t = TYPE_sht;
781 break;
782 case 4:
783 t = TYPE_int;
784 break;
785#if SIZEOF_VAR_T == 8
786 case 8:
787 t = TYPE_lng;
788 break;
789#endif
790 default:
791 assert(0);
792 }
793 }
794
795 if (subsorted ||
796 ((BATordered(b) || BATordered_rev(b)) &&
797 (g == NULL || BATordered(g) || BATordered_rev(g)))) {
798 /* we only need to compare each entry with the previous */
799 ALGODEBUG fprintf(stderr, "#BATgroup(b=%s#" BUNFMT "[%s],"
800 "s=%s#" BUNFMT ","
801 "g=%s#" BUNFMT ","
802 "e=%s#" BUNFMT ","
803 "h=%s#" BUNFMT ",subsorted=%d): "
804 "compare consecutive values\n",
805 BATgetId(b), BATcount(b), ATOMname(b->ttype),
806 s ? BATgetId(s) : "NULL", s ? BATcount(s) : 0,
807 g ? BATgetId(g) : "NULL", g ? BATcount(g) : 0,
808 e ? BATgetId(e) : "NULL", e ? BATcount(e) : 0,
809 h ? BATgetId(h) : "NULL", h ? BATcount(h) : 0,
810 subsorted);
811
812 switch (t) {
813 case TYPE_bte:
814 GRP_compare_consecutive_values_tpe(bte);
815 break;
816 case TYPE_sht:
817 GRP_compare_consecutive_values_tpe(sht);
818 break;
819 case TYPE_int:
820 GRP_compare_consecutive_values_tpe(int);
821 break;
822 case TYPE_lng:
823 GRP_compare_consecutive_values_tpe(lng);
824 break;
825#ifdef HAVE_HGE
826 case TYPE_hge:
827 GRP_compare_consecutive_values_tpe(hge);
828 break;
829#endif
830 case TYPE_flt:
831 GRP_compare_consecutive_values_tpe(flt);
832 break;
833 case TYPE_dbl:
834 GRP_compare_consecutive_values_tpe(dbl);
835 break;
836 default:
837 GRP_compare_consecutive_values_any();
838 break;
839 }
840
841 gn->tsorted = true;
842 *groups = gn;
843 } else if (BATordered(b) || BATordered_rev(b)) {
844 BUN i, j;
845 BUN *pgrp;
846
847 assert(g); /* if g == NULL or if there is a single */
848 assert(grps); /* group, we used the code above */
849 /* for each value, we need to scan all previous equal
850 * values (a consecutive, possibly empty, range) to
851 * see if we can find one in the same old group
852 *
853 * we do this by maintaining for each old group the
854 * last time we saw that group, so if the last time we
855 * saw the old group of the current value is within
856 * this range, we can reuse the new group */
857 ALGODEBUG fprintf(stderr, "#BATgroup(b=%s#" BUNFMT "[%s],"
858 "s=%s#" BUNFMT ","
859 "g=%s#" BUNFMT ","
860 "e=%s#" BUNFMT ","
861 "h=%s#" BUNFMT ",subsorted=%d): "
862 "subscan old groups\n",
863 BATgetId(b), BATcount(b), ATOMname(b->ttype),
864 s ? BATgetId(s) : "NULL", s ? BATcount(s) : 0,
865 g ? BATgetId(g) : "NULL", g ? BATcount(g) : 0,
866 e ? BATgetId(e) : "NULL", e ? BATcount(e) : 0,
867 h ? BATgetId(h) : "NULL", h ? BATcount(h) : 0,
868 subsorted);
869 /* determine how many old groups there are */
870 if (e) {
871 j = BATcount(e) + (BUN) e->hseqbase;
872 } else if (h) {
873 j = BATcount(h) + (BUN) h->hseqbase;
874 } else {
875 oid m = 0;
876 for (i = 0, j= BATcount(g); i < j; i++)
877 m = MAX(m , grps[i]);
878 j = (BUN) m + 1;
879 }
880 /* array to maintain last time we saw each old group */
881 pgrp = GDKmalloc(sizeof(BUN) * j);
882 if (pgrp == NULL)
883 goto error;
884 /* initialize to impossible position */
885 memset(pgrp, ~0, sizeof(BUN) * j);
886
887 gn->tsorted = true; /* be optimistic */
888
889 switch (t) {
890 case TYPE_bte:
891 GRP_subscan_old_groups_tpe(bte);
892 break;
893 case TYPE_sht:
894 GRP_subscan_old_groups_tpe(sht);
895 break;
896 case TYPE_int:
897 GRP_subscan_old_groups_tpe(int);
898 break;
899 case TYPE_lng:
900 GRP_subscan_old_groups_tpe(lng);
901 break;
902#ifdef HAVE_HGE
903 case TYPE_hge:
904 GRP_subscan_old_groups_tpe(hge);
905 break;
906#endif
907 case TYPE_flt:
908 GRP_subscan_old_groups_tpe(flt);
909 break;
910 case TYPE_dbl:
911 GRP_subscan_old_groups_tpe(dbl);
912 break;
913 default:
914 GRP_subscan_old_groups_any();
915 break;
916 }
917
918 GDKfree(pgrp);
919 } else if (g == NULL && t == TYPE_bte) {
920 /* byte-sized values, use 256 entry array to keep
921 * track of doled out group ids; note that we can't
922 * possibly have more than 256 groups, so the group id
923 * fits in an unsigned char */
924 unsigned char *restrict bgrps = GDKmalloc(256);
925 const unsigned char *restrict w = (const unsigned char *) Tloc(b, 0);
926 unsigned char v;
927
928 if (bgrps == NULL)
929 goto error;
930 memset(bgrps, 0xFF, 256);
931 if (histo)
932 memset(cnts, 0, maxgrps * sizeof(lng));
933 ngrp = 0;
934 gn->tsorted = true;
935 for (r = 0; r < cnt; r++) {
936 oid o = canditer_next(&ci);
937 p = o - b->hseqbase;
938 if ((v = bgrps[w[p]]) == 0xFF && ngrp < 256) {
939 bgrps[w[p]] = v = (unsigned char) ngrp++;
940 if (extents)
941 exts[v] = o;
942 }
943 ngrps[r] = v;
944 if (r > 0 && v < ngrps[r - 1])
945 gn->tsorted = false;
946 if (histo)
947 cnts[v]++;
948 }
949 GDKfree(bgrps);
950 } else if (g == NULL && t == TYPE_sht) {
951 /* short-sized values, use 65536 entry array to keep
952 * track of doled out group ids; note that we can't
953 * possibly have more than 65536 groups, so the group
954 * id fits in an unsigned short */
955 unsigned short *restrict sgrps = GDKmalloc(65536 * sizeof(short));
956 const unsigned short *restrict w = (const unsigned short *) Tloc(b, 0);
957 unsigned short v;
958
959 if (sgrps == NULL)
960 goto error;
961 memset(sgrps, 0xFF, 65536 * sizeof(short));
962 if (histo)
963 memset(cnts, 0, maxgrps * sizeof(lng));
964 ngrp = 0;
965 gn->tsorted = true;
966 for (r = 0; r < cnt; r++) {
967 oid o = canditer_next(&ci);
968 p = o - b->hseqbase;
969 if ((v = sgrps[w[p]]) == 0xFFFF && ngrp < 65536) {
970 sgrps[w[p]] = v = (unsigned short) ngrp++;
971 if (extents)
972 exts[v] = o;
973 }
974 ngrps[r] = v;
975 if (r > 0 && v < ngrps[r - 1])
976 gn->tsorted = false;
977 if (histo)
978 cnts[v]++;
979 }
980 GDKfree(sgrps);
981 } else if (g == NULL &&
982 (BATcheckhash(b) ||
983 (!b->batTransient &&
984 BAThash(b) == GDK_SUCCEED) ||
985 ((parent = VIEWtparent(b)) != 0 &&
986 BATcheckhash(BBPdescriptor(parent))))) {
987 /* we already have a hash table on b, or b is
988 * persistent and we could create a hash table, or b
989 * is a view on a bat that already has a hash table;
990 * but don't do this if we're checking for subgroups
991 * since we may have to go through long lists of
992 * duplicates in the hash table to find an old
993 * group */
994 ALGODEBUG fprintf(stderr, "#BATgroup(b=%s#" BUNFMT "[%s],"
995 "s=%s#" BUNFMT ","
996 "g=%s#" BUNFMT ","
997 "e=%s#" BUNFMT ","
998 "h=%s#" BUNFMT ",subsorted=%d): "
999 "use existing hash table\n",
1000 BATgetId(b), BATcount(b), ATOMname(b->ttype),
1001 s ? BATgetId(s) : "NULL", s ? BATcount(s) : 0,
1002 g ? BATgetId(g) : "NULL", g ? BATcount(g) : 0,
1003 e ? BATgetId(e) : "NULL", e ? BATcount(e) : 0,
1004 h ? BATgetId(h) : "NULL", h ? BATcount(h) : 0,
1005 subsorted);
1006 if (b->thash == NULL && (parent = VIEWtparent(b)) != 0) {
1007 /* b is a view on another bat (b2 for now).
1008 * calculate the bounds [lo, lo+BATcount(b))
1009 * in the parent that b uses */
1010 BAT *b2 = BBPdescriptor(parent);
1011 lo = (BUN) ((b->theap.base - b2->theap.base) >> b->tshift);
1012 b = b2;
1013 bi = bat_iterator(b);
1014 }
1015 hs = b->thash;
1016 gn->tsorted = true; /* be optimistic */
1017
1018 switch (t) {
1019 case TYPE_bte:
1020 GRP_use_existing_hash_table_tpe(bte);
1021 break;
1022 case TYPE_sht:
1023 GRP_use_existing_hash_table_tpe(sht);
1024 break;
1025 case TYPE_int:
1026 GRP_use_existing_hash_table_tpe(int);
1027 break;
1028 case TYPE_lng:
1029 GRP_use_existing_hash_table_tpe(lng);
1030 break;
1031#ifdef HAVE_HGE
1032 case TYPE_hge:
1033 GRP_use_existing_hash_table_tpe(hge);
1034 break;
1035#endif
1036 case TYPE_flt:
1037 GRP_use_existing_hash_table_tpe(flt);
1038 break;
1039 case TYPE_dbl:
1040 GRP_use_existing_hash_table_tpe(dbl);
1041 break;
1042 default:
1043 GRP_use_existing_hash_table_any();
1044 break;
1045 }
1046 } else {
1047 bool gc = g != NULL && (BATordered(g) || BATordered_rev(g));
1048 const char *nme;
1049 BUN prb;
1050 int bits, len;
1051 BUN mask;
1052 oid grp;
1053
1054 GDKclrerr(); /* not interested in BAThash errors */
1055
1056 /* not sorted, and no pre-existing hash table: we'll
1057 * build an incomplete hash table on the fly--also see
1058 * BATassertProps for similar code; we also exploit if
1059 * g is clustered */
1060 ALGODEBUG fprintf(stderr, "#BATgroup(b=%s#" BUNFMT "[%s],"
1061 "s=%s#" BUNFMT ","
1062 "g=%s#" BUNFMT ","
1063 "e=%s#" BUNFMT ","
1064 "h=%s#" BUNFMT ",subsorted=%d): "
1065 "create partial hash table%s\n",
1066 BATgetId(b), BATcount(b), ATOMname(b->ttype),
1067 s ? BATgetId(s) : "NULL", s ? BATcount(s) : 0,
1068 g ? BATgetId(g) : "NULL", g ? BATcount(g) : 0,
1069 e ? BATgetId(e) : "NULL", e ? BATcount(e) : 0,
1070 h ? BATgetId(h) : "NULL", h ? BATcount(h) : 0,
1071 subsorted, gc ? " (g clustered)" : "");
1072 nme = GDKinmemory() ? ":inmemory" : BBP_physical(b->batCacheid);
1073 mask = MAX(HASHmask(cnt), 1 << 16);
1074 /* mask is a power of two, so pop(mask - 1) tells us
1075 * which power of two */
1076 bits = 8 * SIZEOF_OID - pop(mask - 1);
1077 if ((hs = GDKzalloc(sizeof(Hash))) == NULL ||
1078 (hs->heap.farmid = BBPselectfarm(TRANSIENT, b->ttype, hashheap)) < 0) {
1079 GDKfree(hs);
1080 hs = NULL;
1081 GDKerror("BATgroup: cannot allocate hash table\n");
1082 goto error;
1083 }
1084 len = snprintf(hs->heap.filename, sizeof(hs->heap.filename), "%s.hash%d", nme, THRgettid());
1085 if (len < 0 || len >= (int) sizeof(hs->heap.filename) ||
1086 HASHnew(hs, b->ttype, BUNlast(b), mask, BUN_NONE) != GDK_SUCCEED) {
1087 GDKfree(hs);
1088 hs = NULL;
1089 GDKerror("BATgroup: cannot allocate hash table\n");
1090 goto error;
1091 }
1092 gn->tsorted = true; /* be optimistic */
1093
1094 switch (t) {
1095 case TYPE_bte:
1096 if (grps && !is_oid_nil(maxgrp)
1097#if SIZEOF_OID == SIZEOF_LNG
1098 && maxgrp < ((oid) 1 << (SIZEOF_LNG * 8 - 8))
1099#endif
1100 ) {
1101 ulng v;
1102 const bte *w = (bte *) Tloc(b, 0);
1103 GRP_create_partial_hash_table_core(
1104 (void) 0,
1105 (v = ((ulng)grps[r]<<8)|(unsigned char)w[p], hash_lng(hs, &v)),
1106 w[p] == w[hb] && grps[r] == grps[q],
1107 (void) 0,
1108 NOGRPTST);
1109 } else
1110 GRP_create_partial_hash_table_tpe(bte);
1111 break;
1112 case TYPE_sht:
1113 if (grps && !is_oid_nil(maxgrp)
1114#if SIZEOF_OID == SIZEOF_LNG
1115 && maxgrp < ((oid) 1 << (SIZEOF_LNG * 8 - 16))
1116#endif
1117 ) {
1118 ulng v;
1119 const sht *w = (sht *) Tloc(b, 0);
1120 GRP_create_partial_hash_table_core(
1121 (void) 0,
1122 (v = ((ulng)grps[r]<<16)|(unsigned short)w[p], hash_lng(hs, &v)),
1123 w[p] == w[hb] && grps[r] == grps[q],
1124 (void) 0,
1125 NOGRPTST);
1126 } else
1127 GRP_create_partial_hash_table_tpe(sht);
1128 break;
1129 case TYPE_int:
1130 if (grps && !is_oid_nil(maxgrp)
1131#if SIZEOF_OID == SIZEOF_LNG
1132 && maxgrp < ((oid) 1 << (SIZEOF_LNG * 8 - 32))
1133#endif
1134 ) {
1135 ulng v;
1136 const int *w = (int *) Tloc(b, 0);
1137 GRP_create_partial_hash_table_core(
1138 (void) 0,
1139 (v = ((ulng)grps[r]<<32)|(unsigned int)w[p], hash_lng(hs, &v)),
1140 w[p] == w[hb] && grps[r] == grps[q],
1141 (void) 0,
1142 NOGRPTST);
1143 } else
1144 GRP_create_partial_hash_table_tpe(int);
1145 break;
1146 case TYPE_lng:
1147#ifdef HAVE_HGE
1148 if (grps) {
1149 uhge v;
1150 const lng *w = (lng *) Tloc(b, 0);
1151 GRP_create_partial_hash_table_core(
1152 (void) 0,
1153 (v = ((uhge)grps[r]<<64)|(ulng)w[p], hash_hge(hs, &v)),
1154 w[p] == w[hb] && grps[r] == grps[q],
1155 (void) 0,
1156 NOGRPTST);
1157 } else
1158#endif
1159 GRP_create_partial_hash_table_tpe(lng);
1160 break;
1161#ifdef HAVE_HGE
1162 case TYPE_hge:
1163 GRP_create_partial_hash_table_tpe(hge);
1164 break;
1165#endif
1166 case TYPE_flt:
1167 GRP_create_partial_hash_table_tpe(flt);
1168 break;
1169 case TYPE_dbl:
1170 GRP_create_partial_hash_table_tpe(dbl);
1171 break;
1172 default:
1173 GRP_create_partial_hash_table_any();
1174 }
1175
1176 HEAPfree(&hs->heap, true);
1177 GDKfree(hs);
1178 }
1179 if (extents) {
1180 BATsetcount(en, (BUN) ngrp);
1181 en->tkey = true;
1182 en->tsorted = true;
1183 en->trevsorted = ngrp == 1;
1184 en->tnonil = true;
1185 en->tnil = false;
1186 *extents = virtualize(en);
1187 }
1188 if (histo) {
1189 BATsetcount(hn, (BUN) ngrp);
1190 if (ngrp == cnt || ngrp == 1) {
1191 hn->tkey = ngrp == 1;
1192 hn->tsorted = true;
1193 hn->trevsorted = true;
1194 } else {
1195 hn->tkey = false;
1196 hn->tsorted = false;
1197 hn->trevsorted = false;
1198 }
1199 hn->tnonil = true;
1200 hn->tnil = false;
1201 *histo = hn;
1202 }
1203 gn->tkey = ngrp == BATcount(gn);
1204 gn->trevsorted = ngrp == 1 || BATcount(gn) <= 1;
1205 gn->tnonil = true;
1206 gn->tnil = false;
1207 ngrp--; /* max value is one less than number of values */
1208 BATsetprop(gn, GDK_MAX_VALUE, TYPE_oid, &ngrp);
1209 *groups = gn;
1210 return GDK_SUCCEED;
1211 error:
1212 if (hs != NULL && hs != b->thash) {
1213 HEAPfree(&hs->heap, true);
1214 GDKfree(hs);
1215 }
1216 if (gn)
1217 BBPunfix(gn->batCacheid);
1218 if (en)
1219 BBPunfix(en->batCacheid);
1220 if (hn)
1221 BBPunfix(hn->batCacheid);
1222 return GDK_FAIL;
1223}
1224
1225gdk_return
1226BATgroup(BAT **groups, BAT **extents, BAT **histo,
1227 BAT *b, BAT *s, BAT *g, BAT *e, BAT *h)
1228{
1229 return BATgroup_internal(groups, extents, histo, b, s, g, e, h, false);
1230}
1231