| 1 | /* |
| 2 | * This Source Code Form is subject to the terms of the Mozilla Public |
| 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this |
| 4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. |
| 5 | * |
| 6 | * Copyright 1997 - July 2008 CWI, August 2008 - 2019 MonetDB B.V. |
| 7 | */ |
| 8 | |
| 9 | /* |
| 10 | * author M.L.Kersten |
| 11 | * BAT Iterators |
| 12 | * Many low level algorithms rely on an iterator to break a |
| 13 | * collection into smaller pieces. Each piece is subsequently processed |
| 14 | * by a block. |
| 15 | * |
| 16 | * For very large BATs it may make sense to break it into chunks |
| 17 | * and process them separately to solve a query. An iterator pair is |
| 18 | * provided to chop a BAT into fixed size elements. |
| 19 | * Each chunk is made available as a BATview. |
| 20 | * It provides read-only access to an underlying BAT. Adjusting the bounds |
| 21 | * is cheap, once the BATview descriptor has been constructed. |
| 22 | * |
| 23 | * The smallest granularity is a single BUN, which can be used |
| 24 | * to realize an iterator over the individual BAT elements. |
| 25 | * For larger sized chunks, the operators return a BATview. |
| 26 | * |
| 27 | * All iterators require storage space to administer the |
| 28 | * location of the next element. The BAT iterator module uses a simple |
| 29 | * lng variable, which also acts as a cursor for barrier statements. |
| 30 | * |
| 31 | * The larger chunks produced are currently static, i.e. |
| 32 | * their size is a parameter of the call. Dynamic chunk sizes |
| 33 | * are interesting for time-series query processing. (See another module) |
| 34 | * |
| 35 | */ |
| 36 | |
| 37 | #include "monetdb_config.h" |
| 38 | #include "iterator.h" |
| 39 | |
| 40 | /* |
| 41 | * We start with the large chunk iterator. |
| 42 | * The definition of the control statements require the same |
| 43 | * control variables, which means that the BATview is accessible |
| 44 | * to determine how far to advance when the next chunk is retrieved. |
| 45 | * The number of elements in the chunk is limited by the granule |
| 46 | * size. |
| 47 | */ |
| 48 | str |
| 49 | ITRnewChunk(lng *res, bat *vid, bat *bid, lng *granule) |
| 50 | { |
| 51 | BAT *b, *view; |
| 52 | BUN cnt; |
| 53 | |
| 54 | if ((b = BATdescriptor(*bid)) == NULL) { |
| 55 | throw(MAL, "chop.newChunk" , INTERNAL_BAT_ACCESS); |
| 56 | } |
| 57 | cnt = BATcount(b); |
| 58 | view = VIEWcreate(b->hseqbase, b); |
| 59 | if (view == NULL) { |
| 60 | BBPunfix(b->batCacheid); |
| 61 | throw(MAL, "chop.newChunk" , GDK_EXCEPTION); |
| 62 | } |
| 63 | |
| 64 | /* printf("set bat chunk bound to " LLFMT " 0 - " BUNFMT "\n", |
| 65 | *granule, MIN(cnt,(BUN) *granule)); */ |
| 66 | VIEWbounds(b, view, 0, MIN(cnt, (BUN) * granule)); |
| 67 | *vid = view->batCacheid; |
| 68 | BBPkeepref(view->batCacheid); |
| 69 | BBPunfix(b->batCacheid); |
| 70 | *res = 0; |
| 71 | return MAL_SUCCEED; |
| 72 | } |
| 73 | |
| 74 | /* |
| 75 | * The nextChunk version advances the reader, |
| 76 | * which also means that the view descriptor is already available. |
| 77 | * The granule size may differ in each call. |
| 78 | */ |
| 79 | str |
| 80 | ITRnextChunk(lng *res, bat *vid, bat *bid, lng *granule) |
| 81 | { |
| 82 | BAT *b, *view; |
| 83 | BUN i; |
| 84 | |
| 85 | if ((b = BATdescriptor(*bid)) == NULL) { |
| 86 | throw(MAL, "iterator.nextChunk" , INTERNAL_BAT_ACCESS); |
| 87 | } |
| 88 | if ((view = BATdescriptor(*vid)) == NULL) { |
| 89 | BBPunfix(b->batCacheid); |
| 90 | throw(MAL, "iterator.nextChunk" , INTERNAL_BAT_ACCESS); |
| 91 | } |
| 92 | i = (BUN) (*res + BATcount(view)); |
| 93 | if (i >= BUNlast(b)) { |
| 94 | *res = lng_nil; |
| 95 | *vid = 0; |
| 96 | BBPunfix(view->batCacheid); |
| 97 | BBPunfix(b->batCacheid); |
| 98 | return MAL_SUCCEED; |
| 99 | } |
| 100 | /* printf("set bat chunk bound to " BUNFMT " - " BUNFMT " \n", |
| 101 | i, i+(BUN) *granule-1); */ |
| 102 | VIEWbounds(b, view, i, i + (BUN) * granule); |
| 103 | BAThseqbase(view, is_oid_nil(b->hseqbase) ? oid_nil : b->hseqbase + i); |
| 104 | BBPkeepref(*vid = view->batCacheid); |
| 105 | BBPunfix(b->batCacheid); |
| 106 | *res = i; |
| 107 | return MAL_SUCCEED; |
| 108 | } |
| 109 | |
| 110 | /* |
| 111 | * @- |
| 112 | * The BUN- and BAT-stream manipulate a long handle, i.e. |
| 113 | * the destination variable. It assumes it has been set to |
| 114 | * zero as part of runtime stack initialization. Subsequently, |
| 115 | * it fetches a bun and returns the increment to the control |
| 116 | * variable. If it returns zero the control variable has been reset |
| 117 | * to zero and end of stream has been reached. |
| 118 | */ |
| 119 | str |
| 120 | ITRbunIterator(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci) |
| 121 | { |
| 122 | BATiter bi; |
| 123 | BAT *b; |
| 124 | oid *head; |
| 125 | bat *bid; |
| 126 | ValPtr tail; |
| 127 | |
| 128 | (void) cntxt; |
| 129 | (void) mb; |
| 130 | head = getArgReference_oid(stk, pci, 0); |
| 131 | tail = &stk->stk[pci->argv[1]]; |
| 132 | bid = getArgReference_bat(stk, pci, 2); |
| 133 | |
| 134 | if ((b = BATdescriptor(*bid)) == NULL) { |
| 135 | throw(MAL, "iterator.nextChunk" , INTERNAL_BAT_ACCESS); |
| 136 | } |
| 137 | |
| 138 | if (BATcount(b) == 0) { |
| 139 | *head = oid_nil; |
| 140 | BBPunfix(b->batCacheid); |
| 141 | return MAL_SUCCEED; |
| 142 | } |
| 143 | *head = 0; |
| 144 | |
| 145 | bi = bat_iterator(b); |
| 146 | if (VALinit(tail, b->ttype, BUNtail(bi, *head)) == NULL) { |
| 147 | BBPunfix(b->batCacheid); |
| 148 | throw(MAL, "iterator.nextChunk" , SQLSTATE(HY001) MAL_MALLOC_FAIL); |
| 149 | } |
| 150 | BBPunfix(b->batCacheid); |
| 151 | return MAL_SUCCEED; |
| 152 | } |
| 153 | |
| 154 | str |
| 155 | ITRbunNext(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci) |
| 156 | { |
| 157 | BATiter bi; |
| 158 | BAT *b; |
| 159 | oid *head; |
| 160 | bat *bid; |
| 161 | ValPtr tail; |
| 162 | |
| 163 | (void) cntxt; |
| 164 | (void) mb; |
| 165 | head = getArgReference_oid(stk, pci, 0); |
| 166 | tail = &stk->stk[pci->argv[1]]; |
| 167 | bid = getArgReference_bat(stk, pci, 2); |
| 168 | |
| 169 | if ((b = BATdescriptor(*bid)) == NULL) { |
| 170 | throw(MAL, "iterator.nextChunk" , INTERNAL_BAT_ACCESS); |
| 171 | } |
| 172 | |
| 173 | *head = *head + 1; |
| 174 | if (*head >= BUNlast(b)) { |
| 175 | *head = oid_nil; |
| 176 | BBPunfix(b->batCacheid); |
| 177 | return MAL_SUCCEED; |
| 178 | } |
| 179 | bi = bat_iterator(b); |
| 180 | if (VALinit(tail, b->ttype, BUNtail(bi, *head)) == NULL) { |
| 181 | BBPunfix(b->batCacheid); |
| 182 | throw(MAL, "iterator.nextChunk" , SQLSTATE(HY001) MAL_MALLOC_FAIL); |
| 183 | } |
| 184 | BBPunfix(b->batCacheid); |
| 185 | return MAL_SUCCEED; |
| 186 | } |
| 187 | |
| 188 | str ITRnext_oid(oid *i, oid *step, oid *last){ |
| 189 | oid v = *i; |
| 190 | v = v + *step; |
| 191 | *i = v; |
| 192 | if ( *last <= v ) |
| 193 | *i = oid_nil; |
| 194 | return MAL_SUCCEED; |
| 195 | } |
| 196 | str ITRnext_lng(lng *i, lng *step, lng *last){ |
| 197 | lng v = *i; |
| 198 | v = v + *step; |
| 199 | *i = v; |
| 200 | if ( *last <= v ) |
| 201 | *i = lng_nil; |
| 202 | return MAL_SUCCEED; |
| 203 | } |
| 204 | #ifdef HAVE_HGE |
| 205 | str ITRnext_hge(hge *i, hge *step, hge *last){ |
| 206 | hge v = *i; |
| 207 | v = v + *step; |
| 208 | *i = v; |
| 209 | if ( *last <= v ) |
| 210 | *i = hge_nil; |
| 211 | return MAL_SUCCEED; |
| 212 | } |
| 213 | #endif |
| 214 | str ITRnext_int(int *i, int *step, int *last){ |
| 215 | int v = *i; |
| 216 | v = v + *step; |
| 217 | *i = v; |
| 218 | if ( *last <= v ) |
| 219 | *i = int_nil; |
| 220 | return MAL_SUCCEED; |
| 221 | } |
| 222 | str ITRnext_sht(sht *i, sht *step, sht *last){ |
| 223 | sht v = *i; |
| 224 | v = v + *step; |
| 225 | *i = v; |
| 226 | if ( *last <= v ) |
| 227 | *i = int_nil; |
| 228 | return MAL_SUCCEED; |
| 229 | } |
| 230 | str ITRnext_flt(flt *i, flt *step, flt *last){ |
| 231 | flt v = *i; |
| 232 | v = v + *step; |
| 233 | *i = v; |
| 234 | if ( *last <= v ) |
| 235 | *i = flt_nil; |
| 236 | return MAL_SUCCEED; |
| 237 | } |
| 238 | str ITRnext_dbl(dbl *i, dbl *step, dbl *last){ |
| 239 | dbl v = *i; |
| 240 | v = v + *step; |
| 241 | *i = v; |
| 242 | if ( *last <= v ) |
| 243 | *i = dbl_nil; |
| 244 | return MAL_SUCCEED; |
| 245 | } |
| 246 | |