1 | /* |
2 | * This Source Code Form is subject to the terms of the Mozilla Public |
3 | * License, v. 2.0. If a copy of the MPL was not distributed with this |
4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. |
5 | * |
6 | * Copyright 1997 - July 2008 CWI, August 2008 - 2019 MonetDB B.V. |
7 | */ |
8 | |
9 | /* |
10 | * author M.L.Kersten |
11 | * BAT Iterators |
12 | * Many low level algorithms rely on an iterator to break a |
13 | * collection into smaller pieces. Each piece is subsequently processed |
14 | * by a block. |
15 | * |
16 | * For very large BATs it may make sense to break it into chunks |
17 | * and process them separately to solve a query. An iterator pair is |
18 | * provided to chop a BAT into fixed size elements. |
19 | * Each chunk is made available as a BATview. |
20 | * It provides read-only access to an underlying BAT. Adjusting the bounds |
21 | * is cheap, once the BATview descriptor has been constructed. |
22 | * |
23 | * The smallest granularity is a single BUN, which can be used |
24 | * to realize an iterator over the individual BAT elements. |
25 | * For larger sized chunks, the operators return a BATview. |
26 | * |
27 | * All iterators require storage space to administer the |
28 | * location of the next element. The BAT iterator module uses a simple |
29 | * lng variable, which also acts as a cursor for barrier statements. |
30 | * |
31 | * The larger chunks produced are currently static, i.e. |
32 | * their size is a parameter of the call. Dynamic chunk sizes |
33 | * are interesting for time-series query processing. (See another module) |
34 | * |
35 | */ |
36 | |
37 | #include "monetdb_config.h" |
38 | #include "iterator.h" |
39 | |
40 | /* |
41 | * We start with the large chunk iterator. |
42 | * The definition of the control statements require the same |
43 | * control variables, which means that the BATview is accessible |
44 | * to determine how far to advance when the next chunk is retrieved. |
45 | * The number of elements in the chunk is limited by the granule |
46 | * size. |
47 | */ |
48 | str |
49 | ITRnewChunk(lng *res, bat *vid, bat *bid, lng *granule) |
50 | { |
51 | BAT *b, *view; |
52 | BUN cnt; |
53 | |
54 | if ((b = BATdescriptor(*bid)) == NULL) { |
55 | throw(MAL, "chop.newChunk" , INTERNAL_BAT_ACCESS); |
56 | } |
57 | cnt = BATcount(b); |
58 | view = VIEWcreate(b->hseqbase, b); |
59 | if (view == NULL) { |
60 | BBPunfix(b->batCacheid); |
61 | throw(MAL, "chop.newChunk" , GDK_EXCEPTION); |
62 | } |
63 | |
64 | /* printf("set bat chunk bound to " LLFMT " 0 - " BUNFMT "\n", |
65 | *granule, MIN(cnt,(BUN) *granule)); */ |
66 | VIEWbounds(b, view, 0, MIN(cnt, (BUN) * granule)); |
67 | *vid = view->batCacheid; |
68 | BBPkeepref(view->batCacheid); |
69 | BBPunfix(b->batCacheid); |
70 | *res = 0; |
71 | return MAL_SUCCEED; |
72 | } |
73 | |
74 | /* |
75 | * The nextChunk version advances the reader, |
76 | * which also means that the view descriptor is already available. |
77 | * The granule size may differ in each call. |
78 | */ |
79 | str |
80 | ITRnextChunk(lng *res, bat *vid, bat *bid, lng *granule) |
81 | { |
82 | BAT *b, *view; |
83 | BUN i; |
84 | |
85 | if ((b = BATdescriptor(*bid)) == NULL) { |
86 | throw(MAL, "iterator.nextChunk" , INTERNAL_BAT_ACCESS); |
87 | } |
88 | if ((view = BATdescriptor(*vid)) == NULL) { |
89 | BBPunfix(b->batCacheid); |
90 | throw(MAL, "iterator.nextChunk" , INTERNAL_BAT_ACCESS); |
91 | } |
92 | i = (BUN) (*res + BATcount(view)); |
93 | if (i >= BUNlast(b)) { |
94 | *res = lng_nil; |
95 | *vid = 0; |
96 | BBPunfix(view->batCacheid); |
97 | BBPunfix(b->batCacheid); |
98 | return MAL_SUCCEED; |
99 | } |
100 | /* printf("set bat chunk bound to " BUNFMT " - " BUNFMT " \n", |
101 | i, i+(BUN) *granule-1); */ |
102 | VIEWbounds(b, view, i, i + (BUN) * granule); |
103 | BAThseqbase(view, is_oid_nil(b->hseqbase) ? oid_nil : b->hseqbase + i); |
104 | BBPkeepref(*vid = view->batCacheid); |
105 | BBPunfix(b->batCacheid); |
106 | *res = i; |
107 | return MAL_SUCCEED; |
108 | } |
109 | |
110 | /* |
111 | * @- |
112 | * The BUN- and BAT-stream manipulate a long handle, i.e. |
113 | * the destination variable. It assumes it has been set to |
114 | * zero as part of runtime stack initialization. Subsequently, |
115 | * it fetches a bun and returns the increment to the control |
116 | * variable. If it returns zero the control variable has been reset |
117 | * to zero and end of stream has been reached. |
118 | */ |
119 | str |
120 | ITRbunIterator(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci) |
121 | { |
122 | BATiter bi; |
123 | BAT *b; |
124 | oid *head; |
125 | bat *bid; |
126 | ValPtr tail; |
127 | |
128 | (void) cntxt; |
129 | (void) mb; |
130 | head = getArgReference_oid(stk, pci, 0); |
131 | tail = &stk->stk[pci->argv[1]]; |
132 | bid = getArgReference_bat(stk, pci, 2); |
133 | |
134 | if ((b = BATdescriptor(*bid)) == NULL) { |
135 | throw(MAL, "iterator.nextChunk" , INTERNAL_BAT_ACCESS); |
136 | } |
137 | |
138 | if (BATcount(b) == 0) { |
139 | *head = oid_nil; |
140 | BBPunfix(b->batCacheid); |
141 | return MAL_SUCCEED; |
142 | } |
143 | *head = 0; |
144 | |
145 | bi = bat_iterator(b); |
146 | if (VALinit(tail, b->ttype, BUNtail(bi, *head)) == NULL) { |
147 | BBPunfix(b->batCacheid); |
148 | throw(MAL, "iterator.nextChunk" , SQLSTATE(HY001) MAL_MALLOC_FAIL); |
149 | } |
150 | BBPunfix(b->batCacheid); |
151 | return MAL_SUCCEED; |
152 | } |
153 | |
154 | str |
155 | ITRbunNext(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci) |
156 | { |
157 | BATiter bi; |
158 | BAT *b; |
159 | oid *head; |
160 | bat *bid; |
161 | ValPtr tail; |
162 | |
163 | (void) cntxt; |
164 | (void) mb; |
165 | head = getArgReference_oid(stk, pci, 0); |
166 | tail = &stk->stk[pci->argv[1]]; |
167 | bid = getArgReference_bat(stk, pci, 2); |
168 | |
169 | if ((b = BATdescriptor(*bid)) == NULL) { |
170 | throw(MAL, "iterator.nextChunk" , INTERNAL_BAT_ACCESS); |
171 | } |
172 | |
173 | *head = *head + 1; |
174 | if (*head >= BUNlast(b)) { |
175 | *head = oid_nil; |
176 | BBPunfix(b->batCacheid); |
177 | return MAL_SUCCEED; |
178 | } |
179 | bi = bat_iterator(b); |
180 | if (VALinit(tail, b->ttype, BUNtail(bi, *head)) == NULL) { |
181 | BBPunfix(b->batCacheid); |
182 | throw(MAL, "iterator.nextChunk" , SQLSTATE(HY001) MAL_MALLOC_FAIL); |
183 | } |
184 | BBPunfix(b->batCacheid); |
185 | return MAL_SUCCEED; |
186 | } |
187 | |
188 | str ITRnext_oid(oid *i, oid *step, oid *last){ |
189 | oid v = *i; |
190 | v = v + *step; |
191 | *i = v; |
192 | if ( *last <= v ) |
193 | *i = oid_nil; |
194 | return MAL_SUCCEED; |
195 | } |
196 | str ITRnext_lng(lng *i, lng *step, lng *last){ |
197 | lng v = *i; |
198 | v = v + *step; |
199 | *i = v; |
200 | if ( *last <= v ) |
201 | *i = lng_nil; |
202 | return MAL_SUCCEED; |
203 | } |
204 | #ifdef HAVE_HGE |
205 | str ITRnext_hge(hge *i, hge *step, hge *last){ |
206 | hge v = *i; |
207 | v = v + *step; |
208 | *i = v; |
209 | if ( *last <= v ) |
210 | *i = hge_nil; |
211 | return MAL_SUCCEED; |
212 | } |
213 | #endif |
214 | str ITRnext_int(int *i, int *step, int *last){ |
215 | int v = *i; |
216 | v = v + *step; |
217 | *i = v; |
218 | if ( *last <= v ) |
219 | *i = int_nil; |
220 | return MAL_SUCCEED; |
221 | } |
222 | str ITRnext_sht(sht *i, sht *step, sht *last){ |
223 | sht v = *i; |
224 | v = v + *step; |
225 | *i = v; |
226 | if ( *last <= v ) |
227 | *i = int_nil; |
228 | return MAL_SUCCEED; |
229 | } |
230 | str ITRnext_flt(flt *i, flt *step, flt *last){ |
231 | flt v = *i; |
232 | v = v + *step; |
233 | *i = v; |
234 | if ( *last <= v ) |
235 | *i = flt_nil; |
236 | return MAL_SUCCEED; |
237 | } |
238 | str ITRnext_dbl(dbl *i, dbl *step, dbl *last){ |
239 | dbl v = *i; |
240 | v = v + *step; |
241 | *i = v; |
242 | if ( *last <= v ) |
243 | *i = dbl_nil; |
244 | return MAL_SUCCEED; |
245 | } |
246 | |