1 | /** |
2 | * Licensed to the Apache Software Foundation (ASF) under one |
3 | * or more contributor license agreements. See the NOTICE file |
4 | * distributed with this work for additional information |
5 | * regarding copyright ownership. The ASF licenses this file |
6 | * to you under the Apache License, Version 2.0 (the |
7 | * "License"); you may not use this file except in compliance |
8 | * with the License. You may obtain a copy of the License at |
9 | * |
10 | * http://www.apache.org/licenses/LICENSE-2.0 |
11 | * |
12 | * Unless required by applicable law or agreed to in writing, software |
13 | * distributed under the License is distributed on an "AS IS" BASIS, |
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
15 | * See the License for the specific language governing permissions and |
16 | * limitations under the License. |
17 | */ |
18 | |
19 | #include "orc/Vector.hh" |
20 | |
21 | #include "Adaptor.hh" |
22 | #include "orc/Exceptions.hh" |
23 | |
24 | #include <iostream> |
25 | #include <sstream> |
26 | #include <cstdlib> |
27 | |
28 | namespace orc { |
29 | |
30 | ColumnVectorBatch::ColumnVectorBatch(uint64_t cap, |
31 | MemoryPool& pool |
32 | ): capacity(cap), |
33 | numElements(0), |
34 | notNull(pool, cap), |
35 | hasNulls(false), |
36 | memoryPool(pool) { |
37 | std::memset(notNull.data(), 1, capacity); |
38 | } |
39 | |
40 | ColumnVectorBatch::~ColumnVectorBatch() { |
41 | // PASS |
42 | } |
43 | |
44 | void ColumnVectorBatch::resize(uint64_t cap) { |
45 | if (capacity < cap) { |
46 | capacity = cap; |
47 | notNull.resize(cap); |
48 | } |
49 | } |
50 | |
51 | uint64_t ColumnVectorBatch::getMemoryUsage() { |
52 | return static_cast<uint64_t>(notNull.capacity() * sizeof(char)); |
53 | } |
54 | |
55 | bool ColumnVectorBatch::hasVariableLength() { |
56 | return false; |
57 | } |
58 | |
59 | LongVectorBatch::LongVectorBatch(uint64_t capacity, MemoryPool& pool |
60 | ): ColumnVectorBatch(capacity, pool), |
61 | data(pool, capacity) { |
62 | // PASS |
63 | } |
64 | |
65 | LongVectorBatch::~LongVectorBatch() { |
66 | // PASS |
67 | } |
68 | |
69 | std::string LongVectorBatch::toString() const { |
70 | std::ostringstream buffer; |
71 | buffer << "Long vector <" << numElements << " of " << capacity << ">" ; |
72 | return buffer.str(); |
73 | } |
74 | |
75 | void LongVectorBatch::resize(uint64_t cap) { |
76 | if (capacity < cap) { |
77 | ColumnVectorBatch::resize(cap); |
78 | data.resize(cap); |
79 | } |
80 | } |
81 | |
82 | uint64_t LongVectorBatch::getMemoryUsage() { |
83 | return ColumnVectorBatch::getMemoryUsage() + |
84 | static_cast<uint64_t>(data.capacity() * sizeof(int64_t)); |
85 | } |
86 | |
87 | DoubleVectorBatch::DoubleVectorBatch(uint64_t capacity, MemoryPool& pool |
88 | ): ColumnVectorBatch(capacity, pool), |
89 | data(pool, capacity) { |
90 | // PASS |
91 | } |
92 | |
93 | DoubleVectorBatch::~DoubleVectorBatch() { |
94 | // PASS |
95 | } |
96 | |
97 | std::string DoubleVectorBatch::toString() const { |
98 | std::ostringstream buffer; |
99 | buffer << "Double vector <" << numElements << " of " << capacity << ">" ; |
100 | return buffer.str(); |
101 | } |
102 | |
103 | void DoubleVectorBatch::resize(uint64_t cap) { |
104 | if (capacity < cap) { |
105 | ColumnVectorBatch::resize(cap); |
106 | data.resize(cap); |
107 | } |
108 | } |
109 | |
110 | uint64_t DoubleVectorBatch::getMemoryUsage() { |
111 | return ColumnVectorBatch::getMemoryUsage() |
112 | + static_cast<uint64_t>(data.capacity() * sizeof(double)); |
113 | } |
114 | |
115 | StringVectorBatch::StringVectorBatch(uint64_t capacity, MemoryPool& pool |
116 | ): ColumnVectorBatch(capacity, pool), |
117 | data(pool, capacity), |
118 | length(pool, capacity) { |
119 | // PASS |
120 | } |
121 | |
122 | StringVectorBatch::~StringVectorBatch() { |
123 | // PASS |
124 | } |
125 | |
126 | std::string StringVectorBatch::toString() const { |
127 | std::ostringstream buffer; |
128 | buffer << "Byte vector <" << numElements << " of " << capacity << ">" ; |
129 | return buffer.str(); |
130 | } |
131 | |
132 | void StringVectorBatch::resize(uint64_t cap) { |
133 | if (capacity < cap) { |
134 | ColumnVectorBatch::resize(cap); |
135 | data.resize(cap); |
136 | length.resize(cap); |
137 | } |
138 | } |
139 | |
140 | uint64_t StringVectorBatch::getMemoryUsage() { |
141 | return ColumnVectorBatch::getMemoryUsage() |
142 | + static_cast<uint64_t>(data.capacity() * sizeof(char*) |
143 | + length.capacity() * sizeof(int64_t)); |
144 | } |
145 | |
146 | StructVectorBatch::StructVectorBatch(uint64_t cap, MemoryPool& pool |
147 | ): ColumnVectorBatch(cap, pool) { |
148 | // PASS |
149 | } |
150 | |
151 | StructVectorBatch::~StructVectorBatch() { |
152 | for (uint64_t i=0; i<this->fields.size(); i++) { |
153 | delete this->fields[i]; |
154 | } |
155 | } |
156 | |
157 | std::string StructVectorBatch::toString() const { |
158 | std::ostringstream buffer; |
159 | buffer << "Struct vector <" << numElements << " of " << capacity |
160 | << "; " ; |
161 | for(std::vector<ColumnVectorBatch*>::const_iterator ptr=fields.begin(); |
162 | ptr != fields.end(); ++ptr) { |
163 | buffer << (*ptr)->toString() << "; " ; |
164 | } |
165 | buffer << ">" ; |
166 | return buffer.str(); |
167 | } |
168 | |
169 | void StructVectorBatch::resize(uint64_t cap) { |
170 | ColumnVectorBatch::resize(cap); |
171 | } |
172 | |
173 | uint64_t StructVectorBatch::getMemoryUsage() { |
174 | uint64_t memory = ColumnVectorBatch::getMemoryUsage(); |
175 | for (unsigned int i=0; i < fields.size(); i++) { |
176 | memory += fields[i]->getMemoryUsage(); |
177 | } |
178 | return memory; |
179 | } |
180 | |
181 | bool StructVectorBatch::hasVariableLength() { |
182 | for (unsigned int i=0; i < fields.size(); i++) { |
183 | if (fields[i]->hasVariableLength()) { |
184 | return true; |
185 | } |
186 | } |
187 | return false; |
188 | } |
189 | |
190 | ListVectorBatch::ListVectorBatch(uint64_t cap, MemoryPool& pool |
191 | ): ColumnVectorBatch(cap, pool), |
192 | offsets(pool, cap+1) { |
193 | // PASS |
194 | } |
195 | |
196 | ListVectorBatch::~ListVectorBatch() { |
197 | // PASS |
198 | } |
199 | |
200 | std::string ListVectorBatch::toString() const { |
201 | std::ostringstream buffer; |
202 | buffer << "List vector <" << elements->toString() << " with " |
203 | << numElements << " of " << capacity << ">" ; |
204 | return buffer.str(); |
205 | } |
206 | |
207 | void ListVectorBatch::resize(uint64_t cap) { |
208 | if (capacity < cap) { |
209 | ColumnVectorBatch::resize(cap); |
210 | offsets.resize(cap + 1); |
211 | } |
212 | } |
213 | |
214 | uint64_t ListVectorBatch::getMemoryUsage() { |
215 | return ColumnVectorBatch::getMemoryUsage() |
216 | + static_cast<uint64_t>(offsets.capacity() * sizeof(int64_t)) |
217 | + elements->getMemoryUsage(); |
218 | } |
219 | |
220 | bool ListVectorBatch::hasVariableLength() { |
221 | return true; |
222 | } |
223 | |
224 | MapVectorBatch::MapVectorBatch(uint64_t cap, MemoryPool& pool |
225 | ): ColumnVectorBatch(cap, pool), |
226 | offsets(pool, cap+1) { |
227 | // PASS |
228 | } |
229 | |
230 | MapVectorBatch::~MapVectorBatch() { |
231 | // PASS |
232 | } |
233 | |
234 | std::string MapVectorBatch::toString() const { |
235 | std::ostringstream buffer; |
236 | buffer << "Map vector <" << keys->toString() << ", " |
237 | << elements->toString() << " with " |
238 | << numElements << " of " << capacity << ">" ; |
239 | return buffer.str(); |
240 | } |
241 | |
242 | void MapVectorBatch::resize(uint64_t cap) { |
243 | if (capacity < cap) { |
244 | ColumnVectorBatch::resize(cap); |
245 | offsets.resize(cap + 1); |
246 | } |
247 | } |
248 | |
249 | uint64_t MapVectorBatch::getMemoryUsage() { |
250 | return ColumnVectorBatch::getMemoryUsage() |
251 | + static_cast<uint64_t>(offsets.capacity() * sizeof(int64_t)) |
252 | + keys->getMemoryUsage() |
253 | + elements->getMemoryUsage(); |
254 | } |
255 | |
256 | bool MapVectorBatch::hasVariableLength() { |
257 | return true; |
258 | } |
259 | |
260 | UnionVectorBatch::UnionVectorBatch(uint64_t cap, MemoryPool& pool |
261 | ): ColumnVectorBatch(cap, pool), |
262 | tags(pool, cap), |
263 | offsets(pool, cap) { |
264 | // PASS |
265 | } |
266 | |
267 | UnionVectorBatch::~UnionVectorBatch() { |
268 | for (uint64_t i=0; i < children.size(); i++) { |
269 | delete children[i]; |
270 | } |
271 | } |
272 | |
273 | std::string UnionVectorBatch::toString() const { |
274 | std::ostringstream buffer; |
275 | buffer << "Union vector <" ; |
276 | for(size_t i=0; i < children.size(); ++i) { |
277 | if (i != 0) { |
278 | buffer << ", " ; |
279 | } |
280 | buffer << children[i]->toString(); |
281 | } |
282 | buffer << "; with " << numElements << " of " << capacity << ">" ; |
283 | return buffer.str(); |
284 | } |
285 | |
286 | void UnionVectorBatch::resize(uint64_t cap) { |
287 | if (capacity < cap) { |
288 | ColumnVectorBatch::resize(cap); |
289 | tags.resize(cap); |
290 | offsets.resize(cap); |
291 | } |
292 | } |
293 | |
294 | uint64_t UnionVectorBatch::getMemoryUsage() { |
295 | uint64_t memory = ColumnVectorBatch::getMemoryUsage() |
296 | + static_cast<uint64_t>(tags.capacity() * sizeof(unsigned char) |
297 | + offsets.capacity() * sizeof(uint64_t)); |
298 | for(size_t i=0; i < children.size(); ++i) { |
299 | memory += children[i]->getMemoryUsage(); |
300 | } |
301 | return memory; |
302 | } |
303 | |
304 | bool UnionVectorBatch::hasVariableLength() { |
305 | for(size_t i=0; i < children.size(); ++i) { |
306 | if (children[i]->hasVariableLength()) { |
307 | return true; |
308 | } |
309 | } |
310 | return false; |
311 | } |
312 | |
313 | Decimal64VectorBatch::Decimal64VectorBatch(uint64_t cap, MemoryPool& pool |
314 | ): ColumnVectorBatch(cap, pool), |
315 | precision(0), |
316 | scale(0), |
317 | values(pool, cap), |
318 | readScales(pool, cap) { |
319 | // PASS |
320 | } |
321 | |
322 | Decimal64VectorBatch::~Decimal64VectorBatch() { |
323 | // PASS |
324 | } |
325 | |
326 | std::string Decimal64VectorBatch::toString() const { |
327 | std::ostringstream buffer; |
328 | buffer << "Decimal64 vector with " |
329 | << numElements << " of " << capacity << ">" ; |
330 | return buffer.str(); |
331 | } |
332 | |
333 | void Decimal64VectorBatch::resize(uint64_t cap) { |
334 | if (capacity < cap) { |
335 | ColumnVectorBatch::resize(cap); |
336 | values.resize(cap); |
337 | readScales.resize(cap); |
338 | } |
339 | } |
340 | |
341 | uint64_t Decimal64VectorBatch::getMemoryUsage() { |
342 | return ColumnVectorBatch::getMemoryUsage() |
343 | + static_cast<uint64_t>( |
344 | (values.capacity() + readScales.capacity()) * sizeof(int64_t)); |
345 | } |
346 | |
347 | Decimal128VectorBatch::Decimal128VectorBatch(uint64_t cap, MemoryPool& pool |
348 | ): ColumnVectorBatch(cap, pool), |
349 | precision(0), |
350 | scale(0), |
351 | values(pool, cap), |
352 | readScales(pool, cap) { |
353 | // PASS |
354 | } |
355 | |
356 | Decimal128VectorBatch::~Decimal128VectorBatch() { |
357 | // PASS |
358 | } |
359 | |
360 | std::string Decimal128VectorBatch::toString() const { |
361 | std::ostringstream buffer; |
362 | buffer << "Decimal128 vector with " |
363 | << numElements << " of " << capacity << ">" ; |
364 | return buffer.str(); |
365 | } |
366 | |
367 | void Decimal128VectorBatch::resize(uint64_t cap) { |
368 | if (capacity < cap) { |
369 | ColumnVectorBatch::resize(cap); |
370 | values.resize(cap); |
371 | readScales.resize(cap); |
372 | } |
373 | } |
374 | |
375 | uint64_t Decimal128VectorBatch::getMemoryUsage() { |
376 | return ColumnVectorBatch::getMemoryUsage() |
377 | + static_cast<uint64_t>(values.capacity() * sizeof(Int128) |
378 | + readScales.capacity() * sizeof(int64_t)); |
379 | } |
380 | |
381 | Decimal::Decimal(const Int128& _value, |
382 | int32_t _scale): value(_value), scale(_scale) { |
383 | // PASS |
384 | } |
385 | |
386 | Decimal::Decimal(const std::string& str) { |
387 | std::size_t foundPoint = str.find("." ); |
388 | // no decimal point, it is int |
389 | if(foundPoint == std::string::npos){ |
390 | value = Int128(str); |
391 | scale = 0; |
392 | }else{ |
393 | std::string copy(str); |
394 | scale = static_cast<int32_t>(str.length() - foundPoint - 1); |
395 | value = Int128(copy.replace(foundPoint, 1, "" )); |
396 | } |
397 | } |
398 | |
399 | Decimal::Decimal() : value(0), scale(0) { |
400 | // PASS |
401 | } |
402 | |
403 | std::string Decimal::toString() const { |
404 | return value.toDecimalString(scale); |
405 | } |
406 | |
407 | TimestampVectorBatch::TimestampVectorBatch(uint64_t capacity, |
408 | MemoryPool& pool |
409 | ): ColumnVectorBatch(capacity, |
410 | pool), |
411 | data(pool, capacity), |
412 | nanoseconds(pool, capacity) { |
413 | // PASS |
414 | } |
415 | |
416 | TimestampVectorBatch::~TimestampVectorBatch() { |
417 | // PASS |
418 | } |
419 | |
420 | std::string TimestampVectorBatch::toString() const { |
421 | std::ostringstream buffer; |
422 | buffer << "Timestamp vector <" << numElements << " of " << capacity << ">" ; |
423 | return buffer.str(); |
424 | } |
425 | |
426 | void TimestampVectorBatch::resize(uint64_t cap) { |
427 | if (capacity < cap) { |
428 | ColumnVectorBatch::resize(cap); |
429 | data.resize(cap); |
430 | nanoseconds.resize(cap); |
431 | } |
432 | } |
433 | |
434 | uint64_t TimestampVectorBatch::getMemoryUsage() { |
435 | return ColumnVectorBatch::getMemoryUsage() |
436 | + static_cast<uint64_t>( |
437 | (data.capacity() + nanoseconds.capacity()) * sizeof(int64_t)); |
438 | } |
439 | } |
440 | |