1/**
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 */
18
19#include "orc/Vector.hh"
20
21#include "Adaptor.hh"
22#include "orc/Exceptions.hh"
23
24#include <iostream>
25#include <sstream>
26#include <cstdlib>
27
28namespace orc {
29
30 ColumnVectorBatch::ColumnVectorBatch(uint64_t cap,
31 MemoryPool& pool
32 ): capacity(cap),
33 numElements(0),
34 notNull(pool, cap),
35 hasNulls(false),
36 memoryPool(pool) {
37 std::memset(notNull.data(), 1, capacity);
38 }
39
40 ColumnVectorBatch::~ColumnVectorBatch() {
41 // PASS
42 }
43
44 void ColumnVectorBatch::resize(uint64_t cap) {
45 if (capacity < cap) {
46 capacity = cap;
47 notNull.resize(cap);
48 }
49 }
50
51 uint64_t ColumnVectorBatch::getMemoryUsage() {
52 return static_cast<uint64_t>(notNull.capacity() * sizeof(char));
53 }
54
55 bool ColumnVectorBatch::hasVariableLength() {
56 return false;
57 }
58
59 LongVectorBatch::LongVectorBatch(uint64_t capacity, MemoryPool& pool
60 ): ColumnVectorBatch(capacity, pool),
61 data(pool, capacity) {
62 // PASS
63 }
64
65 LongVectorBatch::~LongVectorBatch() {
66 // PASS
67 }
68
69 std::string LongVectorBatch::toString() const {
70 std::ostringstream buffer;
71 buffer << "Long vector <" << numElements << " of " << capacity << ">";
72 return buffer.str();
73 }
74
75 void LongVectorBatch::resize(uint64_t cap) {
76 if (capacity < cap) {
77 ColumnVectorBatch::resize(cap);
78 data.resize(cap);
79 }
80 }
81
82 uint64_t LongVectorBatch::getMemoryUsage() {
83 return ColumnVectorBatch::getMemoryUsage() +
84 static_cast<uint64_t>(data.capacity() * sizeof(int64_t));
85 }
86
87 DoubleVectorBatch::DoubleVectorBatch(uint64_t capacity, MemoryPool& pool
88 ): ColumnVectorBatch(capacity, pool),
89 data(pool, capacity) {
90 // PASS
91 }
92
93 DoubleVectorBatch::~DoubleVectorBatch() {
94 // PASS
95 }
96
97 std::string DoubleVectorBatch::toString() const {
98 std::ostringstream buffer;
99 buffer << "Double vector <" << numElements << " of " << capacity << ">";
100 return buffer.str();
101 }
102
103 void DoubleVectorBatch::resize(uint64_t cap) {
104 if (capacity < cap) {
105 ColumnVectorBatch::resize(cap);
106 data.resize(cap);
107 }
108 }
109
110 uint64_t DoubleVectorBatch::getMemoryUsage() {
111 return ColumnVectorBatch::getMemoryUsage()
112 + static_cast<uint64_t>(data.capacity() * sizeof(double));
113 }
114
115 StringVectorBatch::StringVectorBatch(uint64_t capacity, MemoryPool& pool
116 ): ColumnVectorBatch(capacity, pool),
117 data(pool, capacity),
118 length(pool, capacity) {
119 // PASS
120 }
121
122 StringVectorBatch::~StringVectorBatch() {
123 // PASS
124 }
125
126 std::string StringVectorBatch::toString() const {
127 std::ostringstream buffer;
128 buffer << "Byte vector <" << numElements << " of " << capacity << ">";
129 return buffer.str();
130 }
131
132 void StringVectorBatch::resize(uint64_t cap) {
133 if (capacity < cap) {
134 ColumnVectorBatch::resize(cap);
135 data.resize(cap);
136 length.resize(cap);
137 }
138 }
139
140 uint64_t StringVectorBatch::getMemoryUsage() {
141 return ColumnVectorBatch::getMemoryUsage()
142 + static_cast<uint64_t>(data.capacity() * sizeof(char*)
143 + length.capacity() * sizeof(int64_t));
144 }
145
146 StructVectorBatch::StructVectorBatch(uint64_t cap, MemoryPool& pool
147 ): ColumnVectorBatch(cap, pool) {
148 // PASS
149 }
150
151 StructVectorBatch::~StructVectorBatch() {
152 for (uint64_t i=0; i<this->fields.size(); i++) {
153 delete this->fields[i];
154 }
155 }
156
157 std::string StructVectorBatch::toString() const {
158 std::ostringstream buffer;
159 buffer << "Struct vector <" << numElements << " of " << capacity
160 << "; ";
161 for(std::vector<ColumnVectorBatch*>::const_iterator ptr=fields.begin();
162 ptr != fields.end(); ++ptr) {
163 buffer << (*ptr)->toString() << "; ";
164 }
165 buffer << ">";
166 return buffer.str();
167 }
168
169 void StructVectorBatch::resize(uint64_t cap) {
170 ColumnVectorBatch::resize(cap);
171 }
172
173 uint64_t StructVectorBatch::getMemoryUsage() {
174 uint64_t memory = ColumnVectorBatch::getMemoryUsage();
175 for (unsigned int i=0; i < fields.size(); i++) {
176 memory += fields[i]->getMemoryUsage();
177 }
178 return memory;
179 }
180
181 bool StructVectorBatch::hasVariableLength() {
182 for (unsigned int i=0; i < fields.size(); i++) {
183 if (fields[i]->hasVariableLength()) {
184 return true;
185 }
186 }
187 return false;
188 }
189
190 ListVectorBatch::ListVectorBatch(uint64_t cap, MemoryPool& pool
191 ): ColumnVectorBatch(cap, pool),
192 offsets(pool, cap+1) {
193 // PASS
194 }
195
196 ListVectorBatch::~ListVectorBatch() {
197 // PASS
198 }
199
200 std::string ListVectorBatch::toString() const {
201 std::ostringstream buffer;
202 buffer << "List vector <" << elements->toString() << " with "
203 << numElements << " of " << capacity << ">";
204 return buffer.str();
205 }
206
207 void ListVectorBatch::resize(uint64_t cap) {
208 if (capacity < cap) {
209 ColumnVectorBatch::resize(cap);
210 offsets.resize(cap + 1);
211 }
212 }
213
214 uint64_t ListVectorBatch::getMemoryUsage() {
215 return ColumnVectorBatch::getMemoryUsage()
216 + static_cast<uint64_t>(offsets.capacity() * sizeof(int64_t))
217 + elements->getMemoryUsage();
218 }
219
220 bool ListVectorBatch::hasVariableLength() {
221 return true;
222 }
223
224 MapVectorBatch::MapVectorBatch(uint64_t cap, MemoryPool& pool
225 ): ColumnVectorBatch(cap, pool),
226 offsets(pool, cap+1) {
227 // PASS
228 }
229
230 MapVectorBatch::~MapVectorBatch() {
231 // PASS
232 }
233
234 std::string MapVectorBatch::toString() const {
235 std::ostringstream buffer;
236 buffer << "Map vector <" << keys->toString() << ", "
237 << elements->toString() << " with "
238 << numElements << " of " << capacity << ">";
239 return buffer.str();
240 }
241
242 void MapVectorBatch::resize(uint64_t cap) {
243 if (capacity < cap) {
244 ColumnVectorBatch::resize(cap);
245 offsets.resize(cap + 1);
246 }
247 }
248
249 uint64_t MapVectorBatch::getMemoryUsage() {
250 return ColumnVectorBatch::getMemoryUsage()
251 + static_cast<uint64_t>(offsets.capacity() * sizeof(int64_t))
252 + keys->getMemoryUsage()
253 + elements->getMemoryUsage();
254 }
255
256 bool MapVectorBatch::hasVariableLength() {
257 return true;
258 }
259
260 UnionVectorBatch::UnionVectorBatch(uint64_t cap, MemoryPool& pool
261 ): ColumnVectorBatch(cap, pool),
262 tags(pool, cap),
263 offsets(pool, cap) {
264 // PASS
265 }
266
267 UnionVectorBatch::~UnionVectorBatch() {
268 for (uint64_t i=0; i < children.size(); i++) {
269 delete children[i];
270 }
271 }
272
273 std::string UnionVectorBatch::toString() const {
274 std::ostringstream buffer;
275 buffer << "Union vector <";
276 for(size_t i=0; i < children.size(); ++i) {
277 if (i != 0) {
278 buffer << ", ";
279 }
280 buffer << children[i]->toString();
281 }
282 buffer << "; with " << numElements << " of " << capacity << ">";
283 return buffer.str();
284 }
285
286 void UnionVectorBatch::resize(uint64_t cap) {
287 if (capacity < cap) {
288 ColumnVectorBatch::resize(cap);
289 tags.resize(cap);
290 offsets.resize(cap);
291 }
292 }
293
294 uint64_t UnionVectorBatch::getMemoryUsage() {
295 uint64_t memory = ColumnVectorBatch::getMemoryUsage()
296 + static_cast<uint64_t>(tags.capacity() * sizeof(unsigned char)
297 + offsets.capacity() * sizeof(uint64_t));
298 for(size_t i=0; i < children.size(); ++i) {
299 memory += children[i]->getMemoryUsage();
300 }
301 return memory;
302 }
303
304 bool UnionVectorBatch::hasVariableLength() {
305 for(size_t i=0; i < children.size(); ++i) {
306 if (children[i]->hasVariableLength()) {
307 return true;
308 }
309 }
310 return false;
311 }
312
313 Decimal64VectorBatch::Decimal64VectorBatch(uint64_t cap, MemoryPool& pool
314 ): ColumnVectorBatch(cap, pool),
315 precision(0),
316 scale(0),
317 values(pool, cap),
318 readScales(pool, cap) {
319 // PASS
320 }
321
322 Decimal64VectorBatch::~Decimal64VectorBatch() {
323 // PASS
324 }
325
326 std::string Decimal64VectorBatch::toString() const {
327 std::ostringstream buffer;
328 buffer << "Decimal64 vector with "
329 << numElements << " of " << capacity << ">";
330 return buffer.str();
331 }
332
333 void Decimal64VectorBatch::resize(uint64_t cap) {
334 if (capacity < cap) {
335 ColumnVectorBatch::resize(cap);
336 values.resize(cap);
337 readScales.resize(cap);
338 }
339 }
340
341 uint64_t Decimal64VectorBatch::getMemoryUsage() {
342 return ColumnVectorBatch::getMemoryUsage()
343 + static_cast<uint64_t>(
344 (values.capacity() + readScales.capacity()) * sizeof(int64_t));
345 }
346
347 Decimal128VectorBatch::Decimal128VectorBatch(uint64_t cap, MemoryPool& pool
348 ): ColumnVectorBatch(cap, pool),
349 precision(0),
350 scale(0),
351 values(pool, cap),
352 readScales(pool, cap) {
353 // PASS
354 }
355
356 Decimal128VectorBatch::~Decimal128VectorBatch() {
357 // PASS
358 }
359
360 std::string Decimal128VectorBatch::toString() const {
361 std::ostringstream buffer;
362 buffer << "Decimal128 vector with "
363 << numElements << " of " << capacity << ">";
364 return buffer.str();
365 }
366
367 void Decimal128VectorBatch::resize(uint64_t cap) {
368 if (capacity < cap) {
369 ColumnVectorBatch::resize(cap);
370 values.resize(cap);
371 readScales.resize(cap);
372 }
373 }
374
375 uint64_t Decimal128VectorBatch::getMemoryUsage() {
376 return ColumnVectorBatch::getMemoryUsage()
377 + static_cast<uint64_t>(values.capacity() * sizeof(Int128)
378 + readScales.capacity() * sizeof(int64_t));
379 }
380
381 Decimal::Decimal(const Int128& _value,
382 int32_t _scale): value(_value), scale(_scale) {
383 // PASS
384 }
385
386 Decimal::Decimal(const std::string& str) {
387 std::size_t foundPoint = str.find(".");
388 // no decimal point, it is int
389 if(foundPoint == std::string::npos){
390 value = Int128(str);
391 scale = 0;
392 }else{
393 std::string copy(str);
394 scale = static_cast<int32_t>(str.length() - foundPoint - 1);
395 value = Int128(copy.replace(foundPoint, 1, ""));
396 }
397 }
398
399 Decimal::Decimal() : value(0), scale(0) {
400 // PASS
401 }
402
403 std::string Decimal::toString() const {
404 return value.toDecimalString(scale);
405 }
406
407 TimestampVectorBatch::TimestampVectorBatch(uint64_t capacity,
408 MemoryPool& pool
409 ): ColumnVectorBatch(capacity,
410 pool),
411 data(pool, capacity),
412 nanoseconds(pool, capacity) {
413 // PASS
414 }
415
416 TimestampVectorBatch::~TimestampVectorBatch() {
417 // PASS
418 }
419
420 std::string TimestampVectorBatch::toString() const {
421 std::ostringstream buffer;
422 buffer << "Timestamp vector <" << numElements << " of " << capacity << ">";
423 return buffer.str();
424 }
425
426 void TimestampVectorBatch::resize(uint64_t cap) {
427 if (capacity < cap) {
428 ColumnVectorBatch::resize(cap);
429 data.resize(cap);
430 nanoseconds.resize(cap);
431 }
432 }
433
434 uint64_t TimestampVectorBatch::getMemoryUsage() {
435 return ColumnVectorBatch::getMemoryUsage()
436 + static_cast<uint64_t>(
437 (data.capacity() + nanoseconds.capacity()) * sizeof(int64_t));
438 }
439}
440