1 | /** |
2 | * Licensed to the Apache Software Foundation (ASF) under one |
3 | * or more contributor license agreements. See the NOTICE file |
4 | * distributed with this work for additional information |
5 | * regarding copyright ownership. The ASF licenses this file |
6 | * to you under the Apache License, Version 2.0 (the |
7 | * "License"); you may not use this file except in compliance |
8 | * with the License. You may obtain a copy of the License at |
9 | * |
10 | * http://www.apache.org/licenses/LICENSE-2.0 |
11 | * |
12 | * Unless required by applicable law or agreed to in writing, software |
13 | * distributed under the License is distributed on an "AS IS" BASIS, |
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
15 | * See the License for the specific language governing permissions and |
16 | * limitations under the License. |
17 | */ |
18 | |
19 | #ifndef ORC_STATISTICS_IMPL_HH |
20 | #define ORC_STATISTICS_IMPL_HH |
21 | |
22 | #include "orc/Common.hh" |
23 | #include "orc/Int128.hh" |
24 | #include "orc/OrcFile.hh" |
25 | #include "orc/Reader.hh" |
26 | |
27 | #include "Timezone.hh" |
28 | #include "TypeImpl.hh" |
29 | |
30 | namespace orc { |
31 | |
32 | /** |
33 | * StatContext contains fields required to compute statistics |
34 | */ |
35 | |
36 | struct StatContext { |
37 | const bool correctStats; |
38 | const Timezone* const writerTimezone; |
39 | StatContext() : correctStats(false), writerTimezone(nullptr) {} |
40 | StatContext(bool cStat, const Timezone* const timezone = nullptr) : |
41 | correctStats(cStat), writerTimezone(timezone) {} |
42 | }; |
43 | |
44 | /** |
45 | * Internal Statistics Implementation |
46 | */ |
47 | |
48 | template <typename T> |
49 | class InternalStatisticsImpl { |
50 | private: |
51 | bool _hasNull; |
52 | bool _hasMinimum; |
53 | bool _hasMaximum; |
54 | bool _hasSum; |
55 | bool _hasTotalLength; |
56 | uint64_t _totalLength; |
57 | uint64_t _valueCount; |
58 | T _minimum; |
59 | T _maximum; |
60 | T _sum; |
61 | public: |
62 | InternalStatisticsImpl() { |
63 | _hasNull = false; |
64 | _hasMinimum = false; |
65 | _hasMaximum = false; |
66 | _hasSum = false; |
67 | _hasTotalLength = false; |
68 | _totalLength = 0; |
69 | _valueCount = 0; |
70 | } |
71 | |
72 | ~InternalStatisticsImpl() {} |
73 | |
74 | // GET / SET _totalLength |
75 | bool hasTotalLength() const { return _hasTotalLength; } |
76 | |
77 | void setHasTotalLength(bool hasTotalLength) { |
78 | _hasTotalLength = hasTotalLength; |
79 | } |
80 | |
81 | uint64_t getTotalLength() const { return _totalLength; } |
82 | |
83 | void setTotalLength(uint64_t totalLength) { _totalLength = totalLength; } |
84 | |
85 | // GET / SET _sum |
86 | bool hasSum() const { return _hasSum; } |
87 | |
88 | void setHasSum(bool hasSum) { _hasSum = hasSum; } |
89 | |
90 | T getSum() const { return _sum; } |
91 | |
92 | void setSum(T sum) { _sum = sum; } |
93 | |
94 | // GET / SET _maximum |
95 | bool hasMaximum() const { return _hasMaximum; } |
96 | |
97 | T getMaximum() const { return _maximum; } |
98 | |
99 | void setHasMaximum(bool hasMax) { _hasMaximum = hasMax; } |
100 | |
101 | void setMaximum(T max) { _maximum = max; } |
102 | |
103 | // GET / SET _minimum |
104 | bool hasMinimum() const { return _hasMinimum; } |
105 | |
106 | void setHasMinimum(bool hasMin) { _hasMinimum = hasMin; } |
107 | |
108 | T getMinimum() const { return _minimum; } |
109 | |
110 | void setMinimum(T min) { _minimum = min; } |
111 | |
112 | // GET / SET _valueCount |
113 | uint64_t getNumberOfValues() const { return _valueCount; } |
114 | |
115 | void setNumberOfValues(uint64_t numValues) { _valueCount = numValues; } |
116 | |
117 | // GET / SET _hasNullValue |
118 | bool hasNull() const { return _hasNull; } |
119 | |
120 | void setHasNull(bool hasNull) { _hasNull = hasNull; } |
121 | |
122 | void reset() { |
123 | _hasNull = false; |
124 | _hasMinimum = false; |
125 | _hasMaximum = false; |
126 | _hasSum = false; |
127 | _hasTotalLength = false; |
128 | _totalLength = 0; |
129 | _valueCount = 0; |
130 | } |
131 | |
132 | void updateMinMax(T value) { |
133 | if (!_hasMinimum) { |
134 | _hasMinimum = _hasMaximum = true; |
135 | _minimum = _maximum = value; |
136 | } else if (compare(value, _minimum)) { |
137 | _minimum = value; |
138 | } else if (compare(_maximum, value)) { |
139 | _maximum = value; |
140 | } |
141 | } |
142 | |
143 | // sum is not merged here as we need to check overflow |
144 | void merge(const InternalStatisticsImpl& other) { |
145 | _hasNull = _hasNull || other._hasNull; |
146 | _valueCount += other._valueCount; |
147 | |
148 | if (other._hasMinimum) { |
149 | if (!_hasMinimum) { |
150 | _hasMinimum = _hasMaximum = true; |
151 | _minimum = other._minimum; |
152 | _maximum = other._maximum; |
153 | } else { |
154 | // all template types should support operator< |
155 | if (compare(_maximum, other._maximum)) { |
156 | _maximum = other._maximum; |
157 | } |
158 | if (compare(other._minimum, _minimum)) { |
159 | _minimum = other._minimum; |
160 | } |
161 | } |
162 | } |
163 | |
164 | _hasTotalLength = _hasTotalLength && other._hasTotalLength; |
165 | _totalLength += other._totalLength; |
166 | } |
167 | }; |
168 | |
169 | typedef InternalStatisticsImpl<char> InternalCharStatistics; |
170 | typedef InternalStatisticsImpl<uint64_t> InternalBooleanStatistics; |
171 | typedef InternalStatisticsImpl<int64_t> InternalIntegerStatistics; |
172 | typedef InternalStatisticsImpl<int32_t> InternalDateStatistics; |
173 | typedef InternalStatisticsImpl<double> InternalDoubleStatistics; |
174 | typedef InternalStatisticsImpl<Decimal> InternalDecimalStatistics; |
175 | typedef InternalStatisticsImpl<std::string> InternalStringStatistics; |
176 | |
177 | /** |
178 | * Mutable column statistics for use by the writer. |
179 | */ |
180 | class MutableColumnStatistics { |
181 | public: |
182 | virtual ~MutableColumnStatistics(); |
183 | |
184 | virtual void increase(uint64_t count) = 0; |
185 | |
186 | virtual void setNumberOfValues(uint64_t value) = 0; |
187 | |
188 | virtual void setHasNull(bool hasNull) = 0; |
189 | |
190 | virtual void merge(const MutableColumnStatistics& other) = 0; |
191 | |
192 | virtual void reset() = 0; |
193 | |
194 | virtual void toProtoBuf(proto::ColumnStatistics& pbStats) const = 0; |
195 | }; |
196 | |
197 | /** |
198 | * ColumnStatistics Implementation |
199 | */ |
200 | |
201 | class ColumnStatisticsImpl: public ColumnStatistics, |
202 | public MutableColumnStatistics { |
203 | private: |
204 | InternalCharStatistics _stats; |
205 | public: |
206 | ColumnStatisticsImpl() { reset(); } |
207 | ColumnStatisticsImpl(const proto::ColumnStatistics& stats); |
208 | virtual ~ColumnStatisticsImpl() override; |
209 | |
210 | uint64_t getNumberOfValues() const override { |
211 | return _stats.getNumberOfValues(); |
212 | } |
213 | |
214 | void setNumberOfValues(uint64_t value) override { |
215 | _stats.setNumberOfValues(value); |
216 | } |
217 | |
218 | void increase(uint64_t count) override { |
219 | _stats.setNumberOfValues(_stats.getNumberOfValues() + count); |
220 | } |
221 | |
222 | bool hasNull() const override { |
223 | return _stats.hasNull(); |
224 | } |
225 | |
226 | void setHasNull(bool hasNull) override { |
227 | _stats.setHasNull(hasNull); |
228 | } |
229 | |
230 | void merge(const MutableColumnStatistics& other) override { |
231 | _stats.merge(dynamic_cast<const ColumnStatisticsImpl&>(other)._stats); |
232 | } |
233 | |
234 | void reset() override { |
235 | _stats.reset(); |
236 | } |
237 | |
238 | void toProtoBuf(proto::ColumnStatistics& pbStats) const override { |
239 | pbStats.set_hasnull(_stats.hasNull()); |
240 | pbStats.set_numberofvalues(_stats.getNumberOfValues()); |
241 | } |
242 | |
243 | std::string toString() const override { |
244 | std::ostringstream buffer; |
245 | buffer << "Column has " << getNumberOfValues() << " values" |
246 | << " and has null value: " << (hasNull() ? "yes" : "no" ) |
247 | << std::endl; |
248 | return buffer.str(); |
249 | } |
250 | }; |
251 | |
252 | class BinaryColumnStatisticsImpl: public BinaryColumnStatistics, |
253 | public MutableColumnStatistics { |
254 | private: |
255 | InternalCharStatistics _stats; |
256 | public: |
257 | BinaryColumnStatisticsImpl() { reset(); } |
258 | BinaryColumnStatisticsImpl(const proto::ColumnStatistics& stats, |
259 | const StatContext& statContext); |
260 | virtual ~BinaryColumnStatisticsImpl() override; |
261 | |
262 | uint64_t getNumberOfValues() const override { |
263 | return _stats.getNumberOfValues(); |
264 | } |
265 | |
266 | void setNumberOfValues(uint64_t value) override { |
267 | _stats.setNumberOfValues(value); |
268 | } |
269 | |
270 | void increase(uint64_t count) override { |
271 | _stats.setNumberOfValues(_stats.getNumberOfValues() + count); |
272 | } |
273 | |
274 | bool hasNull() const override { |
275 | return _stats.hasNull(); |
276 | } |
277 | |
278 | void setHasNull(bool hasNull) override { |
279 | _stats.setHasNull(hasNull); |
280 | } |
281 | |
282 | bool hasTotalLength() const override { |
283 | return _stats.hasTotalLength(); |
284 | } |
285 | |
286 | uint64_t getTotalLength() const override { |
287 | if(hasTotalLength()){ |
288 | return _stats.getTotalLength(); |
289 | }else{ |
290 | throw ParseError("Total length is not defined." ); |
291 | } |
292 | } |
293 | |
294 | void setTotalLength(uint64_t length) { |
295 | _stats.setHasTotalLength(true); |
296 | _stats.setTotalLength(length); |
297 | } |
298 | |
299 | void update(size_t length) { |
300 | _stats.setTotalLength(_stats.getTotalLength() + length); |
301 | } |
302 | |
303 | void merge(const MutableColumnStatistics& other) override { |
304 | const BinaryColumnStatisticsImpl& binStats = |
305 | dynamic_cast<const BinaryColumnStatisticsImpl&>(other); |
306 | _stats.merge(binStats._stats); |
307 | } |
308 | |
309 | void reset() override { |
310 | _stats.reset(); |
311 | setTotalLength(0); |
312 | } |
313 | |
314 | void toProtoBuf(proto::ColumnStatistics& pbStats) const override { |
315 | pbStats.set_hasnull(_stats.hasNull()); |
316 | pbStats.set_numberofvalues(_stats.getNumberOfValues()); |
317 | |
318 | proto::BinaryStatistics* binStats = pbStats.mutable_binarystatistics(); |
319 | binStats->set_sum(static_cast<int64_t>(_stats.getTotalLength())); |
320 | } |
321 | |
322 | std::string toString() const override { |
323 | std::ostringstream buffer; |
324 | buffer << "Data type: Binary" << std::endl |
325 | << "Values: " << getNumberOfValues() << std::endl |
326 | << "Has null: " << (hasNull() ? "yes" : "no" ) << std::endl; |
327 | if(hasTotalLength()){ |
328 | buffer << "Total length: " << getTotalLength() << std::endl; |
329 | }else{ |
330 | buffer << "Total length: not defined" << std::endl; |
331 | } |
332 | return buffer.str(); |
333 | } |
334 | }; |
335 | |
336 | class BooleanColumnStatisticsImpl: public BooleanColumnStatistics, |
337 | public MutableColumnStatistics { |
338 | private: |
339 | InternalBooleanStatistics _stats; |
340 | bool _hasCount; |
341 | uint64_t _trueCount; |
342 | |
343 | public: |
344 | BooleanColumnStatisticsImpl() { reset(); } |
345 | BooleanColumnStatisticsImpl(const proto::ColumnStatistics& stats, |
346 | const StatContext& statContext); |
347 | virtual ~BooleanColumnStatisticsImpl() override; |
348 | |
349 | bool hasCount() const override { |
350 | return _hasCount; |
351 | } |
352 | |
353 | void increase(uint64_t count) override { |
354 | _stats.setNumberOfValues(_stats.getNumberOfValues() + count); |
355 | _hasCount = true; |
356 | } |
357 | |
358 | uint64_t getNumberOfValues() const override { |
359 | return _stats.getNumberOfValues(); |
360 | } |
361 | |
362 | void setNumberOfValues(uint64_t value) override { |
363 | _stats.setNumberOfValues(value); |
364 | } |
365 | |
366 | bool hasNull() const override { |
367 | return _stats.hasNull(); |
368 | } |
369 | |
370 | void setHasNull(bool hasNull) override { |
371 | _stats.setHasNull(hasNull); |
372 | } |
373 | |
374 | uint64_t getFalseCount() const override { |
375 | if(hasCount()){ |
376 | return getNumberOfValues() - _trueCount; |
377 | }else{ |
378 | throw ParseError("False count is not defined." ); |
379 | } |
380 | } |
381 | |
382 | uint64_t getTrueCount() const override { |
383 | if(hasCount()){ |
384 | return _trueCount; |
385 | }else{ |
386 | throw ParseError("True count is not defined." ); |
387 | } |
388 | } |
389 | |
390 | void setTrueCount(uint64_t trueCount) { |
391 | _hasCount = true; |
392 | _trueCount = trueCount; |
393 | } |
394 | |
395 | void update(bool value, size_t repetitions) { |
396 | if (value) { |
397 | _trueCount += repetitions; |
398 | } |
399 | } |
400 | |
401 | void merge(const MutableColumnStatistics& other) override { |
402 | const BooleanColumnStatisticsImpl& boolStats = |
403 | dynamic_cast<const BooleanColumnStatisticsImpl&>(other); |
404 | _stats.merge(boolStats._stats); |
405 | _hasCount = _hasCount && boolStats._hasCount; |
406 | _trueCount += boolStats._trueCount; |
407 | } |
408 | |
409 | void reset() override { |
410 | _stats.reset(); |
411 | setTrueCount(0); |
412 | } |
413 | |
414 | void toProtoBuf(proto::ColumnStatistics& pbStats) const override { |
415 | pbStats.set_hasnull(_stats.hasNull()); |
416 | pbStats.set_numberofvalues(_stats.getNumberOfValues()); |
417 | |
418 | proto::BucketStatistics* bucketStats = pbStats.mutable_bucketstatistics(); |
419 | if (_hasCount) { |
420 | bucketStats->add_count(_trueCount); |
421 | } |
422 | } |
423 | |
424 | std::string toString() const override { |
425 | std::ostringstream buffer; |
426 | buffer << "Data type: Boolean" << std::endl |
427 | << "Values: " << getNumberOfValues() << std::endl |
428 | << "Has null: " << (hasNull() ? "yes" : "no" ) << std::endl; |
429 | if(hasCount()){ |
430 | buffer << "(true: " << getTrueCount() << "; false: " |
431 | << getFalseCount() << ")" << std::endl; |
432 | } else { |
433 | buffer << "(true: not defined; false: not defined)" << std::endl; |
434 | buffer << "True and false count are not defined" << std::endl; |
435 | } |
436 | return buffer.str(); |
437 | } |
438 | }; |
439 | |
440 | class DateColumnStatisticsImpl: public DateColumnStatistics, |
441 | public MutableColumnStatistics{ |
442 | private: |
443 | InternalDateStatistics _stats; |
444 | public: |
445 | DateColumnStatisticsImpl() { reset(); } |
446 | DateColumnStatisticsImpl(const proto::ColumnStatistics& stats, |
447 | const StatContext& statContext); |
448 | virtual ~DateColumnStatisticsImpl() override; |
449 | |
450 | bool hasMinimum() const override { |
451 | return _stats.hasMinimum(); |
452 | } |
453 | |
454 | bool hasMaximum() const override { |
455 | return _stats.hasMaximum(); |
456 | } |
457 | |
458 | void increase(uint64_t count) override { |
459 | _stats.setNumberOfValues(_stats.getNumberOfValues() + count); |
460 | } |
461 | |
462 | uint64_t getNumberOfValues() const override { |
463 | return _stats.getNumberOfValues(); |
464 | } |
465 | |
466 | void setNumberOfValues(uint64_t value) override { |
467 | _stats.setNumberOfValues(value); |
468 | } |
469 | |
470 | bool hasNull() const override { |
471 | return _stats.hasNull(); |
472 | } |
473 | |
474 | void setHasNull(bool hasNull) override { |
475 | _stats.setHasNull(hasNull); |
476 | } |
477 | |
478 | int32_t getMinimum() const override { |
479 | if(hasMinimum()){ |
480 | return _stats.getMinimum(); |
481 | }else{ |
482 | throw ParseError("Minimum is not defined." ); |
483 | } |
484 | } |
485 | |
486 | int32_t getMaximum() const override { |
487 | if(hasMaximum()){ |
488 | return _stats.getMaximum(); |
489 | }else{ |
490 | throw ParseError("Maximum is not defined." ); |
491 | } |
492 | } |
493 | |
494 | void setMinimum(int32_t minimum) { |
495 | _stats.setHasMinimum(true); |
496 | _stats.setMinimum(minimum); |
497 | } |
498 | |
499 | void setMaximum(int32_t maximum) { |
500 | _stats.setHasMaximum(true); |
501 | _stats.setMaximum(maximum); |
502 | } |
503 | |
504 | void update(int32_t value) { |
505 | _stats.updateMinMax(value); |
506 | } |
507 | |
508 | void merge(const MutableColumnStatistics& other) override { |
509 | const DateColumnStatisticsImpl& dateStats = |
510 | dynamic_cast<const DateColumnStatisticsImpl&>(other); |
511 | _stats.merge(dateStats._stats); |
512 | } |
513 | |
514 | void reset() override { |
515 | _stats.reset(); |
516 | } |
517 | |
518 | void toProtoBuf(proto::ColumnStatistics& pbStats) const override { |
519 | pbStats.set_hasnull(_stats.hasNull()); |
520 | pbStats.set_numberofvalues(_stats.getNumberOfValues()); |
521 | |
522 | if (_stats.hasMinimum()) { |
523 | proto::DateStatistics* dateStatistics = |
524 | pbStats.mutable_datestatistics(); |
525 | dateStatistics->set_maximum(_stats.getMaximum()); |
526 | dateStatistics->set_minimum(_stats.getMinimum()); |
527 | } |
528 | } |
529 | |
530 | std::string toString() const override { |
531 | std::ostringstream buffer; |
532 | buffer << "Data type: Date" << std::endl |
533 | << "Values: " << getNumberOfValues() << std::endl |
534 | << "Has null: " << (hasNull() ? "yes" : "no" ) << std::endl; |
535 | if(hasMinimum()){ |
536 | buffer << "Minimum: " << getMinimum() << std::endl; |
537 | }else{ |
538 | buffer << "Minimum: not defined" << std::endl; |
539 | } |
540 | |
541 | if(hasMaximum()){ |
542 | buffer << "Maximum: " << getMaximum() << std::endl; |
543 | }else{ |
544 | buffer << "Maximum: not defined" << std::endl; |
545 | } |
546 | return buffer.str(); |
547 | } |
548 | }; |
549 | |
550 | class DecimalColumnStatisticsImpl: public DecimalColumnStatistics, |
551 | public MutableColumnStatistics { |
552 | private: |
553 | InternalDecimalStatistics _stats; |
554 | |
555 | public: |
556 | DecimalColumnStatisticsImpl() { reset(); } |
557 | DecimalColumnStatisticsImpl(const proto::ColumnStatistics& stats, |
558 | const StatContext& statContext); |
559 | virtual ~DecimalColumnStatisticsImpl() override; |
560 | |
561 | bool hasMinimum() const override { |
562 | return _stats.hasMinimum(); |
563 | } |
564 | |
565 | bool hasMaximum() const override { |
566 | return _stats.hasMaximum(); |
567 | } |
568 | |
569 | bool hasSum() const override { |
570 | return _stats.hasSum(); |
571 | } |
572 | |
573 | void increase(uint64_t count) override { |
574 | _stats.setNumberOfValues(_stats.getNumberOfValues() + count); |
575 | } |
576 | |
577 | uint64_t getNumberOfValues() const override { |
578 | return _stats.getNumberOfValues(); |
579 | } |
580 | |
581 | void setNumberOfValues(uint64_t value) override { |
582 | _stats.setNumberOfValues(value); |
583 | } |
584 | |
585 | bool hasNull() const override { |
586 | return _stats.hasNull(); |
587 | } |
588 | |
589 | void setHasNull(bool hasNull) override { |
590 | _stats.setHasNull(hasNull); |
591 | } |
592 | |
593 | Decimal getMinimum() const override { |
594 | if(hasMinimum()){ |
595 | return _stats.getMinimum(); |
596 | }else{ |
597 | throw ParseError("Minimum is not defined." ); |
598 | } |
599 | } |
600 | |
601 | Decimal getMaximum() const override { |
602 | if(hasMaximum()){ |
603 | return _stats.getMaximum(); |
604 | }else{ |
605 | throw ParseError("Maximum is not defined." ); |
606 | } |
607 | } |
608 | |
609 | void setMinimum(Decimal minimum) { |
610 | _stats.setHasMinimum(true); |
611 | _stats.setMinimum(minimum); |
612 | } |
613 | |
614 | void setMaximum(Decimal maximum) { |
615 | _stats.setHasMaximum(true); |
616 | _stats.setMaximum(maximum); |
617 | } |
618 | |
619 | Decimal getSum() const override { |
620 | if(hasSum()){ |
621 | return _stats.getSum(); |
622 | }else{ |
623 | throw ParseError("Sum is not defined." ); |
624 | } |
625 | } |
626 | |
627 | void setSum(Decimal sum) { |
628 | _stats.setHasSum(true); |
629 | _stats.setSum(sum); |
630 | } |
631 | |
632 | void update(const Decimal& value) { |
633 | _stats.updateMinMax(value); |
634 | |
635 | if (_stats.hasSum()) { |
636 | updateSum(value); |
637 | } |
638 | } |
639 | |
640 | void merge(const MutableColumnStatistics& other) override { |
641 | const DecimalColumnStatisticsImpl& decStats = |
642 | dynamic_cast<const DecimalColumnStatisticsImpl&>(other); |
643 | |
644 | _stats.merge(decStats._stats); |
645 | |
646 | _stats.setHasSum(_stats.hasSum() && decStats.hasSum()); |
647 | if (_stats.hasSum()) { |
648 | updateSum(decStats.getSum()); |
649 | } |
650 | } |
651 | |
652 | void reset() override { |
653 | _stats.reset(); |
654 | setSum(Decimal()); |
655 | } |
656 | |
657 | void toProtoBuf(proto::ColumnStatistics& pbStats) const override { |
658 | pbStats.set_hasnull(_stats.hasNull()); |
659 | pbStats.set_numberofvalues(_stats.getNumberOfValues()); |
660 | |
661 | proto::DecimalStatistics* decStats = pbStats.mutable_decimalstatistics(); |
662 | if (_stats.hasMinimum()) { |
663 | decStats->set_minimum(_stats.getMinimum().toString()); |
664 | decStats->set_maximum(_stats.getMaximum().toString()); |
665 | } |
666 | if (_stats.hasSum()) { |
667 | decStats->set_sum(_stats.getSum().toString()); |
668 | } |
669 | } |
670 | |
671 | std::string toString() const override { |
672 | std::ostringstream buffer; |
673 | buffer << "Data type: Decimal" << std::endl |
674 | << "Values: " << getNumberOfValues() << std::endl |
675 | << "Has null: " << (hasNull() ? "yes" : "no" ) << std::endl; |
676 | if(hasMinimum()){ |
677 | buffer << "Minimum: " << getMinimum().toString() << std::endl; |
678 | }else{ |
679 | buffer << "Minimum: not defined" << std::endl; |
680 | } |
681 | |
682 | if(hasMaximum()){ |
683 | buffer << "Maximum: " << getMaximum().toString() << std::endl; |
684 | }else{ |
685 | buffer << "Maximum: not defined" << std::endl; |
686 | } |
687 | |
688 | if(hasSum()){ |
689 | buffer << "Sum: " << getSum().toString() << std::endl; |
690 | }else{ |
691 | buffer << "Sum: not defined" << std::endl; |
692 | } |
693 | |
694 | return buffer.str(); |
695 | } |
696 | |
697 | private: |
698 | void updateSum(Decimal value) { |
699 | if (_stats.hasSum()) { |
700 | bool overflow = false; |
701 | Decimal sum = _stats.getSum(); |
702 | if (sum.scale > value.scale) { |
703 | value.value = scaleUpInt128ByPowerOfTen(value.value, |
704 | sum.scale - value.scale, |
705 | overflow); |
706 | } else if (sum.scale < value.scale) { |
707 | sum.value = scaleUpInt128ByPowerOfTen(sum.value, |
708 | value.scale - sum.scale, |
709 | overflow); |
710 | sum.scale = value.scale; |
711 | } |
712 | |
713 | if (!overflow) { |
714 | bool wasPositive = sum.value >= 0; |
715 | sum.value += value.value; |
716 | if ((value.value >= 0) == wasPositive) { |
717 | _stats.setHasSum((sum.value >= 0) == wasPositive); |
718 | } |
719 | } else { |
720 | _stats.setHasSum(false); |
721 | } |
722 | |
723 | if (_stats.hasSum()) { |
724 | _stats.setSum(sum); |
725 | } |
726 | } |
727 | } |
728 | }; |
729 | |
730 | class DoubleColumnStatisticsImpl: public DoubleColumnStatistics, |
731 | public MutableColumnStatistics { |
732 | private: |
733 | InternalDoubleStatistics _stats; |
734 | public: |
735 | DoubleColumnStatisticsImpl() { reset(); } |
736 | DoubleColumnStatisticsImpl(const proto::ColumnStatistics& stats); |
737 | virtual ~DoubleColumnStatisticsImpl() override; |
738 | |
739 | bool hasMinimum() const override { |
740 | return _stats.hasMinimum(); |
741 | } |
742 | |
743 | bool hasMaximum() const override { |
744 | return _stats.hasMaximum(); |
745 | } |
746 | |
747 | bool hasSum() const override { |
748 | return _stats.hasSum(); |
749 | } |
750 | |
751 | void increase(uint64_t count) override { |
752 | _stats.setNumberOfValues(_stats.getNumberOfValues() + count); |
753 | } |
754 | |
755 | uint64_t getNumberOfValues() const override { |
756 | return _stats.getNumberOfValues(); |
757 | } |
758 | |
759 | void setNumberOfValues(uint64_t value) override { |
760 | _stats.setNumberOfValues(value); |
761 | } |
762 | |
763 | bool hasNull() const override { |
764 | return _stats.hasNull(); |
765 | } |
766 | |
767 | void setHasNull(bool hasNull) override { |
768 | _stats.setHasNull(hasNull); |
769 | } |
770 | |
771 | double getMinimum() const override { |
772 | if(hasMinimum()){ |
773 | return _stats.getMinimum(); |
774 | }else{ |
775 | throw ParseError("Minimum is not defined." ); |
776 | } |
777 | } |
778 | |
779 | double getMaximum() const override { |
780 | if(hasMaximum()){ |
781 | return _stats.getMaximum(); |
782 | }else{ |
783 | throw ParseError("Maximum is not defined." ); |
784 | } |
785 | } |
786 | |
787 | void setMinimum(double minimum) { |
788 | _stats.setHasMinimum(true); |
789 | _stats.setMinimum(minimum); |
790 | } |
791 | |
792 | void setMaximum(double maximum) { |
793 | _stats.setHasMaximum(true); |
794 | _stats.setMaximum(maximum); |
795 | } |
796 | |
797 | double getSum() const override { |
798 | if(hasSum()){ |
799 | return _stats.getSum(); |
800 | }else{ |
801 | throw ParseError("Sum is not defined." ); |
802 | } |
803 | } |
804 | |
805 | void setSum(double sum) { |
806 | _stats.setHasSum(true); |
807 | _stats.setSum(sum); |
808 | } |
809 | |
810 | void update(double value) { |
811 | _stats.updateMinMax(value); |
812 | _stats.setSum(_stats.getSum() + value); |
813 | } |
814 | |
815 | void merge(const MutableColumnStatistics& other) override { |
816 | const DoubleColumnStatisticsImpl& doubleStats = |
817 | dynamic_cast<const DoubleColumnStatisticsImpl&>(other); |
818 | _stats.merge(doubleStats._stats); |
819 | |
820 | _stats.setHasSum(_stats.hasSum() && doubleStats.hasSum()); |
821 | if (_stats.hasSum()) { |
822 | _stats.setSum(_stats.getSum() + doubleStats.getSum()); |
823 | } |
824 | } |
825 | |
826 | void reset() override { |
827 | _stats.reset(); |
828 | setSum(0.0); |
829 | } |
830 | |
831 | void toProtoBuf(proto::ColumnStatistics& pbStats) const override { |
832 | pbStats.set_hasnull(_stats.hasNull()); |
833 | pbStats.set_numberofvalues(_stats.getNumberOfValues()); |
834 | |
835 | proto::DoubleStatistics* doubleStats = pbStats.mutable_doublestatistics(); |
836 | if (_stats.hasMinimum()) { |
837 | doubleStats->set_minimum(_stats.getMinimum()); |
838 | doubleStats->set_maximum(_stats.getMaximum()); |
839 | } |
840 | if (_stats.hasSum()) { |
841 | doubleStats->set_sum(_stats.getSum()); |
842 | } |
843 | } |
844 | |
845 | std::string toString() const override { |
846 | std::ostringstream buffer; |
847 | buffer << "Data type: Double" << std::endl |
848 | << "Values: " << getNumberOfValues() << std::endl |
849 | << "Has null: " << (hasNull() ? "yes" : "no" ) << std::endl; |
850 | if(hasMinimum()){ |
851 | buffer << "Minimum: " << getMinimum() << std::endl; |
852 | }else{ |
853 | buffer << "Minimum: not defined" << std::endl; |
854 | } |
855 | |
856 | if(hasMaximum()){ |
857 | buffer << "Maximum: " << getMaximum() << std::endl; |
858 | }else{ |
859 | buffer << "Maximum: not defined" << std::endl; |
860 | } |
861 | |
862 | if(hasSum()){ |
863 | buffer << "Sum: " << getSum() << std::endl; |
864 | }else{ |
865 | buffer << "Sum: not defined" << std::endl; |
866 | } |
867 | return buffer.str(); |
868 | } |
869 | }; |
870 | |
871 | class IntegerColumnStatisticsImpl: public IntegerColumnStatistics, |
872 | public MutableColumnStatistics { |
873 | private: |
874 | InternalIntegerStatistics _stats; |
875 | public: |
876 | IntegerColumnStatisticsImpl() { reset(); } |
877 | IntegerColumnStatisticsImpl(const proto::ColumnStatistics& stats); |
878 | virtual ~IntegerColumnStatisticsImpl() override; |
879 | |
880 | bool hasMinimum() const override { |
881 | return _stats.hasMinimum(); |
882 | } |
883 | |
884 | bool hasMaximum() const override { |
885 | return _stats.hasMaximum(); |
886 | } |
887 | |
888 | bool hasSum() const override { |
889 | return _stats.hasSum(); |
890 | } |
891 | |
892 | void increase(uint64_t count) override { |
893 | _stats.setNumberOfValues(_stats.getNumberOfValues() + count); |
894 | } |
895 | |
896 | uint64_t getNumberOfValues() const override { |
897 | return _stats.getNumberOfValues(); |
898 | } |
899 | |
900 | void setNumberOfValues(uint64_t value) override { |
901 | _stats.setNumberOfValues(value); |
902 | } |
903 | |
904 | bool hasNull() const override { |
905 | return _stats.hasNull(); |
906 | } |
907 | |
908 | void setHasNull(bool hasNull) override { |
909 | _stats.setHasNull(hasNull); |
910 | } |
911 | |
912 | int64_t getMinimum() const override { |
913 | if(hasMinimum()){ |
914 | return _stats.getMinimum(); |
915 | }else{ |
916 | throw ParseError("Minimum is not defined." ); |
917 | } |
918 | } |
919 | |
920 | int64_t getMaximum() const override { |
921 | if(hasMaximum()){ |
922 | return _stats.getMaximum(); |
923 | }else{ |
924 | throw ParseError("Maximum is not defined." ); |
925 | } |
926 | } |
927 | |
928 | void setMinimum(int64_t minimum) { |
929 | _stats.setHasMinimum(true); |
930 | _stats.setMinimum(minimum); |
931 | } |
932 | |
933 | void setMaximum(int64_t maximum) { |
934 | _stats.setHasMaximum(true); |
935 | _stats.setMaximum(maximum); |
936 | } |
937 | |
938 | int64_t getSum() const override { |
939 | if(hasSum()){ |
940 | return _stats.getSum(); |
941 | }else{ |
942 | throw ParseError("Sum is not defined." ); |
943 | } |
944 | } |
945 | |
946 | void setSum(int64_t sum) { |
947 | _stats.setHasSum(true); |
948 | _stats.setSum(sum); |
949 | } |
950 | |
951 | void update(int64_t value, int repetitions); |
952 | |
953 | void merge(const MutableColumnStatistics& other) override { |
954 | const IntegerColumnStatisticsImpl& intStats = |
955 | dynamic_cast<const IntegerColumnStatisticsImpl&>(other); |
956 | |
957 | _stats.merge(intStats._stats); |
958 | |
959 | // update sum and check overflow |
960 | _stats.setHasSum(_stats.hasSum() && intStats.hasSum()); |
961 | if (_stats.hasSum()) { |
962 | bool wasPositive = _stats.getSum() >= 0; |
963 | _stats.setSum(_stats.getSum() + intStats.getSum()); |
964 | if ((intStats.getSum() >= 0) == wasPositive) { |
965 | _stats.setHasSum((_stats.getSum() >= 0) == wasPositive); |
966 | } |
967 | } |
968 | } |
969 | |
970 | void reset() override { |
971 | _stats.reset(); |
972 | setSum(0); |
973 | } |
974 | |
975 | void toProtoBuf(proto::ColumnStatistics& pbStats) const override { |
976 | pbStats.set_hasnull(_stats.hasNull()); |
977 | pbStats.set_numberofvalues(_stats.getNumberOfValues()); |
978 | |
979 | proto::IntegerStatistics* intStats = pbStats.mutable_intstatistics(); |
980 | if (_stats.hasMinimum()) { |
981 | intStats->set_minimum(_stats.getMinimum()); |
982 | intStats->set_maximum(_stats.getMaximum()); |
983 | } |
984 | if (_stats.hasSum()) { |
985 | intStats->set_sum(_stats.getSum()); |
986 | } |
987 | } |
988 | |
989 | std::string toString() const override { |
990 | std::ostringstream buffer; |
991 | buffer << "Data type: Integer" << std::endl |
992 | << "Values: " << getNumberOfValues() << std::endl |
993 | << "Has null: " << (hasNull() ? "yes" : "no" ) << std::endl; |
994 | if(hasMinimum()){ |
995 | buffer << "Minimum: " << getMinimum() << std::endl; |
996 | }else{ |
997 | buffer << "Minimum: not defined" << std::endl; |
998 | } |
999 | |
1000 | if(hasMaximum()){ |
1001 | buffer << "Maximum: " << getMaximum() << std::endl; |
1002 | }else{ |
1003 | buffer << "Maximum: not defined" << std::endl; |
1004 | } |
1005 | |
1006 | if(hasSum()){ |
1007 | buffer << "Sum: " << getSum() << std::endl; |
1008 | }else{ |
1009 | buffer << "Sum: not defined" << std::endl; |
1010 | } |
1011 | return buffer.str(); |
1012 | } |
1013 | }; |
1014 | |
1015 | class StringColumnStatisticsImpl: public StringColumnStatistics, |
1016 | public MutableColumnStatistics{ |
1017 | private: |
1018 | InternalStringStatistics _stats; |
1019 | |
1020 | public: |
1021 | StringColumnStatisticsImpl() { |
1022 | reset(); |
1023 | } |
1024 | StringColumnStatisticsImpl(const proto::ColumnStatistics& stats, |
1025 | const StatContext& statContext); |
1026 | virtual ~StringColumnStatisticsImpl() override; |
1027 | |
1028 | bool hasMinimum() const override { |
1029 | return _stats.hasMinimum(); |
1030 | } |
1031 | |
1032 | bool hasMaximum() const override { |
1033 | return _stats.hasMaximum(); |
1034 | } |
1035 | |
1036 | bool hasTotalLength() const override { |
1037 | return _stats.hasTotalLength(); |
1038 | } |
1039 | |
1040 | void increase(uint64_t count) override { |
1041 | _stats.setNumberOfValues(_stats.getNumberOfValues() + count); |
1042 | } |
1043 | |
1044 | uint64_t getNumberOfValues() const override { |
1045 | return _stats.getNumberOfValues(); |
1046 | } |
1047 | |
1048 | void setNumberOfValues(uint64_t value) override { |
1049 | _stats.setNumberOfValues(value); |
1050 | } |
1051 | |
1052 | bool hasNull() const override { |
1053 | return _stats.hasNull(); |
1054 | } |
1055 | |
1056 | void setHasNull(bool hasNull) override { |
1057 | _stats.setHasNull(hasNull); |
1058 | } |
1059 | |
1060 | std::string getMinimum() const override { |
1061 | if(hasMinimum()){ |
1062 | return _stats.getMinimum(); |
1063 | }else{ |
1064 | throw ParseError("Minimum is not defined." ); |
1065 | } |
1066 | } |
1067 | |
1068 | std::string getMaximum() const override { |
1069 | if(hasMaximum()){ |
1070 | return _stats.getMaximum(); |
1071 | }else{ |
1072 | throw ParseError("Maximum is not defined." ); |
1073 | } |
1074 | } |
1075 | |
1076 | void setMinimum(std::string minimum) { |
1077 | _stats.setHasMinimum(true); |
1078 | _stats.setMinimum(minimum); |
1079 | } |
1080 | |
1081 | void setMaximum(std::string maximum) { |
1082 | _stats.setHasMaximum(true); |
1083 | _stats.setMaximum(maximum); |
1084 | } |
1085 | |
1086 | uint64_t getTotalLength() const override { |
1087 | if(hasTotalLength()){ |
1088 | return _stats.getTotalLength(); |
1089 | }else{ |
1090 | throw ParseError("Total length is not defined." ); |
1091 | } |
1092 | } |
1093 | |
1094 | void setTotalLength(uint64_t length) { |
1095 | _stats.setHasTotalLength(true); |
1096 | _stats.setTotalLength(length); |
1097 | } |
1098 | |
1099 | void update(const char* value, size_t length) { |
1100 | if (value != nullptr) { |
1101 | if (!_stats.hasMinimum()) { |
1102 | setMinimum(std::string(value, value + length)); |
1103 | setMaximum(std::string(value, value + length)); |
1104 | } else { |
1105 | // update min |
1106 | int minCmp = strncmp(_stats.getMinimum().c_str(), |
1107 | value, |
1108 | std::min(_stats.getMinimum().length(), length)); |
1109 | if (minCmp > 0 || |
1110 | (minCmp == 0 && length < _stats.getMinimum().length())) { |
1111 | setMinimum(std::string(value, value + length)); |
1112 | } |
1113 | |
1114 | // update max |
1115 | int maxCmp = strncmp(_stats.getMaximum().c_str(), |
1116 | value, |
1117 | std::min(_stats.getMaximum().length(), length)); |
1118 | if (maxCmp < 0 || |
1119 | (maxCmp == 0 && length > _stats.getMaximum().length())) { |
1120 | setMaximum(std::string(value, value + length)); |
1121 | } |
1122 | } |
1123 | } |
1124 | |
1125 | _stats.setTotalLength(_stats.getTotalLength() + length); |
1126 | } |
1127 | |
1128 | void update(std::string value) { |
1129 | update(value.c_str(), value.length()); |
1130 | } |
1131 | |
1132 | void merge(const MutableColumnStatistics& other) override { |
1133 | const StringColumnStatisticsImpl& strStats = |
1134 | dynamic_cast<const StringColumnStatisticsImpl&>(other); |
1135 | _stats.merge(strStats._stats); |
1136 | } |
1137 | |
1138 | void reset() override { |
1139 | _stats.reset(); |
1140 | setTotalLength(0); |
1141 | } |
1142 | |
1143 | void toProtoBuf(proto::ColumnStatistics& pbStats) const override { |
1144 | pbStats.set_hasnull(_stats.hasNull()); |
1145 | pbStats.set_numberofvalues(_stats.getNumberOfValues()); |
1146 | |
1147 | proto::StringStatistics* strStats = pbStats.mutable_stringstatistics(); |
1148 | if (_stats.hasMinimum()) { |
1149 | strStats->set_minimum(_stats.getMinimum()); |
1150 | strStats->set_maximum(_stats.getMaximum()); |
1151 | } |
1152 | if (_stats.hasTotalLength()) { |
1153 | strStats->set_sum(static_cast<int64_t>(_stats.getTotalLength())); |
1154 | } |
1155 | } |
1156 | |
1157 | std::string toString() const override { |
1158 | std::ostringstream buffer; |
1159 | buffer << "Data type: String" << std::endl |
1160 | << "Values: " << getNumberOfValues() << std::endl |
1161 | << "Has null: " << (hasNull() ? "yes" : "no" ) << std::endl; |
1162 | if(hasMinimum()){ |
1163 | buffer << "Minimum: " << getMinimum() << std::endl; |
1164 | }else{ |
1165 | buffer << "Minimum is not defined" << std::endl; |
1166 | } |
1167 | |
1168 | if(hasMaximum()){ |
1169 | buffer << "Maximum: " << getMaximum() << std::endl; |
1170 | }else{ |
1171 | buffer << "Maximum is not defined" << std::endl; |
1172 | } |
1173 | |
1174 | if(hasTotalLength()){ |
1175 | buffer << "Total length: " << getTotalLength() << std::endl; |
1176 | }else{ |
1177 | buffer << "Total length is not defined" << std::endl; |
1178 | } |
1179 | return buffer.str(); |
1180 | } |
1181 | }; |
1182 | |
1183 | class TimestampColumnStatisticsImpl: public TimestampColumnStatistics, |
1184 | public MutableColumnStatistics { |
1185 | private: |
1186 | InternalIntegerStatistics _stats; |
1187 | bool _hasLowerBound; |
1188 | bool _hasUpperBound; |
1189 | int64_t _lowerBound; |
1190 | int64_t _upperBound; |
1191 | |
1192 | public: |
1193 | TimestampColumnStatisticsImpl() { reset(); } |
1194 | TimestampColumnStatisticsImpl(const proto::ColumnStatistics& stats, |
1195 | const StatContext& statContext); |
1196 | virtual ~TimestampColumnStatisticsImpl() override; |
1197 | |
1198 | bool hasMinimum() const override { |
1199 | return _stats.hasMinimum(); |
1200 | } |
1201 | |
1202 | bool hasMaximum() const override { |
1203 | return _stats.hasMaximum(); |
1204 | } |
1205 | |
1206 | uint64_t getNumberOfValues() const override { |
1207 | return _stats.getNumberOfValues(); |
1208 | } |
1209 | |
1210 | void setNumberOfValues(uint64_t value) override { |
1211 | _stats.setNumberOfValues(value); |
1212 | } |
1213 | |
1214 | void increase(uint64_t count) override { |
1215 | _stats.setNumberOfValues(_stats.getNumberOfValues() + count); |
1216 | } |
1217 | |
1218 | bool hasNull() const override { |
1219 | return _stats.hasNull(); |
1220 | } |
1221 | |
1222 | void setHasNull(bool hasNull) override { |
1223 | _stats.setHasNull(hasNull); |
1224 | } |
1225 | |
1226 | int64_t getMinimum() const override { |
1227 | if(hasMinimum()){ |
1228 | return _stats.getMinimum(); |
1229 | }else{ |
1230 | throw ParseError("Minimum is not defined." ); |
1231 | } |
1232 | } |
1233 | |
1234 | int64_t getMaximum() const override { |
1235 | if(hasMaximum()){ |
1236 | return _stats.getMaximum(); |
1237 | }else{ |
1238 | throw ParseError("Maximum is not defined." ); |
1239 | } |
1240 | } |
1241 | |
1242 | void setMinimum(int64_t minimum) { |
1243 | _stats.setHasMinimum(true); |
1244 | _stats.setMinimum(minimum); |
1245 | } |
1246 | |
1247 | void setMaximum(int64_t maximum) { |
1248 | _stats.setHasMaximum(true); |
1249 | _stats.setMaximum(maximum); |
1250 | } |
1251 | |
1252 | void update(int64_t value) { |
1253 | _stats.updateMinMax(value); |
1254 | } |
1255 | |
1256 | void merge(const MutableColumnStatistics& other) override { |
1257 | const TimestampColumnStatisticsImpl& tsStats = |
1258 | dynamic_cast<const TimestampColumnStatisticsImpl&>(other); |
1259 | _stats.merge(tsStats._stats); |
1260 | } |
1261 | |
1262 | void reset() override { |
1263 | _stats.reset(); |
1264 | } |
1265 | |
1266 | void toProtoBuf(proto::ColumnStatistics& pbStats) const override { |
1267 | pbStats.set_hasnull(_stats.hasNull()); |
1268 | pbStats.set_numberofvalues(_stats.getNumberOfValues()); |
1269 | |
1270 | if (_stats.hasMinimum()) { |
1271 | proto::TimestampStatistics* tsStats = |
1272 | pbStats.mutable_timestampstatistics(); |
1273 | tsStats->set_minimumutc(_stats.getMinimum()); |
1274 | tsStats->set_maximumutc(_stats.getMaximum()); |
1275 | } |
1276 | } |
1277 | |
1278 | std::string toString() const override { |
1279 | std::ostringstream buffer; |
1280 | struct tm tmValue; |
1281 | char timeBuffer[20]; |
1282 | time_t secs = 0; |
1283 | |
1284 | buffer << "Data type: Timestamp" << std::endl |
1285 | << "Values: " << getNumberOfValues() << std::endl |
1286 | << "Has null: " << (hasNull() ? "yes" : "no" ) << std::endl; |
1287 | if(hasMinimum()){ |
1288 | secs = static_cast<time_t>(getMinimum() / 1000); |
1289 | gmtime_r(&secs, &tmValue); |
1290 | strftime(timeBuffer, sizeof(timeBuffer), "%Y-%m-%d %H:%M:%S" , &tmValue); |
1291 | buffer << "Minimum: " << timeBuffer << "." |
1292 | << (getMinimum() % 1000) << std::endl; |
1293 | }else{ |
1294 | buffer << "Minimum is not defined" << std::endl; |
1295 | } |
1296 | |
1297 | if(hasLowerBound()){ |
1298 | secs = static_cast<time_t>(getLowerBound() / 1000); |
1299 | gmtime_r(&secs, &tmValue); |
1300 | strftime(timeBuffer, sizeof(timeBuffer), "%Y-%m-%d %H:%M:%S" , &tmValue); |
1301 | buffer << "LowerBound: " << timeBuffer << "." |
1302 | << (getLowerBound() % 1000) << std::endl; |
1303 | }else{ |
1304 | buffer << "LowerBound is not defined" << std::endl; |
1305 | } |
1306 | |
1307 | if(hasMaximum()){ |
1308 | secs = static_cast<time_t>(getMaximum()/1000); |
1309 | gmtime_r(&secs, &tmValue); |
1310 | strftime(timeBuffer, sizeof(timeBuffer), "%Y-%m-%d %H:%M:%S" , &tmValue); |
1311 | buffer << "Maximum: " << timeBuffer << "." |
1312 | << (getMaximum() % 1000) << std::endl; |
1313 | }else{ |
1314 | buffer << "Maximum is not defined" << std::endl; |
1315 | } |
1316 | |
1317 | if(hasUpperBound()){ |
1318 | secs = static_cast<time_t>(getUpperBound() / 1000); |
1319 | gmtime_r(&secs, &tmValue); |
1320 | strftime(timeBuffer, sizeof(timeBuffer), "%Y-%m-%d %H:%M:%S" , &tmValue); |
1321 | buffer << "UpperBound: " << timeBuffer << "." |
1322 | << (getUpperBound() % 1000) << std::endl; |
1323 | }else{ |
1324 | buffer << "UpperBound is not defined" << std::endl; |
1325 | } |
1326 | |
1327 | return buffer.str(); |
1328 | } |
1329 | |
1330 | bool hasLowerBound() const override { |
1331 | return _hasLowerBound; |
1332 | } |
1333 | |
1334 | bool hasUpperBound() const override { |
1335 | return _hasUpperBound; |
1336 | } |
1337 | |
1338 | int64_t getLowerBound() const override { |
1339 | if(hasLowerBound()){ |
1340 | return _lowerBound; |
1341 | }else{ |
1342 | throw ParseError("LowerBound is not defined." ); |
1343 | } |
1344 | } |
1345 | |
1346 | int64_t getUpperBound() const override { |
1347 | if(hasUpperBound()){ |
1348 | return _upperBound; |
1349 | }else{ |
1350 | throw ParseError("UpperBound is not defined." ); |
1351 | } |
1352 | } |
1353 | }; |
1354 | |
1355 | ColumnStatistics* convertColumnStatistics(const proto::ColumnStatistics& s, |
1356 | const StatContext& statContext); |
1357 | |
1358 | class StatisticsImpl: public Statistics { |
1359 | private: |
1360 | std::list<ColumnStatistics*> colStats; |
1361 | |
1362 | // DELIBERATELY NOT IMPLEMENTED |
1363 | StatisticsImpl(const StatisticsImpl&); |
1364 | StatisticsImpl& operator=(const StatisticsImpl&); |
1365 | |
1366 | public: |
1367 | StatisticsImpl(const proto::StripeStatistics& stripeStats, |
1368 | const StatContext& statContext); |
1369 | |
1370 | (const proto::Footer& , const StatContext& statContext); |
1371 | |
1372 | virtual const ColumnStatistics* getColumnStatistics(uint32_t columnId |
1373 | ) const override { |
1374 | std::list<ColumnStatistics*>::const_iterator it = colStats.begin(); |
1375 | std::advance(it, static_cast<int64_t>(columnId)); |
1376 | return *it; |
1377 | } |
1378 | |
1379 | virtual ~StatisticsImpl() override; |
1380 | |
1381 | uint32_t getNumberOfColumns() const override { |
1382 | return static_cast<uint32_t>(colStats.size()); |
1383 | } |
1384 | }; |
1385 | |
1386 | class StripeStatisticsImpl: public StripeStatistics { |
1387 | private: |
1388 | std::unique_ptr<StatisticsImpl> columnStats; |
1389 | std::vector<std::vector<std::shared_ptr<const ColumnStatistics> > > |
1390 | rowIndexStats; |
1391 | |
1392 | // DELIBERATELY NOT IMPLEMENTED |
1393 | StripeStatisticsImpl(const StripeStatisticsImpl&); |
1394 | StripeStatisticsImpl& operator=(const StripeStatisticsImpl&); |
1395 | |
1396 | public: |
1397 | StripeStatisticsImpl( |
1398 | const proto::StripeStatistics& stripeStats, |
1399 | std::vector<std::vector<proto::ColumnStatistics> >& indexStats, |
1400 | const StatContext& statContext); |
1401 | |
1402 | virtual const ColumnStatistics* getColumnStatistics(uint32_t columnId |
1403 | ) const override { |
1404 | return columnStats->getColumnStatistics(columnId); |
1405 | } |
1406 | |
1407 | uint32_t getNumberOfColumns() const override { |
1408 | return columnStats->getNumberOfColumns(); |
1409 | } |
1410 | |
1411 | virtual const ColumnStatistics* getRowIndexStatistics(uint32_t columnId, |
1412 | uint32_t rowIndex |
1413 | ) const override { |
1414 | // check id indices are valid |
1415 | return rowIndexStats[columnId][rowIndex].get(); |
1416 | } |
1417 | |
1418 | virtual ~StripeStatisticsImpl() override; |
1419 | |
1420 | uint32_t getNumberOfRowIndexStats(uint32_t columnId) const override { |
1421 | return static_cast<uint32_t>(rowIndexStats[columnId].size()); |
1422 | } |
1423 | }; |
1424 | |
1425 | /** |
1426 | * Create ColumnStatistics for writers |
1427 | * @param type of column |
1428 | * @return MutableColumnStatistics instances |
1429 | */ |
1430 | std::unique_ptr<MutableColumnStatistics> createColumnStatistics( |
1431 | const Type& type); |
1432 | |
1433 | }// namespace |
1434 | |
1435 | #endif |
1436 | |