1#ifndef BUCKETED_DATA_FILE_H
2#define BUCKETED_DATA_FILE_H
3
4/*
5 * Legal Notice
6 *
7 * This document and associated source code (the "Work") is a part of a
8 * benchmark specification maintained by the TPC.
9 *
10 * The TPC reserves all right, title, and interest to the Work as provided
11 * under U.S. and international laws, including without limitation all patent
12 * and trademark rights therein.
13 *
14 * No Warranty
15 *
16 * 1.1 TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THE INFORMATION
17 * CONTAINED HEREIN IS PROVIDED "AS IS" AND WITH ALL FAULTS, AND THE
18 * AUTHORS AND DEVELOPERS OF THE WORK HEREBY DISCLAIM ALL OTHER
19 * WARRANTIES AND CONDITIONS, EITHER EXPRESS, IMPLIED OR STATUTORY,
20 * INCLUDING, BUT NOT LIMITED TO, ANY (IF ANY) IMPLIED WARRANTIES,
21 * DUTIES OR CONDITIONS OF MERCHANTABILITY, OF FITNESS FOR A PARTICULAR
22 * PURPOSE, OF ACCURACY OR COMPLETENESS OF RESPONSES, OF RESULTS, OF
23 * WORKMANLIKE EFFORT, OF LACK OF VIRUSES, AND OF LACK OF NEGLIGENCE.
24 * ALSO, THERE IS NO WARRANTY OR CONDITION OF TITLE, QUIET ENJOYMENT,
25 * QUIET POSSESSION, CORRESPONDENCE TO DESCRIPTION OR NON-INFRINGEMENT
26 * WITH REGARD TO THE WORK.
27 * 1.2 IN NO EVENT WILL ANY AUTHOR OR DEVELOPER OF THE WORK BE LIABLE TO
28 * ANY OTHER PARTY FOR ANY DAMAGES, INCLUDING BUT NOT LIMITED TO THE
29 * COST OF PROCURING SUBSTITUTE GOODS OR SERVICES, LOST PROFITS, LOSS
30 * OF USE, LOSS OF DATA, OR ANY INCIDENTAL, CONSEQUENTIAL, DIRECT,
31 * INDIRECT, OR SPECIAL DAMAGES WHETHER UNDER CONTRACT, TORT, WARRANTY,
32 * OR OTHERWISE, ARISING IN ANY WAY OUT OF THIS OR ANY OTHER AGREEMENT
33 * RELATING TO THE WORK, WHETHER OR NOT SUCH AUTHOR OR DEVELOPER HAD
34 * ADVANCE NOTICE OF THE POSSIBILITY OF SUCH DAMAGES.
35 *
36 * Contributors
37 * - Doug Johnson
38 */
39
40#include <vector>
41
42//#include <string> // for stoi C++11
43#include <cstdlib> // for atoi
44
45#include "ITextSplitter.h"
46#include "ShrinkToFit.h"
47
48namespace TPCE {
49//
50// Description:
51// A template class for converting a series of text records into a
52// bucketed binary in-memory structure for quick easy access.
53//
54// Exception Safety:
55// The Basic guarantee is provided.
56//
57// Copy Behavior:
58// Copying is allowed.
59//
60//
61// Assumptions:
62// - bucket IDs start at 1.
63// - records are sorted by bucket ID smallest to largest.
64//
65template <class T> class BucketedDataFile {
66public:
67 // Leverage the size type of our underlying storage container but
68 // insulate clients from the implementation particulars by creating
69 // our own type.
70 // Set this first so we can use it for recordCount.
71 typedef typename std::vector<T>::size_type size_type;
72
73private:
74 std::vector<std::vector<T>> buckets;
75 size_type recordCount;
76
77public:
78 enum SizeFilter { AllRecords, BucketsOnly };
79
80 explicit BucketedDataFile(ITextSplitter &splitter) : recordCount(0) {
81 // eof only returns true after trying to read the end, so
82 // "prime the pump" by doing an initial read.
83 std::deque<std::string> fields = splitter.getNextRecord();
84
85 // Process each record.
86 while (!splitter.eof()) {
87 if (1 == fields.size() && "" == fields[0]) {
88 // We found a blank line so skip it and move on.
89 fields = splitter.getNextRecord();
90 continue;
91 }
92
93 // The first field is the bucket ID for this record.
94 // int bucketID = std::stoi(fields[0]); // C++11
95 unsigned int bucketID = std::atoi(fields[0].c_str());
96 fields.pop_front();
97
98 if (buckets.size() == bucketID - 1) {
99 // First record of a new bucket so add the bucket.
100 buckets.push_back(std::vector<T>());
101 }
102
103 // Now we know the bucket exists so go ahead and add the record.
104 buckets[bucketID - 1].push_back(T(fields));
105 ++recordCount;
106
107 // Move on to the next record.
108 fields = splitter.getNextRecord();
109 }
110
111 // Now that everything has been loaded tighten up our storage.
112 // NOTE: shrinking the outer bucket vector has the side effect of
113 // shrinking all the internal bucket vectors.
114 shrink_to_fit<std::vector<std::vector<T>>>(buckets);
115 // buckets.shrink_to_fit(); // C++11
116 }
117
118 //
119 // Default copies and destructor are ok.
120 //
121 // ~BucketedDataFile();
122 // BucketedDataFile(const BucketedDataFile&);
123 // BucketedDataFile& operator=(const BucketedDataFile&);
124 //
125
126 size_type size(SizeFilter filter = AllRecords) const {
127 return (filter == AllRecords ? recordCount : buckets.size());
128 }
129
130 // Provide 0-based access to the buckets.
131 const std::vector<T> &operator[](size_type idx) const {
132 return buckets[idx];
133 }
134
135 // Provide range-checked 0-based access to the buckets.
136 const std::vector<T> &at(size_type idx) const {
137 return buckets.at(idx);
138 }
139
140 // Provide range-checked bucket-ID-based access by to the buckets.
141 const std::vector<T> &getBucket(size_type bucketID, bool rangeCheckedAccess = false) const {
142 size_type idx = bucketID - 1;
143 return (rangeCheckedAccess ? buckets.at(idx) : buckets[idx]);
144 }
145};
146
147} // namespace TPCE
148#endif // BUCKETED_DATA_FILE_H
149