1 | #ifndef WEIGHTED_DATA_FILE_H |
2 | #define WEIGHTED_DATA_FILE_H |
3 | |
4 | /* |
5 | * Legal Notice |
6 | * |
7 | * This document and associated source code (the "Work") is a part of a |
8 | * benchmark specification maintained by the TPC. |
9 | * |
10 | * The TPC reserves all right, title, and interest to the Work as provided |
11 | * under U.S. and international laws, including without limitation all patent |
12 | * and trademark rights therein. |
13 | * |
14 | * No Warranty |
15 | * |
16 | * 1.1 TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THE INFORMATION |
17 | * CONTAINED HEREIN IS PROVIDED "AS IS" AND WITH ALL FAULTS, AND THE |
18 | * AUTHORS AND DEVELOPERS OF THE WORK HEREBY DISCLAIM ALL OTHER |
19 | * WARRANTIES AND CONDITIONS, EITHER EXPRESS, IMPLIED OR STATUTORY, |
20 | * INCLUDING, BUT NOT LIMITED TO, ANY (IF ANY) IMPLIED WARRANTIES, |
21 | * DUTIES OR CONDITIONS OF MERCHANTABILITY, OF FITNESS FOR A PARTICULAR |
22 | * PURPOSE, OF ACCURACY OR COMPLETENESS OF RESPONSES, OF RESULTS, OF |
23 | * WORKMANLIKE EFFORT, OF LACK OF VIRUSES, AND OF LACK OF NEGLIGENCE. |
24 | * ALSO, THERE IS NO WARRANTY OR CONDITION OF TITLE, QUIET ENJOYMENT, |
25 | * QUIET POSSESSION, CORRESPONDENCE TO DESCRIPTION OR NON-INFRINGEMENT |
26 | * WITH REGARD TO THE WORK. |
27 | * 1.2 IN NO EVENT WILL ANY AUTHOR OR DEVELOPER OF THE WORK BE LIABLE TO |
28 | * ANY OTHER PARTY FOR ANY DAMAGES, INCLUDING BUT NOT LIMITED TO THE |
29 | * COST OF PROCURING SUBSTITUTE GOODS OR SERVICES, LOST PROFITS, LOSS |
30 | * OF USE, LOSS OF DATA, OR ANY INCIDENTAL, CONSEQUENTIAL, DIRECT, |
31 | * INDIRECT, OR SPECIAL DAMAGES WHETHER UNDER CONTRACT, TORT, WARRANTY, |
32 | * OR OTHERWISE, ARISING IN ANY WAY OUT OF THIS OR ANY OTHER AGREEMENT |
33 | * RELATING TO THE WORK, WHETHER OR NOT SUCH AUTHOR OR DEVELOPER HAD |
34 | * ADVANCE NOTICE OF THE POSSIBILITY OF SUCH DAMAGES. |
35 | * |
36 | * Contributors |
37 | * - Doug Johnson |
38 | */ |
39 | |
40 | #include <deque> |
41 | #include <sstream> |
42 | #include <vector> |
43 | #include <stdexcept> |
44 | |
45 | //#include <string> // for stoi C++11 |
46 | #include <cstdlib> // for atoi |
47 | |
48 | #include "ITextSplitter.h" |
49 | #include "ShrinkToFit.h" |
50 | |
51 | namespace TPCE { |
52 | // |
53 | // Description: |
54 | // A template class for converting a series of weighted text records |
55 | // into a binary in-memory structure for quick easy access. |
56 | // |
57 | // Exception Safety: |
58 | // The Basic guarantee is provided. |
59 | // |
60 | // Copy Behavior: |
61 | // Copying is allowed. |
62 | // |
63 | template <class RecordType> class WeightedDataFile { |
64 | private: |
65 | typedef std::vector<RecordType> Records; // For convenience and readability |
66 | Records records; |
67 | |
68 | typedef std::vector<int> Weights; // For convenience and readability |
69 | Weights weightedIndexes; |
70 | |
71 | public: |
72 | // Leverage the size type of our underlying storage container but |
73 | // insulate clients from the implementation particulars by creating |
74 | // our own type. |
75 | typedef typename Records::size_type size_type; |
76 | |
77 | enum SizeFilter { AllRecords, UniqueRecordsOnly }; |
78 | |
79 | explicit WeightedDataFile(ITextSplitter &splitter) { |
80 | // eof only returns true after trying to read the end, so |
81 | // "prime the pump" by doing an initial read. |
82 | std::deque<std::string> fields = splitter.getNextRecord(); |
83 | |
84 | // Process each record. |
85 | while (!splitter.eof()) { |
86 | if (1 == fields.size() && "" == fields[0]) { |
87 | // We found a blank line so skip it and move on. |
88 | fields = splitter.getNextRecord(); |
89 | continue; |
90 | } |
91 | |
92 | // The first field is the weight for this record. |
93 | // int weight = std::stoi(fields[0]); // C++11 |
94 | int weight = std::atoi(fields[0].c_str()); |
95 | fields.pop_front(); |
96 | |
97 | // Set up the weighted indexes for the record. |
98 | for (int ii = 0; ii < weight; ++ii) { |
99 | weightedIndexes.push_back(records.size()); |
100 | } |
101 | |
102 | // Add the record. |
103 | records.push_back(RecordType(fields)); |
104 | |
105 | // Move on to the next record. |
106 | fields = splitter.getNextRecord(); |
107 | } |
108 | |
109 | // Now that everything has been loaded tighten up our storage. |
110 | shrink_to_fit<Weights>(weightedIndexes); |
111 | shrink_to_fit<Records>(records); |
112 | // weightedIndexes.shrink_to_fit(); // C++11 |
113 | // records.shrink_to_fit(); // C++11 |
114 | } |
115 | |
116 | // |
117 | // Default copies and destructor are ok. |
118 | // |
119 | // ~WeightedDataFile(); |
120 | // WeightedDataFile(const WeightedDataFile&); |
121 | // WeightedDataFile& operator=(const WeightedDataFile&); |
122 | // |
123 | |
124 | size_type size(SizeFilter filter = AllRecords) const { |
125 | return (filter == AllRecords ? weightedIndexes.size() : records.size()); |
126 | } |
127 | |
128 | const RecordType &operator[](size_type weightedIndex) const { |
129 | return records[weightedIndexes[weightedIndex]]; |
130 | } |
131 | |
132 | const RecordType &at(size_type weightedIndex) const { |
133 | return records.at(weightedIndexes.at(weightedIndex)); |
134 | } |
135 | |
136 | const RecordType &getUniqueRecord(size_type idx, bool rangeCheckedAccess = false) const { |
137 | return (rangeCheckedAccess ? records.at(idx) : records[idx]); |
138 | } |
139 | }; |
140 | |
141 | } // namespace TPCE |
142 | #endif // WEIGHTED_DATA_FILE_H |
143 | |