1 | /** Run this (example) |
2 | * ./arena_with_free_lists 5000000 < ../../Server/data/test/hits/20140317_20140323_2_178_4/Title.bin |
3 | */ |
4 | |
5 | #define USE_BAD_ARENA 0 |
6 | |
7 | #if !USE_BAD_ARENA |
8 | #include <Common/ArenaWithFreeLists.h> |
9 | #endif |
10 | |
11 | #include <variant> |
12 | #include <memory> |
13 | #include <array> |
14 | #include <sys/resource.h> |
15 | #include <ext/bit_cast.h> |
16 | #include <ext/size.h> |
17 | #include <Common/Arena.h> |
18 | |
19 | #include <common/StringRef.h> |
20 | #include <Core/Field.h> |
21 | #include <Common/Stopwatch.h> |
22 | #include <IO/ReadBufferFromFileDescriptor.h> |
23 | #include <Compression/CompressedReadBuffer.h> |
24 | #include <IO/ReadHelpers.h> |
25 | |
26 | using namespace DB; |
27 | |
28 | namespace DB |
29 | { |
30 | namespace ErrorCodes |
31 | { |
32 | extern const int SYSTEM_ERROR; |
33 | } |
34 | } |
35 | |
36 | |
37 | /// Implementation of ArenaWithFreeLists, which contains a bug. Used to reproduce the bug. |
38 | #if USE_BAD_ARENA |
39 | |
40 | class ArenaWithFreeLists : private Allocator<false> |
41 | { |
42 | private: |
43 | struct Block { Block * next; }; |
44 | |
45 | static const std::array<size_t, 14> & getSizes() |
46 | { |
47 | static constexpr std::array<size_t, 14> sizes{ |
48 | 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536 |
49 | }; |
50 | |
51 | static_assert(sizes.front() >= sizeof(Block), "Can't make allocations smaller than sizeof(Block)" ); |
52 | |
53 | return sizes; |
54 | } |
55 | |
56 | static auto sizeToPreviousPowerOfTwo(const int size) |
57 | { |
58 | return _bit_scan_reverse(size - 1); |
59 | /// The bug is located in the line above. If you change to the next line, then the bug is fixed. |
60 | //return size <= 1 ? 0 : _bit_scan_reverse(size - 1); |
61 | } |
62 | |
63 | static auto getMinBucketNum() |
64 | { |
65 | static const auto val = sizeToPreviousPowerOfTwo(getSizes().front()); |
66 | return val; |
67 | } |
68 | static auto getMaxFixedBlockSize() { return getSizes().back(); } |
69 | |
70 | Arena pool; |
71 | const std::unique_ptr<Block * []> free_lists = std::make_unique<Block * []>(ext::size(getSizes())); |
72 | |
73 | static size_t findFreeListIndex(const size_t size) |
74 | { |
75 | /// shift powers of two into previous bucket by subtracting 1 |
76 | const auto bucket_num = sizeToPreviousPowerOfTwo(size); |
77 | |
78 | return std::max(bucket_num, getMinBucketNum()) - getMinBucketNum(); |
79 | } |
80 | |
81 | public: |
82 | ArenaWithFreeLists( |
83 | const size_t initial_size = 4096, const size_t growth_factor = 2, |
84 | const size_t linear_growth_threshold = 128 * 1024 * 1024) |
85 | : pool{initial_size, growth_factor, linear_growth_threshold} |
86 | { |
87 | } |
88 | |
89 | char * alloc(const size_t size) |
90 | { |
91 | if (size > getMaxFixedBlockSize()) |
92 | return static_cast<char *>(Allocator::alloc(size)); |
93 | |
94 | /// find list of required size |
95 | const auto list_idx = findFreeListIndex(size); |
96 | |
97 | if (auto & block = free_lists[list_idx]) |
98 | { |
99 | const auto res = ext::bit_cast<char *>(block); |
100 | block = block->next; |
101 | return res; |
102 | } |
103 | |
104 | /// no block of corresponding size, allocate a new one |
105 | return pool.alloc(getSizes()[list_idx]); |
106 | } |
107 | |
108 | void free(const void * ptr, const size_t size) |
109 | { |
110 | if (size > getMaxFixedBlockSize()) |
111 | return Allocator::free(const_cast<void *>(ptr), size); |
112 | |
113 | /// find list of required size |
114 | const auto list_idx = findFreeListIndex(size); |
115 | |
116 | auto & block = free_lists[list_idx]; |
117 | const auto old = block; |
118 | block = ext::bit_cast<Block *>(ptr); |
119 | block->next = old; |
120 | } |
121 | |
122 | /// Size of the allocated pool in bytes |
123 | size_t size() const |
124 | { |
125 | return pool.size(); |
126 | } |
127 | }; |
128 | |
129 | #endif |
130 | |
131 | |
132 | /// A small piece copied from the CacheDictionary. It is used only to demonstrate the problem. |
133 | struct Dictionary |
134 | { |
135 | template <typename Value> using ContainerType = Value[]; |
136 | template <typename Value> using ContainerPtrType = std::unique_ptr<ContainerType<Value>>; |
137 | |
138 | enum class AttributeUnderlyingType |
139 | { |
140 | utUInt8, |
141 | utUInt16, |
142 | utUInt32, |
143 | utUInt64, |
144 | utInt8, |
145 | utInt16, |
146 | utInt32, |
147 | utInt64, |
148 | utFloat32, |
149 | utFloat64, |
150 | utString |
151 | }; |
152 | |
153 | struct Attribute final |
154 | { |
155 | AttributeUnderlyingType type; |
156 | std::variant< |
157 | UInt8, UInt16, UInt32, UInt64, |
158 | Int8, Int16, Int32, Int64, |
159 | Float32, Float64, |
160 | String> null_values; |
161 | std::variant< |
162 | ContainerPtrType<UInt8>, ContainerPtrType<UInt16>, ContainerPtrType<UInt32>, ContainerPtrType<UInt64>, |
163 | ContainerPtrType<Int8>, ContainerPtrType<Int16>, ContainerPtrType<Int32>, ContainerPtrType<Int64>, |
164 | ContainerPtrType<Float32>, ContainerPtrType<Float64>, |
165 | ContainerPtrType<StringRef>> arrays; |
166 | }; |
167 | |
168 | std::unique_ptr<ArenaWithFreeLists> string_arena; |
169 | |
170 | /// This function is compiled into exactly the same machine code as in production, when there was a bug. |
171 | void NO_INLINE setAttributeValue(Attribute & attribute, const UInt64 idx, const Field & value) const |
172 | { |
173 | switch (attribute.type) |
174 | { |
175 | case AttributeUnderlyingType::utUInt8: std::get<ContainerPtrType<UInt8>>(attribute.arrays)[idx] = value.get<UInt64>(); break; |
176 | case AttributeUnderlyingType::utUInt16: std::get<ContainerPtrType<UInt16>>(attribute.arrays)[idx] = value.get<UInt64>(); break; |
177 | case AttributeUnderlyingType::utUInt32: std::get<ContainerPtrType<UInt32>>(attribute.arrays)[idx] = value.get<UInt64>(); break; |
178 | case AttributeUnderlyingType::utUInt64: std::get<ContainerPtrType<UInt64>>(attribute.arrays)[idx] = value.get<UInt64>(); break; |
179 | case AttributeUnderlyingType::utInt8: std::get<ContainerPtrType<Int8>>(attribute.arrays)[idx] = value.get<Int64>(); break; |
180 | case AttributeUnderlyingType::utInt16: std::get<ContainerPtrType<Int16>>(attribute.arrays)[idx] = value.get<Int64>(); break; |
181 | case AttributeUnderlyingType::utInt32: std::get<ContainerPtrType<Int32>>(attribute.arrays)[idx] = value.get<Int64>(); break; |
182 | case AttributeUnderlyingType::utInt64: std::get<ContainerPtrType<Int64>>(attribute.arrays)[idx] = value.get<Int64>(); break; |
183 | case AttributeUnderlyingType::utFloat32: std::get<ContainerPtrType<Float32>>(attribute.arrays)[idx] = value.get<Float64>(); break; |
184 | case AttributeUnderlyingType::utFloat64: std::get<ContainerPtrType<Float64>>(attribute.arrays)[idx] = value.get<Float64>(); break; |
185 | case AttributeUnderlyingType::utString: |
186 | { |
187 | const auto & string = value.get<String>(); |
188 | auto & string_ref = std::get<ContainerPtrType<StringRef>>(attribute.arrays)[idx]; |
189 | const auto & null_value_ref = std::get<String>(attribute.null_values); |
190 | |
191 | /// free memory unless it points to a null_value |
192 | if (string_ref.data && string_ref.data != null_value_ref.data()) |
193 | string_arena->free(const_cast<char *>(string_ref.data), string_ref.size); |
194 | |
195 | const auto size = string.size(); |
196 | if (size != 0) |
197 | { |
198 | auto string_ptr = string_arena->alloc(size + 1); |
199 | std::copy(string.data(), string.data() + size + 1, string_ptr); |
200 | string_ref = StringRef{string_ptr, size}; |
201 | } |
202 | else |
203 | string_ref = {}; |
204 | |
205 | break; |
206 | } |
207 | } |
208 | } |
209 | }; |
210 | |
211 | |
212 | int main(int argc, char ** argv) |
213 | { |
214 | if (argc < 2) |
215 | { |
216 | std::cerr << "Usage: program n\n" ; |
217 | return 1; |
218 | } |
219 | |
220 | std::cerr << std::fixed << std::setprecision(2); |
221 | |
222 | size_t n = parse<size_t>(argv[1]); |
223 | std::vector<std::string> data; |
224 | size_t sum_strings_size = 0; |
225 | |
226 | { |
227 | Stopwatch watch; |
228 | DB::ReadBufferFromFileDescriptor in1(STDIN_FILENO); |
229 | DB::CompressedReadBuffer in2(in1); |
230 | |
231 | for (size_t i = 0; i < n && !in2.eof(); ++i) |
232 | { |
233 | data.emplace_back(); |
234 | readStringBinary(data.back(), in2); |
235 | sum_strings_size += data.back().size() + 1; |
236 | } |
237 | |
238 | watch.stop(); |
239 | std::cerr |
240 | << "Read. Elements: " << data.size() << ", bytes: " << sum_strings_size |
241 | << ", elapsed: " << watch.elapsedSeconds() |
242 | << " (" << data.size() / watch.elapsedSeconds() << " elem/sec.," |
243 | << " " << sum_strings_size / 1048576.0 / watch.elapsedSeconds() << " MiB/sec.)" |
244 | << std::endl; |
245 | |
246 | rusage resource_usage; |
247 | if (0 != getrusage(RUSAGE_SELF, &resource_usage)) |
248 | throwFromErrno("Cannot getrusage" , ErrorCodes::SYSTEM_ERROR); |
249 | |
250 | size_t allocated_bytes = resource_usage.ru_maxrss * 1024; |
251 | std::cerr << "Current memory usage: " << allocated_bytes << " bytes.\n" ; |
252 | } |
253 | |
254 | ArenaWithFreeLists arena; |
255 | std::vector<StringRef> refs; |
256 | refs.reserve(data.size()); |
257 | |
258 | { |
259 | Stopwatch watch; |
260 | |
261 | for (const auto & s : data) |
262 | { |
263 | auto ptr = arena.alloc(s.size() + 1); |
264 | memcpy(ptr, s.data(), s.size() + 1); |
265 | refs.emplace_back(ptr, s.size() + 1); |
266 | } |
267 | |
268 | watch.stop(); |
269 | std::cerr |
270 | << "Insert info arena. Bytes: " << arena.size() |
271 | << ", elapsed: " << watch.elapsedSeconds() |
272 | << " (" << data.size() / watch.elapsedSeconds() << " elem/sec.," |
273 | << " " << sum_strings_size / 1048576.0 / watch.elapsedSeconds() << " MiB/sec.)" |
274 | << std::endl; |
275 | } |
276 | |
277 | //while (true) |
278 | { |
279 | Stopwatch watch; |
280 | |
281 | size_t bytes = 0; |
282 | for (size_t i = 0, size = data.size(); i < size; ++i) |
283 | { |
284 | size_t index_from = lrand48() % size; |
285 | size_t index_to = lrand48() % size; |
286 | |
287 | arena.free(const_cast<char *>(refs[index_to].data), refs[index_to].size); |
288 | const auto & s = data[index_from]; |
289 | auto ptr = arena.alloc(s.size() + 1); |
290 | memcpy(ptr, s.data(), s.size() + 1); |
291 | bytes += s.size() + 1; |
292 | |
293 | refs[index_to] = {ptr, s.size() + 1}; |
294 | } |
295 | |
296 | watch.stop(); |
297 | std::cerr |
298 | << "Randomly remove and insert elements. Bytes: " << arena.size() |
299 | << ", elapsed: " << watch.elapsedSeconds() |
300 | << " (" << data.size() / watch.elapsedSeconds() << " elem/sec.," |
301 | << " " << bytes / 1048576.0 / watch.elapsedSeconds() << " MiB/sec.)" |
302 | << std::endl; |
303 | } |
304 | |
305 | Dictionary dictionary; |
306 | dictionary.string_arena = std::make_unique<ArenaWithFreeLists>(); |
307 | |
308 | constexpr size_t cache_size = 1024; |
309 | |
310 | Dictionary::Attribute attr; |
311 | attr.type = Dictionary::AttributeUnderlyingType::utString; |
312 | std::get<Dictionary::ContainerPtrType<StringRef>>(attr.arrays).reset(new StringRef[cache_size]{}); |
313 | |
314 | while (true) |
315 | { |
316 | Stopwatch watch; |
317 | |
318 | size_t bytes = 0; |
319 | for (size_t i = 0, size = data.size(); i < size; ++i) |
320 | { |
321 | size_t index_from = lrand48() % size; |
322 | size_t index_to = lrand48() % cache_size; |
323 | |
324 | dictionary.setAttributeValue(attr, index_to, data[index_from]); |
325 | |
326 | bytes += data[index_from].size() + 1; |
327 | } |
328 | |
329 | watch.stop(); |
330 | std::cerr |
331 | << "Filling cache. Bytes: " << arena.size() |
332 | << ", elapsed: " << watch.elapsedSeconds() |
333 | << " (" << data.size() / watch.elapsedSeconds() << " elem/sec.," |
334 | << " " << bytes / 1048576.0 / watch.elapsedSeconds() << " MiB/sec.)" |
335 | << std::endl; |
336 | } |
337 | } |
338 | |