1 | /* |
2 | * Copyright 2017-present Facebook, Inc. |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | */ |
16 | |
17 | #include <folly/concurrency/DynamicBoundedQueue.h> |
18 | #include <folly/MPMCQueue.h> |
19 | #include <folly/ProducerConsumerQueue.h> |
20 | #include <folly/portability/GTest.h> |
21 | |
22 | #include <glog/logging.h> |
23 | |
24 | #include <atomic> |
25 | #include <thread> |
26 | |
27 | DEFINE_bool(bench, false, "run benchmark" ); |
28 | DEFINE_int32(reps, 10, "number of reps" ); |
29 | DEFINE_int32(ops, 1000000, "number of operations per rep" ); |
30 | DEFINE_int64(capacity, 1000000, "capacity" ); |
31 | |
32 | template <typename T, bool MayBlock, typename WeightFn> |
33 | using DSPSC = folly::DSPSCQueue<T, MayBlock, 8, 7, WeightFn>; |
34 | |
35 | template <typename T, bool MayBlock, typename WeightFn> |
36 | using DMPSC = folly::DMPSCQueue<T, MayBlock, 8, 7, WeightFn>; |
37 | |
38 | template <typename T, bool MayBlock, typename WeightFn> |
39 | using DSPMC = folly::DSPMCQueue<T, MayBlock, 8, 7, WeightFn>; |
40 | |
41 | template <typename T, bool MayBlock, typename WeightFn> |
42 | using DMPMC = folly::DMPMCQueue<T, MayBlock, 8, 7, WeightFn>; |
43 | |
44 | template <template <typename, bool, typename> class Q, bool MayBlock> |
45 | void basic_test() { |
46 | auto dur = std::chrono::microseconds(100); |
47 | auto deadline = std::chrono::steady_clock::now() + dur; |
48 | |
49 | struct CustomWeightFn { |
50 | uint64_t operator()(int val) { |
51 | return val * 100; |
52 | } |
53 | }; |
54 | |
55 | Q<int, MayBlock, CustomWeightFn> q(10000); |
56 | |
57 | ASSERT_TRUE(q.empty()); |
58 | ASSERT_EQ(q.size(), 0); |
59 | int v; |
60 | ASSERT_FALSE(q.try_dequeue(v)); |
61 | |
62 | q.enqueue(1); |
63 | ASSERT_TRUE(q.try_enqueue(2)); |
64 | ASSERT_TRUE(q.try_enqueue_until(3, deadline)); |
65 | ASSERT_TRUE(q.try_enqueue_for(4, dur)); |
66 | |
67 | ASSERT_EQ(q.size(), 4); |
68 | ASSERT_EQ(q.weight(), 1000); |
69 | ASSERT_FALSE(q.empty()); |
70 | |
71 | q.dequeue(v); |
72 | ASSERT_EQ(v, 1); |
73 | ASSERT_TRUE(q.try_dequeue(v)); |
74 | ASSERT_EQ(v, 2); |
75 | ASSERT_TRUE(q.try_dequeue_until(v, deadline)); |
76 | ASSERT_EQ(v, 3); |
77 | ASSERT_TRUE(q.try_dequeue_for(v, dur)); |
78 | ASSERT_EQ(v, 4); |
79 | |
80 | ASSERT_TRUE(q.empty()); |
81 | ASSERT_EQ(q.size(), 0); |
82 | ASSERT_EQ(q.weight(), 0); |
83 | } |
84 | |
85 | TEST(DynamicBoundedQueue, basic) { |
86 | basic_test<DSPSC, false>(); |
87 | basic_test<DMPSC, false>(); |
88 | basic_test<DSPMC, false>(); |
89 | basic_test<DMPMC, false>(); |
90 | basic_test<DSPSC, true>(); |
91 | basic_test<DMPSC, true>(); |
92 | basic_test<DSPMC, true>(); |
93 | basic_test<DMPMC, true>(); |
94 | } |
95 | |
96 | template <template <typename, bool, typename> class Q, bool MayBlock> |
97 | void move_test() { |
98 | struct Foo { |
99 | int v_; |
100 | explicit Foo(int v) noexcept : v_(v) {} |
101 | Foo(const Foo&) = delete; |
102 | Foo& operator=(const Foo&) = delete; |
103 | Foo(Foo&& other) noexcept : v_(other.v_) {} |
104 | Foo& operator=(Foo&& other) noexcept { |
105 | v_ = other.v_; |
106 | return *this; |
107 | } |
108 | }; |
109 | |
110 | struct CustomWeightFn { |
111 | uint64_t operator()(Foo&&) { |
112 | return 10; |
113 | } |
114 | }; |
115 | |
116 | auto dur = std::chrono::microseconds(100); |
117 | auto deadline = std::chrono::steady_clock::now() + dur; |
118 | |
119 | Q<Foo, MayBlock, CustomWeightFn> q(100); |
120 | Foo v(1); |
121 | q.enqueue(std::move(v)); |
122 | ASSERT_TRUE(q.try_enqueue(std::move(v))); |
123 | ASSERT_TRUE(q.try_enqueue_until(std::move(v), deadline)); |
124 | ASSERT_TRUE(q.try_enqueue_for(std::move(v), dur)); |
125 | |
126 | ASSERT_EQ(q.size(), 4); |
127 | ASSERT_EQ(q.weight(), 40); |
128 | } |
129 | |
130 | TEST(DynamicBoundedQueue, move) { |
131 | move_test<DSPSC, false>(); |
132 | move_test<DMPSC, false>(); |
133 | move_test<DSPMC, false>(); |
134 | move_test<DMPMC, false>(); |
135 | move_test<DSPSC, true>(); |
136 | move_test<DMPSC, true>(); |
137 | move_test<DSPMC, true>(); |
138 | move_test<DMPMC, true>(); |
139 | } |
140 | |
141 | template <template <typename, bool, typename> class Q, bool MayBlock> |
142 | void capacity_test() { |
143 | struct CustomWeightFn { |
144 | uint64_t operator()(int val) { |
145 | return val; |
146 | } |
147 | }; |
148 | |
149 | Q<int, MayBlock, CustomWeightFn> q(1000); |
150 | ASSERT_EQ(q.weight(), 0); |
151 | int v; |
152 | q.enqueue(100); |
153 | ASSERT_EQ(q.weight(), 100); |
154 | q.enqueue(300); |
155 | ASSERT_EQ(q.weight(), 400); |
156 | ASSERT_FALSE(q.try_enqueue(1200)); |
157 | q.reset_capacity(2000); // reset capacityy to 2000 |
158 | ASSERT_TRUE(q.try_enqueue(1200)); |
159 | ASSERT_EQ(q.weight(), 1600); |
160 | ASSERT_EQ(q.size(), 3); |
161 | q.reset_capacity(1000); // reset capacity back to 1000 |
162 | ASSERT_FALSE(q.try_enqueue(100)); |
163 | q.dequeue(v); |
164 | ASSERT_EQ(v, 100); |
165 | ASSERT_EQ(q.weight(), 1500); |
166 | q.dequeue(v); |
167 | ASSERT_EQ(v, 300); |
168 | ASSERT_EQ(q.weight(), 1200); |
169 | } |
170 | |
171 | TEST(DynamicBoundedQueue, capacity) { |
172 | capacity_test<DSPSC, false>(); |
173 | capacity_test<DMPSC, false>(); |
174 | capacity_test<DSPMC, false>(); |
175 | capacity_test<DMPMC, false>(); |
176 | capacity_test<DSPSC, true>(); |
177 | capacity_test<DMPSC, true>(); |
178 | capacity_test<DSPMC, true>(); |
179 | capacity_test<DMPMC, true>(); |
180 | } |
181 | |
182 | template <typename ProdFunc, typename ConsFunc, typename EndFunc> |
183 | inline uint64_t run_once( |
184 | int nprod, |
185 | int ncons, |
186 | const ProdFunc& prodFn, |
187 | const ConsFunc& consFn, |
188 | const EndFunc& endFn) { |
189 | std::atomic<bool> start{false}; |
190 | std::atomic<int> ready{0}; |
191 | |
192 | /* producers */ |
193 | std::vector<std::thread> prodThr(nprod); |
194 | for (int tid = 0; tid < nprod; ++tid) { |
195 | prodThr[tid] = std::thread([&, tid] { |
196 | ++ready; |
197 | while (!start.load()) { |
198 | /* spin */; |
199 | } |
200 | prodFn(tid); |
201 | }); |
202 | } |
203 | |
204 | /* consumers */ |
205 | std::vector<std::thread> consThr(ncons); |
206 | for (int tid = 0; tid < ncons; ++tid) { |
207 | consThr[tid] = std::thread([&, tid] { |
208 | ++ready; |
209 | while (!start.load()) { |
210 | /* spin */; |
211 | } |
212 | consFn(tid); |
213 | }); |
214 | } |
215 | |
216 | /* wait for all producers and consumers to be ready */ |
217 | while (ready.load() < (nprod + ncons)) { |
218 | /* spin */; |
219 | } |
220 | |
221 | /* begin time measurement */ |
222 | auto tbegin = std::chrono::steady_clock::now(); |
223 | start.store(true); |
224 | |
225 | /* wait for completion */ |
226 | for (int i = 0; i < nprod; ++i) { |
227 | prodThr[i].join(); |
228 | } |
229 | for (int i = 0; i < ncons; ++i) { |
230 | consThr[i].join(); |
231 | } |
232 | |
233 | /* end time measurement */ |
234 | auto tend = std::chrono::steady_clock::now(); |
235 | endFn(); |
236 | return std::chrono::duration_cast<std::chrono::nanoseconds>(tend - tbegin) |
237 | .count(); |
238 | } |
239 | |
240 | template <bool SingleProducer, bool SingleConsumer, bool MayBlock> |
241 | void enq_deq_test(const int nprod, const int ncons) { |
242 | if (SingleProducer) { |
243 | ASSERT_EQ(nprod, 1); |
244 | } |
245 | if (SingleConsumer) { |
246 | ASSERT_EQ(ncons, 1); |
247 | } |
248 | |
249 | int ops = 1000; |
250 | folly::DynamicBoundedQueue<int, SingleProducer, SingleConsumer, MayBlock, 2> |
251 | q(10); |
252 | std::atomic<uint64_t> sum(0); |
253 | |
254 | auto prod = [&](int tid) { |
255 | for (int i = tid; i < ops; i += nprod) { |
256 | if ((i % 3) == 0) { |
257 | while (!q.try_enqueue(i)) { |
258 | /* keep trying */; |
259 | } |
260 | } else if ((i % 3) == 1) { |
261 | auto dur = std::chrono::microseconds(100); |
262 | while (!q.try_enqueue_for(i, dur)) { |
263 | /* keep trying */; |
264 | } |
265 | } else { |
266 | q.enqueue(i); |
267 | } |
268 | } |
269 | }; |
270 | |
271 | auto cons = [&](int tid) { |
272 | uint64_t mysum = 0; |
273 | for (int i = tid; i < ops; i += ncons) { |
274 | int v; |
275 | if ((i % 3) == 0) { |
276 | while (!q.try_dequeue(v)) { |
277 | /* keep trying */; |
278 | } |
279 | } else if ((i % 3) == 1) { |
280 | auto dur = std::chrono::microseconds(100); |
281 | while (!q.try_dequeue_for(v, dur)) { |
282 | /* keep trying */; |
283 | } |
284 | } else { |
285 | q.dequeue(v); |
286 | } |
287 | if (nprod == 1 && ncons == 1) { |
288 | ASSERT_EQ(v, i); |
289 | } |
290 | mysum += v; |
291 | } |
292 | sum.fetch_add(mysum); |
293 | }; |
294 | |
295 | auto endfn = [&] { |
296 | uint64_t expected = ops; |
297 | expected *= ops - 1; |
298 | expected /= 2; |
299 | ASSERT_EQ(sum.load(), expected); |
300 | }; |
301 | run_once(nprod, ncons, prod, cons, endfn); |
302 | } |
303 | |
304 | TEST(DynamicBoundedQueue, enq_deq) { |
305 | /* SPSC */ |
306 | enq_deq_test<true, true, false>(1, 1); |
307 | enq_deq_test<true, true, true>(1, 1); |
308 | /* MPSC */ |
309 | enq_deq_test<false, true, false>(1, 1); |
310 | enq_deq_test<false, true, true>(1, 1); |
311 | enq_deq_test<false, true, false>(2, 1); |
312 | enq_deq_test<false, true, true>(2, 1); |
313 | enq_deq_test<false, true, false>(10, 1); |
314 | enq_deq_test<false, true, true>(10, 1); |
315 | /* SPMC */ |
316 | enq_deq_test<true, false, false>(1, 1); |
317 | enq_deq_test<true, false, true>(1, 1); |
318 | enq_deq_test<true, false, false>(1, 2); |
319 | enq_deq_test<true, false, true>(1, 2); |
320 | enq_deq_test<true, false, false>(1, 10); |
321 | enq_deq_test<true, false, true>(1, 10); |
322 | /* MPMC */ |
323 | enq_deq_test<false, false, false>(1, 1); |
324 | enq_deq_test<false, false, true>(1, 1); |
325 | enq_deq_test<false, false, false>(2, 1); |
326 | enq_deq_test<false, false, true>(2, 1); |
327 | enq_deq_test<false, false, false>(10, 1); |
328 | enq_deq_test<false, false, true>(10, 1); |
329 | enq_deq_test<false, false, false>(1, 2); |
330 | enq_deq_test<false, false, true>(1, 2); |
331 | enq_deq_test<false, false, false>(1, 10); |
332 | enq_deq_test<false, false, true>(1, 10); |
333 | enq_deq_test<false, false, false>(2, 2); |
334 | enq_deq_test<false, false, true>(2, 2); |
335 | enq_deq_test<false, false, false>(10, 10); |
336 | enq_deq_test<false, false, true>(10, 10); |
337 | } |
338 | |
339 | template <typename RepFunc> |
340 | uint64_t runBench(const std::string& name, int ops, const RepFunc& repFn) { |
341 | int reps = FLAGS_reps; |
342 | uint64_t min = UINTMAX_MAX; |
343 | uint64_t max = 0; |
344 | uint64_t sum = 0; |
345 | |
346 | repFn(); // sometimes first run is outlier |
347 | for (int r = 0; r < reps; ++r) { |
348 | uint64_t dur = repFn(); |
349 | sum += dur; |
350 | min = std::min(min, dur); |
351 | max = std::max(max, dur); |
352 | // if each rep takes too long run at least 2 reps |
353 | const uint64_t minute = 60000000000ULL; |
354 | if (sum > minute && r >= 1) { |
355 | reps = r + 1; |
356 | break; |
357 | } |
358 | } |
359 | |
360 | const std::string unit = " ns" ; |
361 | uint64_t avg = sum / reps; |
362 | uint64_t res = min; |
363 | std::cout << name; |
364 | std::cout << " " << std::setw(4) << max / ops << unit; |
365 | std::cout << " " << std::setw(4) << avg / ops << unit; |
366 | std::cout << " " << std::setw(4) << res / ops << unit; |
367 | std::cout << std::endl; |
368 | return res; |
369 | } |
370 | |
371 | template <template <typename, bool, typename> class Q, typename T, int Op> |
372 | uint64_t bench(const int nprod, const int ncons, const std::string& name) { |
373 | int ops = FLAGS_ops; |
374 | auto repFn = [&] { |
375 | Q<T, Op == 3 || Op == 4 || Op == 5, folly::DefaultWeightFn<T>> q( |
376 | FLAGS_capacity); |
377 | std::atomic<uint64_t> sum(0); |
378 | auto prod = [&](int tid) { |
379 | for (int i = tid; i < ops; i += nprod) { |
380 | if (Op == 0 || Op == 3) { |
381 | while (!q.try_enqueue(i)) { |
382 | /* keep trying */; |
383 | } |
384 | } else if (Op == 1 || Op == 4) { |
385 | while (!q.try_enqueue_for(i, std::chrono::microseconds(1000))) { |
386 | /* keep trying */; |
387 | } |
388 | } else { |
389 | q.enqueue(i); |
390 | } |
391 | } |
392 | }; |
393 | auto cons = [&](int tid) { |
394 | uint64_t mysum = 0; |
395 | T v = -1; |
396 | for (int i = tid; i < ops; i += ncons) { |
397 | if (Op == 0 || Op == 3) { |
398 | while (!q.try_dequeue(v)) { |
399 | /* keep trying */; |
400 | } |
401 | } else if (Op == 1 || Op == 4) { |
402 | while (!q.try_dequeue_for(v, std::chrono::microseconds(1000))) { |
403 | /* keep trying */; |
404 | } |
405 | } else { |
406 | q.dequeue(v); |
407 | } |
408 | if (nprod == 1 && ncons == 1) { |
409 | DCHECK_EQ(int(v), i); |
410 | } |
411 | mysum += v; |
412 | } |
413 | sum.fetch_add(mysum); |
414 | }; |
415 | auto endfn = [&] { |
416 | uint64_t expected = ops; |
417 | expected *= ops - 1; |
418 | expected /= 2; |
419 | ASSERT_EQ(sum.load(), expected); |
420 | }; |
421 | return run_once(nprod, ncons, prod, cons, endfn); |
422 | }; |
423 | return runBench(name, ops, repFn); |
424 | } |
425 | |
426 | /* For performance comparison */ |
427 | template <typename T> |
428 | class MPMC { |
429 | folly::MPMCQueue<T> q_; |
430 | |
431 | public: |
432 | explicit MPMC(uint64_t capacity) : q_(capacity) {} |
433 | |
434 | void enqueue(const T& v) { |
435 | q_.blockingWrite(v); |
436 | } |
437 | |
438 | void enqueue(T&& v) { |
439 | q_.blockingWrite(std::move(v)); |
440 | } |
441 | |
442 | bool try_enqueue(const T& v) { |
443 | return q_.write(v); |
444 | } |
445 | |
446 | bool try_enqueue(const T&& v) { |
447 | return q_.write(std::move(v)); |
448 | } |
449 | |
450 | template <typename Rep, typename Period> |
451 | bool try_enqueue_for( |
452 | const T& v, |
453 | const std::chrono::duration<Rep, Period>& duration) { |
454 | return q_.tryWriteUntil(std::chrono::steady_clock::now() + duration, v); |
455 | } |
456 | |
457 | void dequeue(T& item) { |
458 | q_.blockingRead(item); |
459 | } |
460 | |
461 | bool try_dequeue(T& item) { |
462 | return q_.read(item); |
463 | } |
464 | |
465 | template <typename Rep, typename Period> |
466 | bool try_dequeue_for( |
467 | T& item, |
468 | const std::chrono::duration<Rep, Period>& duration) { |
469 | return q_.tryReadUntil(std::chrono::steady_clock::now() + duration, item); |
470 | } |
471 | }; |
472 | |
473 | template <typename T, bool, typename> |
474 | using FMPMC = MPMC<T>; |
475 | |
476 | template <typename T> |
477 | class PCQ { |
478 | folly::ProducerConsumerQueue<T> q_; |
479 | |
480 | public: |
481 | explicit PCQ(uint64_t capacity) : q_(capacity) {} |
482 | |
483 | void enqueue(const T&) { |
484 | ASSERT_TRUE(false); |
485 | } |
486 | |
487 | bool try_enqueue(const T& v) { |
488 | return q_.write(v); |
489 | } |
490 | |
491 | bool try_enqueue(T&& v) { |
492 | return q_.write(std::move(v)); |
493 | } |
494 | |
495 | template <typename Rep, typename Period> |
496 | bool try_enqueue_for(const T&, const std::chrono::duration<Rep, Period>&) { |
497 | return false; |
498 | } |
499 | |
500 | void dequeue(T&) { |
501 | ASSERT_TRUE(false); |
502 | } |
503 | |
504 | bool try_dequeue(T& item) { |
505 | return q_.read(item); |
506 | } |
507 | |
508 | template <typename Rep, typename Period> |
509 | bool try_dequeue_for(T&, const std::chrono::duration<Rep, Period>&) { |
510 | return false; |
511 | } |
512 | }; |
513 | |
514 | template <typename T, bool, typename> |
515 | using FPCQ = PCQ<T>; |
516 | |
517 | template <size_t M> |
518 | struct IntArray { |
519 | int a[M]; |
520 | IntArray() {} |
521 | /* implicit */ IntArray(int v) { |
522 | for (size_t i = 0; i < M; ++i) { |
523 | a[i] = v; |
524 | } |
525 | } |
526 | operator int() { |
527 | return a[0]; |
528 | } |
529 | }; |
530 | |
531 | void dottedLine() { |
532 | std::cout << ".............................................................." |
533 | << std::endl; |
534 | } |
535 | |
536 | template <typename T> |
537 | void type_benches(const int np, const int nc, const std::string& name) { |
538 | std::cout << name |
539 | << "===========================================" << std::endl; |
540 | if (np == 1 && nc == 1) { |
541 | bench<DSPSC, T, 0>(1, 1, "DSPSC try spin only " ); |
542 | bench<DSPSC, T, 1>(1, 1, "DSPSC timed spin only " ); |
543 | bench<DSPSC, T, 2>(1, 1, "DSPSC wait spin only " ); |
544 | bench<DSPSC, T, 3>(1, 1, "DSPSC try may block " ); |
545 | bench<DSPSC, T, 4>(1, 1, "DSPSC timed may block " ); |
546 | bench<DSPSC, T, 5>(1, 1, "DSPSC wait may block " ); |
547 | dottedLine(); |
548 | } |
549 | if (nc == 1) { |
550 | bench<DMPSC, T, 0>(np, 1, "DMPSC try spin only " ); |
551 | bench<DMPSC, T, 1>(np, 1, "DMPSC timed spin only " ); |
552 | bench<DMPSC, T, 2>(np, 1, "DMPSC wait spin only " ); |
553 | bench<DMPSC, T, 3>(np, 1, "DMPSC try may block " ); |
554 | bench<DMPSC, T, 4>(np, 1, "DMPSC timed may block " ); |
555 | bench<DMPSC, T, 5>(np, 1, "DMPSC wait may block " ); |
556 | dottedLine(); |
557 | } |
558 | if (np == 1) { |
559 | bench<DSPMC, T, 0>(1, nc, "DSPMC try spin only " ); |
560 | bench<DSPMC, T, 1>(1, nc, "DSPMC timed spin only " ); |
561 | bench<DSPMC, T, 2>(1, nc, "DSPMC wait spin only " ); |
562 | bench<DSPMC, T, 3>(1, nc, "DSPMC try may block " ); |
563 | bench<DSPMC, T, 4>(1, nc, "DSPMC timed may block " ); |
564 | bench<DSPMC, T, 5>(1, nc, "DSPMC wait may block " ); |
565 | dottedLine(); |
566 | } |
567 | bench<DMPMC, T, 0>(np, nc, "DMPMC try spin only " ); |
568 | bench<DMPMC, T, 1>(np, nc, "DMPMC timed spin only " ); |
569 | bench<DMPMC, T, 2>(np, nc, "DMPMC wait spin only " ); |
570 | bench<DMPMC, T, 3>(np, nc, "DMPMC try may block " ); |
571 | bench<DMPMC, T, 4>(np, nc, "DMPMC timed may block " ); |
572 | bench<DMPMC, T, 5>(np, nc, "DMPMC wait may block " ); |
573 | dottedLine(); |
574 | if (np == 1 && nc == 1) { |
575 | bench<FPCQ, T, 0>(1, 1, "folly::PCQ read " ); |
576 | dottedLine(); |
577 | } |
578 | bench<FMPMC, T, 3>(np, nc, "folly::MPMC read " ); |
579 | bench<FMPMC, T, 4>(np, nc, "folly::MPMC tryReadUntil " ); |
580 | bench<FMPMC, T, 5>(np, nc, "folly::MPMC blockingRead " ); |
581 | std::cout << "==============================================================" |
582 | << std::endl; |
583 | } |
584 | |
585 | void benches(const int np, const int nc) { |
586 | std::cout << "====================== " << std::setw(2) << np << " prod" |
587 | << " " << std::setw(2) << nc << " cons" |
588 | << " ======================" << std::endl; |
589 | type_benches<uint32_t>(np, nc, "=== uint32_t ======" ); |
590 | // Benchmarks for other element sizes can be added as follows: |
591 | // type_benches<IntArray<4>>(np, nc, "=== IntArray<4> ==="); |
592 | } |
593 | |
594 | TEST(DynamicBoundedQueue, bench) { |
595 | if (!FLAGS_bench) { |
596 | return; |
597 | } |
598 | std::cout << "==============================================================" |
599 | << std::endl; |
600 | std::cout << std::setw(2) << FLAGS_reps << " reps of " << std::setw(8) |
601 | << FLAGS_ops << " handoffs\n" ; |
602 | dottedLine(); |
603 | std::cout << "Using capacity " << FLAGS_capacity << " for all queues\n" ; |
604 | std::cout << "==============================================================" |
605 | << std::endl; |
606 | std::cout << "Test name Max time Avg time Min time" |
607 | << std::endl; |
608 | |
609 | for (int np : {1, 8, 32}) { |
610 | for (int nc : {1, 8, 32}) { |
611 | benches(np, nc); |
612 | } |
613 | } |
614 | } |
615 | |
616 | /* |
617 | $ numactl -N 1 dynamic_bounded_queue_test --bench --capacity=1000000 |
618 | ============================================================== |
619 | 10 reps of 1000000 handoffs |
620 | .............................................................. |
621 | Using capacity 1000000 for all queues |
622 | ============================================================== |
623 | Test name Max time Avg time Min time |
624 | ====================== 1 prod 1 cons ====================== |
625 | === uint32_t ================================================= |
626 | DSPSC try spin only 7 ns 7 ns 7 ns |
627 | DSPSC timed spin only 9 ns 9 ns 9 ns |
628 | DSPSC wait spin only 7 ns 7 ns 7 ns |
629 | DSPSC try may block 39 ns 36 ns 33 ns |
630 | DSPSC timed may block 39 ns 38 ns 37 ns |
631 | DSPSC wait may block 37 ns 34 ns 33 ns |
632 | .............................................................. |
633 | DMPSC try spin only 54 ns 53 ns 52 ns |
634 | DMPSC timed spin only 53 ns 52 ns 51 ns |
635 | DMPSC wait spin only 53 ns 52 ns 51 ns |
636 | DMPSC try may block 67 ns 65 ns 64 ns |
637 | DMPSC timed may block 64 ns 62 ns 60 ns |
638 | DMPSC wait may block 64 ns 62 ns 60 ns |
639 | .............................................................. |
640 | DSPMC try spin only 25 ns 24 ns 23 ns |
641 | DSPMC timed spin only 24 ns 23 ns 23 ns |
642 | DSPMC wait spin only 22 ns 21 ns 21 ns |
643 | DSPMC try may block 30 ns 26 ns 21 ns |
644 | DSPMC timed may block 25 ns 24 ns 24 ns |
645 | DSPMC wait may block 22 ns 22 ns 21 ns |
646 | .............................................................. |
647 | DMPMC try spin only 48 ns 45 ns 39 ns |
648 | DMPMC timed spin only 31 ns 30 ns 24 ns |
649 | DMPMC wait spin only 49 ns 47 ns 43 ns |
650 | DMPMC try may block 63 ns 62 ns 61 ns |
651 | DMPMC timed may block 64 ns 60 ns 46 ns |
652 | DMPMC wait may block 61 ns 60 ns 58 ns |
653 | .............................................................. |
654 | folly::PCQ read 8 ns 7 ns 7 ns |
655 | .............................................................. |
656 | folly::MPMC read 53 ns 51 ns 49 ns |
657 | folly::MPMC tryReadUntil 112 ns 106 ns 103 ns |
658 | folly::MPMC blockingRead 50 ns 47 ns 46 ns |
659 | ============================================================== |
660 | ====================== 1 prod 8 cons ====================== |
661 | === uint32_t ================================================= |
662 | DSPMC try spin only 166 ns 159 ns 153 ns |
663 | DSPMC timed spin only 169 ns 163 ns 156 ns |
664 | DSPMC wait spin only 60 ns 57 ns 54 ns |
665 | DSPMC try may block 170 ns 163 ns 153 ns |
666 | DSPMC timed may block 165 ns 157 ns 150 ns |
667 | DSPMC wait may block 94 ns 78 ns 52 ns |
668 | .............................................................. |
669 | DMPMC try spin only 170 ns 161 ns 149 ns |
670 | DMPMC timed spin only 167 ns 158 ns 149 ns |
671 | DMPMC wait spin only 93 ns 80 ns 51 ns |
672 | DMPMC try may block 164 ns 161 ns 154 ns |
673 | DMPMC timed may block 163 ns 156 ns 145 ns |
674 | DMPMC wait may block 117 ns 102 ns 87 ns |
675 | .............................................................. |
676 | folly::MPMC read 176 ns 168 ns 159 ns |
677 | folly::MPMC tryReadUntil 1846 ns 900 ns 521 ns |
678 | folly::MPMC blockingRead 219 ns 193 ns 178 ns |
679 | ============================================================== |
680 | ====================== 1 prod 32 cons ====================== |
681 | === uint32_t ================================================= |
682 | DSPMC try spin only 224 ns 213 ns 204 ns |
683 | DSPMC timed spin only 215 ns 209 ns 199 ns |
684 | DSPMC wait spin only 334 ns 114 ns 52 ns |
685 | DSPMC try may block 240 ns 215 ns 202 ns |
686 | DSPMC timed may block 245 ns 221 ns 200 ns |
687 | DSPMC wait may block 215 ns 151 ns 98 ns |
688 | .............................................................. |
689 | DMPMC try spin only 348 ns 252 ns 204 ns |
690 | DMPMC timed spin only 379 ns 244 ns 198 ns |
691 | DMPMC wait spin only 173 ns 116 ns 89 ns |
692 | DMPMC try may block 362 ns 231 ns 173 ns |
693 | DMPMC timed may block 282 ns 236 ns 206 ns |
694 | DMPMC wait may block 252 ns 172 ns 134 ns |
695 | .............................................................. |
696 | folly::MPMC read 540 ns 290 ns 186 ns |
697 | folly::MPMC tryReadUntil 24946 ns 24280 ns 23113 ns |
698 | folly::MPMC blockingRead 1345 ns 1297 ns 1265 ns |
699 | ============================================================== |
700 | ====================== 8 prod 1 cons ====================== |
701 | === uint32_t ================================================= |
702 | DMPSC try spin only 68 ns 64 ns 60 ns |
703 | DMPSC timed spin only 69 ns 66 ns 61 ns |
704 | DMPSC wait spin only 67 ns 65 ns 62 ns |
705 | DMPSC try may block 77 ns 73 ns 67 ns |
706 | DMPSC timed may block 75 ns 74 ns 68 ns |
707 | DMPSC wait may block 76 ns 73 ns 69 ns |
708 | .............................................................. |
709 | DMPMC try spin only 76 ns 66 ns 60 ns |
710 | DMPMC timed spin only 77 ns 68 ns 63 ns |
711 | DMPMC wait spin only 68 ns 65 ns 63 ns |
712 | DMPMC try may block 76 ns 72 ns 64 ns |
713 | DMPMC timed may block 82 ns 74 ns 67 ns |
714 | DMPMC wait may block 77 ns 72 ns 68 ns |
715 | .............................................................. |
716 | folly::MPMC read 170 ns 166 ns 161 ns |
717 | folly::MPMC tryReadUntil 184 ns 179 ns 173 ns |
718 | folly::MPMC blockingRead 79 ns 73 ns 53 ns |
719 | ============================================================== |
720 | ====================== 8 prod 8 cons ====================== |
721 | === uint32_t ================================================= |
722 | DMPMC try spin only 181 ns 169 ns 133 ns |
723 | DMPMC timed spin only 179 ns 168 ns 148 ns |
724 | DMPMC wait spin only 77 ns 76 ns 71 ns |
725 | DMPMC try may block 180 ns 179 ns 176 ns |
726 | DMPMC timed may block 174 ns 166 ns 153 ns |
727 | DMPMC wait may block 79 ns 78 ns 75 ns |
728 | .............................................................. |
729 | folly::MPMC read 219 ns 206 ns 183 ns |
730 | folly::MPMC tryReadUntil 262 ns 244 ns 213 ns |
731 | folly::MPMC blockingRead 61 ns 58 ns 54 ns |
732 | ============================================================== |
733 | ====================== 8 prod 32 cons ====================== |
734 | === uint32_t ================================================= |
735 | DMPMC try spin only 265 ns 217 ns 203 ns |
736 | DMPMC timed spin only 236 ns 215 ns 202 ns |
737 | DMPMC wait spin only 93 ns 83 ns 77 ns |
738 | DMPMC try may block 325 ns 234 ns 200 ns |
739 | DMPMC timed may block 206 ns 202 ns 193 ns |
740 | DMPMC wait may block 139 ns 93 ns 76 ns |
741 | .............................................................. |
742 | folly::MPMC read 259 ns 214 ns 201 ns |
743 | folly::MPMC tryReadUntil 281 ns 274 ns 267 ns |
744 | folly::MPMC blockingRead 62 ns 59 ns 57 ns |
745 | ============================================================== |
746 | ====================== 32 prod 1 cons ====================== |
747 | === uint32_t ================================================= |
748 | DMPSC try spin only 95 ns 57 ns 45 ns |
749 | DMPSC timed spin only 94 ns 52 ns 46 ns |
750 | DMPSC wait spin only 104 ns 54 ns 43 ns |
751 | DMPSC try may block 59 ns 54 ns 51 ns |
752 | DMPSC timed may block 86 ns 58 ns 52 ns |
753 | DMPSC wait may block 76 ns 57 ns 53 ns |
754 | .............................................................. |
755 | DMPMC try spin only 68 ns 64 ns 60 ns |
756 | DMPMC timed spin only 137 ns 73 ns 61 ns |
757 | DMPMC wait spin only 86 ns 65 ns 58 ns |
758 | DMPMC try may block 89 ns 71 ns 65 ns |
759 | DMPMC timed may block 82 ns 69 ns 65 ns |
760 | DMPMC wait may block 84 ns 71 ns 66 ns |
761 | .............................................................. |
762 | folly::MPMC read 222 ns 203 ns 192 ns |
763 | folly::MPMC tryReadUntil 239 ns 232 ns 191 ns |
764 | folly::MPMC blockingRead 78 ns 68 ns 64 ns |
765 | ============================================================== |
766 | ====================== 32 prod 8 cons ====================== |
767 | === uint32_t ================================================= |
768 | DMPMC try spin only 183 ns 138 ns 107 ns |
769 | DMPMC timed spin only 237 ns 158 ns 98 ns |
770 | DMPMC wait spin only 87 ns 70 ns 58 ns |
771 | DMPMC try may block 169 ns 132 ns 92 ns |
772 | DMPMC timed may block 172 ns 133 ns 79 ns |
773 | DMPMC wait may block 166 ns 89 ns 66 ns |
774 | .............................................................. |
775 | folly::MPMC read 221 ns 194 ns 183 ns |
776 | folly::MPMC tryReadUntil 258 ns 244 ns 230 ns |
777 | folly::MPMC blockingRead 60 ns 54 ns 47 ns |
778 | ============================================================== |
779 | ====================== 32 prod 32 cons ====================== |
780 | === uint32_t ================================================= |
781 | DMPMC try spin only 419 ns 252 ns 181 ns |
782 | DMPMC timed spin only 252 ns 212 ns 179 ns |
783 | DMPMC wait spin only 153 ns 79 ns 62 ns |
784 | DMPMC try may block 302 ns 236 ns 182 ns |
785 | DMPMC timed may block 266 ns 213 ns 170 ns |
786 | DMPMC wait may block 262 ns 120 ns 64 ns |
787 | .............................................................. |
788 | folly::MPMC read 269 ns 245 ns 199 ns |
789 | folly::MPMC tryReadUntil 257 ns 245 ns 235 ns |
790 | folly::MPMC blockingRead 53 ns 48 ns 45 ns |
791 | ============================================================== |
792 | |
793 | $ numactl -N 1 dynamic_bounded_queue_test --bench --capacity=1000 |
794 | ============================================================== |
795 | 10 reps of 1000000 handoffs |
796 | .............................................................. |
797 | Using capacity 1000 for all queues |
798 | ============================================================== |
799 | Test name Max time Avg time Min time |
800 | ====================== 1 prod 1 cons ====================== |
801 | === uint32_t ================================================= |
802 | DSPSC try spin only 7 ns 7 ns 7 ns |
803 | DSPSC timed spin only 9 ns 9 ns 9 ns |
804 | DSPSC wait spin only 7 ns 7 ns 7 ns |
805 | DSPSC try may block 34 ns 33 ns 31 ns |
806 | DSPSC timed may block 34 ns 34 ns 33 ns |
807 | DSPSC wait may block 30 ns 30 ns 29 ns |
808 | .............................................................. |
809 | DMPSC try spin only 60 ns 57 ns 55 ns |
810 | DMPSC timed spin only 55 ns 52 ns 51 ns |
811 | DMPSC wait spin only 57 ns 54 ns 52 ns |
812 | DMPSC try may block 66 ns 62 ns 39 ns |
813 | DMPSC timed may block 67 ns 64 ns 62 ns |
814 | DMPSC wait may block 67 ns 65 ns 64 ns |
815 | .............................................................. |
816 | DSPMC try spin only 27 ns 25 ns 24 ns |
817 | DSPMC timed spin only 25 ns 25 ns 24 ns |
818 | DSPMC wait spin only 23 ns 23 ns 22 ns |
819 | DSPMC try may block 31 ns 26 ns 24 ns |
820 | DSPMC timed may block 33 ns 30 ns 30 ns |
821 | DSPMC wait may block 37 ns 29 ns 28 ns |
822 | .............................................................. |
823 | DMPMC try spin only 55 ns 53 ns 51 ns |
824 | DMPMC timed spin only 36 ns 31 ns 26 ns |
825 | DMPMC wait spin only 54 ns 53 ns 51 ns |
826 | DMPMC try may block 68 ns 64 ns 51 ns |
827 | DMPMC timed may block 66 ns 63 ns 60 ns |
828 | DMPMC wait may block 68 ns 63 ns 60 ns |
829 | .............................................................. |
830 | folly::PCQ read 15 ns 13 ns 11 ns |
831 | .............................................................. |
832 | folly::MPMC read 60 ns 56 ns 51 ns |
833 | folly::MPMC tryReadUntil 134 ns 112 ns 102 ns |
834 | folly::MPMC blockingRead 57 ns 51 ns 48 ns |
835 | ============================================================== |
836 | ====================== 1 prod 8 cons ====================== |
837 | === uint32_t ================================================= |
838 | DSPMC try spin only 169 ns 162 ns 151 ns |
839 | DSPMC timed spin only 178 ns 166 ns 149 ns |
840 | DSPMC wait spin only 59 ns 55 ns 54 ns |
841 | DSPMC try may block 173 ns 163 ns 153 ns |
842 | DSPMC timed may block 171 ns 166 ns 156 ns |
843 | DSPMC wait may block 71 ns 57 ns 51 ns |
844 | .............................................................. |
845 | DMPMC try spin only 172 ns 164 ns 158 ns |
846 | DMPMC timed spin only 173 ns 164 ns 156 ns |
847 | DMPMC wait spin only 77 ns 62 ns 53 ns |
848 | DMPMC try may block 181 ns 163 ns 152 ns |
849 | DMPMC timed may block 174 ns 165 ns 151 ns |
850 | DMPMC wait may block 91 ns 72 ns 52 ns |
851 | .............................................................. |
852 | folly::MPMC read 178 ns 167 ns 161 ns |
853 | folly::MPMC tryReadUntil 991 ns 676 ns 423 ns |
854 | folly::MPMC blockingRead 154 ns 129 ns 96 ns |
855 | ============================================================== |
856 | ====================== 1 prod 32 cons ====================== |
857 | === uint32_t ================================================= |
858 | DSPMC try spin only 462 ns 288 ns 201 ns |
859 | DSPMC timed spin only 514 ns 283 ns 201 ns |
860 | DSPMC wait spin only 100 ns 60 ns 45 ns |
861 | DSPMC try may block 531 ns 318 ns 203 ns |
862 | DSPMC timed may block 1379 ns 891 ns 460 ns |
863 | DSPMC wait may block 148 ns 111 ns 82 ns |
864 | .............................................................. |
865 | DMPMC try spin only 404 ns 312 ns 205 ns |
866 | DMPMC timed spin only 337 ns 253 ns 219 ns |
867 | DMPMC wait spin only 130 ns 97 ns 72 ns |
868 | DMPMC try may block 532 ns 265 ns 201 ns |
869 | DMPMC timed may block 846 ns 606 ns 412 ns |
870 | DMPMC wait may block 158 ns 112 ns 87 ns |
871 | .............................................................. |
872 | folly::MPMC read 880 ns 419 ns 284 ns |
873 | folly::MPMC tryReadUntil 23432 ns 23184 ns 23007 ns |
874 | folly::MPMC blockingRead 1353 ns 1308 ns 1279 ns |
875 | ============================================================== |
876 | ====================== 8 prod 1 cons ====================== |
877 | === uint32_t ================================================= |
878 | DMPSC try spin only 67 ns 63 ns 51 ns |
879 | DMPSC timed spin only 69 ns 65 ns 63 ns |
880 | DMPSC wait spin only 67 ns 65 ns 61 ns |
881 | DMPSC try may block 73 ns 69 ns 63 ns |
882 | DMPSC timed may block 72 ns 69 ns 64 ns |
883 | DMPSC wait may block 71 ns 70 ns 68 ns |
884 | .............................................................. |
885 | DMPMC try spin only 70 ns 64 ns 59 ns |
886 | DMPMC timed spin only 76 ns 66 ns 53 ns |
887 | DMPMC wait spin only 68 ns 66 ns 64 ns |
888 | DMPMC try may block 71 ns 68 ns 66 ns |
889 | DMPMC timed may block 72 ns 70 ns 67 ns |
890 | DMPMC wait may block 73 ns 70 ns 67 ns |
891 | .............................................................. |
892 | folly::MPMC read 193 ns 167 ns 153 ns |
893 | folly::MPMC tryReadUntil 497 ns 415 ns 348 ns |
894 | folly::MPMC blockingRead 163 ns 134 ns 115 ns |
895 | ============================================================== |
896 | ====================== 8 prod 8 cons ====================== |
897 | === uint32_t ================================================= |
898 | DMPMC try spin only 216 ns 203 ns 196 ns |
899 | DMPMC timed spin only 199 ns 186 ns 178 ns |
900 | DMPMC wait spin only 63 ns 60 ns 58 ns |
901 | DMPMC try may block 212 ns 198 ns 183 ns |
902 | DMPMC timed may block 180 ns 170 ns 162 ns |
903 | DMPMC wait may block 72 ns 68 ns 65 ns |
904 | .............................................................. |
905 | folly::MPMC read 225 ns 201 ns 188 ns |
906 | folly::MPMC tryReadUntil 255 ns 248 ns 232 ns |
907 | folly::MPMC blockingRead 52 ns 48 ns 42 ns |
908 | ============================================================== |
909 | ====================== 8 prod 32 cons ====================== |
910 | === uint32_t ================================================= |
911 | DMPMC try spin only 360 ns 302 ns 195 ns |
912 | DMPMC timed spin only 350 ns 272 ns 218 ns |
913 | DMPMC wait spin only 92 ns 72 ns 61 ns |
914 | DMPMC try may block 352 ns 263 ns 223 ns |
915 | DMPMC timed may block 218 ns 213 ns 209 ns |
916 | DMPMC wait may block 98 ns 77 ns 70 ns |
917 | .............................................................. |
918 | folly::MPMC read 611 ns 461 ns 339 ns |
919 | folly::MPMC tryReadUntil 270 ns 260 ns 253 ns |
920 | folly::MPMC blockingRead 89 ns 84 ns 80 ns |
921 | ============================================================== |
922 | ====================== 32 prod 1 cons ====================== |
923 | === uint32_t ================================================= |
924 | DMPSC try spin only 389 ns 248 ns 149 ns |
925 | DMPSC timed spin only 356 ns 235 ns 120 ns |
926 | DMPSC wait spin only 343 ns 242 ns 125 ns |
927 | DMPSC try may block 412 ns 294 ns 168 ns |
928 | DMPSC timed may block 332 ns 271 ns 189 ns |
929 | DMPSC wait may block 280 ns 252 ns 199 ns |
930 | .............................................................. |
931 | DMPMC try spin only 393 ns 269 ns 105 ns |
932 | DMPMC timed spin only 328 ns 240 ns 112 ns |
933 | DMPMC wait spin only 502 ns 266 ns 107 ns |
934 | DMPMC try may block 514 ns 346 ns 192 ns |
935 | DMPMC timed may block 339 ns 318 ns 278 ns |
936 | DMPMC wait may block 319 ns 307 ns 292 ns |
937 | .............................................................. |
938 | folly::MPMC read 948 ns 517 ns 232 ns |
939 | folly::MPMC tryReadUntil 9649 ns 7567 ns 4140 ns |
940 | folly::MPMC blockingRead 1365 ns 1316 ns 1131 ns |
941 | ============================================================== |
942 | ====================== 32 prod 8 cons ====================== |
943 | === uint32_t ================================================= |
944 | DMPMC try spin only 436 ns 257 ns 115 ns |
945 | DMPMC timed spin only 402 ns 272 ns 121 ns |
946 | DMPMC wait spin only 136 ns 78 ns 55 ns |
947 | DMPMC try may block 454 ns 227 ns 78 ns |
948 | DMPMC timed may block 155 ns 137 ns 116 ns |
949 | DMPMC wait may block 62 ns 59 ns 57 ns |
950 | .............................................................. |
951 | folly::MPMC read 677 ns 497 ns 336 ns |
952 | folly::MPMC tryReadUntil 268 ns 262 ns 258 ns |
953 | folly::MPMC blockingRead 87 ns 85 ns 82 ns |
954 | ============================================================== |
955 | ====================== 32 prod 32 cons ====================== |
956 | === uint32_t ================================================= |
957 | DMPMC try spin only 786 ns 381 ns 142 ns |
958 | DMPMC timed spin only 795 ns 346 ns 126 ns |
959 | DMPMC wait spin only 334 ns 107 ns 55 ns |
960 | DMPMC try may block 535 ns 317 ns 144 ns |
961 | DMPMC timed may block 197 ns 192 ns 183 ns |
962 | DMPMC wait may block 189 ns 75 ns 60 ns |
963 | .............................................................. |
964 | folly::MPMC read 1110 ns 919 ns 732 ns |
965 | folly::MPMC tryReadUntil 214 ns 210 ns 206 ns |
966 | folly::MPMC blockingRead 53 ns 52 ns 51 ns |
967 | ============================================================== |
968 | |
969 | $ lscpu |
970 | Architecture: x86_64 |
971 | CPU op-mode(s): 32-bit, 64-bit |
972 | Byte Order: Little Endian |
973 | CPU(s): 32 |
974 | On-line CPU(s) list: 0-31 |
975 | Thread(s) per core: 2 |
976 | Core(s) per socket: 8 |
977 | Socket(s): 2 |
978 | NUMA node(s): 2 |
979 | Vendor ID: GenuineIntel |
980 | CPU family: 6 |
981 | Model: 45 |
982 | Model name: Intel(R) Xeon(R) CPU E5-2660 0 @ 2.20GHz |
983 | Stepping: 6 |
984 | CPU MHz: 2200.000 |
985 | CPU max MHz: 2200.0000 |
986 | CPU min MHz: 1200.0000 |
987 | BogoMIPS: 4399.92 |
988 | Virtualization: VT-x |
989 | L1d cache: 32K |
990 | L1i cache: 32K |
991 | L2 cache: 256K |
992 | L3 cache: 20480K |
993 | NUMA node0 CPU(s): 0-7,16-23 |
994 | NUMA node1 CPU(s): 8-15,24-31 |
995 | |
996 | Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr |
997 | pge mca cmov pat pse36 clflush dts acpi mmx fxsr |
998 | sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp |
999 | lm constant_tsc arch_perfmon pebs bts rep_good |
1000 | nopl xtopology nonstop_tsc aperfmperf eagerfpu |
1001 | pni pclmulqdq dtes64 monitor ds_cpl vmx smx est |
1002 | tm2 ssse3 cx16 xtpr pdcm pcid dca sse4_1 sse4_2 |
1003 | x2apic popcnt tsc_deadline_timer aes xsave avx |
1004 | lahf_lm epb tpr_shadow vnmi flexpriority ept vpid |
1005 | xsaveopt dtherm arat pln pts |
1006 | */ |
1007 | |