1/*
2 * Copyright 2017-present Facebook, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <folly/concurrency/DynamicBoundedQueue.h>
18#include <folly/MPMCQueue.h>
19#include <folly/ProducerConsumerQueue.h>
20#include <folly/portability/GTest.h>
21
22#include <glog/logging.h>
23
24#include <atomic>
25#include <thread>
26
27DEFINE_bool(bench, false, "run benchmark");
28DEFINE_int32(reps, 10, "number of reps");
29DEFINE_int32(ops, 1000000, "number of operations per rep");
30DEFINE_int64(capacity, 1000000, "capacity");
31
32template <typename T, bool MayBlock, typename WeightFn>
33using DSPSC = folly::DSPSCQueue<T, MayBlock, 8, 7, WeightFn>;
34
35template <typename T, bool MayBlock, typename WeightFn>
36using DMPSC = folly::DMPSCQueue<T, MayBlock, 8, 7, WeightFn>;
37
38template <typename T, bool MayBlock, typename WeightFn>
39using DSPMC = folly::DSPMCQueue<T, MayBlock, 8, 7, WeightFn>;
40
41template <typename T, bool MayBlock, typename WeightFn>
42using DMPMC = folly::DMPMCQueue<T, MayBlock, 8, 7, WeightFn>;
43
44template <template <typename, bool, typename> class Q, bool MayBlock>
45void basic_test() {
46 auto dur = std::chrono::microseconds(100);
47 auto deadline = std::chrono::steady_clock::now() + dur;
48
49 struct CustomWeightFn {
50 uint64_t operator()(int val) {
51 return val * 100;
52 }
53 };
54
55 Q<int, MayBlock, CustomWeightFn> q(10000);
56
57 ASSERT_TRUE(q.empty());
58 ASSERT_EQ(q.size(), 0);
59 int v;
60 ASSERT_FALSE(q.try_dequeue(v));
61
62 q.enqueue(1);
63 ASSERT_TRUE(q.try_enqueue(2));
64 ASSERT_TRUE(q.try_enqueue_until(3, deadline));
65 ASSERT_TRUE(q.try_enqueue_for(4, dur));
66
67 ASSERT_EQ(q.size(), 4);
68 ASSERT_EQ(q.weight(), 1000);
69 ASSERT_FALSE(q.empty());
70
71 q.dequeue(v);
72 ASSERT_EQ(v, 1);
73 ASSERT_TRUE(q.try_dequeue(v));
74 ASSERT_EQ(v, 2);
75 ASSERT_TRUE(q.try_dequeue_until(v, deadline));
76 ASSERT_EQ(v, 3);
77 ASSERT_TRUE(q.try_dequeue_for(v, dur));
78 ASSERT_EQ(v, 4);
79
80 ASSERT_TRUE(q.empty());
81 ASSERT_EQ(q.size(), 0);
82 ASSERT_EQ(q.weight(), 0);
83}
84
85TEST(DynamicBoundedQueue, basic) {
86 basic_test<DSPSC, false>();
87 basic_test<DMPSC, false>();
88 basic_test<DSPMC, false>();
89 basic_test<DMPMC, false>();
90 basic_test<DSPSC, true>();
91 basic_test<DMPSC, true>();
92 basic_test<DSPMC, true>();
93 basic_test<DMPMC, true>();
94}
95
96template <template <typename, bool, typename> class Q, bool MayBlock>
97void move_test() {
98 struct Foo {
99 int v_;
100 explicit Foo(int v) noexcept : v_(v) {}
101 Foo(const Foo&) = delete;
102 Foo& operator=(const Foo&) = delete;
103 Foo(Foo&& other) noexcept : v_(other.v_) {}
104 Foo& operator=(Foo&& other) noexcept {
105 v_ = other.v_;
106 return *this;
107 }
108 };
109
110 struct CustomWeightFn {
111 uint64_t operator()(Foo&&) {
112 return 10;
113 }
114 };
115
116 auto dur = std::chrono::microseconds(100);
117 auto deadline = std::chrono::steady_clock::now() + dur;
118
119 Q<Foo, MayBlock, CustomWeightFn> q(100);
120 Foo v(1);
121 q.enqueue(std::move(v));
122 ASSERT_TRUE(q.try_enqueue(std::move(v)));
123 ASSERT_TRUE(q.try_enqueue_until(std::move(v), deadline));
124 ASSERT_TRUE(q.try_enqueue_for(std::move(v), dur));
125
126 ASSERT_EQ(q.size(), 4);
127 ASSERT_EQ(q.weight(), 40);
128}
129
130TEST(DynamicBoundedQueue, move) {
131 move_test<DSPSC, false>();
132 move_test<DMPSC, false>();
133 move_test<DSPMC, false>();
134 move_test<DMPMC, false>();
135 move_test<DSPSC, true>();
136 move_test<DMPSC, true>();
137 move_test<DSPMC, true>();
138 move_test<DMPMC, true>();
139}
140
141template <template <typename, bool, typename> class Q, bool MayBlock>
142void capacity_test() {
143 struct CustomWeightFn {
144 uint64_t operator()(int val) {
145 return val;
146 }
147 };
148
149 Q<int, MayBlock, CustomWeightFn> q(1000);
150 ASSERT_EQ(q.weight(), 0);
151 int v;
152 q.enqueue(100);
153 ASSERT_EQ(q.weight(), 100);
154 q.enqueue(300);
155 ASSERT_EQ(q.weight(), 400);
156 ASSERT_FALSE(q.try_enqueue(1200));
157 q.reset_capacity(2000); // reset capacityy to 2000
158 ASSERT_TRUE(q.try_enqueue(1200));
159 ASSERT_EQ(q.weight(), 1600);
160 ASSERT_EQ(q.size(), 3);
161 q.reset_capacity(1000); // reset capacity back to 1000
162 ASSERT_FALSE(q.try_enqueue(100));
163 q.dequeue(v);
164 ASSERT_EQ(v, 100);
165 ASSERT_EQ(q.weight(), 1500);
166 q.dequeue(v);
167 ASSERT_EQ(v, 300);
168 ASSERT_EQ(q.weight(), 1200);
169}
170
171TEST(DynamicBoundedQueue, capacity) {
172 capacity_test<DSPSC, false>();
173 capacity_test<DMPSC, false>();
174 capacity_test<DSPMC, false>();
175 capacity_test<DMPMC, false>();
176 capacity_test<DSPSC, true>();
177 capacity_test<DMPSC, true>();
178 capacity_test<DSPMC, true>();
179 capacity_test<DMPMC, true>();
180}
181
182template <typename ProdFunc, typename ConsFunc, typename EndFunc>
183inline uint64_t run_once(
184 int nprod,
185 int ncons,
186 const ProdFunc& prodFn,
187 const ConsFunc& consFn,
188 const EndFunc& endFn) {
189 std::atomic<bool> start{false};
190 std::atomic<int> ready{0};
191
192 /* producers */
193 std::vector<std::thread> prodThr(nprod);
194 for (int tid = 0; tid < nprod; ++tid) {
195 prodThr[tid] = std::thread([&, tid] {
196 ++ready;
197 while (!start.load()) {
198 /* spin */;
199 }
200 prodFn(tid);
201 });
202 }
203
204 /* consumers */
205 std::vector<std::thread> consThr(ncons);
206 for (int tid = 0; tid < ncons; ++tid) {
207 consThr[tid] = std::thread([&, tid] {
208 ++ready;
209 while (!start.load()) {
210 /* spin */;
211 }
212 consFn(tid);
213 });
214 }
215
216 /* wait for all producers and consumers to be ready */
217 while (ready.load() < (nprod + ncons)) {
218 /* spin */;
219 }
220
221 /* begin time measurement */
222 auto tbegin = std::chrono::steady_clock::now();
223 start.store(true);
224
225 /* wait for completion */
226 for (int i = 0; i < nprod; ++i) {
227 prodThr[i].join();
228 }
229 for (int i = 0; i < ncons; ++i) {
230 consThr[i].join();
231 }
232
233 /* end time measurement */
234 auto tend = std::chrono::steady_clock::now();
235 endFn();
236 return std::chrono::duration_cast<std::chrono::nanoseconds>(tend - tbegin)
237 .count();
238}
239
240template <bool SingleProducer, bool SingleConsumer, bool MayBlock>
241void enq_deq_test(const int nprod, const int ncons) {
242 if (SingleProducer) {
243 ASSERT_EQ(nprod, 1);
244 }
245 if (SingleConsumer) {
246 ASSERT_EQ(ncons, 1);
247 }
248
249 int ops = 1000;
250 folly::DynamicBoundedQueue<int, SingleProducer, SingleConsumer, MayBlock, 2>
251 q(10);
252 std::atomic<uint64_t> sum(0);
253
254 auto prod = [&](int tid) {
255 for (int i = tid; i < ops; i += nprod) {
256 if ((i % 3) == 0) {
257 while (!q.try_enqueue(i)) {
258 /* keep trying */;
259 }
260 } else if ((i % 3) == 1) {
261 auto dur = std::chrono::microseconds(100);
262 while (!q.try_enqueue_for(i, dur)) {
263 /* keep trying */;
264 }
265 } else {
266 q.enqueue(i);
267 }
268 }
269 };
270
271 auto cons = [&](int tid) {
272 uint64_t mysum = 0;
273 for (int i = tid; i < ops; i += ncons) {
274 int v;
275 if ((i % 3) == 0) {
276 while (!q.try_dequeue(v)) {
277 /* keep trying */;
278 }
279 } else if ((i % 3) == 1) {
280 auto dur = std::chrono::microseconds(100);
281 while (!q.try_dequeue_for(v, dur)) {
282 /* keep trying */;
283 }
284 } else {
285 q.dequeue(v);
286 }
287 if (nprod == 1 && ncons == 1) {
288 ASSERT_EQ(v, i);
289 }
290 mysum += v;
291 }
292 sum.fetch_add(mysum);
293 };
294
295 auto endfn = [&] {
296 uint64_t expected = ops;
297 expected *= ops - 1;
298 expected /= 2;
299 ASSERT_EQ(sum.load(), expected);
300 };
301 run_once(nprod, ncons, prod, cons, endfn);
302}
303
304TEST(DynamicBoundedQueue, enq_deq) {
305 /* SPSC */
306 enq_deq_test<true, true, false>(1, 1);
307 enq_deq_test<true, true, true>(1, 1);
308 /* MPSC */
309 enq_deq_test<false, true, false>(1, 1);
310 enq_deq_test<false, true, true>(1, 1);
311 enq_deq_test<false, true, false>(2, 1);
312 enq_deq_test<false, true, true>(2, 1);
313 enq_deq_test<false, true, false>(10, 1);
314 enq_deq_test<false, true, true>(10, 1);
315 /* SPMC */
316 enq_deq_test<true, false, false>(1, 1);
317 enq_deq_test<true, false, true>(1, 1);
318 enq_deq_test<true, false, false>(1, 2);
319 enq_deq_test<true, false, true>(1, 2);
320 enq_deq_test<true, false, false>(1, 10);
321 enq_deq_test<true, false, true>(1, 10);
322 /* MPMC */
323 enq_deq_test<false, false, false>(1, 1);
324 enq_deq_test<false, false, true>(1, 1);
325 enq_deq_test<false, false, false>(2, 1);
326 enq_deq_test<false, false, true>(2, 1);
327 enq_deq_test<false, false, false>(10, 1);
328 enq_deq_test<false, false, true>(10, 1);
329 enq_deq_test<false, false, false>(1, 2);
330 enq_deq_test<false, false, true>(1, 2);
331 enq_deq_test<false, false, false>(1, 10);
332 enq_deq_test<false, false, true>(1, 10);
333 enq_deq_test<false, false, false>(2, 2);
334 enq_deq_test<false, false, true>(2, 2);
335 enq_deq_test<false, false, false>(10, 10);
336 enq_deq_test<false, false, true>(10, 10);
337}
338
339template <typename RepFunc>
340uint64_t runBench(const std::string& name, int ops, const RepFunc& repFn) {
341 int reps = FLAGS_reps;
342 uint64_t min = UINTMAX_MAX;
343 uint64_t max = 0;
344 uint64_t sum = 0;
345
346 repFn(); // sometimes first run is outlier
347 for (int r = 0; r < reps; ++r) {
348 uint64_t dur = repFn();
349 sum += dur;
350 min = std::min(min, dur);
351 max = std::max(max, dur);
352 // if each rep takes too long run at least 2 reps
353 const uint64_t minute = 60000000000ULL;
354 if (sum > minute && r >= 1) {
355 reps = r + 1;
356 break;
357 }
358 }
359
360 const std::string unit = " ns";
361 uint64_t avg = sum / reps;
362 uint64_t res = min;
363 std::cout << name;
364 std::cout << " " << std::setw(4) << max / ops << unit;
365 std::cout << " " << std::setw(4) << avg / ops << unit;
366 std::cout << " " << std::setw(4) << res / ops << unit;
367 std::cout << std::endl;
368 return res;
369}
370
371template <template <typename, bool, typename> class Q, typename T, int Op>
372uint64_t bench(const int nprod, const int ncons, const std::string& name) {
373 int ops = FLAGS_ops;
374 auto repFn = [&] {
375 Q<T, Op == 3 || Op == 4 || Op == 5, folly::DefaultWeightFn<T>> q(
376 FLAGS_capacity);
377 std::atomic<uint64_t> sum(0);
378 auto prod = [&](int tid) {
379 for (int i = tid; i < ops; i += nprod) {
380 if (Op == 0 || Op == 3) {
381 while (!q.try_enqueue(i)) {
382 /* keep trying */;
383 }
384 } else if (Op == 1 || Op == 4) {
385 while (!q.try_enqueue_for(i, std::chrono::microseconds(1000))) {
386 /* keep trying */;
387 }
388 } else {
389 q.enqueue(i);
390 }
391 }
392 };
393 auto cons = [&](int tid) {
394 uint64_t mysum = 0;
395 T v = -1;
396 for (int i = tid; i < ops; i += ncons) {
397 if (Op == 0 || Op == 3) {
398 while (!q.try_dequeue(v)) {
399 /* keep trying */;
400 }
401 } else if (Op == 1 || Op == 4) {
402 while (!q.try_dequeue_for(v, std::chrono::microseconds(1000))) {
403 /* keep trying */;
404 }
405 } else {
406 q.dequeue(v);
407 }
408 if (nprod == 1 && ncons == 1) {
409 DCHECK_EQ(int(v), i);
410 }
411 mysum += v;
412 }
413 sum.fetch_add(mysum);
414 };
415 auto endfn = [&] {
416 uint64_t expected = ops;
417 expected *= ops - 1;
418 expected /= 2;
419 ASSERT_EQ(sum.load(), expected);
420 };
421 return run_once(nprod, ncons, prod, cons, endfn);
422 };
423 return runBench(name, ops, repFn);
424}
425
426/* For performance comparison */
427template <typename T>
428class MPMC {
429 folly::MPMCQueue<T> q_;
430
431 public:
432 explicit MPMC(uint64_t capacity) : q_(capacity) {}
433
434 void enqueue(const T& v) {
435 q_.blockingWrite(v);
436 }
437
438 void enqueue(T&& v) {
439 q_.blockingWrite(std::move(v));
440 }
441
442 bool try_enqueue(const T& v) {
443 return q_.write(v);
444 }
445
446 bool try_enqueue(const T&& v) {
447 return q_.write(std::move(v));
448 }
449
450 template <typename Rep, typename Period>
451 bool try_enqueue_for(
452 const T& v,
453 const std::chrono::duration<Rep, Period>& duration) {
454 return q_.tryWriteUntil(std::chrono::steady_clock::now() + duration, v);
455 }
456
457 void dequeue(T& item) {
458 q_.blockingRead(item);
459 }
460
461 bool try_dequeue(T& item) {
462 return q_.read(item);
463 }
464
465 template <typename Rep, typename Period>
466 bool try_dequeue_for(
467 T& item,
468 const std::chrono::duration<Rep, Period>& duration) {
469 return q_.tryReadUntil(std::chrono::steady_clock::now() + duration, item);
470 }
471};
472
473template <typename T, bool, typename>
474using FMPMC = MPMC<T>;
475
476template <typename T>
477class PCQ {
478 folly::ProducerConsumerQueue<T> q_;
479
480 public:
481 explicit PCQ(uint64_t capacity) : q_(capacity) {}
482
483 void enqueue(const T&) {
484 ASSERT_TRUE(false);
485 }
486
487 bool try_enqueue(const T& v) {
488 return q_.write(v);
489 }
490
491 bool try_enqueue(T&& v) {
492 return q_.write(std::move(v));
493 }
494
495 template <typename Rep, typename Period>
496 bool try_enqueue_for(const T&, const std::chrono::duration<Rep, Period>&) {
497 return false;
498 }
499
500 void dequeue(T&) {
501 ASSERT_TRUE(false);
502 }
503
504 bool try_dequeue(T& item) {
505 return q_.read(item);
506 }
507
508 template <typename Rep, typename Period>
509 bool try_dequeue_for(T&, const std::chrono::duration<Rep, Period>&) {
510 return false;
511 }
512};
513
514template <typename T, bool, typename>
515using FPCQ = PCQ<T>;
516
517template <size_t M>
518struct IntArray {
519 int a[M];
520 IntArray() {}
521 /* implicit */ IntArray(int v) {
522 for (size_t i = 0; i < M; ++i) {
523 a[i] = v;
524 }
525 }
526 operator int() {
527 return a[0];
528 }
529};
530
531void dottedLine() {
532 std::cout << ".............................................................."
533 << std::endl;
534}
535
536template <typename T>
537void type_benches(const int np, const int nc, const std::string& name) {
538 std::cout << name
539 << "===========================================" << std::endl;
540 if (np == 1 && nc == 1) {
541 bench<DSPSC, T, 0>(1, 1, "DSPSC try spin only ");
542 bench<DSPSC, T, 1>(1, 1, "DSPSC timed spin only ");
543 bench<DSPSC, T, 2>(1, 1, "DSPSC wait spin only ");
544 bench<DSPSC, T, 3>(1, 1, "DSPSC try may block ");
545 bench<DSPSC, T, 4>(1, 1, "DSPSC timed may block ");
546 bench<DSPSC, T, 5>(1, 1, "DSPSC wait may block ");
547 dottedLine();
548 }
549 if (nc == 1) {
550 bench<DMPSC, T, 0>(np, 1, "DMPSC try spin only ");
551 bench<DMPSC, T, 1>(np, 1, "DMPSC timed spin only ");
552 bench<DMPSC, T, 2>(np, 1, "DMPSC wait spin only ");
553 bench<DMPSC, T, 3>(np, 1, "DMPSC try may block ");
554 bench<DMPSC, T, 4>(np, 1, "DMPSC timed may block ");
555 bench<DMPSC, T, 5>(np, 1, "DMPSC wait may block ");
556 dottedLine();
557 }
558 if (np == 1) {
559 bench<DSPMC, T, 0>(1, nc, "DSPMC try spin only ");
560 bench<DSPMC, T, 1>(1, nc, "DSPMC timed spin only ");
561 bench<DSPMC, T, 2>(1, nc, "DSPMC wait spin only ");
562 bench<DSPMC, T, 3>(1, nc, "DSPMC try may block ");
563 bench<DSPMC, T, 4>(1, nc, "DSPMC timed may block ");
564 bench<DSPMC, T, 5>(1, nc, "DSPMC wait may block ");
565 dottedLine();
566 }
567 bench<DMPMC, T, 0>(np, nc, "DMPMC try spin only ");
568 bench<DMPMC, T, 1>(np, nc, "DMPMC timed spin only ");
569 bench<DMPMC, T, 2>(np, nc, "DMPMC wait spin only ");
570 bench<DMPMC, T, 3>(np, nc, "DMPMC try may block ");
571 bench<DMPMC, T, 4>(np, nc, "DMPMC timed may block ");
572 bench<DMPMC, T, 5>(np, nc, "DMPMC wait may block ");
573 dottedLine();
574 if (np == 1 && nc == 1) {
575 bench<FPCQ, T, 0>(1, 1, "folly::PCQ read ");
576 dottedLine();
577 }
578 bench<FMPMC, T, 3>(np, nc, "folly::MPMC read ");
579 bench<FMPMC, T, 4>(np, nc, "folly::MPMC tryReadUntil ");
580 bench<FMPMC, T, 5>(np, nc, "folly::MPMC blockingRead ");
581 std::cout << "=============================================================="
582 << std::endl;
583}
584
585void benches(const int np, const int nc) {
586 std::cout << "====================== " << std::setw(2) << np << " prod"
587 << " " << std::setw(2) << nc << " cons"
588 << " ======================" << std::endl;
589 type_benches<uint32_t>(np, nc, "=== uint32_t ======");
590 // Benchmarks for other element sizes can be added as follows:
591 // type_benches<IntArray<4>>(np, nc, "=== IntArray<4> ===");
592}
593
594TEST(DynamicBoundedQueue, bench) {
595 if (!FLAGS_bench) {
596 return;
597 }
598 std::cout << "=============================================================="
599 << std::endl;
600 std::cout << std::setw(2) << FLAGS_reps << " reps of " << std::setw(8)
601 << FLAGS_ops << " handoffs\n";
602 dottedLine();
603 std::cout << "Using capacity " << FLAGS_capacity << " for all queues\n";
604 std::cout << "=============================================================="
605 << std::endl;
606 std::cout << "Test name Max time Avg time Min time"
607 << std::endl;
608
609 for (int np : {1, 8, 32}) {
610 for (int nc : {1, 8, 32}) {
611 benches(np, nc);
612 }
613 }
614}
615
616/*
617$ numactl -N 1 dynamic_bounded_queue_test --bench --capacity=1000000
618==============================================================
61910 reps of 1000000 handoffs
620..............................................................
621Using capacity 1000000 for all queues
622==============================================================
623Test name Max time Avg time Min time
624====================== 1 prod 1 cons ======================
625=== uint32_t =================================================
626DSPSC try spin only 7 ns 7 ns 7 ns
627DSPSC timed spin only 9 ns 9 ns 9 ns
628DSPSC wait spin only 7 ns 7 ns 7 ns
629DSPSC try may block 39 ns 36 ns 33 ns
630DSPSC timed may block 39 ns 38 ns 37 ns
631DSPSC wait may block 37 ns 34 ns 33 ns
632..............................................................
633DMPSC try spin only 54 ns 53 ns 52 ns
634DMPSC timed spin only 53 ns 52 ns 51 ns
635DMPSC wait spin only 53 ns 52 ns 51 ns
636DMPSC try may block 67 ns 65 ns 64 ns
637DMPSC timed may block 64 ns 62 ns 60 ns
638DMPSC wait may block 64 ns 62 ns 60 ns
639..............................................................
640DSPMC try spin only 25 ns 24 ns 23 ns
641DSPMC timed spin only 24 ns 23 ns 23 ns
642DSPMC wait spin only 22 ns 21 ns 21 ns
643DSPMC try may block 30 ns 26 ns 21 ns
644DSPMC timed may block 25 ns 24 ns 24 ns
645DSPMC wait may block 22 ns 22 ns 21 ns
646..............................................................
647DMPMC try spin only 48 ns 45 ns 39 ns
648DMPMC timed spin only 31 ns 30 ns 24 ns
649DMPMC wait spin only 49 ns 47 ns 43 ns
650DMPMC try may block 63 ns 62 ns 61 ns
651DMPMC timed may block 64 ns 60 ns 46 ns
652DMPMC wait may block 61 ns 60 ns 58 ns
653..............................................................
654folly::PCQ read 8 ns 7 ns 7 ns
655..............................................................
656folly::MPMC read 53 ns 51 ns 49 ns
657folly::MPMC tryReadUntil 112 ns 106 ns 103 ns
658folly::MPMC blockingRead 50 ns 47 ns 46 ns
659==============================================================
660====================== 1 prod 8 cons ======================
661=== uint32_t =================================================
662DSPMC try spin only 166 ns 159 ns 153 ns
663DSPMC timed spin only 169 ns 163 ns 156 ns
664DSPMC wait spin only 60 ns 57 ns 54 ns
665DSPMC try may block 170 ns 163 ns 153 ns
666DSPMC timed may block 165 ns 157 ns 150 ns
667DSPMC wait may block 94 ns 78 ns 52 ns
668..............................................................
669DMPMC try spin only 170 ns 161 ns 149 ns
670DMPMC timed spin only 167 ns 158 ns 149 ns
671DMPMC wait spin only 93 ns 80 ns 51 ns
672DMPMC try may block 164 ns 161 ns 154 ns
673DMPMC timed may block 163 ns 156 ns 145 ns
674DMPMC wait may block 117 ns 102 ns 87 ns
675..............................................................
676folly::MPMC read 176 ns 168 ns 159 ns
677folly::MPMC tryReadUntil 1846 ns 900 ns 521 ns
678folly::MPMC blockingRead 219 ns 193 ns 178 ns
679==============================================================
680====================== 1 prod 32 cons ======================
681=== uint32_t =================================================
682DSPMC try spin only 224 ns 213 ns 204 ns
683DSPMC timed spin only 215 ns 209 ns 199 ns
684DSPMC wait spin only 334 ns 114 ns 52 ns
685DSPMC try may block 240 ns 215 ns 202 ns
686DSPMC timed may block 245 ns 221 ns 200 ns
687DSPMC wait may block 215 ns 151 ns 98 ns
688..............................................................
689DMPMC try spin only 348 ns 252 ns 204 ns
690DMPMC timed spin only 379 ns 244 ns 198 ns
691DMPMC wait spin only 173 ns 116 ns 89 ns
692DMPMC try may block 362 ns 231 ns 173 ns
693DMPMC timed may block 282 ns 236 ns 206 ns
694DMPMC wait may block 252 ns 172 ns 134 ns
695..............................................................
696folly::MPMC read 540 ns 290 ns 186 ns
697folly::MPMC tryReadUntil 24946 ns 24280 ns 23113 ns
698folly::MPMC blockingRead 1345 ns 1297 ns 1265 ns
699==============================================================
700====================== 8 prod 1 cons ======================
701=== uint32_t =================================================
702DMPSC try spin only 68 ns 64 ns 60 ns
703DMPSC timed spin only 69 ns 66 ns 61 ns
704DMPSC wait spin only 67 ns 65 ns 62 ns
705DMPSC try may block 77 ns 73 ns 67 ns
706DMPSC timed may block 75 ns 74 ns 68 ns
707DMPSC wait may block 76 ns 73 ns 69 ns
708..............................................................
709DMPMC try spin only 76 ns 66 ns 60 ns
710DMPMC timed spin only 77 ns 68 ns 63 ns
711DMPMC wait spin only 68 ns 65 ns 63 ns
712DMPMC try may block 76 ns 72 ns 64 ns
713DMPMC timed may block 82 ns 74 ns 67 ns
714DMPMC wait may block 77 ns 72 ns 68 ns
715..............................................................
716folly::MPMC read 170 ns 166 ns 161 ns
717folly::MPMC tryReadUntil 184 ns 179 ns 173 ns
718folly::MPMC blockingRead 79 ns 73 ns 53 ns
719==============================================================
720====================== 8 prod 8 cons ======================
721=== uint32_t =================================================
722DMPMC try spin only 181 ns 169 ns 133 ns
723DMPMC timed spin only 179 ns 168 ns 148 ns
724DMPMC wait spin only 77 ns 76 ns 71 ns
725DMPMC try may block 180 ns 179 ns 176 ns
726DMPMC timed may block 174 ns 166 ns 153 ns
727DMPMC wait may block 79 ns 78 ns 75 ns
728..............................................................
729folly::MPMC read 219 ns 206 ns 183 ns
730folly::MPMC tryReadUntil 262 ns 244 ns 213 ns
731folly::MPMC blockingRead 61 ns 58 ns 54 ns
732==============================================================
733====================== 8 prod 32 cons ======================
734=== uint32_t =================================================
735DMPMC try spin only 265 ns 217 ns 203 ns
736DMPMC timed spin only 236 ns 215 ns 202 ns
737DMPMC wait spin only 93 ns 83 ns 77 ns
738DMPMC try may block 325 ns 234 ns 200 ns
739DMPMC timed may block 206 ns 202 ns 193 ns
740DMPMC wait may block 139 ns 93 ns 76 ns
741..............................................................
742folly::MPMC read 259 ns 214 ns 201 ns
743folly::MPMC tryReadUntil 281 ns 274 ns 267 ns
744folly::MPMC blockingRead 62 ns 59 ns 57 ns
745==============================================================
746====================== 32 prod 1 cons ======================
747=== uint32_t =================================================
748DMPSC try spin only 95 ns 57 ns 45 ns
749DMPSC timed spin only 94 ns 52 ns 46 ns
750DMPSC wait spin only 104 ns 54 ns 43 ns
751DMPSC try may block 59 ns 54 ns 51 ns
752DMPSC timed may block 86 ns 58 ns 52 ns
753DMPSC wait may block 76 ns 57 ns 53 ns
754..............................................................
755DMPMC try spin only 68 ns 64 ns 60 ns
756DMPMC timed spin only 137 ns 73 ns 61 ns
757DMPMC wait spin only 86 ns 65 ns 58 ns
758DMPMC try may block 89 ns 71 ns 65 ns
759DMPMC timed may block 82 ns 69 ns 65 ns
760DMPMC wait may block 84 ns 71 ns 66 ns
761..............................................................
762folly::MPMC read 222 ns 203 ns 192 ns
763folly::MPMC tryReadUntil 239 ns 232 ns 191 ns
764folly::MPMC blockingRead 78 ns 68 ns 64 ns
765==============================================================
766====================== 32 prod 8 cons ======================
767=== uint32_t =================================================
768DMPMC try spin only 183 ns 138 ns 107 ns
769DMPMC timed spin only 237 ns 158 ns 98 ns
770DMPMC wait spin only 87 ns 70 ns 58 ns
771DMPMC try may block 169 ns 132 ns 92 ns
772DMPMC timed may block 172 ns 133 ns 79 ns
773DMPMC wait may block 166 ns 89 ns 66 ns
774..............................................................
775folly::MPMC read 221 ns 194 ns 183 ns
776folly::MPMC tryReadUntil 258 ns 244 ns 230 ns
777folly::MPMC blockingRead 60 ns 54 ns 47 ns
778==============================================================
779====================== 32 prod 32 cons ======================
780=== uint32_t =================================================
781DMPMC try spin only 419 ns 252 ns 181 ns
782DMPMC timed spin only 252 ns 212 ns 179 ns
783DMPMC wait spin only 153 ns 79 ns 62 ns
784DMPMC try may block 302 ns 236 ns 182 ns
785DMPMC timed may block 266 ns 213 ns 170 ns
786DMPMC wait may block 262 ns 120 ns 64 ns
787..............................................................
788folly::MPMC read 269 ns 245 ns 199 ns
789folly::MPMC tryReadUntil 257 ns 245 ns 235 ns
790folly::MPMC blockingRead 53 ns 48 ns 45 ns
791==============================================================
792
793$ numactl -N 1 dynamic_bounded_queue_test --bench --capacity=1000
794==============================================================
79510 reps of 1000000 handoffs
796..............................................................
797Using capacity 1000 for all queues
798==============================================================
799Test name Max time Avg time Min time
800====================== 1 prod 1 cons ======================
801=== uint32_t =================================================
802DSPSC try spin only 7 ns 7 ns 7 ns
803DSPSC timed spin only 9 ns 9 ns 9 ns
804DSPSC wait spin only 7 ns 7 ns 7 ns
805DSPSC try may block 34 ns 33 ns 31 ns
806DSPSC timed may block 34 ns 34 ns 33 ns
807DSPSC wait may block 30 ns 30 ns 29 ns
808..............................................................
809DMPSC try spin only 60 ns 57 ns 55 ns
810DMPSC timed spin only 55 ns 52 ns 51 ns
811DMPSC wait spin only 57 ns 54 ns 52 ns
812DMPSC try may block 66 ns 62 ns 39 ns
813DMPSC timed may block 67 ns 64 ns 62 ns
814DMPSC wait may block 67 ns 65 ns 64 ns
815..............................................................
816DSPMC try spin only 27 ns 25 ns 24 ns
817DSPMC timed spin only 25 ns 25 ns 24 ns
818DSPMC wait spin only 23 ns 23 ns 22 ns
819DSPMC try may block 31 ns 26 ns 24 ns
820DSPMC timed may block 33 ns 30 ns 30 ns
821DSPMC wait may block 37 ns 29 ns 28 ns
822..............................................................
823DMPMC try spin only 55 ns 53 ns 51 ns
824DMPMC timed spin only 36 ns 31 ns 26 ns
825DMPMC wait spin only 54 ns 53 ns 51 ns
826DMPMC try may block 68 ns 64 ns 51 ns
827DMPMC timed may block 66 ns 63 ns 60 ns
828DMPMC wait may block 68 ns 63 ns 60 ns
829..............................................................
830folly::PCQ read 15 ns 13 ns 11 ns
831..............................................................
832folly::MPMC read 60 ns 56 ns 51 ns
833folly::MPMC tryReadUntil 134 ns 112 ns 102 ns
834folly::MPMC blockingRead 57 ns 51 ns 48 ns
835==============================================================
836====================== 1 prod 8 cons ======================
837=== uint32_t =================================================
838DSPMC try spin only 169 ns 162 ns 151 ns
839DSPMC timed spin only 178 ns 166 ns 149 ns
840DSPMC wait spin only 59 ns 55 ns 54 ns
841DSPMC try may block 173 ns 163 ns 153 ns
842DSPMC timed may block 171 ns 166 ns 156 ns
843DSPMC wait may block 71 ns 57 ns 51 ns
844..............................................................
845DMPMC try spin only 172 ns 164 ns 158 ns
846DMPMC timed spin only 173 ns 164 ns 156 ns
847DMPMC wait spin only 77 ns 62 ns 53 ns
848DMPMC try may block 181 ns 163 ns 152 ns
849DMPMC timed may block 174 ns 165 ns 151 ns
850DMPMC wait may block 91 ns 72 ns 52 ns
851..............................................................
852folly::MPMC read 178 ns 167 ns 161 ns
853folly::MPMC tryReadUntil 991 ns 676 ns 423 ns
854folly::MPMC blockingRead 154 ns 129 ns 96 ns
855==============================================================
856====================== 1 prod 32 cons ======================
857=== uint32_t =================================================
858DSPMC try spin only 462 ns 288 ns 201 ns
859DSPMC timed spin only 514 ns 283 ns 201 ns
860DSPMC wait spin only 100 ns 60 ns 45 ns
861DSPMC try may block 531 ns 318 ns 203 ns
862DSPMC timed may block 1379 ns 891 ns 460 ns
863DSPMC wait may block 148 ns 111 ns 82 ns
864..............................................................
865DMPMC try spin only 404 ns 312 ns 205 ns
866DMPMC timed spin only 337 ns 253 ns 219 ns
867DMPMC wait spin only 130 ns 97 ns 72 ns
868DMPMC try may block 532 ns 265 ns 201 ns
869DMPMC timed may block 846 ns 606 ns 412 ns
870DMPMC wait may block 158 ns 112 ns 87 ns
871..............................................................
872folly::MPMC read 880 ns 419 ns 284 ns
873folly::MPMC tryReadUntil 23432 ns 23184 ns 23007 ns
874folly::MPMC blockingRead 1353 ns 1308 ns 1279 ns
875==============================================================
876====================== 8 prod 1 cons ======================
877=== uint32_t =================================================
878DMPSC try spin only 67 ns 63 ns 51 ns
879DMPSC timed spin only 69 ns 65 ns 63 ns
880DMPSC wait spin only 67 ns 65 ns 61 ns
881DMPSC try may block 73 ns 69 ns 63 ns
882DMPSC timed may block 72 ns 69 ns 64 ns
883DMPSC wait may block 71 ns 70 ns 68 ns
884..............................................................
885DMPMC try spin only 70 ns 64 ns 59 ns
886DMPMC timed spin only 76 ns 66 ns 53 ns
887DMPMC wait spin only 68 ns 66 ns 64 ns
888DMPMC try may block 71 ns 68 ns 66 ns
889DMPMC timed may block 72 ns 70 ns 67 ns
890DMPMC wait may block 73 ns 70 ns 67 ns
891..............................................................
892folly::MPMC read 193 ns 167 ns 153 ns
893folly::MPMC tryReadUntil 497 ns 415 ns 348 ns
894folly::MPMC blockingRead 163 ns 134 ns 115 ns
895==============================================================
896====================== 8 prod 8 cons ======================
897=== uint32_t =================================================
898DMPMC try spin only 216 ns 203 ns 196 ns
899DMPMC timed spin only 199 ns 186 ns 178 ns
900DMPMC wait spin only 63 ns 60 ns 58 ns
901DMPMC try may block 212 ns 198 ns 183 ns
902DMPMC timed may block 180 ns 170 ns 162 ns
903DMPMC wait may block 72 ns 68 ns 65 ns
904..............................................................
905folly::MPMC read 225 ns 201 ns 188 ns
906folly::MPMC tryReadUntil 255 ns 248 ns 232 ns
907folly::MPMC blockingRead 52 ns 48 ns 42 ns
908==============================================================
909====================== 8 prod 32 cons ======================
910=== uint32_t =================================================
911DMPMC try spin only 360 ns 302 ns 195 ns
912DMPMC timed spin only 350 ns 272 ns 218 ns
913DMPMC wait spin only 92 ns 72 ns 61 ns
914DMPMC try may block 352 ns 263 ns 223 ns
915DMPMC timed may block 218 ns 213 ns 209 ns
916DMPMC wait may block 98 ns 77 ns 70 ns
917..............................................................
918folly::MPMC read 611 ns 461 ns 339 ns
919folly::MPMC tryReadUntil 270 ns 260 ns 253 ns
920folly::MPMC blockingRead 89 ns 84 ns 80 ns
921==============================================================
922====================== 32 prod 1 cons ======================
923=== uint32_t =================================================
924DMPSC try spin only 389 ns 248 ns 149 ns
925DMPSC timed spin only 356 ns 235 ns 120 ns
926DMPSC wait spin only 343 ns 242 ns 125 ns
927DMPSC try may block 412 ns 294 ns 168 ns
928DMPSC timed may block 332 ns 271 ns 189 ns
929DMPSC wait may block 280 ns 252 ns 199 ns
930..............................................................
931DMPMC try spin only 393 ns 269 ns 105 ns
932DMPMC timed spin only 328 ns 240 ns 112 ns
933DMPMC wait spin only 502 ns 266 ns 107 ns
934DMPMC try may block 514 ns 346 ns 192 ns
935DMPMC timed may block 339 ns 318 ns 278 ns
936DMPMC wait may block 319 ns 307 ns 292 ns
937..............................................................
938folly::MPMC read 948 ns 517 ns 232 ns
939folly::MPMC tryReadUntil 9649 ns 7567 ns 4140 ns
940folly::MPMC blockingRead 1365 ns 1316 ns 1131 ns
941==============================================================
942====================== 32 prod 8 cons ======================
943=== uint32_t =================================================
944DMPMC try spin only 436 ns 257 ns 115 ns
945DMPMC timed spin only 402 ns 272 ns 121 ns
946DMPMC wait spin only 136 ns 78 ns 55 ns
947DMPMC try may block 454 ns 227 ns 78 ns
948DMPMC timed may block 155 ns 137 ns 116 ns
949DMPMC wait may block 62 ns 59 ns 57 ns
950..............................................................
951folly::MPMC read 677 ns 497 ns 336 ns
952folly::MPMC tryReadUntil 268 ns 262 ns 258 ns
953folly::MPMC blockingRead 87 ns 85 ns 82 ns
954==============================================================
955====================== 32 prod 32 cons ======================
956=== uint32_t =================================================
957DMPMC try spin only 786 ns 381 ns 142 ns
958DMPMC timed spin only 795 ns 346 ns 126 ns
959DMPMC wait spin only 334 ns 107 ns 55 ns
960DMPMC try may block 535 ns 317 ns 144 ns
961DMPMC timed may block 197 ns 192 ns 183 ns
962DMPMC wait may block 189 ns 75 ns 60 ns
963..............................................................
964folly::MPMC read 1110 ns 919 ns 732 ns
965folly::MPMC tryReadUntil 214 ns 210 ns 206 ns
966folly::MPMC blockingRead 53 ns 52 ns 51 ns
967==============================================================
968
969$ lscpu
970Architecture: x86_64
971CPU op-mode(s): 32-bit, 64-bit
972Byte Order: Little Endian
973CPU(s): 32
974On-line CPU(s) list: 0-31
975Thread(s) per core: 2
976Core(s) per socket: 8
977Socket(s): 2
978NUMA node(s): 2
979Vendor ID: GenuineIntel
980CPU family: 6
981Model: 45
982Model name: Intel(R) Xeon(R) CPU E5-2660 0 @ 2.20GHz
983Stepping: 6
984CPU MHz: 2200.000
985CPU max MHz: 2200.0000
986CPU min MHz: 1200.0000
987BogoMIPS: 4399.92
988Virtualization: VT-x
989L1d cache: 32K
990L1i cache: 32K
991L2 cache: 256K
992L3 cache: 20480K
993NUMA node0 CPU(s): 0-7,16-23
994NUMA node1 CPU(s): 8-15,24-31
995
996Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr
997 pge mca cmov pat pse36 clflush dts acpi mmx fxsr
998 sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp
999 lm constant_tsc arch_perfmon pebs bts rep_good
1000 nopl xtopology nonstop_tsc aperfmperf eagerfpu
1001 pni pclmulqdq dtes64 monitor ds_cpl vmx smx est
1002 tm2 ssse3 cx16 xtpr pdcm pcid dca sse4_1 sse4_2
1003 x2apic popcnt tsc_deadline_timer aes xsave avx
1004 lahf_lm epb tpr_shadow vnmi flexpriority ept vpid
1005 xsaveopt dtherm arat pln pts
1006 */
1007