1 | /* |
2 | * Copyright 2017-present Facebook, Inc. |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | */ |
16 | |
17 | #include <folly/concurrency/UnboundedQueue.h> |
18 | #include <folly/MPMCQueue.h> |
19 | #include <folly/ProducerConsumerQueue.h> |
20 | #include <folly/portability/GTest.h> |
21 | |
22 | #include <boost/thread/barrier.hpp> |
23 | #include <glog/logging.h> |
24 | |
25 | #include <atomic> |
26 | #include <thread> |
27 | |
28 | DEFINE_bool(bench, false, "run benchmark" ); |
29 | DEFINE_int32(reps, 10, "number of reps" ); |
30 | DEFINE_int32(ops, 1000000, "number of operations per rep" ); |
31 | DEFINE_int64(capacity, 256 * 1024, "capacity" ); |
32 | |
33 | template <typename T, bool MayBlock> |
34 | using USPSC = folly::USPSCQueue<T, MayBlock>; |
35 | |
36 | template <typename T, bool MayBlock> |
37 | using UMPSC = folly::UMPSCQueue<T, MayBlock>; |
38 | |
39 | template <typename T, bool MayBlock> |
40 | using USPMC = folly::USPMCQueue<T, MayBlock>; |
41 | |
42 | template <typename T, bool MayBlock> |
43 | using UMPMC = folly::UMPMCQueue<T, MayBlock>; |
44 | |
45 | // check functions for inspecting inlined native code |
46 | |
47 | FOLLY_ATTR_WEAK void noop(folly::Optional<int>&&) {} |
48 | |
49 | extern "C" void check_uspsc_mayblock_dequeue_ref( |
50 | USPSC<int, true>& queue, |
51 | int& item) { |
52 | queue.dequeue(item); |
53 | } |
54 | extern "C" bool check_uspsc_mayblock_try_dequeue_ref( |
55 | USPSC<int, true>& queue, |
56 | int& item) { |
57 | return queue.try_dequeue(item); |
58 | } |
59 | extern "C" bool check_uspsc_mayblock_try_dequeue_for_ref( |
60 | USPSC<int, true>& queue, |
61 | int& item, |
62 | std::chrono::milliseconds const& timeout) { |
63 | return queue.try_dequeue_for(item, timeout); |
64 | } |
65 | extern "C" bool check_uspsc_mayblock_try_dequeue_until_ref( |
66 | USPSC<int, true>& queue, |
67 | int& item, |
68 | std::chrono::steady_clock::time_point const& deadline) { |
69 | return queue.try_dequeue_until(item, deadline); |
70 | } |
71 | |
72 | extern "C" int check_uspsc_mayblock_dequeue_ret(USPSC<int, true>& queue) { |
73 | return queue.dequeue(); |
74 | } |
75 | extern "C" void check_uspsc_mayblock_try_dequeue_ret(USPSC<int, true>& queue) { |
76 | noop(queue.try_dequeue()); |
77 | } |
78 | extern "C" void check_uspsc_mayblock_try_dequeue_for_ret( |
79 | USPSC<int, true>& queue, |
80 | std::chrono::milliseconds const& timeout) { |
81 | noop(queue.try_dequeue_for(timeout)); |
82 | } |
83 | extern "C" void check_uspsc_mayblock_try_dequeue_until_ret( |
84 | USPSC<int, true>& queue, |
85 | std::chrono::steady_clock::time_point const& deadline) { |
86 | noop(queue.try_dequeue_until(deadline)); |
87 | } |
88 | |
89 | extern "C" void check_umpmc_mayblock_dequeue_ref( |
90 | UMPMC<int, true>& queue, |
91 | int& item) { |
92 | queue.dequeue(item); |
93 | } |
94 | extern "C" bool check_umpmc_mayblock_try_dequeue_ref( |
95 | UMPMC<int, true>& queue, |
96 | int& item) { |
97 | return queue.try_dequeue(item); |
98 | } |
99 | extern "C" bool check_umpmc_mayblock_try_dequeue_for_ref( |
100 | UMPMC<int, true>& queue, |
101 | int& item, |
102 | std::chrono::milliseconds const& timeout) { |
103 | return queue.try_dequeue_for(item, timeout); |
104 | } |
105 | extern "C" bool check_umpmc_mayblock_try_dequeue_until_ref( |
106 | UMPMC<int, true>& queue, |
107 | int& item, |
108 | std::chrono::steady_clock::time_point const& deadline) { |
109 | return queue.try_dequeue_until(item, deadline); |
110 | } |
111 | |
112 | extern "C" int check_umpmc_mayblock_dequeue_ret(UMPMC<int, true>& queue) { |
113 | return queue.dequeue(); |
114 | } |
115 | extern "C" void check_umpmc_mayblock_try_dequeue_ret(UMPMC<int, true>& queue) { |
116 | noop(queue.try_dequeue()); |
117 | } |
118 | extern "C" void check_umpmc_mayblock_try_dequeue_for_ret( |
119 | UMPMC<int, true>& queue, |
120 | std::chrono::milliseconds const& timeout) { |
121 | noop(queue.try_dequeue_for(timeout)); |
122 | } |
123 | extern "C" void check_umpmc_mayblock_try_dequeue_until_ret( |
124 | UMPMC<int, true>& queue, |
125 | std::chrono::steady_clock::time_point const& deadline) { |
126 | noop(queue.try_dequeue_until(deadline)); |
127 | } |
128 | |
129 | template <template <typename, bool> class Q, bool MayBlock> |
130 | void basic_test() { |
131 | Q<int, MayBlock> q; |
132 | ASSERT_TRUE(q.empty()); |
133 | ASSERT_EQ(q.size(), 0); |
134 | int v = -1; |
135 | ASSERT_FALSE(q.try_dequeue(v)); |
136 | |
137 | q.enqueue(1); |
138 | ASSERT_FALSE(q.empty()); |
139 | ASSERT_EQ(q.size(), 1); |
140 | |
141 | q.enqueue(2); |
142 | ASSERT_EQ(q.size(), 2); |
143 | ASSERT_FALSE(q.empty()); |
144 | |
145 | ASSERT_TRUE(q.try_dequeue(v)); |
146 | ASSERT_EQ(v, 1); |
147 | ASSERT_FALSE(q.empty()); |
148 | ASSERT_EQ(q.size(), 1); |
149 | |
150 | ASSERT_TRUE(q.try_dequeue(v)); |
151 | ASSERT_EQ(v, 2); |
152 | ASSERT_TRUE(q.empty()); |
153 | ASSERT_EQ(q.size(), 0); |
154 | } |
155 | |
156 | TEST(UnboundedQueue, basic) { |
157 | basic_test<USPSC, false>(); |
158 | basic_test<UMPSC, false>(); |
159 | basic_test<USPMC, false>(); |
160 | basic_test<UMPMC, false>(); |
161 | basic_test<USPSC, true>(); |
162 | basic_test<UMPSC, true>(); |
163 | basic_test<USPMC, true>(); |
164 | basic_test<UMPMC, true>(); |
165 | } |
166 | |
167 | template <template <typename, bool> class Q, bool MayBlock> |
168 | void timeout_test() { |
169 | Q<int, MayBlock> q; |
170 | int v; |
171 | ASSERT_FALSE(q.try_dequeue_until( |
172 | v, std::chrono::steady_clock::now() + std::chrono::microseconds(1))); |
173 | ASSERT_FALSE(q.try_dequeue_for(v, std::chrono::microseconds(1))); |
174 | q.enqueue(10); |
175 | ASSERT_TRUE(q.try_dequeue_until( |
176 | v, std::chrono::steady_clock::now() + std::chrono::microseconds(1))); |
177 | ASSERT_EQ(v, 10); |
178 | } |
179 | |
180 | TEST(UnboundedQueue, timeout) { |
181 | timeout_test<USPSC, false>(); |
182 | timeout_test<UMPSC, false>(); |
183 | timeout_test<USPMC, false>(); |
184 | timeout_test<UMPMC, false>(); |
185 | timeout_test<USPSC, true>(); |
186 | timeout_test<UMPSC, true>(); |
187 | timeout_test<USPMC, true>(); |
188 | timeout_test<UMPMC, true>(); |
189 | } |
190 | |
191 | template <template <typename, bool> class Q, bool MayBlock> |
192 | void peek_test() { |
193 | Q<int, MayBlock> q; |
194 | auto res = q.try_peek(); |
195 | ASSERT_FALSE(res); |
196 | for (int i = 0; i < 1000; ++i) { |
197 | q.enqueue(i); |
198 | } |
199 | for (int i = 0; i < 700; ++i) { |
200 | int v; |
201 | q.dequeue(v); |
202 | } |
203 | res = q.try_peek(); |
204 | ASSERT_TRUE(res); |
205 | ASSERT_EQ(*res, 700); |
206 | } |
207 | |
208 | TEST(UnboundedQueue, peek) { |
209 | peek_test<USPSC, false>(); |
210 | peek_test<UMPSC, false>(); |
211 | peek_test<USPSC, true>(); |
212 | peek_test<UMPSC, true>(); |
213 | } |
214 | |
215 | TEST(UnboundedQueue, cleanup_on_destruction) { |
216 | struct Foo { |
217 | int* p_{nullptr}; |
218 | explicit Foo(int* p) : p_(p) {} |
219 | Foo(Foo&& o) noexcept : p_(std::exchange(o.p_, nullptr)) {} |
220 | ~Foo() { |
221 | if (p_) { |
222 | ++(*p_); |
223 | } |
224 | } |
225 | Foo& operator=(Foo&& o) noexcept { |
226 | p_ = std::exchange(o.p_, nullptr); |
227 | return *this; |
228 | } |
229 | }; |
230 | int count = 0; |
231 | int num = 3; |
232 | { |
233 | folly::UMPMCQueue<Foo, false> q; |
234 | for (int i = 0; i < num; ++i) { |
235 | Foo foo(&count); |
236 | q.enqueue(std::move(foo)); |
237 | } |
238 | } |
239 | EXPECT_EQ(count, num); |
240 | } |
241 | |
242 | template <typename ProdFunc, typename ConsFunc, typename EndFunc> |
243 | inline uint64_t run_once( |
244 | int nprod, |
245 | int ncons, |
246 | const ProdFunc& prodFn, |
247 | const ConsFunc& consFn, |
248 | const EndFunc& endFn) { |
249 | boost::barrier barrier(1 + nprod + ncons); |
250 | |
251 | /* producers */ |
252 | std::vector<std::thread> prodThr(nprod); |
253 | for (int tid = 0; tid < nprod; ++tid) { |
254 | prodThr[tid] = std::thread([&, tid] { |
255 | barrier.wait(); // A - wait for thread start |
256 | barrier.wait(); // B - init the work |
257 | prodFn(tid); |
258 | barrier.wait(); // C - join the work |
259 | }); |
260 | } |
261 | |
262 | /* consumers */ |
263 | std::vector<std::thread> consThr(ncons); |
264 | for (int tid = 0; tid < ncons; ++tid) { |
265 | consThr[tid] = std::thread([&, tid] { |
266 | barrier.wait(); // A - wait for thread start |
267 | barrier.wait(); // B - init the work |
268 | consFn(tid); |
269 | barrier.wait(); // C - join the work |
270 | }); |
271 | } |
272 | |
273 | barrier.wait(); // A - wait for thread start |
274 | |
275 | /* begin time measurement */ |
276 | auto const tbegin = std::chrono::steady_clock::now(); |
277 | |
278 | barrier.wait(); // B - init the work |
279 | barrier.wait(); // C - join the work |
280 | |
281 | /* end time measurement */ |
282 | auto const tend = std::chrono::steady_clock::now(); |
283 | |
284 | /* wait for completion */ |
285 | for (int i = 0; i < nprod; ++i) { |
286 | prodThr[i].join(); |
287 | } |
288 | for (int i = 0; i < ncons; ++i) { |
289 | consThr[i].join(); |
290 | } |
291 | |
292 | endFn(); |
293 | auto const dur = tend - tbegin; |
294 | return std::chrono::duration_cast<std::chrono::nanoseconds>(dur).count(); |
295 | } |
296 | |
297 | template <bool SingleProducer, bool SingleConsumer, bool MayBlock> |
298 | void enq_deq_test(const int nprod, const int ncons) { |
299 | if (SingleProducer) { |
300 | ASSERT_EQ(nprod, 1); |
301 | } |
302 | if (SingleConsumer) { |
303 | ASSERT_EQ(ncons, 1); |
304 | } |
305 | |
306 | int ops = 1000; |
307 | folly::UnboundedQueue<int, SingleProducer, SingleConsumer, MayBlock, 4> q; |
308 | std::atomic<uint64_t> sum(0); |
309 | |
310 | auto prod = [&](int tid) { |
311 | for (int i = tid; i < ops; i += nprod) { |
312 | q.enqueue(i); |
313 | } |
314 | }; |
315 | |
316 | auto cons = [&](int tid) { |
317 | uint64_t mysum = 0; |
318 | for (int i = tid; i < ops; i += ncons) { |
319 | int v = -1; |
320 | int vpeek = -1; |
321 | |
322 | if (SingleConsumer) { |
323 | while (true) { |
324 | auto res = q.try_peek(); |
325 | if (res) { |
326 | vpeek = *res; |
327 | break; |
328 | } |
329 | } |
330 | } |
331 | if ((i % 3) == 0) { |
332 | while (!q.try_dequeue(v)) { |
333 | /* keep trying */; |
334 | } |
335 | } else if ((i % 3) == 1) { |
336 | auto duration = std::chrono::milliseconds(1); |
337 | while (!q.try_dequeue_for(v, duration)) { |
338 | /* keep trying */; |
339 | } |
340 | } else { |
341 | q.dequeue(v); |
342 | } |
343 | if (nprod == 1 && ncons == 1) { |
344 | ASSERT_EQ(v, i); |
345 | } |
346 | if (SingleConsumer) { |
347 | ASSERT_EQ(v, vpeek); |
348 | } |
349 | mysum += v; |
350 | } |
351 | sum.fetch_add(mysum); |
352 | }; |
353 | |
354 | auto endfn = [&] { |
355 | uint64_t expected = (ops) * (ops - 1) / 2; |
356 | uint64_t actual = sum.load(); |
357 | ASSERT_EQ(expected, actual); |
358 | }; |
359 | run_once(nprod, ncons, prod, cons, endfn); |
360 | } |
361 | |
362 | TEST(UnboundedQueue, enq_deq) { |
363 | /* SPSC */ |
364 | enq_deq_test<true, true, false>(1, 1); |
365 | enq_deq_test<true, true, true>(1, 1); |
366 | /* MPSC */ |
367 | enq_deq_test<false, true, false>(1, 1); |
368 | enq_deq_test<false, true, true>(1, 1); |
369 | enq_deq_test<false, true, false>(2, 1); |
370 | enq_deq_test<false, true, true>(2, 1); |
371 | enq_deq_test<false, true, false>(10, 1); |
372 | enq_deq_test<false, true, true>(10, 1); |
373 | /* SPMC */ |
374 | enq_deq_test<true, false, false>(1, 1); |
375 | enq_deq_test<true, false, true>(1, 1); |
376 | enq_deq_test<true, false, false>(1, 2); |
377 | enq_deq_test<true, false, true>(1, 2); |
378 | enq_deq_test<true, false, false>(1, 10); |
379 | enq_deq_test<true, false, true>(1, 10); |
380 | /* MPMC */ |
381 | enq_deq_test<false, false, false>(1, 1); |
382 | enq_deq_test<false, false, true>(1, 1); |
383 | enq_deq_test<false, false, false>(2, 1); |
384 | enq_deq_test<false, false, true>(2, 1); |
385 | enq_deq_test<false, false, false>(10, 1); |
386 | enq_deq_test<false, false, true>(10, 1); |
387 | enq_deq_test<false, false, false>(1, 2); |
388 | enq_deq_test<false, false, true>(1, 2); |
389 | enq_deq_test<false, false, false>(1, 10); |
390 | enq_deq_test<false, false, true>(1, 10); |
391 | enq_deq_test<false, false, false>(2, 2); |
392 | enq_deq_test<false, false, true>(2, 2); |
393 | enq_deq_test<false, false, false>(10, 10); |
394 | enq_deq_test<false, false, true>(10, 10); |
395 | } |
396 | |
397 | template <typename RepFunc> |
398 | uint64_t runBench(const std::string& name, int ops, const RepFunc& repFn) { |
399 | int reps = FLAGS_reps; |
400 | uint64_t min = UINTMAX_MAX; |
401 | uint64_t max = 0; |
402 | uint64_t sum = 0; |
403 | |
404 | std::vector<uint64_t> durs(reps); |
405 | |
406 | repFn(); // sometimes first run is outlier |
407 | for (int r = 0; r < reps; ++r) { |
408 | uint64_t dur = repFn(); |
409 | durs[r] = dur; |
410 | sum += dur; |
411 | min = std::min(min, dur); |
412 | max = std::max(max, dur); |
413 | // if each rep takes too long run at least 3 reps |
414 | const uint64_t minute = 60000000000UL; |
415 | if (sum > minute && r >= 2) { |
416 | reps = r + 1; |
417 | break; |
418 | } |
419 | } |
420 | |
421 | const std::string unit = " ns" ; |
422 | uint64_t avg = sum / reps; |
423 | uint64_t res = min; |
424 | uint64_t varsum = 0; |
425 | for (auto r = 0; r < reps; ++r) { |
426 | auto term = int64_t(reps * durs[r]) - int64_t(sum); |
427 | varsum += term * term; |
428 | } |
429 | uint64_t dev = uint64_t(std::sqrt(varsum) * std::pow(reps, -1.5)); |
430 | std::cout << name; |
431 | std::cout << " " << std::setw(4) << max / ops << unit; |
432 | std::cout << " " << std::setw(4) << avg / ops << unit; |
433 | std::cout << " " << std::setw(4) << dev / ops << unit; |
434 | std::cout << " " << std::setw(4) << res / ops << unit; |
435 | std::cout << std::endl; |
436 | return res; |
437 | } |
438 | |
439 | template <template <typename, bool> class Q, typename T, int Op> |
440 | uint64_t bench(const int nprod, const int ncons, const std::string& name) { |
441 | int ops = FLAGS_ops; |
442 | auto repFn = [&] { |
443 | Q<T, Op == 3 || Op == 4 || Op == 5> q; |
444 | std::atomic<uint64_t> sum(0); |
445 | auto prod = [&](int tid) { |
446 | for (int i = tid; i < ops; i += nprod) { |
447 | q.enqueue(i); |
448 | } |
449 | }; |
450 | auto cons = [&](int tid) { |
451 | uint64_t mysum = 0; |
452 | for (int i = tid; i < ops; i += ncons) { |
453 | T v; |
454 | if (Op == 0 || Op == 3) { |
455 | while (UNLIKELY(!q.try_dequeue(v))) { |
456 | /* keep trying */; |
457 | } |
458 | } else if (Op == 1 || Op == 4) { |
459 | auto duration = std::chrono::microseconds(1000); |
460 | while (UNLIKELY(!q.try_dequeue_for(v, duration))) { |
461 | /* keep trying */; |
462 | } |
463 | } else { |
464 | DCHECK(Op == 2 || Op == 5); |
465 | q.dequeue(v); |
466 | } |
467 | if (nprod == 1 && ncons == 1) { |
468 | DCHECK_EQ(int(v), i); |
469 | } |
470 | mysum += v; |
471 | } |
472 | sum.fetch_add(mysum); |
473 | }; |
474 | auto endfn = [&] { |
475 | uint64_t expected = (ops) * (ops - 1) / 2; |
476 | uint64_t actual = sum.load(); |
477 | DCHECK_EQ(expected, actual); |
478 | }; |
479 | return run_once(nprod, ncons, prod, cons, endfn); |
480 | }; |
481 | return runBench(name, ops, repFn); |
482 | } |
483 | |
484 | /* For performance comparison */ |
485 | template <typename T> |
486 | class MPMC { |
487 | folly::MPMCQueue<T> q_; |
488 | |
489 | public: |
490 | MPMC() : q_(FLAGS_capacity) {} |
491 | |
492 | template <typename... Args> |
493 | void enqueue(Args&&... args) { |
494 | q_.blockingWrite(std::forward<Args>(args)...); |
495 | } |
496 | |
497 | void dequeue(T& item) { |
498 | q_.blockingRead(item); |
499 | } |
500 | |
501 | bool try_dequeue(T& item) { |
502 | return q_.read(item); |
503 | } |
504 | |
505 | template <typename Rep, typename Period> |
506 | bool try_dequeue_for( |
507 | T& item, |
508 | const std::chrono::duration<Rep, Period>& duration) noexcept { |
509 | auto deadline = std::chrono::steady_clock::now() + duration; |
510 | return q_.tryReadUntil(deadline, item); |
511 | } |
512 | }; |
513 | |
514 | template <typename T, bool ignore> |
515 | using FMPMC = MPMC<T>; |
516 | |
517 | template <typename T> |
518 | class PCQ { |
519 | folly::ProducerConsumerQueue<T> q_; |
520 | |
521 | public: |
522 | PCQ() : q_(FLAGS_capacity) {} |
523 | |
524 | template <typename... Args> |
525 | void enqueue(Args&&... args) { |
526 | while (!q_.write(std::forward<Args>(args)...)) { |
527 | /* keep trying*/; |
528 | } |
529 | } |
530 | |
531 | void dequeue(T&) { |
532 | ASSERT_TRUE(false); |
533 | } |
534 | |
535 | bool try_dequeue(T& item) { |
536 | return q_.read(item); |
537 | } |
538 | |
539 | template <typename Rep, typename Period> |
540 | bool try_dequeue_for(T&, const std::chrono::duration<Rep, Period>&) noexcept { |
541 | return false; |
542 | } |
543 | }; |
544 | |
545 | template <typename T, bool ignore> |
546 | using FPCQ = PCQ<T>; |
547 | |
548 | template <size_t M> |
549 | struct IntArray { |
550 | int a[M]; |
551 | IntArray() {} |
552 | /* implicit */ IntArray(int v) { |
553 | for (size_t i = 0; i < M; ++i) { |
554 | a[i] = v; |
555 | } |
556 | } |
557 | operator int() { |
558 | return a[0]; |
559 | } |
560 | }; |
561 | |
562 | void dottedLine() { |
563 | std::cout |
564 | << "........................................................................" |
565 | << std::endl; |
566 | } |
567 | |
568 | template <typename T> |
569 | void type_benches(const int np, const int nc, const std::string& name) { |
570 | std::cout << name << "=====================================================" |
571 | << std::endl; |
572 | if (np == 1 && nc == 1) { |
573 | bench<USPSC, T, 0>(1, 1, "Unbounded SPSC try spin only " ); |
574 | bench<USPSC, T, 1>(1, 1, "Unbounded SPSC timed spin only " ); |
575 | bench<USPSC, T, 2>(1, 1, "Unbounded SPSC wait spin only " ); |
576 | bench<USPSC, T, 3>(1, 1, "Unbounded SPSC try may block " ); |
577 | bench<USPSC, T, 4>(1, 1, "Unbounded SPSC timed may block " ); |
578 | bench<USPSC, T, 5>(1, 1, "Unbounded SPSC wait may block " ); |
579 | dottedLine(); |
580 | } |
581 | if (nc == 1) { |
582 | bench<UMPSC, T, 0>(np, 1, "Unbounded MPSC try spin only " ); |
583 | bench<UMPSC, T, 1>(np, 1, "Unbounded MPSC timed spin only " ); |
584 | bench<UMPSC, T, 2>(np, 1, "Unbounded MPSC wait spin only " ); |
585 | bench<UMPSC, T, 3>(np, 1, "Unbounded MPSC try may block " ); |
586 | bench<UMPSC, T, 4>(np, 1, "Unbounded MPSC timed may block " ); |
587 | bench<UMPSC, T, 5>(np, 1, "Unbounded MPSC wait may block " ); |
588 | dottedLine(); |
589 | } |
590 | if (np == 1) { |
591 | bench<USPMC, T, 0>(1, nc, "Unbounded SPMC try spin only " ); |
592 | bench<USPMC, T, 1>(1, nc, "Unbounded SPMC timed spin only " ); |
593 | bench<USPMC, T, 2>(1, nc, "Unbounded SPMC wait spin only " ); |
594 | bench<USPMC, T, 3>(1, nc, "Unbounded SPMC try may block " ); |
595 | bench<USPMC, T, 4>(1, nc, "Unbounded SPMC timed may block " ); |
596 | bench<USPMC, T, 5>(1, nc, "Unbounded SPMC wait may block " ); |
597 | dottedLine(); |
598 | } |
599 | bench<UMPMC, T, 0>(np, nc, "Unbounded MPMC try spin only " ); |
600 | bench<UMPMC, T, 1>(np, nc, "Unbounded MPMC timed spin only " ); |
601 | bench<UMPMC, T, 2>(np, nc, "Unbounded MPMC wait spin only " ); |
602 | bench<UMPMC, T, 3>(np, nc, "Unbounded MPMC try may block " ); |
603 | bench<UMPMC, T, 4>(np, nc, "Unbounded MPMC timed may block " ); |
604 | bench<UMPMC, T, 5>(np, nc, "Unbounded MPMC wait may block " ); |
605 | dottedLine(); |
606 | if (np == 1 && nc == 1) { |
607 | bench<FPCQ, T, 0>(1, 1, "folly::PCQ read " ); |
608 | dottedLine(); |
609 | } |
610 | bench<FMPMC, T, 3>(np, nc, "folly::MPMC read " ); |
611 | bench<FMPMC, T, 4>(np, nc, "folly::MPMC tryReadUntil " ); |
612 | bench<FMPMC, T, 5>(np, nc, "folly::MPMC blockingRead " ); |
613 | std::cout |
614 | << "========================================================================" |
615 | << std::endl; |
616 | } |
617 | |
618 | void benches(const int np, const int nc) { |
619 | std::cout << "====================== " << std::setw(2) << np << " prod" |
620 | << " " << std::setw(2) << nc << " cons" |
621 | << " ================================" << std::endl; |
622 | type_benches<uint32_t>(np, nc, "=== uint32_t ======" ); |
623 | // Benchmarks for other element sizes can be added as follows: |
624 | // type_benches<IntArray<4>>(np, nc, "=== IntArray<4> ==="); |
625 | } |
626 | |
627 | TEST(UnboundedQueue, bench) { |
628 | if (!FLAGS_bench) { |
629 | return; |
630 | } |
631 | std::cout |
632 | << "========================================================================" |
633 | << std::endl; |
634 | std::cout << std::setw(2) << FLAGS_reps << " reps of " << std::setw(8) |
635 | << FLAGS_ops << " handoffs\n" ; |
636 | dottedLine(); |
637 | std::cout << "$ numactl -N 1 $dir/unbounded_queue_test --bench\n" ; |
638 | dottedLine(); |
639 | std::cout << "Using capacity " << FLAGS_capacity |
640 | << " for folly::ProducerConsumerQueue and\n" |
641 | << "folly::MPMCQueue\n" ; |
642 | std::cout |
643 | << "========================================================================" |
644 | << std::endl; |
645 | std::cout |
646 | << "Test name Max time Avg time Dev time Min time" |
647 | << std::endl; |
648 | |
649 | for (int nc : {1, 2, 4, 8, 16, 32}) { |
650 | int np = 1; |
651 | benches(np, nc); |
652 | } |
653 | |
654 | for (int np : {1, 2, 4, 8, 16, 32}) { |
655 | int nc = 1; |
656 | benches(np, nc); |
657 | } |
658 | |
659 | for (int np : {2, 4, 8, 16, 32}) { |
660 | for (int nc : {2, 4, 8, 16, 32}) { |
661 | benches(np, nc); |
662 | } |
663 | } |
664 | } |
665 | |
666 | /* |
667 | ============================================================== |
668 | 10 reps of 1000000 handoffs |
669 | .............................................................. |
670 | $ numactl -N 1 $dir/unbounded_queue_test --bench |
671 | .............................................................. |
672 | Using capacity 262144 for folly::ProducerConsumerQueue and |
673 | folly::MPMCQueue |
674 | ============================================================== |
675 | Test name Max time Avg time Min time |
676 | ====================== 1 prod 1 cons ====================== |
677 | === uint32_t ================================================= |
678 | Unbounded SPSC try spin only 5 ns 5 ns 5 ns |
679 | Unbounded SPSC timed spin only 5 ns 5 ns 5 ns |
680 | Unbounded SPSC wait spin only 6 ns 6 ns 5 ns |
681 | Unbounded SPSC try may block 38 ns 37 ns 35 ns |
682 | Unbounded SPSC timed may block 38 ns 36 ns 34 ns |
683 | Unbounded SPSC wait may block 34 ns 34 ns 33 ns |
684 | .............................................................. |
685 | Unbounded MPSC try spin only 45 ns 43 ns 42 ns |
686 | Unbounded MPSC timed spin only 47 ns 43 ns 42 ns |
687 | Unbounded MPSC wait spin only 45 ns 43 ns 41 ns |
688 | Unbounded MPSC try may block 55 ns 52 ns 51 ns |
689 | Unbounded MPSC timed may block 54 ns 52 ns 51 ns |
690 | Unbounded MPSC wait may block 51 ns 50 ns 49 ns |
691 | .............................................................. |
692 | Unbounded SPMC try spin only 18 ns 17 ns 16 ns |
693 | Unbounded SPMC timed spin only 23 ns 21 ns 18 ns |
694 | Unbounded SPMC wait spin only 22 ns 19 ns 16 ns |
695 | Unbounded SPMC try may block 30 ns 26 ns 22 ns |
696 | Unbounded SPMC timed may block 32 ns 24 ns 20 ns |
697 | Unbounded SPMC wait may block 49 ns 35 ns 29 ns |
698 | .............................................................. |
699 | Unbounded MPMC try spin only 25 ns 24 ns 24 ns |
700 | Unbounded MPMC timed spin only 38 ns 35 ns 30 ns |
701 | Unbounded MPMC wait spin only 41 ns 39 ns 37 ns |
702 | Unbounded MPMC try may block 53 ns 52 ns 51 ns |
703 | Unbounded MPMC timed may block 52 ns 51 ns 49 ns |
704 | Unbounded MPMC wait may block 53 ns 51 ns 50 ns |
705 | .............................................................. |
706 | folly::PCQ read 16 ns 11 ns 7 ns |
707 | .............................................................. |
708 | folly::MPMC read 52 ns 52 ns 51 ns |
709 | folly::MPMC tryReadUntil 96 ns 90 ns 55 ns |
710 | folly::MPMC blockingRead 61 ns 56 ns 50 ns |
711 | ============================================================== |
712 | ====================== 1 prod 2 cons ====================== |
713 | === uint32_t ================================================= |
714 | Unbounded SPMC try spin only 76 ns 68 ns 53 ns |
715 | Unbounded SPMC timed spin only 79 ns 71 ns 65 ns |
716 | Unbounded SPMC wait spin only 39 ns 35 ns 32 ns |
717 | Unbounded SPMC try may block 83 ns 81 ns 76 ns |
718 | Unbounded SPMC timed may block 86 ns 63 ns 23 ns |
719 | Unbounded SPMC wait may block 38 ns 36 ns 34 ns |
720 | .............................................................. |
721 | Unbounded MPMC try spin only 86 ns 79 ns 64 ns |
722 | Unbounded MPMC timed spin only 84 ns 77 ns 74 ns |
723 | Unbounded MPMC wait spin only 36 ns 35 ns 34 ns |
724 | Unbounded MPMC try may block 83 ns 79 ns 75 ns |
725 | Unbounded MPMC timed may block 83 ns 76 ns 63 ns |
726 | Unbounded MPMC wait may block 56 ns 48 ns 36 ns |
727 | .............................................................. |
728 | folly::MPMC read 103 ns 93 ns 68 ns |
729 | folly::MPMC tryReadUntil 109 ns 102 ns 91 ns |
730 | folly::MPMC blockingRead 61 ns 58 ns 54 ns |
731 | ============================================================== |
732 | ====================== 1 prod 4 cons ====================== |
733 | === uint32_t ================================================= |
734 | Unbounded SPMC try spin only 116 ns 109 ns 97 ns |
735 | Unbounded SPMC timed spin only 117 ns 111 ns 108 ns |
736 | Unbounded SPMC wait spin only 43 ns 40 ns 37 ns |
737 | Unbounded SPMC try may block 127 ns 113 ns 98 ns |
738 | Unbounded SPMC timed may block 116 ns 109 ns 97 ns |
739 | Unbounded SPMC wait may block 45 ns 43 ns 40 ns |
740 | .............................................................. |
741 | Unbounded MPMC try spin only 121 ns 113 ns 102 ns |
742 | Unbounded MPMC timed spin only 118 ns 108 ns 88 ns |
743 | Unbounded MPMC wait spin only 45 ns 41 ns 34 ns |
744 | Unbounded MPMC try may block 117 ns 108 ns 96 ns |
745 | Unbounded MPMC timed may block 118 ns 109 ns 99 ns |
746 | Unbounded MPMC wait may block 62 ns 53 ns 43 ns |
747 | .............................................................. |
748 | folly::MPMC read 139 ns 130 ns 111 ns |
749 | folly::MPMC tryReadUntil 205 ns 135 ns 115 ns |
750 | folly::MPMC blockingRead 104 ns 74 ns 54 ns |
751 | ============================================================== |
752 | ====================== 1 prod 8 cons ====================== |
753 | === uint32_t ================================================= |
754 | Unbounded SPMC try spin only 169 ns 163 ns 157 ns |
755 | Unbounded SPMC timed spin only 167 ns 158 ns 133 ns |
756 | Unbounded SPMC wait spin only 44 ns 39 ns 36 ns |
757 | Unbounded SPMC try may block 170 ns 165 ns 156 ns |
758 | Unbounded SPMC timed may block 172 ns 163 ns 153 ns |
759 | Unbounded SPMC wait may block 49 ns 40 ns 35 ns |
760 | .............................................................. |
761 | Unbounded MPMC try spin only 166 ns 158 ns 149 ns |
762 | Unbounded MPMC timed spin only 171 ns 161 ns 145 ns |
763 | Unbounded MPMC wait spin only 62 ns 52 ns 42 ns |
764 | Unbounded MPMC try may block 169 ns 161 ns 149 ns |
765 | Unbounded MPMC timed may block 170 ns 160 ns 147 ns |
766 | Unbounded MPMC wait may block 70 ns 63 ns 61 ns |
767 | .............................................................. |
768 | folly::MPMC read 174 ns 167 ns 159 ns |
769 | folly::MPMC tryReadUntil 349 ns 171 ns 148 ns |
770 | folly::MPMC blockingRead 182 ns 138 ns 115 ns |
771 | ============================================================== |
772 | ====================== 1 prod 16 cons ====================== |
773 | === uint32_t ================================================= |
774 | Unbounded SPMC try spin only 219 ns 198 ns 190 ns |
775 | Unbounded SPMC timed spin only 202 ns 198 ns 193 ns |
776 | Unbounded SPMC wait spin only 36 ns 36 ns 35 ns |
777 | Unbounded SPMC try may block 202 ns 195 ns 190 ns |
778 | Unbounded SPMC timed may block 208 ns 197 ns 190 ns |
779 | Unbounded SPMC wait may block 96 ns 77 ns 64 ns |
780 | .............................................................. |
781 | Unbounded MPMC try spin only 204 ns 198 ns 194 ns |
782 | Unbounded MPMC timed spin only 202 ns 195 ns 190 ns |
783 | Unbounded MPMC wait spin only 61 ns 59 ns 57 ns |
784 | Unbounded MPMC try may block 206 ns 196 ns 191 ns |
785 | Unbounded MPMC timed may block 204 ns 198 ns 192 ns |
786 | Unbounded MPMC wait may block 100 ns 88 ns 84 ns |
787 | .............................................................. |
788 | folly::MPMC read 210 ns 191 ns 182 ns |
789 | folly::MPMC tryReadUntil 574 ns 248 ns 192 ns |
790 | folly::MPMC blockingRead 1400 ns 1319 ns 1227 ns |
791 | ============================================================== |
792 | ====================== 1 prod 32 cons ====================== |
793 | === uint32_t ================================================= |
794 | Unbounded SPMC try spin only 209 ns 205 ns 199 ns |
795 | Unbounded SPMC timed spin only 208 ns 205 ns 200 ns |
796 | Unbounded SPMC wait spin only 175 ns 51 ns 33 ns |
797 | Unbounded SPMC try may block 215 ns 203 ns 186 ns |
798 | Unbounded SPMC timed may block 453 ns 334 ns 204 ns |
799 | Unbounded SPMC wait may block 110 ns 87 ns 55 ns |
800 | .............................................................. |
801 | Unbounded MPMC try spin only 328 ns 218 ns 197 ns |
802 | Unbounded MPMC timed spin only 217 ns 206 ns 200 ns |
803 | Unbounded MPMC wait spin only 147 ns 85 ns 58 ns |
804 | Unbounded MPMC try may block 310 ns 223 ns 199 ns |
805 | Unbounded MPMC timed may block 461 ns 275 ns 196 ns |
806 | Unbounded MPMC wait may block 148 ns 111 ns 78 ns |
807 | .............................................................. |
808 | folly::MPMC read 280 ns 215 ns 194 ns |
809 | folly::MPMC tryReadUntil 28740 ns 13508 ns 212 ns |
810 | folly::MPMC blockingRead 1343 ns 1293 ns 1269 ns |
811 | ============================================================== |
812 | ====================== 1 prod 1 cons ====================== |
813 | === uint32_t ================================================= |
814 | Unbounded SPSC try spin only 5 ns 5 ns 5 ns |
815 | Unbounded SPSC timed spin only 8 ns 6 ns 6 ns |
816 | Unbounded SPSC wait spin only 6 ns 6 ns 5 ns |
817 | Unbounded SPSC try may block 37 ns 36 ns 35 ns |
818 | Unbounded SPSC timed may block 37 ns 36 ns 35 ns |
819 | Unbounded SPSC wait may block 35 ns 35 ns 34 ns |
820 | .............................................................. |
821 | Unbounded MPSC try spin only 43 ns 42 ns 41 ns |
822 | Unbounded MPSC timed spin only 45 ns 42 ns 42 ns |
823 | Unbounded MPSC wait spin only 44 ns 43 ns 42 ns |
824 | Unbounded MPSC try may block 55 ns 51 ns 50 ns |
825 | Unbounded MPSC timed may block 61 ns 52 ns 50 ns |
826 | Unbounded MPSC wait may block 54 ns 52 ns 50 ns |
827 | .............................................................. |
828 | Unbounded SPMC try spin only 18 ns 17 ns 17 ns |
829 | Unbounded SPMC timed spin only 23 ns 19 ns 17 ns |
830 | Unbounded SPMC wait spin only 20 ns 17 ns 15 ns |
831 | Unbounded SPMC try may block 30 ns 23 ns 19 ns |
832 | Unbounded SPMC timed may block 23 ns 19 ns 17 ns |
833 | Unbounded SPMC wait may block 36 ns 31 ns 26 ns |
834 | .............................................................. |
835 | Unbounded MPMC try spin only 25 ns 23 ns 17 ns |
836 | Unbounded MPMC timed spin only 37 ns 34 ns 25 ns |
837 | Unbounded MPMC wait spin only 40 ns 38 ns 36 ns |
838 | Unbounded MPMC try may block 51 ns 49 ns 48 ns |
839 | Unbounded MPMC timed may block 53 ns 50 ns 48 ns |
840 | Unbounded MPMC wait may block 53 ns 49 ns 34 ns |
841 | .............................................................. |
842 | folly::PCQ read 15 ns 12 ns 7 ns |
843 | .............................................................. |
844 | folly::MPMC read 53 ns 51 ns 50 ns |
845 | folly::MPMC tryReadUntil 100 ns 96 ns 90 ns |
846 | folly::MPMC blockingRead 75 ns 59 ns 52 ns |
847 | ============================================================== |
848 | ====================== 2 prod 1 cons ====================== |
849 | === uint32_t ================================================= |
850 | Unbounded MPSC try spin only 49 ns 49 ns 46 ns |
851 | Unbounded MPSC timed spin only 52 ns 50 ns 49 ns |
852 | Unbounded MPSC wait spin only 53 ns 52 ns 51 ns |
853 | Unbounded MPSC try may block 63 ns 60 ns 57 ns |
854 | Unbounded MPSC timed may block 64 ns 61 ns 54 ns |
855 | Unbounded MPSC wait may block 62 ns 59 ns 35 ns |
856 | .............................................................. |
857 | Unbounded MPMC try spin only 44 ns 41 ns 38 ns |
858 | Unbounded MPMC timed spin only 50 ns 49 ns 49 ns |
859 | Unbounded MPMC wait spin only 51 ns 49 ns 49 ns |
860 | Unbounded MPMC try may block 63 ns 60 ns 57 ns |
861 | Unbounded MPMC timed may block 62 ns 60 ns 57 ns |
862 | Unbounded MPMC wait may block 62 ns 60 ns 58 ns |
863 | .............................................................. |
864 | folly::MPMC read 78 ns 57 ns 52 ns |
865 | folly::MPMC tryReadUntil 78 ns 72 ns 70 ns |
866 | folly::MPMC blockingRead 56 ns 54 ns 52 ns |
867 | ============================================================== |
868 | ====================== 4 prod 1 cons ====================== |
869 | === uint32_t ================================================= |
870 | Unbounded MPSC try spin only 48 ns 47 ns 46 ns |
871 | Unbounded MPSC timed spin only 47 ns 47 ns 46 ns |
872 | Unbounded MPSC wait spin only 49 ns 47 ns 47 ns |
873 | Unbounded MPSC try may block 61 ns 59 ns 55 ns |
874 | Unbounded MPSC timed may block 62 ns 58 ns 46 ns |
875 | Unbounded MPSC wait may block 62 ns 61 ns 59 ns |
876 | .............................................................. |
877 | Unbounded MPMC try spin only 42 ns 42 ns 40 ns |
878 | Unbounded MPMC timed spin only 48 ns 47 ns 45 ns |
879 | Unbounded MPMC wait spin only 48 ns 47 ns 46 ns |
880 | Unbounded MPMC try may block 63 ns 62 ns 61 ns |
881 | Unbounded MPMC timed may block 63 ns 61 ns 51 ns |
882 | Unbounded MPMC wait may block 62 ns 61 ns 59 ns |
883 | .............................................................. |
884 | folly::MPMC read 56 ns 55 ns 54 ns |
885 | folly::MPMC tryReadUntil 112 ns 106 ns 97 ns |
886 | folly::MPMC blockingRead 47 ns 47 ns 45 ns |
887 | ============================================================== |
888 | ====================== 8 prod 1 cons ====================== |
889 | === uint32_t ================================================= |
890 | Unbounded MPSC try spin only 44 ns 43 ns 42 ns |
891 | Unbounded MPSC timed spin only 45 ns 44 ns 40 ns |
892 | Unbounded MPSC wait spin only 45 ns 44 ns 41 ns |
893 | Unbounded MPSC try may block 61 ns 60 ns 58 ns |
894 | Unbounded MPSC timed may block 61 ns 59 ns 56 ns |
895 | Unbounded MPSC wait may block 61 ns 59 ns 56 ns |
896 | .............................................................. |
897 | Unbounded MPMC try spin only 43 ns 40 ns 36 ns |
898 | Unbounded MPMC timed spin only 45 ns 44 ns 41 ns |
899 | Unbounded MPMC wait spin only 45 ns 43 ns 41 ns |
900 | Unbounded MPMC try may block 62 ns 60 ns 58 ns |
901 | Unbounded MPMC timed may block 62 ns 59 ns 56 ns |
902 | Unbounded MPMC wait may block 61 ns 58 ns 54 ns |
903 | .............................................................. |
904 | folly::MPMC read 147 ns 119 ns 63 ns |
905 | folly::MPMC tryReadUntil 152 ns 130 ns 97 ns |
906 | folly::MPMC blockingRead 135 ns 101 ns 48 ns |
907 | ============================================================== |
908 | ====================== 16 prod 1 cons ====================== |
909 | === uint32_t ================================================= |
910 | Unbounded MPSC try spin only 47 ns 38 ns 35 ns |
911 | Unbounded MPSC timed spin only 36 ns 36 ns 35 ns |
912 | Unbounded MPSC wait spin only 46 ns 37 ns 35 ns |
913 | Unbounded MPSC try may block 58 ns 47 ns 45 ns |
914 | Unbounded MPSC timed may block 46 ns 46 ns 45 ns |
915 | Unbounded MPSC wait may block 47 ns 45 ns 45 ns |
916 | .............................................................. |
917 | Unbounded MPMC try spin only 41 ns 39 ns 35 ns |
918 | Unbounded MPMC timed spin only 45 ns 41 ns 38 ns |
919 | Unbounded MPMC wait spin only 43 ns 40 ns 38 ns |
920 | Unbounded MPMC try may block 51 ns 49 ns 47 ns |
921 | Unbounded MPMC timed may block 52 ns 49 ns 47 ns |
922 | Unbounded MPMC wait may block 59 ns 50 ns 46 ns |
923 | .............................................................. |
924 | folly::MPMC read 924 ns 839 ns 664 ns |
925 | folly::MPMC tryReadUntil 968 ns 865 ns 678 ns |
926 | folly::MPMC blockingRead 929 ns 727 ns 487 ns |
927 | ============================================================== |
928 | ====================== 32 prod 1 cons ====================== |
929 | === uint32_t ================================================= |
930 | Unbounded MPSC try spin only 90 ns 44 ns 36 ns |
931 | Unbounded MPSC timed spin only 91 ns 43 ns 35 ns |
932 | Unbounded MPSC wait spin only 92 ns 55 ns 36 ns |
933 | Unbounded MPSC try may block 87 ns 52 ns 45 ns |
934 | Unbounded MPSC timed may block 70 ns 48 ns 45 ns |
935 | Unbounded MPSC wait may block 109 ns 60 ns 45 ns |
936 | .............................................................. |
937 | Unbounded MPMC try spin only 47 ns 42 ns 37 ns |
938 | Unbounded MPMC timed spin only 50 ns 46 ns 38 ns |
939 | Unbounded MPMC wait spin only 50 ns 42 ns 36 ns |
940 | Unbounded MPMC try may block 103 ns 59 ns 50 ns |
941 | Unbounded MPMC timed may block 56 ns 52 ns 47 ns |
942 | Unbounded MPMC wait may block 59 ns 51 ns 46 ns |
943 | .............................................................. |
944 | folly::MPMC read 1029 ns 911 ns 694 ns |
945 | folly::MPMC tryReadUntil 1023 ns 969 ns 907 ns |
946 | folly::MPMC blockingRead 1024 ns 921 ns 790 ns |
947 | ============================================================== |
948 | ====================== 2 prod 2 cons ====================== |
949 | === uint32_t ================================================= |
950 | Unbounded MPMC try spin only 83 ns 66 ns 24 ns |
951 | Unbounded MPMC timed spin only 84 ns 74 ns 49 ns |
952 | Unbounded MPMC wait spin only 50 ns 49 ns 47 ns |
953 | Unbounded MPMC try may block 86 ns 81 ns 77 ns |
954 | Unbounded MPMC timed may block 82 ns 74 ns 59 ns |
955 | Unbounded MPMC wait may block 62 ns 59 ns 56 ns |
956 | .............................................................. |
957 | folly::MPMC read 98 ns 85 ns 63 ns |
958 | folly::MPMC tryReadUntil 105 ns 94 ns 83 ns |
959 | folly::MPMC blockingRead 59 ns 56 ns 54 ns |
960 | ============================================================== |
961 | ====================== 2 prod 4 cons ====================== |
962 | === uint32_t ================================================= |
963 | Unbounded MPMC try spin only 114 ns 105 ns 91 ns |
964 | Unbounded MPMC timed spin only 119 ns 107 ns 102 ns |
965 | Unbounded MPMC wait spin only 54 ns 53 ns 52 ns |
966 | Unbounded MPMC try may block 114 ns 106 ns 93 ns |
967 | Unbounded MPMC timed may block 111 ns 100 ns 92 ns |
968 | Unbounded MPMC wait may block 70 ns 64 ns 60 ns |
969 | .............................................................. |
970 | folly::MPMC read 133 ns 125 ns 120 ns |
971 | folly::MPMC tryReadUntil 130 ns 125 ns 114 ns |
972 | folly::MPMC blockingRead 69 ns 68 ns 66 ns |
973 | ============================================================== |
974 | ====================== 2 prod 8 cons ====================== |
975 | === uint32_t ================================================= |
976 | Unbounded MPMC try spin only 169 ns 160 ns 152 ns |
977 | Unbounded MPMC timed spin only 165 ns 158 ns 149 ns |
978 | Unbounded MPMC wait spin only 59 ns 54 ns 45 ns |
979 | Unbounded MPMC try may block 166 ns 158 ns 131 ns |
980 | Unbounded MPMC timed may block 168 ns 163 ns 158 ns |
981 | Unbounded MPMC wait may block 73 ns 66 ns 60 ns |
982 | .............................................................. |
983 | folly::MPMC read 170 ns 167 ns 160 ns |
984 | folly::MPMC tryReadUntil 163 ns 154 ns 146 ns |
985 | folly::MPMC blockingRead 82 ns 73 ns 60 ns |
986 | ============================================================== |
987 | ====================== 2 prod 16 cons ====================== |
988 | === uint32_t ================================================= |
989 | Unbounded MPMC try spin only 207 ns 198 ns 191 ns |
990 | Unbounded MPMC timed spin only 211 ns 198 ns 192 ns |
991 | Unbounded MPMC wait spin only 57 ns 55 ns 54 ns |
992 | Unbounded MPMC try may block 197 ns 193 ns 188 ns |
993 | Unbounded MPMC timed may block 201 ns 195 ns 188 ns |
994 | Unbounded MPMC wait may block 89 ns 78 ns 70 ns |
995 | .............................................................. |
996 | folly::MPMC read 196 ns 189 ns 181 ns |
997 | folly::MPMC tryReadUntil 202 ns 184 ns 173 ns |
998 | folly::MPMC blockingRead 267 ns 100 ns 76 ns |
999 | ============================================================== |
1000 | ====================== 2 prod 32 cons ====================== |
1001 | === uint32_t ================================================= |
1002 | Unbounded MPMC try spin only 228 ns 207 ns 193 ns |
1003 | Unbounded MPMC timed spin only 210 ns 205 ns 198 ns |
1004 | Unbounded MPMC wait spin only 102 ns 71 ns 56 ns |
1005 | Unbounded MPMC try may block 268 ns 211 ns 198 ns |
1006 | Unbounded MPMC timed may block 226 ns 205 ns 183 ns |
1007 | Unbounded MPMC wait may block 502 ns 164 ns 67 ns |
1008 | .............................................................. |
1009 | folly::MPMC read 228 ns 205 ns 195 ns |
1010 | folly::MPMC tryReadUntil 207 ns 200 ns 192 ns |
1011 | folly::MPMC blockingRead 830 ns 612 ns 192 ns |
1012 | ============================================================== |
1013 | ====================== 4 prod 2 cons ====================== |
1014 | === uint32_t ================================================= |
1015 | Unbounded MPMC try spin only 87 ns 65 ns 33 ns |
1016 | Unbounded MPMC timed spin only 79 ns 60 ns 36 ns |
1017 | Unbounded MPMC wait spin only 47 ns 46 ns 44 ns |
1018 | Unbounded MPMC try may block 87 ns 77 ns 52 ns |
1019 | Unbounded MPMC timed may block 86 ns 79 ns 57 ns |
1020 | Unbounded MPMC wait may block 62 ns 61 ns 60 ns |
1021 | .............................................................. |
1022 | folly::MPMC read 110 ns 95 ns 60 ns |
1023 | folly::MPMC tryReadUntil 108 ns 104 ns 96 ns |
1024 | folly::MPMC blockingRead 60 ns 57 ns 47 ns |
1025 | ============================================================== |
1026 | ====================== 4 prod 4 cons ====================== |
1027 | === uint32_t ================================================= |
1028 | Unbounded MPMC try spin only 110 ns 100 ns 86 ns |
1029 | Unbounded MPMC timed spin only 113 ns 104 ns 93 ns |
1030 | Unbounded MPMC wait spin only 49 ns 46 ns 45 ns |
1031 | Unbounded MPMC try may block 115 ns 105 ns 84 ns |
1032 | Unbounded MPMC timed may block 119 ns 108 ns 89 ns |
1033 | Unbounded MPMC wait may block 63 ns 61 ns 54 ns |
1034 | .............................................................. |
1035 | folly::MPMC read 140 ns 131 ns 113 ns |
1036 | folly::MPMC tryReadUntil 132 ns 129 ns 121 ns |
1037 | folly::MPMC blockingRead 58 ns 53 ns 48 ns |
1038 | ============================================================== |
1039 | ====================== 4 prod 8 cons ====================== |
1040 | === uint32_t ================================================= |
1041 | Unbounded MPMC try spin only 170 ns 162 ns 151 ns |
1042 | Unbounded MPMC timed spin only 174 ns 158 ns 139 ns |
1043 | Unbounded MPMC wait spin only 51 ns 50 ns 48 ns |
1044 | Unbounded MPMC try may block 164 ns 160 ns 154 ns |
1045 | Unbounded MPMC timed may block 165 ns 158 ns 144 ns |
1046 | Unbounded MPMC wait may block 67 ns 62 ns 52 ns |
1047 | .............................................................. |
1048 | folly::MPMC read 174 ns 166 ns 156 ns |
1049 | folly::MPMC tryReadUntil 165 ns 160 ns 150 ns |
1050 | folly::MPMC blockingRead 58 ns 56 ns 49 ns |
1051 | ============================================================== |
1052 | ====================== 4 prod 16 cons ====================== |
1053 | === uint32_t ================================================= |
1054 | Unbounded MPMC try spin only 200 ns 195 ns 181 ns |
1055 | Unbounded MPMC timed spin only 200 ns 195 ns 191 ns |
1056 | Unbounded MPMC wait spin only 51 ns 49 ns 45 ns |
1057 | Unbounded MPMC try may block 198 ns 192 ns 188 ns |
1058 | Unbounded MPMC timed may block 199 ns 190 ns 182 ns |
1059 | Unbounded MPMC wait may block 77 ns 66 ns 60 ns |
1060 | .............................................................. |
1061 | folly::MPMC read 195 ns 186 ns 175 ns |
1062 | folly::MPMC tryReadUntil 204 ns 187 ns 167 ns |
1063 | folly::MPMC blockingRead 66 ns 60 ns 57 ns |
1064 | ============================================================== |
1065 | ====================== 4 prod 32 cons ====================== |
1066 | === uint32_t ================================================= |
1067 | Unbounded MPMC try spin only 246 ns 210 ns 195 ns |
1068 | Unbounded MPMC timed spin only 217 ns 207 ns 199 ns |
1069 | Unbounded MPMC wait spin only 66 ns 52 ns 46 ns |
1070 | Unbounded MPMC try may block 250 ns 207 ns 197 ns |
1071 | Unbounded MPMC timed may block 208 ns 202 ns 195 ns |
1072 | Unbounded MPMC wait may block 80 ns 66 ns 56 ns |
1073 | .............................................................. |
1074 | folly::MPMC read 231 ns 201 ns 190 ns |
1075 | folly::MPMC tryReadUntil 202 ns 199 ns 196 ns |
1076 | folly::MPMC blockingRead 65 ns 61 ns 57 ns |
1077 | ============================================================== |
1078 | ====================== 8 prod 2 cons ====================== |
1079 | === uint32_t ================================================= |
1080 | Unbounded MPMC try spin only 50 ns 41 ns 39 ns |
1081 | Unbounded MPMC timed spin only 73 ns 49 ns 40 ns |
1082 | Unbounded MPMC wait spin only 46 ns 43 ns 39 ns |
1083 | Unbounded MPMC try may block 81 ns 62 ns 56 ns |
1084 | Unbounded MPMC timed may block 75 ns 61 ns 53 ns |
1085 | Unbounded MPMC wait may block 61 ns 57 ns 50 ns |
1086 | .............................................................. |
1087 | folly::MPMC read 120 ns 102 ns 58 ns |
1088 | folly::MPMC tryReadUntil 119 ns 112 ns 103 ns |
1089 | folly::MPMC blockingRead 85 ns 71 ns 58 ns |
1090 | ============================================================== |
1091 | ====================== 8 prod 4 cons ====================== |
1092 | === uint32_t ================================================= |
1093 | Unbounded MPMC try spin only 104 ns 87 ns 39 ns |
1094 | Unbounded MPMC timed spin only 109 ns 89 ns 40 ns |
1095 | Unbounded MPMC wait spin only 46 ns 45 ns 43 ns |
1096 | Unbounded MPMC try may block 121 ns 101 ns 74 ns |
1097 | Unbounded MPMC timed may block 116 ns 103 ns 72 ns |
1098 | Unbounded MPMC wait may block 62 ns 57 ns 52 ns |
1099 | .............................................................. |
1100 | folly::MPMC read 136 ns 130 ns 118 ns |
1101 | folly::MPMC tryReadUntil 132 ns 127 ns 118 ns |
1102 | folly::MPMC blockingRead 68 ns 61 ns 51 ns |
1103 | ============================================================== |
1104 | ====================== 8 prod 8 cons ====================== |
1105 | === uint32_t ================================================= |
1106 | Unbounded MPMC try spin only 175 ns 171 ns 162 ns |
1107 | Unbounded MPMC timed spin only 177 ns 169 ns 159 ns |
1108 | Unbounded MPMC wait spin only 49 ns 47 ns 45 ns |
1109 | Unbounded MPMC try may block 175 ns 171 ns 156 ns |
1110 | Unbounded MPMC timed may block 180 ns 170 ns 162 ns |
1111 | Unbounded MPMC wait may block 63 ns 62 ns 59 ns |
1112 | .............................................................. |
1113 | folly::MPMC read 177 ns 162 ns 147 ns |
1114 | folly::MPMC tryReadUntil 170 ns 162 ns 148 ns |
1115 | folly::MPMC blockingRead 57 ns 53 ns 49 ns |
1116 | ============================================================== |
1117 | ====================== 8 prod 16 cons ====================== |
1118 | === uint32_t ================================================= |
1119 | Unbounded MPMC try spin only 203 ns 192 ns 185 ns |
1120 | Unbounded MPMC timed spin only 199 ns 193 ns 185 ns |
1121 | Unbounded MPMC wait spin only 48 ns 46 ns 44 ns |
1122 | Unbounded MPMC try may block 204 ns 194 ns 182 ns |
1123 | Unbounded MPMC timed may block 198 ns 187 ns 171 ns |
1124 | Unbounded MPMC wait may block 63 ns 61 ns 57 ns |
1125 | .............................................................. |
1126 | folly::MPMC read 193 ns 185 ns 167 ns |
1127 | folly::MPMC tryReadUntil 199 ns 188 ns 164 ns |
1128 | folly::MPMC blockingRead 57 ns 52 ns 49 ns |
1129 | ============================================================== |
1130 | ====================== 8 prod 32 cons ====================== |
1131 | === uint32_t ================================================= |
1132 | Unbounded MPMC try spin only 222 ns 208 ns 198 ns |
1133 | Unbounded MPMC timed spin only 234 ns 212 ns 203 ns |
1134 | Unbounded MPMC wait spin only 89 ns 58 ns 45 ns |
1135 | Unbounded MPMC try may block 234 ns 207 ns 196 ns |
1136 | Unbounded MPMC timed may block 205 ns 203 ns 197 ns |
1137 | Unbounded MPMC wait may block 65 ns 63 ns 61 ns |
1138 | .............................................................. |
1139 | folly::MPMC read 240 ns 204 ns 194 ns |
1140 | folly::MPMC tryReadUntil 205 ns 202 ns 199 ns |
1141 | folly::MPMC blockingRead 56 ns 52 ns 49 ns |
1142 | ============================================================== |
1143 | ====================== 16 prod 2 cons ====================== |
1144 | === uint32_t ================================================= |
1145 | Unbounded MPMC try spin only 52 ns 40 ns 34 ns |
1146 | Unbounded MPMC timed spin only 63 ns 47 ns 36 ns |
1147 | Unbounded MPMC wait spin only 45 ns 39 ns 36 ns |
1148 | Unbounded MPMC try may block 62 ns 51 ns 47 ns |
1149 | Unbounded MPMC timed may block 77 ns 52 ns 46 ns |
1150 | Unbounded MPMC wait may block 63 ns 50 ns 46 ns |
1151 | .............................................................. |
1152 | folly::MPMC read 114 ns 103 ns 77 ns |
1153 | folly::MPMC tryReadUntil 116 ns 106 ns 85 ns |
1154 | folly::MPMC blockingRead 85 ns 79 ns 63 ns |
1155 | ============================================================== |
1156 | ====================== 16 prod 4 cons ====================== |
1157 | === uint32_t ================================================= |
1158 | Unbounded MPMC try spin only 106 ns 68 ns 33 ns |
1159 | Unbounded MPMC timed spin only 88 ns 56 ns 36 ns |
1160 | Unbounded MPMC wait spin only 46 ns 39 ns 35 ns |
1161 | Unbounded MPMC try may block 95 ns 66 ns 47 ns |
1162 | Unbounded MPMC timed may block 80 ns 57 ns 46 ns |
1163 | Unbounded MPMC wait may block 52 ns 48 ns 45 ns |
1164 | .............................................................. |
1165 | folly::MPMC read 121 ns 113 ns 104 ns |
1166 | folly::MPMC tryReadUntil 119 ns 110 ns 101 ns |
1167 | folly::MPMC blockingRead 65 ns 62 ns 57 ns |
1168 | ============================================================== |
1169 | ====================== 16 prod 8 cons ====================== |
1170 | === uint32_t ================================================= |
1171 | Unbounded MPMC try spin only 153 ns 109 ns 46 ns |
1172 | Unbounded MPMC timed spin only 167 ns 110 ns 36 ns |
1173 | Unbounded MPMC wait spin only 43 ns 39 ns 36 ns |
1174 | Unbounded MPMC try may block 159 ns 125 ns 100 ns |
1175 | Unbounded MPMC timed may block 127 ns 82 ns 52 ns |
1176 | Unbounded MPMC wait may block 51 ns 50 ns 46 ns |
1177 | .............................................................. |
1178 | folly::MPMC read 149 ns 139 ns 129 ns |
1179 | folly::MPMC tryReadUntil 141 ns 134 ns 112 ns |
1180 | folly::MPMC blockingRead 59 ns 54 ns 49 ns |
1181 | ============================================================== |
1182 | ====================== 16 prod 16 cons ====================== |
1183 | === uint32_t ================================================= |
1184 | Unbounded MPMC try spin only 193 ns 169 ns 148 ns |
1185 | Unbounded MPMC timed spin only 221 ns 175 ns 106 ns |
1186 | Unbounded MPMC wait spin only 45 ns 41 ns 37 ns |
1187 | Unbounded MPMC try may block 204 ns 171 ns 133 ns |
1188 | Unbounded MPMC timed may block 184 ns 162 ns 104 ns |
1189 | Unbounded MPMC wait may block 61 ns 52 ns 49 ns |
1190 | .............................................................. |
1191 | folly::MPMC read 181 ns 164 ns 157 ns |
1192 | folly::MPMC tryReadUntil 185 ns 173 ns 157 ns |
1193 | folly::MPMC blockingRead 56 ns 50 ns 45 ns |
1194 | ============================================================== |
1195 | ====================== 16 prod 32 cons ====================== |
1196 | === uint32_t ================================================= |
1197 | Unbounded MPMC try spin only 255 ns 217 ns 181 ns |
1198 | Unbounded MPMC timed spin only 225 ns 205 ns 182 ns |
1199 | Unbounded MPMC wait spin only 115 ns 57 ns 40 ns |
1200 | Unbounded MPMC try may block 215 ns 199 ns 184 ns |
1201 | Unbounded MPMC timed may block 218 ns 196 ns 179 ns |
1202 | Unbounded MPMC wait may block 63 ns 54 ns 47 ns |
1203 | .............................................................. |
1204 | folly::MPMC read 260 ns 205 ns 185 ns |
1205 | folly::MPMC tryReadUntil 205 ns 200 ns 192 ns |
1206 | folly::MPMC blockingRead 53 ns 48 ns 43 ns |
1207 | ============================================================== |
1208 | ====================== 32 prod 2 cons ====================== |
1209 | === uint32_t ================================================= |
1210 | Unbounded MPMC try spin only 95 ns 66 ns 45 ns |
1211 | Unbounded MPMC timed spin only 95 ns 62 ns 45 ns |
1212 | Unbounded MPMC wait spin only 56 ns 44 ns 36 ns |
1213 | Unbounded MPMC try may block 123 ns 86 ns 50 ns |
1214 | Unbounded MPMC timed may block 109 ns 73 ns 47 ns |
1215 | Unbounded MPMC wait may block 95 ns 58 ns 47 ns |
1216 | .............................................................. |
1217 | folly::MPMC read 445 ns 380 ns 315 ns |
1218 | folly::MPMC tryReadUntil 459 ns 341 ns 153 ns |
1219 | folly::MPMC blockingRead 351 ns 286 ns 218 ns |
1220 | ============================================================== |
1221 | ====================== 32 prod 4 cons ====================== |
1222 | === uint32_t ================================================= |
1223 | Unbounded MPMC try spin only 114 ns 92 ns 59 ns |
1224 | Unbounded MPMC timed spin only 135 ns 99 ns 47 ns |
1225 | Unbounded MPMC wait spin only 139 ns 55 ns 38 ns |
1226 | Unbounded MPMC try may block 165 ns 113 ns 72 ns |
1227 | Unbounded MPMC timed may block 119 ns 94 ns 51 ns |
1228 | Unbounded MPMC wait may block 61 ns 52 ns 47 ns |
1229 | .............................................................. |
1230 | folly::MPMC read 127 ns 112 ns 93 ns |
1231 | folly::MPMC tryReadUntil 116 ns 107 ns 96 ns |
1232 | folly::MPMC blockingRead 67 ns 59 ns 51 ns |
1233 | ============================================================== |
1234 | ====================== 32 prod 8 cons ====================== |
1235 | === uint32_t ================================================= |
1236 | Unbounded MPMC try spin only 226 ns 140 ns 57 ns |
1237 | Unbounded MPMC timed spin only 176 ns 126 ns 61 ns |
1238 | Unbounded MPMC wait spin only 86 ns 50 ns 39 ns |
1239 | Unbounded MPMC try may block 170 ns 131 ns 76 ns |
1240 | Unbounded MPMC timed may block 201 ns 141 ns 110 ns |
1241 | Unbounded MPMC wait may block 94 ns 55 ns 47 ns |
1242 | .............................................................. |
1243 | folly::MPMC read 148 ns 131 ns 120 ns |
1244 | folly::MPMC tryReadUntil 132 ns 126 ns 121 ns |
1245 | folly::MPMC blockingRead 59 ns 54 ns 51 ns |
1246 | ============================================================== |
1247 | ====================== 32 prod 16 cons ====================== |
1248 | === uint32_t ================================================= |
1249 | Unbounded MPMC try spin only 209 ns 174 ns 146 ns |
1250 | Unbounded MPMC timed spin only 214 ns 189 ns 154 ns |
1251 | Unbounded MPMC wait spin only 138 ns 51 ns 38 ns |
1252 | Unbounded MPMC try may block 247 ns 191 ns 144 ns |
1253 | Unbounded MPMC timed may block 245 ns 180 ns 123 ns |
1254 | Unbounded MPMC wait may block 74 ns 51 ns 46 ns |
1255 | .............................................................. |
1256 | folly::MPMC read 164 ns 148 ns 135 ns |
1257 | folly::MPMC tryReadUntil 156 ns 149 ns 140 ns |
1258 | folly::MPMC blockingRead 55 ns 50 ns 47 ns |
1259 | ============================================================== |
1260 | ====================== 32 prod 32 cons ====================== |
1261 | === uint32_t ================================================= |
1262 | Unbounded MPMC try spin only 255 ns 212 ns 179 ns |
1263 | Unbounded MPMC timed spin only 391 ns 223 ns 147 ns |
1264 | Unbounded MPMC wait spin only 78 ns 44 ns 38 ns |
1265 | Unbounded MPMC try may block 516 ns 249 ns 195 ns |
1266 | Unbounded MPMC timed may block 293 ns 210 ns 171 ns |
1267 | Unbounded MPMC wait may block 54 ns 51 ns 48 ns |
1268 | .............................................................. |
1269 | folly::MPMC read 195 ns 183 ns 164 ns |
1270 | folly::MPMC tryReadUntil 191 ns 175 ns 159 ns |
1271 | folly::MPMC blockingRead 49 ns 45 ns 43 ns |
1272 | ============================================================== |
1273 | |
1274 | $ lscpu |
1275 | Architecture: x86_64 |
1276 | CPU op-mode(s): 32-bit, 64-bit |
1277 | Byte Order: Little Endian |
1278 | CPU(s): 32 |
1279 | On-line CPU(s) list: 0-31 |
1280 | Thread(s) per core: 2 |
1281 | Core(s) per socket: 8 |
1282 | Socket(s): 2 |
1283 | NUMA node(s): 2 |
1284 | Vendor ID: GenuineIntel |
1285 | CPU family: 6 |
1286 | Model: 45 |
1287 | Model name: Intel(R) Xeon(R) CPU E5-2660 0 @ 2.20GHz |
1288 | Stepping: 6 |
1289 | CPU MHz: 2200.000 |
1290 | CPU max MHz: 2200.0000 |
1291 | CPU min MHz: 1200.0000 |
1292 | BogoMIPS: 4399.92 |
1293 | Virtualization: VT-x |
1294 | L1d cache: 32K |
1295 | L1i cache: 32K |
1296 | L2 cache: 256K |
1297 | L3 cache: 20480K |
1298 | NUMA node0 CPU(s): 0-7,16-23 |
1299 | NUMA node1 CPU(s): 8-15,24-31 |
1300 | |
1301 | Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr |
1302 | pge mca cmov pat pse36 clflush dts acpi mmx fxsr |
1303 | sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp |
1304 | lm constant_tsc arch_perfmon pebs bts rep_good |
1305 | nopl xtopology nonstop_tsc aperfmperf eagerfpu |
1306 | pni pclmulqdq dtes64 monitor ds_cpl vmx smx est |
1307 | tm2 ssse3 cx16 xtpr pdcm pcid dca sse4_1 sse4_2 |
1308 | x2apic popcnt tsc_deadline_timer aes xsave avx |
1309 | lahf_lm epb tpr_shadow vnmi flexpriority ept vpid |
1310 | xsaveopt dtherm arat pln pts |
1311 | */ |
1312 | |