1/*****************************************************************************
2
3Copyright (c) 2013, 2015, Oracle and/or its affiliates. All Rights Reserved.
4
5This program is free software; you can redistribute it and/or modify it under
6the terms of the GNU General Public License as published by the Free Software
7Foundation; version 2 of the License.
8
9This program is distributed in the hope that it will be useful, but WITHOUT
10ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
12
13You should have received a copy of the GNU General Public License along with
14this program; if not, write to the Free Software Foundation, Inc.,
1551 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
16
17*****************************************************************************/
18
19/**************************************************//**
20@file os/os0event.cc
21The interface to the operating system condition variables.
22
23Created 2012-09-23 Sunny Bains
24*******************************************************/
25
26#include "os0event.h"
27#include "ut0mutex.h"
28#include "ha_prototypes.h"
29#include "ut0new.h"
30
31#ifdef _WIN32
32#include <windows.h>
33#include <synchapi.h>
34#endif /* _WIN32 */
35
36#include <list>
37
38#ifdef _WIN32
39/** Native condition variable. */
40typedef CONDITION_VARIABLE os_cond_t;
41#else
42/** Native condition variable */
43typedef pthread_cond_t os_cond_t;
44#endif /* _WIN32 */
45
46typedef std::list<os_event_t, ut_allocator<os_event_t> > os_event_list_t;
47typedef os_event_list_t::iterator event_iter_t;
48
49/** InnoDB condition variable. */
50struct os_event {
51 os_event() UNIV_NOTHROW;
52
53 ~os_event() UNIV_NOTHROW;
54
55 /**
56 Destroys a condition variable */
57 void destroy() UNIV_NOTHROW
58 {
59#ifndef _WIN32
60 int ret = pthread_cond_destroy(&cond_var);
61 ut_a(ret == 0);
62#endif /* !_WIN32 */
63
64 mutex.destroy();
65 }
66
67 /** Set the event */
68 void set() UNIV_NOTHROW
69 {
70 mutex.enter();
71
72 if (!m_set) {
73 broadcast();
74 }
75
76 mutex.exit();
77 }
78
79 int64_t reset() UNIV_NOTHROW
80 {
81 mutex.enter();
82
83 if (m_set) {
84 m_set = false;
85 }
86
87 int64_t ret = signal_count;
88
89 mutex.exit();
90
91 return(ret);
92 }
93
94 /**
95 Waits for an event object until it is in the signaled state.
96
97 Typically, if the event has been signalled after the os_event_reset()
98 we'll return immediately because event->m_set == true.
99 There are, however, situations (e.g.: sync_array code) where we may
100 lose this information. For example:
101
102 thread A calls os_event_reset()
103 thread B calls os_event_set() [event->m_set == true]
104 thread C calls os_event_reset() [event->m_set == false]
105 thread A calls os_event_wait() [infinite wait!]
106 thread C calls os_event_wait() [infinite wait!]
107
108 Where such a scenario is possible, to avoid infinite wait, the
109 value returned by reset() should be passed in as
110 reset_sig_count. */
111 void wait_low(int64_t reset_sig_count) UNIV_NOTHROW;
112
113 /**
114 Waits for an event object until it is in the signaled state or
115 a timeout is exceeded.
116 @param time_in_usec - timeout in microseconds,
117 or OS_SYNC_INFINITE_TIME
118 @param reset_sig_count- zero or the value returned by
119 previous call of os_event_reset().
120 @return 0 if success, OS_SYNC_TIME_EXCEEDED if timeout was exceeded */
121 ulint wait_time_low(
122 ulint time_in_usec,
123 int64_t reset_sig_count) UNIV_NOTHROW;
124
125 /** @return true if the event is in the signalled state. */
126 bool is_set() const UNIV_NOTHROW
127 {
128 return(m_set);
129 }
130
131private:
132 /**
133 Initialize a condition variable */
134 void init() UNIV_NOTHROW
135 {
136
137 mutex.init();
138
139#ifdef _WIN32
140 InitializeConditionVariable(&cond_var);
141#else
142 {
143 int ret;
144
145 ret = pthread_cond_init(&cond_var, NULL);
146 ut_a(ret == 0);
147 }
148#endif /* _WIN32 */
149 }
150
151 /**
152 Wait on condition variable */
153 void wait() UNIV_NOTHROW
154 {
155#ifdef _WIN32
156 if (!SleepConditionVariableCS(&cond_var, mutex, INFINITE)) {
157 ut_error;
158 }
159#else
160 {
161 int ret;
162
163 ret = pthread_cond_wait(&cond_var, mutex);
164 ut_a(ret == 0);
165 }
166#endif /* _WIN32 */
167 }
168
169 /**
170 Wakes all threads waiting for condition variable */
171 void broadcast() UNIV_NOTHROW
172 {
173 m_set = true;
174 ++signal_count;
175
176#ifdef _WIN32
177 WakeAllConditionVariable(&cond_var);
178#else
179 {
180 int ret;
181
182 ret = pthread_cond_broadcast(&cond_var);
183 ut_a(ret == 0);
184 }
185#endif /* _WIN32 */
186 }
187
188 /**
189 Wakes one thread waiting for condition variable */
190 void signal() UNIV_NOTHROW
191 {
192#ifdef _WIN32
193 WakeConditionVariable(&cond_var);
194#else
195 {
196 int ret;
197
198 ret = pthread_cond_signal(&cond_var);
199 ut_a(ret == 0);
200 }
201#endif /* _WIN32 */
202 }
203
204 /**
205 Do a timed wait on condition variable.
206 @param abstime - timeout
207 @param time_in_ms - timeout in milliseconds.
208 @return true if timed out, false otherwise */
209 bool timed_wait(
210#ifndef _WIN32
211 const timespec* abstime
212#else
213 DWORD time_in_ms
214#endif /* !_WIN32 */
215 );
216
217private:
218 bool m_set; /*!< this is true when the
219 event is in the signaled
220 state, i.e., a thread does
221 not stop if it tries to wait
222 for this event */
223 int64_t signal_count; /*!< this is incremented
224 each time the event becomes
225 signaled */
226 EventMutex mutex; /*!< this mutex protects
227 the next fields */
228
229
230 os_cond_t cond_var; /*!< condition variable is
231 used in waiting for the event */
232
233public:
234 event_iter_t event_iter; /*!< For O(1) removal from
235 list */
236protected:
237 // Disable copying
238 os_event(const os_event&);
239 os_event& operator=(const os_event&);
240};
241
242/**
243Do a timed wait on condition variable.
244@param abstime - absolute time to wait
245@param time_in_ms - timeout in milliseconds
246@return true if timed out */
247bool
248os_event::timed_wait(
249#ifndef _WIN32
250 const timespec* abstime
251#else
252 DWORD time_in_ms
253#endif /* !_WIN32 */
254)
255{
256#ifdef _WIN32
257 BOOL ret;
258
259 ret = SleepConditionVariableCS(&cond_var, mutex, time_in_ms);
260
261 if (!ret) {
262 DWORD err = GetLastError();
263
264 /* FQDN=msdn.microsoft.com
265 @see http://$FQDN/en-us/library/ms686301%28VS.85%29.aspx,
266
267 "Condition variables are subject to spurious wakeups
268 (those not associated with an explicit wake) and stolen wakeups
269 (another thread manages to run before the woken thread)."
270 Check for both types of timeouts.
271 Conditions are checked by the caller.*/
272 if (err == WAIT_TIMEOUT || err == ERROR_TIMEOUT) {
273 return(true);
274 }
275 }
276
277 ut_a(ret);
278
279 return(false);
280#else
281 int ret;
282
283 ret = pthread_cond_timedwait(&cond_var, mutex, abstime);
284
285 switch (ret) {
286 case 0:
287 case ETIMEDOUT:
288 /* We play it safe by checking for EINTR even though
289 according to the POSIX documentation it can't return EINTR. */
290 case EINTR:
291 break;
292
293 default:
294 ib::error() << "pthread_cond_timedwait() returned: " << ret
295 << ": abstime={" << abstime->tv_sec << ","
296 << abstime->tv_nsec << "}";
297 ut_error;
298 }
299
300 return(ret == ETIMEDOUT);
301#endif /* _WIN32 */
302}
303
304/**
305Waits for an event object until it is in the signaled state.
306
307Typically, if the event has been signalled after the os_event_reset()
308we'll return immediately because event->m_set == true.
309There are, however, situations (e.g.: sync_array code) where we may
310lose this information. For example:
311
312thread A calls os_event_reset()
313thread B calls os_event_set() [event->m_set == true]
314thread C calls os_event_reset() [event->m_set == false]
315thread A calls os_event_wait() [infinite wait!]
316thread C calls os_event_wait() [infinite wait!]
317
318Where such a scenario is possible, to avoid infinite wait, the
319value returned by reset() should be passed in as
320reset_sig_count. */
321void
322os_event::wait_low(
323 int64_t reset_sig_count) UNIV_NOTHROW
324{
325 mutex.enter();
326
327 if (!reset_sig_count) {
328 reset_sig_count = signal_count;
329 }
330
331 while (!m_set && signal_count == reset_sig_count) {
332
333 wait();
334
335 /* Spurious wakeups may occur: we have to check if the
336 event really has been signaled after we came here to wait. */
337 }
338
339 mutex.exit();
340}
341
342/**
343Waits for an event object until it is in the signaled state or
344a timeout is exceeded.
345@param time_in_usec - timeout in microseconds, or OS_SYNC_INFINITE_TIME
346@param reset_sig_count - zero or the value returned by previous call
347 of os_event_reset().
348@return 0 if success, OS_SYNC_TIME_EXCEEDED if timeout was exceeded */
349ulint
350os_event::wait_time_low(
351 ulint time_in_usec,
352 int64_t reset_sig_count) UNIV_NOTHROW
353{
354 bool timed_out = false;
355
356#ifdef _WIN32
357 DWORD time_in_ms;
358
359 if (time_in_usec != OS_SYNC_INFINITE_TIME) {
360 time_in_ms = DWORD(time_in_usec / 1000);
361 } else {
362 time_in_ms = INFINITE;
363 }
364#else
365 struct timespec abstime;
366
367 if (time_in_usec != OS_SYNC_INFINITE_TIME) {
368 struct timeval tv;
369 int ret;
370 ulint sec;
371 ulint usec;
372
373 ret = ut_usectime(&sec, &usec);
374 ut_a(ret == 0);
375
376 tv.tv_sec = sec;
377 tv.tv_usec = usec;
378
379 tv.tv_usec += time_in_usec;
380
381 abstime.tv_sec = tv.tv_sec + tv.tv_usec / 1000000;
382 abstime.tv_nsec = tv.tv_usec % 1000000 * 1000;
383 } else {
384 abstime.tv_nsec = 999999999;
385 abstime.tv_sec = (time_t) ULINT_MAX;
386 }
387
388 ut_a(abstime.tv_nsec <= 999999999);
389
390#endif /* _WIN32 */
391
392 mutex.enter();
393
394 if (!reset_sig_count) {
395 reset_sig_count = signal_count;
396 }
397
398 do {
399 if (m_set || signal_count != reset_sig_count) {
400
401 break;
402 }
403
404#ifndef _WIN32
405 timed_out = timed_wait(&abstime);
406#else
407 timed_out = timed_wait(time_in_ms);
408#endif /* !_WIN32 */
409
410 } while (!timed_out);
411
412 mutex.exit();
413
414 return(timed_out ? OS_SYNC_TIME_EXCEEDED : 0);
415}
416
417/** Constructor */
418os_event::os_event() UNIV_NOTHROW
419{
420 init();
421
422 m_set = false;
423
424 /* We return this value in os_event_reset(),
425 which can then be be used to pass to the
426 os_event_wait_low(). The value of zero is
427 reserved in os_event_wait_low() for the case
428 when the caller does not want to pass any
429 signal_count value. To distinguish between
430 the two cases we initialize signal_count
431 to 1 here. */
432
433 signal_count = 1;
434}
435
436/** Destructor */
437os_event::~os_event() UNIV_NOTHROW
438{
439 destroy();
440}
441
442/**
443Creates an event semaphore, i.e., a semaphore which may just have two
444states: signaled and nonsignaled. The created event is manual reset: it
445must be reset explicitly by calling sync_os_reset_event.
446@return the event handle */
447os_event_t os_event_create(const char*)
448{
449 return(UT_NEW_NOKEY(os_event()));
450}
451
452/**
453Check if the event is set.
454@return true if set */
455bool
456os_event_is_set(
457/*============*/
458 const os_event_t event) /*!< in: event to test */
459{
460 return(event->is_set());
461}
462
463/**
464Sets an event semaphore to the signaled state: lets waiting threads
465proceed. */
466void
467os_event_set(
468/*=========*/
469 os_event_t event) /*!< in/out: event to set */
470{
471 event->set();
472}
473
474/**
475Resets an event semaphore to the nonsignaled state. Waiting threads will
476stop to wait for the event.
477The return value should be passed to os_even_wait_low() if it is desired
478that this thread should not wait in case of an intervening call to
479os_event_set() between this os_event_reset() and the
480os_event_wait_low() call. See comments for os_event_wait_low().
481@return current signal_count. */
482int64_t
483os_event_reset(
484/*===========*/
485 os_event_t event) /*!< in/out: event to reset */
486{
487 return(event->reset());
488}
489
490/**
491Waits for an event object until it is in the signaled state or
492a timeout is exceeded.
493@return 0 if success, OS_SYNC_TIME_EXCEEDED if timeout was exceeded */
494ulint
495os_event_wait_time_low(
496/*===================*/
497 os_event_t event, /*!< in/out: event to wait */
498 ulint time_in_usec, /*!< in: timeout in
499 microseconds, or
500 OS_SYNC_INFINITE_TIME */
501 int64_t reset_sig_count) /*!< in: zero or the value
502 returned by previous call of
503 os_event_reset(). */
504{
505 return(event->wait_time_low(time_in_usec, reset_sig_count));
506}
507
508/**
509Waits for an event object until it is in the signaled state.
510
511Where such a scenario is possible, to avoid infinite wait, the
512value returned by os_event_reset() should be passed in as
513reset_sig_count. */
514void
515os_event_wait_low(
516/*==============*/
517 os_event_t event, /*!< in: event to wait */
518 int64_t reset_sig_count) /*!< in: zero or the value
519 returned by previous call of
520 os_event_reset(). */
521{
522 event->wait_low(reset_sig_count);
523}
524
525/**
526Frees an event object. */
527void
528os_event_destroy(
529/*=============*/
530 os_event_t& event) /*!< in/own: event to free */
531
532{
533 if (event != NULL) {
534 UT_DELETE(event);
535 event = NULL;
536 }
537}
538