1 | /***************************************************************************** |
2 | |
3 | Copyright (c) 2013, 2015, Oracle and/or its affiliates. All Rights Reserved. |
4 | |
5 | This program is free software; you can redistribute it and/or modify it under |
6 | the terms of the GNU General Public License as published by the Free Software |
7 | Foundation; version 2 of the License. |
8 | |
9 | This program is distributed in the hope that it will be useful, but WITHOUT |
10 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
11 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. |
12 | |
13 | You should have received a copy of the GNU General Public License along with |
14 | this program; if not, write to the Free Software Foundation, Inc., |
15 | 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA |
16 | |
17 | *****************************************************************************/ |
18 | |
19 | /**************************************************//** |
20 | @file os/os0event.cc |
21 | The interface to the operating system condition variables. |
22 | |
23 | Created 2012-09-23 Sunny Bains |
24 | *******************************************************/ |
25 | |
26 | #include "os0event.h" |
27 | #include "ut0mutex.h" |
28 | #include "ha_prototypes.h" |
29 | #include "ut0new.h" |
30 | |
31 | #ifdef _WIN32 |
32 | #include <windows.h> |
33 | #include <synchapi.h> |
34 | #endif /* _WIN32 */ |
35 | |
36 | #include <list> |
37 | |
38 | #ifdef _WIN32 |
39 | /** Native condition variable. */ |
40 | typedef CONDITION_VARIABLE os_cond_t; |
41 | #else |
42 | /** Native condition variable */ |
43 | typedef pthread_cond_t os_cond_t; |
44 | #endif /* _WIN32 */ |
45 | |
46 | typedef std::list<os_event_t, ut_allocator<os_event_t> > os_event_list_t; |
47 | typedef os_event_list_t::iterator event_iter_t; |
48 | |
49 | /** InnoDB condition variable. */ |
50 | struct os_event { |
51 | os_event() UNIV_NOTHROW; |
52 | |
53 | ~os_event() UNIV_NOTHROW; |
54 | |
55 | /** |
56 | Destroys a condition variable */ |
57 | void destroy() UNIV_NOTHROW |
58 | { |
59 | #ifndef _WIN32 |
60 | int ret = pthread_cond_destroy(&cond_var); |
61 | ut_a(ret == 0); |
62 | #endif /* !_WIN32 */ |
63 | |
64 | mutex.destroy(); |
65 | } |
66 | |
67 | /** Set the event */ |
68 | void set() UNIV_NOTHROW |
69 | { |
70 | mutex.enter(); |
71 | |
72 | if (!m_set) { |
73 | broadcast(); |
74 | } |
75 | |
76 | mutex.exit(); |
77 | } |
78 | |
79 | int64_t reset() UNIV_NOTHROW |
80 | { |
81 | mutex.enter(); |
82 | |
83 | if (m_set) { |
84 | m_set = false; |
85 | } |
86 | |
87 | int64_t ret = signal_count; |
88 | |
89 | mutex.exit(); |
90 | |
91 | return(ret); |
92 | } |
93 | |
94 | /** |
95 | Waits for an event object until it is in the signaled state. |
96 | |
97 | Typically, if the event has been signalled after the os_event_reset() |
98 | we'll return immediately because event->m_set == true. |
99 | There are, however, situations (e.g.: sync_array code) where we may |
100 | lose this information. For example: |
101 | |
102 | thread A calls os_event_reset() |
103 | thread B calls os_event_set() [event->m_set == true] |
104 | thread C calls os_event_reset() [event->m_set == false] |
105 | thread A calls os_event_wait() [infinite wait!] |
106 | thread C calls os_event_wait() [infinite wait!] |
107 | |
108 | Where such a scenario is possible, to avoid infinite wait, the |
109 | value returned by reset() should be passed in as |
110 | reset_sig_count. */ |
111 | void wait_low(int64_t reset_sig_count) UNIV_NOTHROW; |
112 | |
113 | /** |
114 | Waits for an event object until it is in the signaled state or |
115 | a timeout is exceeded. |
116 | @param time_in_usec - timeout in microseconds, |
117 | or OS_SYNC_INFINITE_TIME |
118 | @param reset_sig_count- zero or the value returned by |
119 | previous call of os_event_reset(). |
120 | @return 0 if success, OS_SYNC_TIME_EXCEEDED if timeout was exceeded */ |
121 | ulint wait_time_low( |
122 | ulint time_in_usec, |
123 | int64_t reset_sig_count) UNIV_NOTHROW; |
124 | |
125 | /** @return true if the event is in the signalled state. */ |
126 | bool is_set() const UNIV_NOTHROW |
127 | { |
128 | return(m_set); |
129 | } |
130 | |
131 | private: |
132 | /** |
133 | Initialize a condition variable */ |
134 | void init() UNIV_NOTHROW |
135 | { |
136 | |
137 | mutex.init(); |
138 | |
139 | #ifdef _WIN32 |
140 | InitializeConditionVariable(&cond_var); |
141 | #else |
142 | { |
143 | int ret; |
144 | |
145 | ret = pthread_cond_init(&cond_var, NULL); |
146 | ut_a(ret == 0); |
147 | } |
148 | #endif /* _WIN32 */ |
149 | } |
150 | |
151 | /** |
152 | Wait on condition variable */ |
153 | void wait() UNIV_NOTHROW |
154 | { |
155 | #ifdef _WIN32 |
156 | if (!SleepConditionVariableCS(&cond_var, mutex, INFINITE)) { |
157 | ut_error; |
158 | } |
159 | #else |
160 | { |
161 | int ret; |
162 | |
163 | ret = pthread_cond_wait(&cond_var, mutex); |
164 | ut_a(ret == 0); |
165 | } |
166 | #endif /* _WIN32 */ |
167 | } |
168 | |
169 | /** |
170 | Wakes all threads waiting for condition variable */ |
171 | void broadcast() UNIV_NOTHROW |
172 | { |
173 | m_set = true; |
174 | ++signal_count; |
175 | |
176 | #ifdef _WIN32 |
177 | WakeAllConditionVariable(&cond_var); |
178 | #else |
179 | { |
180 | int ret; |
181 | |
182 | ret = pthread_cond_broadcast(&cond_var); |
183 | ut_a(ret == 0); |
184 | } |
185 | #endif /* _WIN32 */ |
186 | } |
187 | |
188 | /** |
189 | Wakes one thread waiting for condition variable */ |
190 | void signal() UNIV_NOTHROW |
191 | { |
192 | #ifdef _WIN32 |
193 | WakeConditionVariable(&cond_var); |
194 | #else |
195 | { |
196 | int ret; |
197 | |
198 | ret = pthread_cond_signal(&cond_var); |
199 | ut_a(ret == 0); |
200 | } |
201 | #endif /* _WIN32 */ |
202 | } |
203 | |
204 | /** |
205 | Do a timed wait on condition variable. |
206 | @param abstime - timeout |
207 | @param time_in_ms - timeout in milliseconds. |
208 | @return true if timed out, false otherwise */ |
209 | bool timed_wait( |
210 | #ifndef _WIN32 |
211 | const timespec* abstime |
212 | #else |
213 | DWORD time_in_ms |
214 | #endif /* !_WIN32 */ |
215 | ); |
216 | |
217 | private: |
218 | bool m_set; /*!< this is true when the |
219 | event is in the signaled |
220 | state, i.e., a thread does |
221 | not stop if it tries to wait |
222 | for this event */ |
223 | int64_t signal_count; /*!< this is incremented |
224 | each time the event becomes |
225 | signaled */ |
226 | EventMutex mutex; /*!< this mutex protects |
227 | the next fields */ |
228 | |
229 | |
230 | os_cond_t cond_var; /*!< condition variable is |
231 | used in waiting for the event */ |
232 | |
233 | public: |
234 | event_iter_t event_iter; /*!< For O(1) removal from |
235 | list */ |
236 | protected: |
237 | // Disable copying |
238 | os_event(const os_event&); |
239 | os_event& operator=(const os_event&); |
240 | }; |
241 | |
242 | /** |
243 | Do a timed wait on condition variable. |
244 | @param abstime - absolute time to wait |
245 | @param time_in_ms - timeout in milliseconds |
246 | @return true if timed out */ |
247 | bool |
248 | os_event::timed_wait( |
249 | #ifndef _WIN32 |
250 | const timespec* abstime |
251 | #else |
252 | DWORD time_in_ms |
253 | #endif /* !_WIN32 */ |
254 | ) |
255 | { |
256 | #ifdef _WIN32 |
257 | BOOL ret; |
258 | |
259 | ret = SleepConditionVariableCS(&cond_var, mutex, time_in_ms); |
260 | |
261 | if (!ret) { |
262 | DWORD err = GetLastError(); |
263 | |
264 | /* FQDN=msdn.microsoft.com |
265 | @see http://$FQDN/en-us/library/ms686301%28VS.85%29.aspx, |
266 | |
267 | "Condition variables are subject to spurious wakeups |
268 | (those not associated with an explicit wake) and stolen wakeups |
269 | (another thread manages to run before the woken thread)." |
270 | Check for both types of timeouts. |
271 | Conditions are checked by the caller.*/ |
272 | if (err == WAIT_TIMEOUT || err == ERROR_TIMEOUT) { |
273 | return(true); |
274 | } |
275 | } |
276 | |
277 | ut_a(ret); |
278 | |
279 | return(false); |
280 | #else |
281 | int ret; |
282 | |
283 | ret = pthread_cond_timedwait(&cond_var, mutex, abstime); |
284 | |
285 | switch (ret) { |
286 | case 0: |
287 | case ETIMEDOUT: |
288 | /* We play it safe by checking for EINTR even though |
289 | according to the POSIX documentation it can't return EINTR. */ |
290 | case EINTR: |
291 | break; |
292 | |
293 | default: |
294 | ib::error() << "pthread_cond_timedwait() returned: " << ret |
295 | << ": abstime={" << abstime->tv_sec << "," |
296 | << abstime->tv_nsec << "}" ; |
297 | ut_error; |
298 | } |
299 | |
300 | return(ret == ETIMEDOUT); |
301 | #endif /* _WIN32 */ |
302 | } |
303 | |
304 | /** |
305 | Waits for an event object until it is in the signaled state. |
306 | |
307 | Typically, if the event has been signalled after the os_event_reset() |
308 | we'll return immediately because event->m_set == true. |
309 | There are, however, situations (e.g.: sync_array code) where we may |
310 | lose this information. For example: |
311 | |
312 | thread A calls os_event_reset() |
313 | thread B calls os_event_set() [event->m_set == true] |
314 | thread C calls os_event_reset() [event->m_set == false] |
315 | thread A calls os_event_wait() [infinite wait!] |
316 | thread C calls os_event_wait() [infinite wait!] |
317 | |
318 | Where such a scenario is possible, to avoid infinite wait, the |
319 | value returned by reset() should be passed in as |
320 | reset_sig_count. */ |
321 | void |
322 | os_event::wait_low( |
323 | int64_t reset_sig_count) UNIV_NOTHROW |
324 | { |
325 | mutex.enter(); |
326 | |
327 | if (!reset_sig_count) { |
328 | reset_sig_count = signal_count; |
329 | } |
330 | |
331 | while (!m_set && signal_count == reset_sig_count) { |
332 | |
333 | wait(); |
334 | |
335 | /* Spurious wakeups may occur: we have to check if the |
336 | event really has been signaled after we came here to wait. */ |
337 | } |
338 | |
339 | mutex.exit(); |
340 | } |
341 | |
342 | /** |
343 | Waits for an event object until it is in the signaled state or |
344 | a timeout is exceeded. |
345 | @param time_in_usec - timeout in microseconds, or OS_SYNC_INFINITE_TIME |
346 | @param reset_sig_count - zero or the value returned by previous call |
347 | of os_event_reset(). |
348 | @return 0 if success, OS_SYNC_TIME_EXCEEDED if timeout was exceeded */ |
349 | ulint |
350 | os_event::wait_time_low( |
351 | ulint time_in_usec, |
352 | int64_t reset_sig_count) UNIV_NOTHROW |
353 | { |
354 | bool timed_out = false; |
355 | |
356 | #ifdef _WIN32 |
357 | DWORD time_in_ms; |
358 | |
359 | if (time_in_usec != OS_SYNC_INFINITE_TIME) { |
360 | time_in_ms = DWORD(time_in_usec / 1000); |
361 | } else { |
362 | time_in_ms = INFINITE; |
363 | } |
364 | #else |
365 | struct timespec abstime; |
366 | |
367 | if (time_in_usec != OS_SYNC_INFINITE_TIME) { |
368 | struct timeval tv; |
369 | int ret; |
370 | ulint sec; |
371 | ulint usec; |
372 | |
373 | ret = ut_usectime(&sec, &usec); |
374 | ut_a(ret == 0); |
375 | |
376 | tv.tv_sec = sec; |
377 | tv.tv_usec = usec; |
378 | |
379 | tv.tv_usec += time_in_usec; |
380 | |
381 | abstime.tv_sec = tv.tv_sec + tv.tv_usec / 1000000; |
382 | abstime.tv_nsec = tv.tv_usec % 1000000 * 1000; |
383 | } else { |
384 | abstime.tv_nsec = 999999999; |
385 | abstime.tv_sec = (time_t) ULINT_MAX; |
386 | } |
387 | |
388 | ut_a(abstime.tv_nsec <= 999999999); |
389 | |
390 | #endif /* _WIN32 */ |
391 | |
392 | mutex.enter(); |
393 | |
394 | if (!reset_sig_count) { |
395 | reset_sig_count = signal_count; |
396 | } |
397 | |
398 | do { |
399 | if (m_set || signal_count != reset_sig_count) { |
400 | |
401 | break; |
402 | } |
403 | |
404 | #ifndef _WIN32 |
405 | timed_out = timed_wait(&abstime); |
406 | #else |
407 | timed_out = timed_wait(time_in_ms); |
408 | #endif /* !_WIN32 */ |
409 | |
410 | } while (!timed_out); |
411 | |
412 | mutex.exit(); |
413 | |
414 | return(timed_out ? OS_SYNC_TIME_EXCEEDED : 0); |
415 | } |
416 | |
417 | /** Constructor */ |
418 | os_event::os_event() UNIV_NOTHROW |
419 | { |
420 | init(); |
421 | |
422 | m_set = false; |
423 | |
424 | /* We return this value in os_event_reset(), |
425 | which can then be be used to pass to the |
426 | os_event_wait_low(). The value of zero is |
427 | reserved in os_event_wait_low() for the case |
428 | when the caller does not want to pass any |
429 | signal_count value. To distinguish between |
430 | the two cases we initialize signal_count |
431 | to 1 here. */ |
432 | |
433 | signal_count = 1; |
434 | } |
435 | |
436 | /** Destructor */ |
437 | os_event::~os_event() UNIV_NOTHROW |
438 | { |
439 | destroy(); |
440 | } |
441 | |
442 | /** |
443 | Creates an event semaphore, i.e., a semaphore which may just have two |
444 | states: signaled and nonsignaled. The created event is manual reset: it |
445 | must be reset explicitly by calling sync_os_reset_event. |
446 | @return the event handle */ |
447 | os_event_t os_event_create(const char*) |
448 | { |
449 | return(UT_NEW_NOKEY(os_event())); |
450 | } |
451 | |
452 | /** |
453 | Check if the event is set. |
454 | @return true if set */ |
455 | bool |
456 | os_event_is_set( |
457 | /*============*/ |
458 | const os_event_t event) /*!< in: event to test */ |
459 | { |
460 | return(event->is_set()); |
461 | } |
462 | |
463 | /** |
464 | Sets an event semaphore to the signaled state: lets waiting threads |
465 | proceed. */ |
466 | void |
467 | os_event_set( |
468 | /*=========*/ |
469 | os_event_t event) /*!< in/out: event to set */ |
470 | { |
471 | event->set(); |
472 | } |
473 | |
474 | /** |
475 | Resets an event semaphore to the nonsignaled state. Waiting threads will |
476 | stop to wait for the event. |
477 | The return value should be passed to os_even_wait_low() if it is desired |
478 | that this thread should not wait in case of an intervening call to |
479 | os_event_set() between this os_event_reset() and the |
480 | os_event_wait_low() call. See comments for os_event_wait_low(). |
481 | @return current signal_count. */ |
482 | int64_t |
483 | os_event_reset( |
484 | /*===========*/ |
485 | os_event_t event) /*!< in/out: event to reset */ |
486 | { |
487 | return(event->reset()); |
488 | } |
489 | |
490 | /** |
491 | Waits for an event object until it is in the signaled state or |
492 | a timeout is exceeded. |
493 | @return 0 if success, OS_SYNC_TIME_EXCEEDED if timeout was exceeded */ |
494 | ulint |
495 | os_event_wait_time_low( |
496 | /*===================*/ |
497 | os_event_t event, /*!< in/out: event to wait */ |
498 | ulint time_in_usec, /*!< in: timeout in |
499 | microseconds, or |
500 | OS_SYNC_INFINITE_TIME */ |
501 | int64_t reset_sig_count) /*!< in: zero or the value |
502 | returned by previous call of |
503 | os_event_reset(). */ |
504 | { |
505 | return(event->wait_time_low(time_in_usec, reset_sig_count)); |
506 | } |
507 | |
508 | /** |
509 | Waits for an event object until it is in the signaled state. |
510 | |
511 | Where such a scenario is possible, to avoid infinite wait, the |
512 | value returned by os_event_reset() should be passed in as |
513 | reset_sig_count. */ |
514 | void |
515 | os_event_wait_low( |
516 | /*==============*/ |
517 | os_event_t event, /*!< in: event to wait */ |
518 | int64_t reset_sig_count) /*!< in: zero or the value |
519 | returned by previous call of |
520 | os_event_reset(). */ |
521 | { |
522 | event->wait_low(reset_sig_count); |
523 | } |
524 | |
525 | /** |
526 | Frees an event object. */ |
527 | void |
528 | os_event_destroy( |
529 | /*=============*/ |
530 | os_event_t& event) /*!< in/own: event to free */ |
531 | |
532 | { |
533 | if (event != NULL) { |
534 | UT_DELETE(event); |
535 | event = NULL; |
536 | } |
537 | } |
538 | |