| 1 | /*------------------------------------------------------------------------- |
| 2 | * |
| 3 | * posix_sema.c |
| 4 | * Implement PGSemaphores using POSIX semaphore facilities |
| 5 | * |
| 6 | * We prefer the unnamed style of POSIX semaphore (the kind made with |
| 7 | * sem_init). We can cope with the kind made with sem_open, however. |
| 8 | * |
| 9 | * In either implementation, typedef PGSemaphore is equivalent to "sem_t *". |
| 10 | * With unnamed semaphores, the sem_t structs live in an array in shared |
| 11 | * memory. With named semaphores, that's not true because we cannot persuade |
| 12 | * sem_open to do its allocation there. Therefore, the named-semaphore code |
| 13 | * *does not cope with EXEC_BACKEND*. The sem_t structs will just be in the |
| 14 | * postmaster's private memory, where they are successfully inherited by |
| 15 | * forked backends, but they could not be accessed by exec'd backends. |
| 16 | * |
| 17 | * |
| 18 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
| 19 | * Portions Copyright (c) 1994, Regents of the University of California |
| 20 | * |
| 21 | * IDENTIFICATION |
| 22 | * src/backend/port/posix_sema.c |
| 23 | * |
| 24 | *------------------------------------------------------------------------- |
| 25 | */ |
| 26 | #include "postgres.h" |
| 27 | |
| 28 | #include <fcntl.h> |
| 29 | #include <semaphore.h> |
| 30 | #include <signal.h> |
| 31 | #include <unistd.h> |
| 32 | |
| 33 | #include "miscadmin.h" |
| 34 | #include "storage/ipc.h" |
| 35 | #include "storage/pg_sema.h" |
| 36 | #include "storage/shmem.h" |
| 37 | |
| 38 | |
| 39 | /* see file header comment */ |
| 40 | #if defined(USE_NAMED_POSIX_SEMAPHORES) && defined(EXEC_BACKEND) |
| 41 | #error cannot use named POSIX semaphores with EXEC_BACKEND |
| 42 | #endif |
| 43 | |
| 44 | typedef union SemTPadded |
| 45 | { |
| 46 | sem_t pgsem; |
| 47 | char pad[PG_CACHE_LINE_SIZE]; |
| 48 | } SemTPadded; |
| 49 | |
| 50 | /* typedef PGSemaphore is equivalent to pointer to sem_t */ |
| 51 | typedef struct PGSemaphoreData |
| 52 | { |
| 53 | SemTPadded sem_padded; |
| 54 | } PGSemaphoreData; |
| 55 | |
| 56 | #define PG_SEM_REF(x) (&(x)->sem_padded.pgsem) |
| 57 | |
| 58 | #define IPCProtection (0600) /* access/modify by user only */ |
| 59 | |
| 60 | #ifdef USE_NAMED_POSIX_SEMAPHORES |
| 61 | static sem_t **mySemPointers; /* keep track of created semaphores */ |
| 62 | #else |
| 63 | static PGSemaphore sharedSemas; /* array of PGSemaphoreData in shared memory */ |
| 64 | #endif |
| 65 | static int numSems; /* number of semas acquired so far */ |
| 66 | static int maxSems; /* allocated size of above arrays */ |
| 67 | static int nextSemKey; /* next name to try */ |
| 68 | |
| 69 | |
| 70 | static void ReleaseSemaphores(int status, Datum arg); |
| 71 | |
| 72 | |
| 73 | #ifdef USE_NAMED_POSIX_SEMAPHORES |
| 74 | |
| 75 | /* |
| 76 | * PosixSemaphoreCreate |
| 77 | * |
| 78 | * Attempt to create a new named semaphore. |
| 79 | * |
| 80 | * If we fail with a failure code other than collision-with-existing-sema, |
| 81 | * print out an error and abort. Other types of errors suggest nonrecoverable |
| 82 | * problems. |
| 83 | */ |
| 84 | static sem_t * |
| 85 | PosixSemaphoreCreate(void) |
| 86 | { |
| 87 | int semKey; |
| 88 | char semname[64]; |
| 89 | sem_t *mySem; |
| 90 | |
| 91 | for (;;) |
| 92 | { |
| 93 | semKey = nextSemKey++; |
| 94 | |
| 95 | snprintf(semname, sizeof(semname), "/pgsql-%d" , semKey); |
| 96 | |
| 97 | mySem = sem_open(semname, O_CREAT | O_EXCL, |
| 98 | (mode_t) IPCProtection, (unsigned) 1); |
| 99 | |
| 100 | #ifdef SEM_FAILED |
| 101 | if (mySem != (sem_t *) SEM_FAILED) |
| 102 | break; |
| 103 | #else |
| 104 | if (mySem != (sem_t *) (-1)) |
| 105 | break; |
| 106 | #endif |
| 107 | |
| 108 | /* Loop if error indicates a collision */ |
| 109 | if (errno == EEXIST || errno == EACCES || errno == EINTR) |
| 110 | continue; |
| 111 | |
| 112 | /* |
| 113 | * Else complain and abort |
| 114 | */ |
| 115 | elog(FATAL, "sem_open(\"%s\") failed: %m" , semname); |
| 116 | } |
| 117 | |
| 118 | /* |
| 119 | * Unlink the semaphore immediately, so it can't be accessed externally. |
| 120 | * This also ensures that it will go away if we crash. |
| 121 | */ |
| 122 | sem_unlink(semname); |
| 123 | |
| 124 | return mySem; |
| 125 | } |
| 126 | #else /* !USE_NAMED_POSIX_SEMAPHORES */ |
| 127 | |
| 128 | /* |
| 129 | * PosixSemaphoreCreate |
| 130 | * |
| 131 | * Attempt to create a new unnamed semaphore. |
| 132 | */ |
| 133 | static void |
| 134 | PosixSemaphoreCreate(sem_t *sem) |
| 135 | { |
| 136 | if (sem_init(sem, 1, 1) < 0) |
| 137 | elog(FATAL, "sem_init failed: %m" ); |
| 138 | } |
| 139 | #endif /* USE_NAMED_POSIX_SEMAPHORES */ |
| 140 | |
| 141 | |
| 142 | /* |
| 143 | * PosixSemaphoreKill - removes a semaphore |
| 144 | */ |
| 145 | static void |
| 146 | PosixSemaphoreKill(sem_t *sem) |
| 147 | { |
| 148 | #ifdef USE_NAMED_POSIX_SEMAPHORES |
| 149 | /* Got to use sem_close for named semaphores */ |
| 150 | if (sem_close(sem) < 0) |
| 151 | elog(LOG, "sem_close failed: %m" ); |
| 152 | #else |
| 153 | /* Got to use sem_destroy for unnamed semaphores */ |
| 154 | if (sem_destroy(sem) < 0) |
| 155 | elog(LOG, "sem_destroy failed: %m" ); |
| 156 | #endif |
| 157 | } |
| 158 | |
| 159 | |
| 160 | /* |
| 161 | * Report amount of shared memory needed for semaphores |
| 162 | */ |
| 163 | Size |
| 164 | PGSemaphoreShmemSize(int maxSemas) |
| 165 | { |
| 166 | #ifdef USE_NAMED_POSIX_SEMAPHORES |
| 167 | /* No shared memory needed in this case */ |
| 168 | return 0; |
| 169 | #else |
| 170 | /* Need a PGSemaphoreData per semaphore */ |
| 171 | return mul_size(maxSemas, sizeof(PGSemaphoreData)); |
| 172 | #endif |
| 173 | } |
| 174 | |
| 175 | /* |
| 176 | * PGReserveSemaphores --- initialize semaphore support |
| 177 | * |
| 178 | * This is called during postmaster start or shared memory reinitialization. |
| 179 | * It should do whatever is needed to be able to support up to maxSemas |
| 180 | * subsequent PGSemaphoreCreate calls. Also, if any system resources |
| 181 | * are acquired here or in PGSemaphoreCreate, register an on_shmem_exit |
| 182 | * callback to release them. |
| 183 | * |
| 184 | * The port number is passed for possible use as a key (for Posix, we use |
| 185 | * it to generate the starting semaphore name). In a standalone backend, |
| 186 | * zero will be passed. |
| 187 | * |
| 188 | * In the Posix implementation, we acquire semaphores on-demand; the |
| 189 | * maxSemas parameter is just used to size the arrays. For unnamed |
| 190 | * semaphores, there is an array of PGSemaphoreData structs in shared memory. |
| 191 | * For named semaphores, we keep a postmaster-local array of sem_t pointers, |
| 192 | * which we use for releasing the semphores when done. |
| 193 | * (This design minimizes the dependency of postmaster shutdown on the |
| 194 | * contents of shared memory, which a failed backend might have clobbered. |
| 195 | * We can't do much about the possibility of sem_destroy() crashing, but |
| 196 | * we don't have to expose the counters to other processes.) |
| 197 | */ |
| 198 | void |
| 199 | PGReserveSemaphores(int maxSemas, int port) |
| 200 | { |
| 201 | #ifdef USE_NAMED_POSIX_SEMAPHORES |
| 202 | mySemPointers = (sem_t **) malloc(maxSemas * sizeof(sem_t *)); |
| 203 | if (mySemPointers == NULL) |
| 204 | elog(PANIC, "out of memory" ); |
| 205 | #else |
| 206 | |
| 207 | /* |
| 208 | * We must use ShmemAllocUnlocked(), since the spinlock protecting |
| 209 | * ShmemAlloc() won't be ready yet. (This ordering is necessary when we |
| 210 | * are emulating spinlocks with semaphores.) |
| 211 | */ |
| 212 | sharedSemas = (PGSemaphore) |
| 213 | ShmemAllocUnlocked(PGSemaphoreShmemSize(maxSemas)); |
| 214 | #endif |
| 215 | |
| 216 | numSems = 0; |
| 217 | maxSems = maxSemas; |
| 218 | nextSemKey = port * 1000; |
| 219 | |
| 220 | on_shmem_exit(ReleaseSemaphores, 0); |
| 221 | } |
| 222 | |
| 223 | /* |
| 224 | * Release semaphores at shutdown or shmem reinitialization |
| 225 | * |
| 226 | * (called as an on_shmem_exit callback, hence funny argument list) |
| 227 | */ |
| 228 | static void |
| 229 | ReleaseSemaphores(int status, Datum arg) |
| 230 | { |
| 231 | int i; |
| 232 | |
| 233 | #ifdef USE_NAMED_POSIX_SEMAPHORES |
| 234 | for (i = 0; i < numSems; i++) |
| 235 | PosixSemaphoreKill(mySemPointers[i]); |
| 236 | free(mySemPointers); |
| 237 | #endif |
| 238 | |
| 239 | #ifdef USE_UNNAMED_POSIX_SEMAPHORES |
| 240 | for (i = 0; i < numSems; i++) |
| 241 | PosixSemaphoreKill(PG_SEM_REF(sharedSemas + i)); |
| 242 | #endif |
| 243 | } |
| 244 | |
| 245 | /* |
| 246 | * PGSemaphoreCreate |
| 247 | * |
| 248 | * Allocate a PGSemaphore structure with initial count 1 |
| 249 | */ |
| 250 | PGSemaphore |
| 251 | PGSemaphoreCreate(void) |
| 252 | { |
| 253 | PGSemaphore sema; |
| 254 | sem_t *newsem; |
| 255 | |
| 256 | /* Can't do this in a backend, because static state is postmaster's */ |
| 257 | Assert(!IsUnderPostmaster); |
| 258 | |
| 259 | if (numSems >= maxSems) |
| 260 | elog(PANIC, "too many semaphores created" ); |
| 261 | |
| 262 | #ifdef USE_NAMED_POSIX_SEMAPHORES |
| 263 | newsem = PosixSemaphoreCreate(); |
| 264 | /* Remember new sema for ReleaseSemaphores */ |
| 265 | mySemPointers[numSems] = newsem; |
| 266 | sema = (PGSemaphore) newsem; |
| 267 | #else |
| 268 | sema = &sharedSemas[numSems]; |
| 269 | newsem = PG_SEM_REF(sema); |
| 270 | PosixSemaphoreCreate(newsem); |
| 271 | #endif |
| 272 | |
| 273 | numSems++; |
| 274 | |
| 275 | return sema; |
| 276 | } |
| 277 | |
| 278 | /* |
| 279 | * PGSemaphoreReset |
| 280 | * |
| 281 | * Reset a previously-initialized PGSemaphore to have count 0 |
| 282 | */ |
| 283 | void |
| 284 | PGSemaphoreReset(PGSemaphore sema) |
| 285 | { |
| 286 | /* |
| 287 | * There's no direct API for this in POSIX, so we have to ratchet the |
| 288 | * semaphore down to 0 with repeated trywait's. |
| 289 | */ |
| 290 | for (;;) |
| 291 | { |
| 292 | if (sem_trywait(PG_SEM_REF(sema)) < 0) |
| 293 | { |
| 294 | if (errno == EAGAIN || errno == EDEADLK) |
| 295 | break; /* got it down to 0 */ |
| 296 | if (errno == EINTR) |
| 297 | continue; /* can this happen? */ |
| 298 | elog(FATAL, "sem_trywait failed: %m" ); |
| 299 | } |
| 300 | } |
| 301 | } |
| 302 | |
| 303 | /* |
| 304 | * PGSemaphoreLock |
| 305 | * |
| 306 | * Lock a semaphore (decrement count), blocking if count would be < 0 |
| 307 | */ |
| 308 | void |
| 309 | PGSemaphoreLock(PGSemaphore sema) |
| 310 | { |
| 311 | int errStatus; |
| 312 | |
| 313 | /* See notes in sysv_sema.c's implementation of PGSemaphoreLock. */ |
| 314 | do |
| 315 | { |
| 316 | errStatus = sem_wait(PG_SEM_REF(sema)); |
| 317 | } while (errStatus < 0 && errno == EINTR); |
| 318 | |
| 319 | if (errStatus < 0) |
| 320 | elog(FATAL, "sem_wait failed: %m" ); |
| 321 | } |
| 322 | |
| 323 | /* |
| 324 | * PGSemaphoreUnlock |
| 325 | * |
| 326 | * Unlock a semaphore (increment count) |
| 327 | */ |
| 328 | void |
| 329 | PGSemaphoreUnlock(PGSemaphore sema) |
| 330 | { |
| 331 | int errStatus; |
| 332 | |
| 333 | /* |
| 334 | * Note: if errStatus is -1 and errno == EINTR then it means we returned |
| 335 | * from the operation prematurely because we were sent a signal. So we |
| 336 | * try and unlock the semaphore again. Not clear this can really happen, |
| 337 | * but might as well cope. |
| 338 | */ |
| 339 | do |
| 340 | { |
| 341 | errStatus = sem_post(PG_SEM_REF(sema)); |
| 342 | } while (errStatus < 0 && errno == EINTR); |
| 343 | |
| 344 | if (errStatus < 0) |
| 345 | elog(FATAL, "sem_post failed: %m" ); |
| 346 | } |
| 347 | |
| 348 | /* |
| 349 | * PGSemaphoreTryLock |
| 350 | * |
| 351 | * Lock a semaphore only if able to do so without blocking |
| 352 | */ |
| 353 | bool |
| 354 | PGSemaphoreTryLock(PGSemaphore sema) |
| 355 | { |
| 356 | int errStatus; |
| 357 | |
| 358 | /* |
| 359 | * Note: if errStatus is -1 and errno == EINTR then it means we returned |
| 360 | * from the operation prematurely because we were sent a signal. So we |
| 361 | * try and lock the semaphore again. |
| 362 | */ |
| 363 | do |
| 364 | { |
| 365 | errStatus = sem_trywait(PG_SEM_REF(sema)); |
| 366 | } while (errStatus < 0 && errno == EINTR); |
| 367 | |
| 368 | if (errStatus < 0) |
| 369 | { |
| 370 | if (errno == EAGAIN || errno == EDEADLK) |
| 371 | return false; /* failed to lock it */ |
| 372 | /* Otherwise we got trouble */ |
| 373 | elog(FATAL, "sem_trywait failed: %m" ); |
| 374 | } |
| 375 | |
| 376 | return true; |
| 377 | } |
| 378 | |