github.com/containers/podman/v4@v4.9.4/libpod/lock/shm/shm_lock.c (about) 1 #include <errno.h> 2 #include <fcntl.h> 3 #include <pthread.h> 4 #include <stdbool.h> 5 #include <stdint.h> 6 #include <stdlib.h> 7 #include <sys/mman.h> 8 #include <sys/stat.h> 9 #include <sys/types.h> 10 #include <unistd.h> 11 12 #include "shm_lock.h" 13 14 // Compute the size of the SHM struct 15 static size_t compute_shm_size(uint32_t num_bitmaps) { 16 return sizeof(shm_struct_t) + (num_bitmaps * sizeof(lock_group_t)); 17 } 18 19 // Take the given mutex. 20 // Handles exceptional conditions, including a mutex locked by a process that 21 // died holding it. 22 // Returns 0 on success, or positive errno on failure. 23 static int take_mutex(pthread_mutex_t *mutex, bool trylock) { 24 int ret_code; 25 26 if (!trylock) { 27 do { 28 ret_code = pthread_mutex_lock(mutex); 29 } while(ret_code == EAGAIN); 30 } else { 31 do { 32 ret_code = pthread_mutex_trylock(mutex); 33 } while(ret_code == EAGAIN); 34 } 35 36 if (ret_code == EOWNERDEAD) { 37 // The previous owner of the mutex died while holding it 38 // Take it for ourselves 39 ret_code = pthread_mutex_consistent(mutex); 40 if (ret_code != 0) { 41 // Someone else may have gotten here first and marked the state consistent 42 // However, the mutex could also be invalid. 43 // Fail here instead of looping back to trying to lock the mutex. 44 return ret_code; 45 } 46 } else if (ret_code != 0) { 47 return ret_code; 48 } 49 50 return 0; 51 } 52 53 // Release the given mutex. 54 // Returns 0 on success, or positive errno on failure. 55 static int release_mutex(pthread_mutex_t *mutex) { 56 int ret_code; 57 58 do { 59 ret_code = pthread_mutex_unlock(mutex); 60 } while(ret_code == EAGAIN); 61 62 if (ret_code != 0) { 63 return ret_code; 64 } 65 66 return 0; 67 } 68 69 // Set up an SHM segment holding locks for libpod. 70 // num_locks must not be 0. 71 // Path is the path to the SHM segment. It must begin with a single / and 72 // container no other / characters, and be at most 255 characters including 73 // terminating NULL byte. 74 // Returns a valid pointer on success or NULL on error. 75 // If an error occurs, negative ERRNO values will be written to error_code. 76 shm_struct_t *setup_lock_shm(char *path, uint32_t num_locks, int *error_code) { 77 int shm_fd, i, j, ret_code; 78 uint32_t num_bitmaps; 79 size_t shm_size; 80 shm_struct_t *shm; 81 pthread_mutexattr_t attr; 82 83 // If error_code doesn't point to anything, we can't reasonably return errors 84 // So fail immediately 85 if (error_code == NULL) { 86 return NULL; 87 } 88 89 // We need a nonzero number of locks 90 if (num_locks == 0) { 91 *error_code = -1 * EINVAL; 92 return NULL; 93 } 94 95 if (path == NULL) { 96 *error_code = -1 * EINVAL; 97 return NULL; 98 } 99 100 // Calculate the number of bitmaps required 101 num_bitmaps = num_locks / BITMAP_SIZE; 102 if (num_locks % BITMAP_SIZE != 0) { 103 // The actual number given is not an even multiple of our bitmap size 104 // So round up 105 num_bitmaps += 1; 106 } 107 108 // Calculate size of the shm segment 109 shm_size = compute_shm_size(num_bitmaps); 110 111 // Create a new SHM segment for us 112 shm_fd = shm_open(path, O_RDWR | O_CREAT | O_EXCL, 0600); 113 if (shm_fd < 0) { 114 *error_code = -1 * errno; 115 return NULL; 116 } 117 118 // Increase its size to what we need 119 ret_code = ftruncate(shm_fd, shm_size); 120 if (ret_code < 0) { 121 *error_code = -1 * errno; 122 goto CLEANUP_UNLINK; 123 } 124 125 // Map the shared memory in 126 shm = mmap(NULL, shm_size, PROT_READ | PROT_WRITE, MAP_SHARED, shm_fd, 0); 127 if (shm == MAP_FAILED) { 128 *error_code = -1 * errno; 129 goto CLEANUP_UNLINK; 130 } 131 132 // We have successfully mapped the memory, now initialize the region 133 shm->magic = MAGIC; 134 shm->unused = 0; 135 shm->num_locks = num_bitmaps * BITMAP_SIZE; 136 shm->num_bitmaps = num_bitmaps; 137 138 // Create an initializer for our pthread mutexes 139 ret_code = pthread_mutexattr_init(&attr); 140 if (ret_code != 0) { 141 *error_code = -1 * ret_code; 142 goto CLEANUP_UNMAP; 143 } 144 145 // Ensure that recursive locking of a mutex by the same OS thread (which may 146 // refer to numerous goroutines) blocks. 147 ret_code = pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_NORMAL); 148 if (ret_code != 0) { 149 *error_code = -1 * ret_code; 150 goto CLEANUP_FREEATTR; 151 } 152 153 // Set mutexes to pshared - multiprocess-safe 154 ret_code = pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED); 155 if (ret_code != 0) { 156 *error_code = -1 * ret_code; 157 goto CLEANUP_FREEATTR; 158 } 159 160 // Set mutexes to robust - if a process dies while holding a mutex, we'll get 161 // a special error code on the next attempt to lock it. 162 // This should prevent panicking processes from leaving the state unusable. 163 ret_code = pthread_mutexattr_setrobust(&attr, PTHREAD_MUTEX_ROBUST); 164 if (ret_code != 0) { 165 *error_code = -1 * ret_code; 166 goto CLEANUP_FREEATTR; 167 } 168 169 // Initialize the mutex that protects the bitmaps using the mutex attributes 170 ret_code = pthread_mutex_init(&(shm->segment_lock), &attr); 171 if (ret_code != 0) { 172 *error_code = -1 * ret_code; 173 goto CLEANUP_FREEATTR; 174 } 175 176 // Initialize all bitmaps to 0 initially 177 // And initialize all semaphores they use 178 for (i = 0; i < num_bitmaps; i++) { 179 shm->locks[i].bitmap = 0; 180 for (j = 0; j < BITMAP_SIZE; j++) { 181 // Initialize each mutex 182 ret_code = pthread_mutex_init(&(shm->locks[i].locks[j]), &attr); 183 if (ret_code != 0) { 184 *error_code = -1 * ret_code; 185 goto CLEANUP_FREEATTR; 186 } 187 } 188 } 189 190 // Close the file descriptor, we're done with it 191 // Ignore errors, it's ok if we leak a single FD and this should only run once 192 close(shm_fd); 193 194 // Destroy the pthread initializer attribute. 195 // Again, ignore errors, this will only run once and we might leak a tiny bit 196 // of memory at worst. 197 pthread_mutexattr_destroy(&attr); 198 199 return shm; 200 201 // Cleanup after an error 202 CLEANUP_FREEATTR: 203 pthread_mutexattr_destroy(&attr); 204 CLEANUP_UNMAP: 205 munmap(shm, shm_size); 206 CLEANUP_UNLINK: 207 close(shm_fd); 208 shm_unlink(path); 209 return NULL; 210 } 211 212 // Open an existing SHM segment holding libpod locks. 213 // num_locks is the number of locks that will be configured in the SHM segment. 214 // num_locks cannot be 0. 215 // Path is the path to the SHM segment. It must begin with a single / and 216 // container no other / characters, and be at most 255 characters including 217 // terminating NULL byte. 218 // Returns a valid pointer on success or NULL on error. 219 // If an error occurs, negative ERRNO values will be written to error_code. 220 // ERANGE is returned for a mismatch between num_locks and the number of locks 221 // available in the SHM lock struct. 222 shm_struct_t *open_lock_shm(char *path, uint32_t num_locks, int *error_code) { 223 int shm_fd; 224 shm_struct_t *shm; 225 size_t shm_size; 226 uint32_t num_bitmaps; 227 228 if (error_code == NULL) { 229 return NULL; 230 } 231 232 // We need a nonzero number of locks 233 if (num_locks == 0) { 234 *error_code = -1 * EINVAL; 235 return NULL; 236 } 237 238 if (path == NULL) { 239 *error_code = -1 * EINVAL; 240 return NULL; 241 } 242 243 // Calculate the number of bitmaps required 244 num_bitmaps = num_locks / BITMAP_SIZE; 245 if (num_locks % BITMAP_SIZE != 0) { 246 num_bitmaps += 1; 247 } 248 249 // Calculate size of the shm segment 250 shm_size = compute_shm_size(num_bitmaps); 251 252 shm_fd = shm_open(path, O_RDWR, 0600); 253 if (shm_fd < 0) { 254 *error_code = -1 * errno; 255 return NULL; 256 } 257 258 // Map the shared memory in 259 shm = mmap(NULL, shm_size, PROT_READ | PROT_WRITE, MAP_SHARED, shm_fd, 0); 260 if (shm == MAP_FAILED) { 261 *error_code = -1 * errno; 262 } 263 264 // Ignore errors, it's ok if we leak a single FD since this only runs once 265 close(shm_fd); 266 267 // Check if we successfully mmap'd 268 if (shm == MAP_FAILED) { 269 return NULL; 270 } 271 272 // Need to check the SHM to see if it's actually our locks 273 if (shm->magic != MAGIC) { 274 *error_code = -1 * EBADF; 275 goto CLEANUP; 276 } 277 if (shm->num_locks != (num_bitmaps * BITMAP_SIZE)) { 278 *error_code = -1 * ERANGE; 279 goto CLEANUP; 280 } 281 282 return shm; 283 284 CLEANUP: 285 munmap(shm, shm_size); 286 return NULL; 287 } 288 289 // Close an open SHM lock struct, unmapping the backing memory. 290 // The given shm_struct_t will be rendered unusable as a result. 291 // On success, 0 is returned. On failure, negative ERRNO values are returned. 292 int32_t close_lock_shm(shm_struct_t *shm) { 293 int ret_code; 294 size_t shm_size; 295 296 // We can't unmap null... 297 if (shm == NULL) { 298 return -1 * EINVAL; 299 } 300 301 shm_size = compute_shm_size(shm->num_bitmaps); 302 303 ret_code = munmap(shm, shm_size); 304 305 if (ret_code != 0) { 306 return -1 * errno; 307 } 308 309 return 0; 310 } 311 312 // Allocate the first available semaphore 313 // Returns a positive integer guaranteed to be less than UINT32_MAX on success, 314 // or negative errno values on failure 315 // On success, the returned integer is the number of the semaphore allocated 316 int64_t allocate_semaphore(shm_struct_t *shm) { 317 int ret_code, i; 318 bitmap_t test_map; 319 int64_t sem_number, num_within_bitmap; 320 321 if (shm == NULL) { 322 return -1 * EINVAL; 323 } 324 325 // Lock the semaphore controlling access to our shared memory 326 ret_code = take_mutex(&(shm->segment_lock), false); 327 if (ret_code != 0) { 328 return -1 * ret_code; 329 } 330 331 // Loop through our bitmaps to search for one that is not full 332 for (i = 0; i < shm->num_bitmaps; i++) { 333 if (shm->locks[i].bitmap != 0xFFFFFFFF) { 334 test_map = 0x1; 335 num_within_bitmap = 0; 336 while (test_map != 0) { 337 if ((test_map & shm->locks[i].bitmap) == 0) { 338 // Compute the number of the semaphore we are allocating 339 sem_number = (BITMAP_SIZE * i) + num_within_bitmap; 340 // OR in the bitmap 341 shm->locks[i].bitmap = shm->locks[i].bitmap | test_map; 342 343 // Clear the mutex 344 ret_code = release_mutex(&(shm->segment_lock)); 345 if (ret_code != 0) { 346 return -1 * ret_code; 347 } 348 349 // Return the semaphore we've allocated 350 return sem_number; 351 } 352 test_map = test_map << 1; 353 num_within_bitmap++; 354 } 355 // We should never fall through this loop 356 // TODO maybe an assert() here to panic if we do? 357 } 358 } 359 360 // Clear the mutex 361 ret_code = release_mutex(&(shm->segment_lock)); 362 if (ret_code != 0) { 363 return -1 * ret_code; 364 } 365 366 // All bitmaps are full 367 // We have no available semaphores, report allocation failure 368 return -1 * ENOSPC; 369 } 370 371 // Allocate the semaphore with the given ID. 372 // Returns an error if the semaphore with this ID does not exist, or has already 373 // been allocated. 374 // Returns 0 on success, or negative errno values on failure. 375 int32_t allocate_given_semaphore(shm_struct_t *shm, uint32_t sem_index) { 376 int bitmap_index, index_in_bitmap, ret_code; 377 bitmap_t test_map; 378 379 if (shm == NULL) { 380 return -1 * EINVAL; 381 } 382 383 // Check if the lock index is valid 384 if (sem_index >= shm->num_locks) { 385 return -1 * EINVAL; 386 } 387 388 bitmap_index = sem_index / BITMAP_SIZE; 389 index_in_bitmap = sem_index % BITMAP_SIZE; 390 391 // This should never happen if the sem_index test above succeeded, but better 392 // safe than sorry 393 if (bitmap_index >= shm->num_bitmaps) { 394 return -1 * EFAULT; 395 } 396 397 test_map = 0x1 << index_in_bitmap; 398 399 // Lock the mutex controlling access to our shared memory 400 ret_code = take_mutex(&(shm->segment_lock), false); 401 if (ret_code != 0) { 402 return -1 * ret_code; 403 } 404 405 // Check if the semaphore is allocated 406 if ((test_map & shm->locks[bitmap_index].bitmap) != 0) { 407 ret_code = release_mutex(&(shm->segment_lock)); 408 if (ret_code != 0) { 409 return -1 * ret_code; 410 } 411 412 return -1 * EEXIST; 413 } 414 415 // The semaphore is not allocated, allocate it 416 shm->locks[bitmap_index].bitmap = shm->locks[bitmap_index].bitmap | test_map; 417 418 ret_code = release_mutex(&(shm->segment_lock)); 419 if (ret_code != 0) { 420 return -1 * ret_code; 421 } 422 423 return 0; 424 } 425 426 // Deallocate a given semaphore 427 // Returns 0 on success, negative ERRNO values on failure 428 int32_t deallocate_semaphore(shm_struct_t *shm, uint32_t sem_index) { 429 bitmap_t test_map; 430 int bitmap_index, index_in_bitmap, ret_code; 431 432 if (shm == NULL) { 433 return -1 * EINVAL; 434 } 435 436 // Check if the lock index is valid 437 if (sem_index >= shm->num_locks) { 438 return -1 * EINVAL; 439 } 440 441 bitmap_index = sem_index / BITMAP_SIZE; 442 index_in_bitmap = sem_index % BITMAP_SIZE; 443 444 // This should never happen if the sem_index test above succeeded, but better 445 // safe than sorry 446 if (bitmap_index >= shm->num_bitmaps) { 447 return -1 * EFAULT; 448 } 449 450 test_map = 0x1 << index_in_bitmap; 451 452 // Lock the mutex controlling access to our shared memory 453 ret_code = take_mutex(&(shm->segment_lock), false); 454 if (ret_code != 0) { 455 return -1 * ret_code; 456 } 457 458 // Check if the semaphore is allocated 459 if ((test_map & shm->locks[bitmap_index].bitmap) == 0) { 460 ret_code = release_mutex(&(shm->segment_lock)); 461 if (ret_code != 0) { 462 return -1 * ret_code; 463 } 464 465 return -1 * ENOENT; 466 } 467 468 // The semaphore is allocated, clear it 469 // Invert the bitmask we used to test to clear the bit 470 test_map = ~test_map; 471 shm->locks[bitmap_index].bitmap = shm->locks[bitmap_index].bitmap & test_map; 472 473 ret_code = release_mutex(&(shm->segment_lock)); 474 if (ret_code != 0) { 475 return -1 * ret_code; 476 } 477 478 return 0; 479 } 480 481 // Deallocate all semaphores unconditionally. 482 // Returns negative ERRNO values. 483 int32_t deallocate_all_semaphores(shm_struct_t *shm) { 484 int ret_code; 485 uint i; 486 487 if (shm == NULL) { 488 return -1 * EINVAL; 489 } 490 491 // Lock the mutex controlling access to our shared memory 492 ret_code = take_mutex(&(shm->segment_lock), false); 493 if (ret_code != 0) { 494 return -1 * ret_code; 495 } 496 497 // Iterate through all bitmaps and reset to unused 498 for (i = 0; i < shm->num_bitmaps; i++) { 499 shm->locks[i].bitmap = 0; 500 } 501 502 // Unlock the allocation control mutex 503 ret_code = release_mutex(&(shm->segment_lock)); 504 if (ret_code != 0) { 505 return -1 * ret_code; 506 } 507 508 return 0; 509 } 510 511 // Lock a given semaphore 512 // Does not check if the semaphore is allocated - this ensures that, even for 513 // removed containers, we can still successfully lock to check status (and 514 // subsequently realize they have been removed). 515 // Returns 0 on success, -1 on failure 516 int32_t lock_semaphore(shm_struct_t *shm, uint32_t sem_index) { 517 int bitmap_index, index_in_bitmap; 518 519 if (shm == NULL) { 520 return -1 * EINVAL; 521 } 522 523 if (sem_index >= shm->num_locks) { 524 return -1 * EINVAL; 525 } 526 527 bitmap_index = sem_index / BITMAP_SIZE; 528 index_in_bitmap = sem_index % BITMAP_SIZE; 529 530 return -1 * take_mutex(&(shm->locks[bitmap_index].locks[index_in_bitmap]), false); 531 } 532 533 // Unlock a given semaphore 534 // Does not check if the semaphore is allocated - this ensures that, even for 535 // removed containers, we can still successfully lock to check status (and 536 // subsequently realize they have been removed). 537 // Returns 0 on success, -1 on failure 538 int32_t unlock_semaphore(shm_struct_t *shm, uint32_t sem_index) { 539 int bitmap_index, index_in_bitmap; 540 541 if (shm == NULL) { 542 return -1 * EINVAL; 543 } 544 545 if (sem_index >= shm->num_locks) { 546 return -1 * EINVAL; 547 } 548 549 bitmap_index = sem_index / BITMAP_SIZE; 550 index_in_bitmap = sem_index % BITMAP_SIZE; 551 552 return -1 * release_mutex(&(shm->locks[bitmap_index].locks[index_in_bitmap])); 553 } 554 555 // Get the number of free locks. 556 // Returns a positive integer guaranteed to be less than UINT32_MAX on success, 557 // or negative errno values on failure. 558 // On success, the returned integer is the number of free semaphores. 559 int64_t available_locks(shm_struct_t *shm) { 560 int ret_code, i, count; 561 bitmap_t test_map; 562 int64_t free_locks = 0; 563 564 if (shm == NULL) { 565 return -1 * EINVAL; 566 } 567 568 // Lock the semaphore controlling access to the SHM segment. 569 // This isn't strictly necessary as we're only reading, but it seems safer. 570 ret_code = take_mutex(&(shm->segment_lock), false); 571 if (ret_code != 0) { 572 return -1 * ret_code; 573 } 574 575 // Loop through all bitmaps, counting number of allocated locks. 576 for (i = 0; i < shm->num_bitmaps; i++) { 577 // Short-circuit to catch fully-empty bitmaps quick. 578 if (shm->locks[i].bitmap == 0) { 579 free_locks += sizeof(bitmap_t) * 8; 580 continue; 581 } 582 583 // Use Kernighan's Algorithm to count bits set. Subtract from number of bits 584 // in the integer to get free bits, and thus free lock count. 585 test_map = shm->locks[i].bitmap; 586 count = 0; 587 while (test_map) { 588 test_map = test_map & (test_map - 1); 589 count++; 590 } 591 592 free_locks += (sizeof(bitmap_t) * 8) - count; 593 } 594 595 // Clear the mutex 596 ret_code = release_mutex(&(shm->segment_lock)); 597 if (ret_code != 0) { 598 return -1 * ret_code; 599 } 600 601 // Return free lock count. 602 return free_locks; 603 } 604 605 // Attempt to take a given semaphore. If successfully taken, it is immediately 606 // released before the function returns. 607 // Used to check if a semaphore is in use, to detect potential deadlocks where a 608 // lock has not been released for an extended period of time. 609 // Note that this is NOT POSIX trylock as the lock is immediately released if 610 // taken. 611 // Returns negative errno on failure. 612 // On success, returns 1 if the lock was successfully taken, and 0 if it was 613 // not. 614 int32_t try_lock(shm_struct_t *shm, uint32_t sem_index) { 615 int bitmap_index, index_in_bitmap, ret_code; 616 pthread_mutex_t *mutex; 617 618 if (shm == NULL) { 619 return -1 * EINVAL; 620 } 621 622 if (sem_index >= shm->num_locks) { 623 return -1 * EINVAL; 624 } 625 626 bitmap_index = sem_index / BITMAP_SIZE; 627 index_in_bitmap = sem_index % BITMAP_SIZE; 628 629 mutex = &(shm->locks[bitmap_index].locks[index_in_bitmap]); 630 631 ret_code = take_mutex(mutex, true); 632 633 if (ret_code == EBUSY) { 634 // Did not successfully take the lock 635 return 0; 636 } else if (ret_code != 0) { 637 // Another, unrelated error 638 return -1 * ret_code; 639 } 640 641 // Lock taken successfully, unlock and return. 642 ret_code = release_mutex(mutex); 643 if (ret_code != 0) { 644 return -1 * ret_code; 645 } 646 647 return 1; 648 }