github.com/containers/podman/v4@v4.9.4/libpod/lock/shm/shm_lock.c (about)

     1  #include <errno.h>
     2  #include <fcntl.h>
     3  #include <pthread.h>
     4  #include <stdbool.h>
     5  #include <stdint.h>
     6  #include <stdlib.h>
     7  #include <sys/mman.h>
     8  #include <sys/stat.h>
     9  #include <sys/types.h>
    10  #include <unistd.h>
    11  
    12  #include "shm_lock.h"
    13  
    14  // Compute the size of the SHM struct
    15  static size_t compute_shm_size(uint32_t num_bitmaps) {
    16    return sizeof(shm_struct_t) + (num_bitmaps * sizeof(lock_group_t));
    17  }
    18  
    19  // Take the given mutex.
    20  // Handles exceptional conditions, including a mutex locked by a process that
    21  // died holding it.
    22  // Returns 0 on success, or positive errno on failure.
    23  static int take_mutex(pthread_mutex_t *mutex, bool trylock) {
    24    int ret_code;
    25  
    26    if (!trylock) {
    27      do {
    28        ret_code = pthread_mutex_lock(mutex);
    29      } while(ret_code == EAGAIN);
    30    } else {
    31      do {
    32        ret_code = pthread_mutex_trylock(mutex);
    33      } while(ret_code == EAGAIN);
    34    }
    35  
    36    if (ret_code == EOWNERDEAD) {
    37      // The previous owner of the mutex died while holding it
    38      // Take it for ourselves
    39      ret_code = pthread_mutex_consistent(mutex);
    40      if (ret_code != 0) {
    41        // Someone else may have gotten here first and marked the state consistent
    42        // However, the mutex could also be invalid.
    43        // Fail here instead of looping back to trying to lock the mutex.
    44        return ret_code;
    45      }
    46    } else if (ret_code != 0) {
    47      return ret_code;
    48    }
    49  
    50    return 0;
    51  }
    52  
    53  // Release the given mutex.
    54  // Returns 0 on success, or positive errno on failure.
    55  static int release_mutex(pthread_mutex_t *mutex) {
    56    int ret_code;
    57  
    58    do {
    59      ret_code = pthread_mutex_unlock(mutex);
    60    } while(ret_code == EAGAIN);
    61  
    62    if (ret_code != 0) {
    63      return ret_code;
    64    }
    65  
    66    return 0;
    67  }
    68  
    69  // Set up an SHM segment holding locks for libpod.
    70  // num_locks must not be 0.
    71  // Path is the path to the SHM segment. It must begin with a single / and
    72  // container no other / characters, and be at most 255 characters including
    73  // terminating NULL byte.
    74  // Returns a valid pointer on success or NULL on error.
    75  // If an error occurs, negative ERRNO values will be written to error_code.
    76  shm_struct_t *setup_lock_shm(char *path, uint32_t num_locks, int *error_code) {
    77    int shm_fd, i, j, ret_code;
    78    uint32_t num_bitmaps;
    79    size_t shm_size;
    80    shm_struct_t *shm;
    81    pthread_mutexattr_t attr;
    82  
    83    // If error_code doesn't point to anything, we can't reasonably return errors
    84    // So fail immediately
    85    if (error_code == NULL) {
    86      return NULL;
    87    }
    88  
    89    // We need a nonzero number of locks
    90    if (num_locks == 0) {
    91      *error_code = -1 * EINVAL;
    92      return NULL;
    93    }
    94  
    95    if (path == NULL) {
    96      *error_code = -1 * EINVAL;
    97      return NULL;
    98    }
    99  
   100    // Calculate the number of bitmaps required
   101    num_bitmaps = num_locks / BITMAP_SIZE;
   102    if (num_locks % BITMAP_SIZE != 0) {
   103      // The actual number given is not an even multiple of our bitmap size
   104      // So round up
   105      num_bitmaps += 1;
   106    }
   107  
   108    // Calculate size of the shm segment
   109    shm_size = compute_shm_size(num_bitmaps);
   110  
   111    // Create a new SHM segment for us
   112    shm_fd = shm_open(path, O_RDWR | O_CREAT | O_EXCL, 0600);
   113    if (shm_fd < 0) {
   114      *error_code = -1 * errno;
   115      return NULL;
   116    }
   117  
   118    // Increase its size to what we need
   119    ret_code = ftruncate(shm_fd, shm_size);
   120    if (ret_code < 0) {
   121      *error_code = -1 * errno;
   122      goto CLEANUP_UNLINK;
   123    }
   124  
   125    // Map the shared memory in
   126    shm = mmap(NULL, shm_size, PROT_READ | PROT_WRITE, MAP_SHARED, shm_fd, 0);
   127    if (shm == MAP_FAILED) {
   128      *error_code = -1 * errno;
   129      goto CLEANUP_UNLINK;
   130    }
   131  
   132    // We have successfully mapped the memory, now initialize the region
   133    shm->magic = MAGIC;
   134    shm->unused = 0;
   135    shm->num_locks = num_bitmaps * BITMAP_SIZE;
   136    shm->num_bitmaps = num_bitmaps;
   137  
   138    // Create an initializer for our pthread mutexes
   139    ret_code = pthread_mutexattr_init(&attr);
   140    if (ret_code != 0) {
   141      *error_code = -1 * ret_code;
   142      goto CLEANUP_UNMAP;
   143    }
   144  
   145    // Ensure that recursive locking of a mutex by the same OS thread (which may
   146    // refer to numerous goroutines) blocks.
   147    ret_code = pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_NORMAL);
   148    if (ret_code != 0) {
   149      *error_code = -1 * ret_code;
   150      goto CLEANUP_FREEATTR;
   151    }
   152  
   153    // Set mutexes to pshared - multiprocess-safe
   154    ret_code = pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED);
   155    if (ret_code != 0) {
   156      *error_code = -1 * ret_code;
   157      goto CLEANUP_FREEATTR;
   158    }
   159  
   160    // Set mutexes to robust - if a process dies while holding a mutex, we'll get
   161    // a special error code on the next attempt to lock it.
   162    // This should prevent panicking processes from leaving the state unusable.
   163    ret_code = pthread_mutexattr_setrobust(&attr, PTHREAD_MUTEX_ROBUST);
   164    if (ret_code != 0) {
   165      *error_code = -1 * ret_code;
   166      goto CLEANUP_FREEATTR;
   167    }
   168  
   169    // Initialize the mutex that protects the bitmaps using the mutex attributes
   170    ret_code = pthread_mutex_init(&(shm->segment_lock), &attr);
   171    if (ret_code != 0) {
   172      *error_code = -1 * ret_code;
   173      goto CLEANUP_FREEATTR;
   174    }
   175  
   176    // Initialize all bitmaps to 0 initially
   177    // And initialize all semaphores they use
   178    for (i = 0; i < num_bitmaps; i++) {
   179      shm->locks[i].bitmap = 0;
   180      for (j = 0; j < BITMAP_SIZE; j++) {
   181        // Initialize each mutex
   182        ret_code = pthread_mutex_init(&(shm->locks[i].locks[j]), &attr);
   183        if (ret_code != 0) {
   184  	*error_code = -1 * ret_code;
   185  	goto CLEANUP_FREEATTR;
   186        }
   187      }
   188    }
   189  
   190    // Close the file descriptor, we're done with it
   191    // Ignore errors, it's ok if we leak a single FD and this should only run once
   192    close(shm_fd);
   193  
   194    // Destroy the pthread initializer attribute.
   195    // Again, ignore errors, this will only run once and we might leak a tiny bit
   196    // of memory at worst.
   197    pthread_mutexattr_destroy(&attr);
   198  
   199    return shm;
   200  
   201    // Cleanup after an error
   202   CLEANUP_FREEATTR:
   203    pthread_mutexattr_destroy(&attr);
   204   CLEANUP_UNMAP:
   205    munmap(shm, shm_size);
   206   CLEANUP_UNLINK:
   207    close(shm_fd);
   208    shm_unlink(path);
   209    return NULL;
   210  }
   211  
   212  // Open an existing SHM segment holding libpod locks.
   213  // num_locks is the number of locks that will be configured in the SHM segment.
   214  // num_locks cannot be 0.
   215  // Path is the path to the SHM segment. It must begin with a single / and
   216  // container no other / characters, and be at most 255 characters including
   217  // terminating NULL byte.
   218  // Returns a valid pointer on success or NULL on error.
   219  // If an error occurs, negative ERRNO values will be written to error_code.
   220  // ERANGE is returned for a mismatch between num_locks and the number of locks
   221  // available in the SHM lock struct.
   222  shm_struct_t *open_lock_shm(char *path, uint32_t num_locks, int *error_code) {
   223    int shm_fd;
   224    shm_struct_t *shm;
   225    size_t shm_size;
   226    uint32_t num_bitmaps;
   227  
   228    if (error_code == NULL) {
   229      return NULL;
   230    }
   231  
   232    // We need a nonzero number of locks
   233    if (num_locks == 0) {
   234      *error_code = -1 * EINVAL;
   235      return NULL;
   236    }
   237  
   238    if (path == NULL) {
   239      *error_code = -1 * EINVAL;
   240      return NULL;
   241    }
   242  
   243    // Calculate the number of bitmaps required
   244    num_bitmaps = num_locks / BITMAP_SIZE;
   245    if (num_locks % BITMAP_SIZE != 0) {
   246      num_bitmaps += 1;
   247    }
   248  
   249    // Calculate size of the shm segment
   250    shm_size = compute_shm_size(num_bitmaps);
   251  
   252    shm_fd = shm_open(path, O_RDWR, 0600);
   253    if (shm_fd < 0) {
   254      *error_code = -1 * errno;
   255      return NULL;
   256    }
   257  
   258    // Map the shared memory in
   259    shm = mmap(NULL, shm_size, PROT_READ | PROT_WRITE, MAP_SHARED, shm_fd, 0);
   260    if (shm == MAP_FAILED) {
   261      *error_code = -1 * errno;
   262    }
   263  
   264    // Ignore errors, it's ok if we leak a single FD since this only runs once
   265    close(shm_fd);
   266  
   267    // Check if we successfully mmap'd
   268    if (shm == MAP_FAILED) {
   269      return NULL;
   270    }
   271  
   272    // Need to check the SHM to see if it's actually our locks
   273    if (shm->magic != MAGIC) {
   274      *error_code = -1 * EBADF;
   275      goto CLEANUP;
   276    }
   277    if (shm->num_locks != (num_bitmaps * BITMAP_SIZE)) {
   278      *error_code = -1 * ERANGE;
   279      goto CLEANUP;
   280    }
   281  
   282    return shm;
   283  
   284   CLEANUP:
   285    munmap(shm, shm_size);
   286    return NULL;
   287  }
   288  
   289  // Close an open SHM lock struct, unmapping the backing memory.
   290  // The given shm_struct_t will be rendered unusable as a result.
   291  // On success, 0 is returned. On failure, negative ERRNO values are returned.
   292  int32_t close_lock_shm(shm_struct_t *shm) {
   293    int ret_code;
   294    size_t shm_size;
   295  
   296    // We can't unmap null...
   297    if (shm == NULL) {
   298      return -1 * EINVAL;
   299    }
   300  
   301    shm_size = compute_shm_size(shm->num_bitmaps);
   302  
   303    ret_code = munmap(shm, shm_size);
   304  
   305    if (ret_code != 0) {
   306      return -1 * errno;
   307    }
   308  
   309    return 0;
   310  }
   311  
   312  // Allocate the first available semaphore
   313  // Returns a positive integer guaranteed to be less than UINT32_MAX on success,
   314  // or negative errno values on failure
   315  // On success, the returned integer is the number of the semaphore allocated
   316  int64_t allocate_semaphore(shm_struct_t *shm) {
   317    int ret_code, i;
   318    bitmap_t test_map;
   319    int64_t sem_number, num_within_bitmap;
   320  
   321    if (shm == NULL) {
   322      return -1 * EINVAL;
   323    }
   324  
   325    // Lock the semaphore controlling access to our shared memory
   326    ret_code = take_mutex(&(shm->segment_lock), false);
   327    if (ret_code != 0) {
   328      return -1 * ret_code;
   329    }
   330  
   331    // Loop through our bitmaps to search for one that is not full
   332    for (i = 0; i < shm->num_bitmaps; i++) {
   333      if (shm->locks[i].bitmap != 0xFFFFFFFF) {
   334        test_map = 0x1;
   335        num_within_bitmap = 0;
   336        while (test_map != 0) {
   337  	if ((test_map & shm->locks[i].bitmap) == 0) {
   338  	  // Compute the number of the semaphore we are allocating
   339  	  sem_number = (BITMAP_SIZE * i) + num_within_bitmap;
   340  	  // OR in the bitmap
   341  	  shm->locks[i].bitmap = shm->locks[i].bitmap | test_map;
   342  
   343  	  // Clear the mutex
   344  	  ret_code = release_mutex(&(shm->segment_lock));
   345  	  if (ret_code != 0) {
   346  	    return -1 * ret_code;
   347  	  }
   348  
   349  	  // Return the semaphore we've allocated
   350  	  return sem_number;
   351  	}
   352  	test_map = test_map << 1;
   353  	num_within_bitmap++;
   354        }
   355        // We should never fall through this loop
   356        // TODO maybe an assert() here to panic if we do?
   357      }
   358    }
   359  
   360    // Clear the mutex
   361    ret_code = release_mutex(&(shm->segment_lock));
   362    if (ret_code != 0) {
   363      return -1 * ret_code;
   364    }
   365  
   366    // All bitmaps are full
   367    // We have no available semaphores, report allocation failure
   368    return -1 * ENOSPC;
   369  }
   370  
   371  // Allocate the semaphore with the given ID.
   372  // Returns an error if the semaphore with this ID does not exist, or has already
   373  // been allocated.
   374  // Returns 0 on success, or negative errno values on failure.
   375  int32_t allocate_given_semaphore(shm_struct_t *shm, uint32_t sem_index) {
   376    int bitmap_index, index_in_bitmap, ret_code;
   377    bitmap_t test_map;
   378  
   379    if (shm == NULL) {
   380      return -1 * EINVAL;
   381    }
   382  
   383    // Check if the lock index is valid
   384    if (sem_index >= shm->num_locks) {
   385      return -1 * EINVAL;
   386    }
   387  
   388    bitmap_index = sem_index / BITMAP_SIZE;
   389    index_in_bitmap = sem_index % BITMAP_SIZE;
   390  
   391    // This should never happen if the sem_index test above succeeded, but better
   392    // safe than sorry
   393    if (bitmap_index >= shm->num_bitmaps) {
   394      return -1 * EFAULT;
   395    }
   396  
   397    test_map = 0x1 << index_in_bitmap;
   398  
   399    // Lock the mutex controlling access to our shared memory
   400    ret_code = take_mutex(&(shm->segment_lock), false);
   401    if (ret_code != 0) {
   402      return -1 * ret_code;
   403    }
   404  
   405    // Check if the semaphore is allocated
   406    if ((test_map & shm->locks[bitmap_index].bitmap) != 0) {
   407      ret_code = release_mutex(&(shm->segment_lock));
   408      if (ret_code != 0) {
   409        return -1 * ret_code;
   410      }
   411  
   412      return -1 * EEXIST;
   413    }
   414  
   415    // The semaphore is not allocated, allocate it
   416    shm->locks[bitmap_index].bitmap = shm->locks[bitmap_index].bitmap | test_map;
   417  
   418    ret_code = release_mutex(&(shm->segment_lock));
   419    if (ret_code != 0) {
   420      return -1 * ret_code;
   421    }
   422  
   423    return 0;
   424  }
   425  
   426  // Deallocate a given semaphore
   427  // Returns 0 on success, negative ERRNO values on failure
   428  int32_t deallocate_semaphore(shm_struct_t *shm, uint32_t sem_index) {
   429    bitmap_t test_map;
   430    int bitmap_index, index_in_bitmap, ret_code;
   431  
   432    if (shm == NULL) {
   433      return -1 * EINVAL;
   434    }
   435  
   436    // Check if the lock index is valid
   437    if (sem_index >= shm->num_locks) {
   438      return -1 * EINVAL;
   439    }
   440  
   441    bitmap_index = sem_index / BITMAP_SIZE;
   442    index_in_bitmap = sem_index % BITMAP_SIZE;
   443  
   444    // This should never happen if the sem_index test above succeeded, but better
   445    // safe than sorry
   446    if (bitmap_index >= shm->num_bitmaps) {
   447      return -1 * EFAULT;
   448    }
   449  
   450    test_map = 0x1 << index_in_bitmap;
   451  
   452    // Lock the mutex controlling access to our shared memory
   453    ret_code = take_mutex(&(shm->segment_lock), false);
   454    if (ret_code != 0) {
   455      return -1 * ret_code;
   456    }
   457  
   458    // Check if the semaphore is allocated
   459    if ((test_map & shm->locks[bitmap_index].bitmap) == 0) {
   460      ret_code = release_mutex(&(shm->segment_lock));
   461      if (ret_code != 0) {
   462        return -1 * ret_code;
   463      }
   464  
   465      return -1 * ENOENT;
   466    }
   467  
   468    // The semaphore is allocated, clear it
   469    // Invert the bitmask we used to test to clear the bit
   470    test_map = ~test_map;
   471    shm->locks[bitmap_index].bitmap = shm->locks[bitmap_index].bitmap & test_map;
   472  
   473    ret_code = release_mutex(&(shm->segment_lock));
   474    if (ret_code != 0) {
   475      return -1 * ret_code;
   476    }
   477  
   478    return 0;
   479  }
   480  
   481  // Deallocate all semaphores unconditionally.
   482  // Returns negative ERRNO values.
   483  int32_t deallocate_all_semaphores(shm_struct_t *shm) {
   484    int ret_code;
   485    uint i;
   486  
   487    if (shm == NULL) {
   488      return -1 * EINVAL;
   489    }
   490  
   491    // Lock the mutex controlling access to our shared memory
   492    ret_code = take_mutex(&(shm->segment_lock), false);
   493    if (ret_code != 0) {
   494      return -1 * ret_code;
   495    }
   496  
   497    // Iterate through all bitmaps and reset to unused
   498    for (i = 0; i < shm->num_bitmaps; i++) {
   499      shm->locks[i].bitmap = 0;
   500    }
   501  
   502    // Unlock the allocation control mutex
   503    ret_code = release_mutex(&(shm->segment_lock));
   504    if (ret_code != 0) {
   505      return -1 * ret_code;
   506    }
   507  
   508    return 0;
   509  }
   510  
   511  // Lock a given semaphore
   512  // Does not check if the semaphore is allocated - this ensures that, even for
   513  // removed containers, we can still successfully lock to check status (and
   514  // subsequently realize they have been removed).
   515  // Returns 0 on success, -1 on failure
   516  int32_t lock_semaphore(shm_struct_t *shm, uint32_t sem_index) {
   517    int bitmap_index, index_in_bitmap;
   518  
   519    if (shm == NULL) {
   520      return -1 * EINVAL;
   521    }
   522  
   523    if (sem_index >= shm->num_locks) {
   524      return -1 * EINVAL;
   525    }
   526  
   527    bitmap_index = sem_index / BITMAP_SIZE;
   528    index_in_bitmap = sem_index % BITMAP_SIZE;
   529  
   530    return -1 * take_mutex(&(shm->locks[bitmap_index].locks[index_in_bitmap]), false);
   531  }
   532  
   533  // Unlock a given semaphore
   534  // Does not check if the semaphore is allocated - this ensures that, even for
   535  // removed containers, we can still successfully lock to check status (and
   536  // subsequently realize they have been removed).
   537  // Returns 0 on success, -1 on failure
   538  int32_t unlock_semaphore(shm_struct_t *shm, uint32_t sem_index) {
   539    int bitmap_index, index_in_bitmap;
   540  
   541    if (shm == NULL) {
   542      return -1 * EINVAL;
   543    }
   544  
   545    if (sem_index >= shm->num_locks) {
   546      return -1 * EINVAL;
   547    }
   548  
   549    bitmap_index = sem_index / BITMAP_SIZE;
   550    index_in_bitmap = sem_index % BITMAP_SIZE;
   551  
   552    return -1 * release_mutex(&(shm->locks[bitmap_index].locks[index_in_bitmap]));
   553  }
   554  
   555  // Get the number of free locks.
   556  // Returns a positive integer guaranteed to be less than UINT32_MAX on success,
   557  // or negative errno values on failure.
   558  // On success, the returned integer is the number of free semaphores.
   559  int64_t available_locks(shm_struct_t *shm) {
   560    int ret_code, i, count;
   561    bitmap_t test_map;
   562    int64_t free_locks = 0;
   563  
   564    if (shm == NULL) {
   565      return -1 * EINVAL;
   566    }
   567  
   568    // Lock the semaphore controlling access to the SHM segment.
   569    // This isn't strictly necessary as we're only reading, but it seems safer.
   570    ret_code = take_mutex(&(shm->segment_lock), false);
   571    if (ret_code != 0) {
   572      return -1 * ret_code;
   573    }
   574  
   575    // Loop through all bitmaps, counting number of allocated locks.
   576    for (i = 0; i < shm->num_bitmaps; i++) {
   577      // Short-circuit to catch fully-empty bitmaps quick.
   578      if (shm->locks[i].bitmap == 0) {
   579        free_locks += sizeof(bitmap_t) * 8;
   580        continue;
   581      }
   582  
   583      // Use Kernighan's Algorithm to count bits set. Subtract from number of bits
   584      // in the integer to get free bits, and thus free lock count.
   585      test_map = shm->locks[i].bitmap;
   586      count = 0;
   587      while (test_map) {
   588        test_map = test_map & (test_map - 1);
   589        count++;
   590      }
   591  
   592      free_locks += (sizeof(bitmap_t) * 8) - count;
   593    }
   594  
   595    // Clear the mutex
   596    ret_code = release_mutex(&(shm->segment_lock));
   597    if (ret_code != 0) {
   598      return -1 * ret_code;
   599    }
   600  
   601    // Return free lock count.
   602    return free_locks;
   603  }
   604  
   605  // Attempt to take a given semaphore. If successfully taken, it is immediately
   606  // released before the function returns.
   607  // Used to check if a semaphore is in use, to detect potential deadlocks where a
   608  // lock has not been released for an extended period of time.
   609  // Note that this is NOT POSIX trylock as the lock is immediately released if
   610  // taken.
   611  // Returns negative errno on failure.
   612  // On success, returns 1 if the lock was successfully taken, and 0 if it was
   613  // not.
   614  int32_t try_lock(shm_struct_t *shm, uint32_t sem_index) {
   615    int bitmap_index, index_in_bitmap, ret_code;
   616    pthread_mutex_t *mutex;
   617  
   618    if (shm == NULL) {
   619      return -1 * EINVAL;
   620    }
   621  
   622    if (sem_index >= shm->num_locks) {
   623      return -1 * EINVAL;
   624    }
   625  
   626    bitmap_index = sem_index / BITMAP_SIZE;
   627    index_in_bitmap = sem_index % BITMAP_SIZE;
   628  
   629    mutex = &(shm->locks[bitmap_index].locks[index_in_bitmap]);
   630  
   631    ret_code = take_mutex(mutex, true);
   632  
   633    if (ret_code == EBUSY) {
   634      // Did not successfully take the lock
   635      return 0;
   636    } else if (ret_code != 0) {
   637      // Another, unrelated error
   638      return -1 * ret_code;
   639    }
   640  
   641    // Lock taken successfully, unlock and return.
   642    ret_code = release_mutex(mutex);
   643    if (ret_code != 0) {
   644      return -1 * ret_code;
   645    }
   646  
   647    return 1;
   648  }