github.com/swiftstack/ProxyFS@v0.0.0-20210203235616-4017c267d62f/dlm/llm.go (about)

     1  // Copyright (c) 2015-2021, NVIDIA CORPORATION.
     2  // SPDX-License-Identifier: Apache-2.0
     3  
     4  package dlm
     5  
     6  import (
     7  	"container/list"
     8  	"errors"
     9  	"fmt"
    10  	"sync"
    11  	"time"
    12  
    13  	"github.com/swiftstack/ProxyFS/blunder"
    14  	"github.com/swiftstack/ProxyFS/trackedlock"
    15  )
    16  
    17  // This struct is used by LLM to track a lock.
    18  type localLockTrack struct {
    19  	trackedlock.Mutex
    20  	lockId       string // lock identity (must be unique)
    21  	owners       uint64 // Count of threads which own lock
    22  	waiters      uint64 // Count of threads which want to own the lock (either shared or exclusive)
    23  	state        lockState
    24  	exclOwner    CallerID
    25  	listOfOwners []CallerID
    26  	waitReqQ     *list.List               // List of requests waiting for lock
    27  	rwMutexTrack trackedlock.RWMutexTrack // Track the lock to see how long its held
    28  }
    29  
    30  var localLockTrackPool = sync.Pool{
    31  	New: func() interface{} {
    32  		var track localLockTrack
    33  
    34  		// every localLockTrack should have a waitReqQ
    35  		track.waitReqQ = list.New()
    36  
    37  		return &track
    38  	},
    39  }
    40  
    41  type localLockRequest struct {
    42  	requestedState lockState
    43  	*sync.Cond
    44  	wakeUp       bool
    45  	LockCallerID CallerID
    46  }
    47  
    48  type lockState int
    49  
    50  const (
    51  	nilType lockState = iota
    52  	shared
    53  	exclusive
    54  	stale
    55  )
    56  
    57  // NOTE: This is a test-only interface used for unit tests.
    58  //
    59  // This function assumes that globals.Lock() is held.
    60  // TODO - can this be used in more cases without creating entry it if does not exist?
    61  func getTrack(lockId string) (track *localLockTrack, ok bool) {
    62  	track, ok = globals.localLockMap[lockId]
    63  	if !ok {
    64  		return track, ok
    65  	}
    66  	return track, ok
    67  }
    68  
    69  // NOTE: This is a test-only interface used for unit tests.
    70  func waitCountWaiters(lockId string, count uint64) {
    71  	for {
    72  		globals.Lock()
    73  		track, ok := getTrack(lockId)
    74  
    75  		// If the tracking object has not been created yet, sleep and retry.
    76  		if !ok {
    77  			// Sleep 5 milliseconds and test again
    78  			globals.Unlock()
    79  			time.Sleep(5 * time.Millisecond)
    80  			break
    81  		}
    82  
    83  		track.Mutex.Lock()
    84  
    85  		globals.Unlock()
    86  
    87  		waiters := track.waiters
    88  		track.Mutex.Unlock()
    89  
    90  		if waiters == count {
    91  			return
    92  		} else {
    93  			// Sleep 5 milliseconds and test again
    94  			time.Sleep(5 * time.Millisecond)
    95  		}
    96  	}
    97  }
    98  
    99  // NOTE: This is a test-only interface used for unit tests.
   100  func waitCountOwners(lockId string, count uint64) {
   101  	for {
   102  		globals.Lock()
   103  		track, ok := getTrack(lockId)
   104  
   105  		// If the tracking object has not been created yet, sleep and retry.
   106  		if !ok {
   107  			// Sleep 5 milliseconds and test again
   108  			globals.Unlock()
   109  			time.Sleep(5 * time.Millisecond)
   110  			break
   111  		}
   112  
   113  		track.Mutex.Lock()
   114  
   115  		globals.Unlock()
   116  
   117  		owners := track.owners
   118  		track.Mutex.Unlock()
   119  
   120  		if owners == count {
   121  			return
   122  		} else {
   123  			// Sleep 5 milliseconds and test again
   124  			time.Sleep(5 * time.Millisecond)
   125  		}
   126  	}
   127  }
   128  
   129  // This function assumes the mutex is held on the tracker structure
   130  func (t *localLockTrack) removeFromListOfOwners(callerID CallerID) {
   131  
   132  	// Find Position and delete entry (a map might be more efficient)
   133  	for i, id := range t.listOfOwners {
   134  		if id == callerID {
   135  			lastIdx := len(t.listOfOwners) - 1
   136  			t.listOfOwners[i] = t.listOfOwners[lastIdx]
   137  			t.listOfOwners = t.listOfOwners[:lastIdx]
   138  			return
   139  		}
   140  	}
   141  
   142  	panic(fmt.Sprintf("Can't find CallerID: %v in list of lock owners!", callerID))
   143  }
   144  
   145  // This function assumes the mutex is held on the tracker structure
   146  func callerInListOfOwners(listOfOwners []CallerID, callerID CallerID) (amOwner bool) {
   147  	// Find Position
   148  	for _, id := range listOfOwners {
   149  		if id == callerID {
   150  			return true
   151  		}
   152  	}
   153  
   154  	return false
   155  }
   156  
   157  func isLockHeld(lockID string, callerID CallerID, lockHeldType LockHeldType) (held bool) {
   158  	globals.Lock()
   159  	// NOTE: Not doing a defer globals.Unlock() here since grabbing another lock below.
   160  
   161  	track, ok := globals.localLockMap[lockID]
   162  	if !ok {
   163  
   164  		// Lock does not exist in map
   165  		globals.Unlock()
   166  		return false
   167  	}
   168  
   169  	track.Mutex.Lock()
   170  
   171  	globals.Unlock()
   172  
   173  	defer track.Mutex.Unlock()
   174  
   175  	switch lockHeldType {
   176  	case READLOCK:
   177  		return (track.state == shared) && (callerInListOfOwners(track.listOfOwners, callerID))
   178  	case WRITELOCK:
   179  		return (track.state == exclusive) && (callerInListOfOwners(track.listOfOwners, callerID))
   180  	case ANYLOCK:
   181  		return ((track.state == exclusive) || (track.state == shared)) && (callerInListOfOwners(track.listOfOwners, callerID))
   182  	}
   183  	return false
   184  }
   185  
   186  func grantAndSignal(track *localLockTrack, localQRequest *localLockRequest) {
   187  	track.state = localQRequest.requestedState
   188  	track.listOfOwners = append(track.listOfOwners, localQRequest.LockCallerID)
   189  	track.owners++
   190  
   191  	if track.state == exclusive {
   192  		if track.exclOwner != nil || track.owners != 1 {
   193  			panic(fmt.Sprintf("granted exclusive lock when (exclOwner != nil || track.owners != 1)! "+
   194  				"track lockId %v owners %d waiters %d lockState %v exclOwner %v listOfOwners %v",
   195  				track.lockId, track.owners, track.waiters, track.state,
   196  				*track.exclOwner, track.listOfOwners))
   197  		}
   198  		track.exclOwner = localQRequest.LockCallerID
   199  	}
   200  
   201  	localQRequest.wakeUp = true
   202  	localQRequest.Cond.Broadcast()
   203  }
   204  
   205  // Process the waitReqQ and see if any locks can be granted.
   206  //
   207  // This function assumes that the tracking mutex is held.
   208  func processLocalQ(track *localLockTrack) {
   209  
   210  	// If nothing on queue then return
   211  	if track.waitReqQ.Len() == 0 {
   212  		return
   213  	}
   214  
   215  	// If the lock is already held exclusively then nothing to do.
   216  	if track.state == exclusive {
   217  		return
   218  	}
   219  
   220  	// At this point, the lock is either stale or shared
   221  	//
   222  	// Loop through Q and see if a request can be granted.  If it can then pop it off the Q.
   223  	for track.waitReqQ.Len() > 0 {
   224  		elem := track.waitReqQ.Remove(track.waitReqQ.Front())
   225  		var localQRequest *localLockRequest
   226  		var ok bool
   227  		if localQRequest, ok = elem.(*localLockRequest); !ok {
   228  			panic("Remove of elem failed!!!")
   229  		}
   230  
   231  		// If the lock is already free and then want it exclusive
   232  		if (localQRequest.requestedState == exclusive) && (track.state == stale) {
   233  			grantAndSignal(track, localQRequest)
   234  			return
   235  		}
   236  
   237  		// If want exclusive and not free, we can't grant so push on front and break from loop.
   238  		if localQRequest.requestedState == exclusive {
   239  			track.waitReqQ.PushFront(localQRequest)
   240  			return
   241  		}
   242  
   243  		// At this point we know the Q entry is shared.  Grant it now.
   244  		grantAndSignal(track, localQRequest)
   245  	}
   246  }
   247  
   248  func (l *RWLockStruct) commonLock(requestedState lockState, try bool) (err error) {
   249  
   250  	globals.Lock()
   251  	track, ok := globals.localLockMap[l.LockID]
   252  	if !ok {
   253  		// TODO - handle blocking waiting for lock from DLM
   254  
   255  		// Lock does not exist in map, get one
   256  		track = localLockTrackPool.Get().(*localLockTrack)
   257  		if track.waitReqQ.Len() != 0 {
   258  			panic(fmt.Sprintf("localLockTrack object %p from pool does not have empty waitReqQ",
   259  				track))
   260  		}
   261  		if len(track.listOfOwners) != 0 {
   262  			panic(fmt.Sprintf("localLockTrack object %p  from pool does not have empty ListOfOwners",
   263  				track))
   264  		}
   265  		track.lockId = l.LockID
   266  		track.state = stale
   267  
   268  		globals.localLockMap[l.LockID] = track
   269  
   270  	}
   271  
   272  	track.Mutex.Lock()
   273  	defer track.Mutex.Unlock()
   274  
   275  	globals.Unlock()
   276  
   277  	// If we are doing a TryWriteLock or TryReadLock, see if we could
   278  	// grab the lock before putting on queue.
   279  	if try {
   280  		if (requestedState == exclusive) && (track.state != stale) {
   281  			err = errors.New("Lock is busy - try again!")
   282  			return blunder.AddError(err, blunder.TryAgainError)
   283  		} else {
   284  			if track.state == exclusive {
   285  				err = errors.New("Lock is busy - try again!")
   286  				return blunder.AddError(err, blunder.TryAgainError)
   287  			}
   288  		}
   289  	}
   290  	localRequest := localLockRequest{requestedState: requestedState, LockCallerID: l.LockCallerID, wakeUp: false}
   291  	localRequest.Cond = sync.NewCond(&track.Mutex)
   292  	track.waitReqQ.PushBack(&localRequest)
   293  
   294  	track.waiters++
   295  
   296  	// See if any locks can be granted
   297  	processLocalQ(track)
   298  
   299  	// wakeUp will already be true if processLocalQ() signaled this thread to wakeup.
   300  	for localRequest.wakeUp == false {
   301  		localRequest.Cond.Wait()
   302  	}
   303  
   304  	// sanity check request and lock state
   305  	if localRequest.wakeUp != true {
   306  		panic(fmt.Sprintf("commonLock(): thread awoke without being signalled; localRequest %v "+
   307  			"track lockId %v owners %d waiters %d lockState %v exclOwner %v listOfOwners %v",
   308  			localRequest, track.lockId, track.owners, track.waiters, track.state,
   309  			*track.exclOwner, track.listOfOwners))
   310  	}
   311  	if track.state == stale || track.owners == 0 || (track.owners > 1 && track.state != shared) {
   312  		panic(fmt.Sprintf("commonLock(): lock is in undefined state: localRequest %v "+
   313  			"track lockId %v owners %d waiters %d lockState %v exclOwner %v listOfOwners %v",
   314  			localRequest, track.lockId, track.owners, track.waiters, track.state,
   315  			*track.exclOwner, track.listOfOwners))
   316  	}
   317  
   318  	// let trackedlock package track how long we hold the lock
   319  	if track.state == exclusive {
   320  		track.rwMutexTrack.LockTrack(track)
   321  	} else {
   322  		track.rwMutexTrack.RLockTrack(track)
   323  	}
   324  
   325  	// At this point, we got the lock either by the call to processLocalQ() above
   326  	// or as a result of processLocalQ() being called from the unlock() path.
   327  
   328  	// We decrement waiters here instead of in processLocalQ() so that other threads do not
   329  	// assume there are no waiters between the time the Cond is signaled and we wakeup this thread.
   330  	track.waiters--
   331  
   332  	return nil
   333  }
   334  
   335  // unlock() releases the lock and signals any waiters that the lock is free.
   336  func (l *RWLockStruct) unlock() (err error) {
   337  
   338  	// TODO - assert not stale and if shared that count != 0
   339  	globals.Lock()
   340  	track, ok := globals.localLockMap[l.LockID]
   341  	if !ok {
   342  		panic(fmt.Sprintf("Trying to Unlock() inode: %v and lock not found in localLockMap()!", l.LockID))
   343  	}
   344  
   345  	track.Mutex.Lock()
   346  
   347  	// Remove lock from localLockMap if no other thread using.
   348  	//
   349  	// We have track structure for lock.  While holding mutex on localLockMap, remove
   350  	// lock from map if we are the last holder of the lock.
   351  	// TODO - does this handle revoke case and any others?
   352  	var deleted = false
   353  	if (track.owners == 1) && (track.waiters == 0) {
   354  		deleted = true
   355  		delete(globals.localLockMap, l.LockID)
   356  	}
   357  
   358  	globals.Unlock()
   359  
   360  	// TODO - handle release of lock back to DLM and delete from localLockMap
   361  	// Set stale and signal any waiters
   362  	track.owners--
   363  	track.removeFromListOfOwners(l.LockCallerID)
   364  	if track.state == exclusive {
   365  		if track.owners != 0 || track.exclOwner == nil {
   366  			panic(fmt.Sprintf("releasing exclusive lock when (exclOwner == nil || track.owners != 0)! "+
   367  				"track lockId %v owners %d waiters %d lockState %v exclOwner %v listOfOwners %v",
   368  				track.lockId, track.owners, track.waiters, track.state,
   369  				*track.exclOwner, track.listOfOwners))
   370  		}
   371  		track.exclOwner = nil
   372  	}
   373  
   374  	if track.owners == 0 {
   375  		track.state = stale
   376  	} else {
   377  		if track.owners < 0 {
   378  			panic("track.owners < 0!!!")
   379  		}
   380  	}
   381  	// record the release of the lock
   382  	track.rwMutexTrack.DLMUnlockTrack(track)
   383  
   384  	// See if any locks can be granted
   385  	processLocalQ(track)
   386  
   387  	track.Mutex.Unlock()
   388  
   389  	// can't return the
   390  	if deleted {
   391  		if track.waitReqQ.Len() != 0 || track.waiters != 0 || track.state != stale {
   392  			panic(fmt.Sprintf(
   393  				"localLockTrack object %p retrieved from pool does not have an empty waitReqQ",
   394  				track.waitReqQ))
   395  		}
   396  		localLockTrackPool.Put(track)
   397  	}
   398  
   399  	// TODO what error is possible?
   400  	return nil
   401  }