github.com/swiftstack/proxyfs@v0.0.0-20201223034610-5434d919416e/fs/api_internal.go (about)

     1  // Package fs, sitting on top of the inode manager, defines the filesystem exposed by ProxyFS.
     2  package fs
     3  
     4  import (
     5  	"bytes"
     6  	"container/list"
     7  	"fmt"
     8  	"math"
     9  	"path"
    10  	"strings"
    11  	"syscall"
    12  	"time"
    13  
    14  	"github.com/swiftstack/ProxyFS/blunder"
    15  	"github.com/swiftstack/ProxyFS/dlm"
    16  	"github.com/swiftstack/ProxyFS/inode"
    17  	"github.com/swiftstack/ProxyFS/logger"
    18  	"github.com/swiftstack/ProxyFS/utils"
    19  )
    20  
    21  // Shorthand for our internal API debug log id; global to the package
    22  var internalDebug = logger.DbgInternal
    23  
    24  type symlinkFollowState struct {
    25  	seen      map[inode.InodeNumber]bool
    26  	traversed int
    27  }
    28  
    29  // Let us sort an array of directory and file names
    30  type dirAndFileName struct {
    31  	dirName  string
    32  	fileName string
    33  }
    34  
    35  // this has to be a named type to be a method receiver
    36  type dirAndFileNameSlice []dirAndFileName
    37  
    38  func (coll dirAndFileNameSlice) Len() int {
    39  	return len(coll)
    40  }
    41  
    42  func (coll dirAndFileNameSlice) Less(i int, j int) bool {
    43  	return coll[i].dirName < coll[j].dirName
    44  }
    45  
    46  func (coll dirAndFileNameSlice) Swap(i int, j int) {
    47  	coll[i], coll[j] = coll[j], coll[i]
    48  }
    49  
    50  // trackInFlightFileInodeData is called to ensure a timely Flush occurs.
    51  //
    52  // Only Write() will call this while holding a WriteLock on the fileInode
    53  // either just before or just after its call to inode.Write().
    54  func (vS *volumeStruct) trackInFlightFileInodeData(inodeNumber inode.InodeNumber) {
    55  	var (
    56  		inFlightFileInodeData *inFlightFileInodeDataStruct
    57  		ok                    bool
    58  	)
    59  
    60  	globals.Lock()
    61  	vS.dataMutex.Lock()
    62  	inFlightFileInodeData, ok = vS.inFlightFileInodeDataMap[inodeNumber]
    63  	if !ok {
    64  		inFlightFileInodeData = &inFlightFileInodeDataStruct{
    65  			InodeNumber: inodeNumber,
    66  			volStruct:   vS,
    67  			control:     make(chan bool, inFlightFileInodeDataControlBuffering),
    68  		}
    69  		vS.inFlightFileInodeDataMap[inodeNumber] = inFlightFileInodeData
    70  		inFlightFileInodeData.globalsListElement = globals.inFlightFileInodeDataList.PushBack(inFlightFileInodeData)
    71  		inFlightFileInodeData.wg.Add(1)
    72  		go inFlightFileInodeData.inFlightFileInodeDataTracker()
    73  	}
    74  	vS.dataMutex.Unlock()
    75  	globals.Unlock()
    76  }
    77  
    78  // untrackInFlightInodeData is called once it is known a Flush() is no longer needed
    79  // or to actually request a Flush() [as would be the case during unmounting a volume].
    80  func (vS *volumeStruct) untrackInFlightFileInodeData(inodeNumber inode.InodeNumber, flushFirst bool) {
    81  	var (
    82  		inFlightFileInodeData *inFlightFileInodeDataStruct
    83  		ok                    bool
    84  	)
    85  
    86  	globals.Lock()
    87  	vS.dataMutex.Lock()
    88  	inFlightFileInodeData, ok = vS.inFlightFileInodeDataMap[inodeNumber]
    89  	if !ok {
    90  		vS.dataMutex.Unlock()
    91  		globals.Unlock()
    92  		return
    93  	}
    94  	delete(vS.inFlightFileInodeDataMap, inodeNumber)
    95  	if nil != inFlightFileInodeData.globalsListElement {
    96  		_ = globals.inFlightFileInodeDataList.Remove(inFlightFileInodeData.globalsListElement)
    97  		inFlightFileInodeData.globalsListElement = nil
    98  	}
    99  	inFlightFileInodeData.control <- flushFirst
   100  	vS.dataMutex.Unlock()
   101  	globals.Unlock()
   102  	if flushFirst {
   103  		inFlightFileInodeData.wg.Wait()
   104  	}
   105  }
   106  
   107  // untrackInFlightFileInodeDataAll is called to flush all current elements
   108  // of vS.inFlightFileInodeDataMap (if any) during SIGHUP or Down().
   109  func (vS *volumeStruct) untrackInFlightFileInodeDataAll() {
   110  	var (
   111  		inFlightFileInodeNumber          inode.InodeNumber
   112  		inFlightFileInodeNumbers         []inode.InodeNumber
   113  		inFlightFileInodeNumbersCapacity int
   114  	)
   115  
   116  	// Snapshot list of inode.InodeNumber's currently in vS.inFlightFileInodeDataMap
   117  
   118  	vS.dataMutex.Lock()
   119  	inFlightFileInodeNumbersCapacity = len(vS.inFlightFileInodeDataMap)
   120  	if 0 == inFlightFileInodeNumbersCapacity {
   121  		vS.dataMutex.Unlock()
   122  		return
   123  	}
   124  	inFlightFileInodeNumbers = make([]inode.InodeNumber, 0, inFlightFileInodeNumbersCapacity)
   125  	for inFlightFileInodeNumber, _ = range vS.inFlightFileInodeDataMap {
   126  		inFlightFileInodeNumbers = append(inFlightFileInodeNumbers, inFlightFileInodeNumber)
   127  	}
   128  	vS.dataMutex.Unlock()
   129  
   130  	// Now go flush each of those
   131  
   132  	for _, inFlightFileInodeNumber = range inFlightFileInodeNumbers {
   133  		vS.untrackInFlightFileInodeData(inFlightFileInodeNumber, true)
   134  	}
   135  }
   136  
   137  func (vS *volumeStruct) inFlightFileInodeDataFlusher(inodeNumber inode.InodeNumber) {
   138  	var (
   139  		err         error
   140  		inodeLock   *dlm.RWLockStruct
   141  		stillExists bool
   142  	)
   143  
   144  	// Act as if a package fs client called Flush()...
   145  
   146  	inodeLock, err = vS.inodeVolumeHandle.InitInodeLock(inodeNumber, nil)
   147  	if nil != err {
   148  		logger.PanicfWithError(err, "InitInodeLock() for volume '%s' inode %v failed", vS.volumeName, inodeNumber)
   149  	}
   150  	err = inodeLock.WriteLock()
   151  	if nil != err {
   152  		logger.PanicfWithError(err, "dlm.Writelock() for volume '%s' inode %v failed", vS.volumeName, inodeNumber)
   153  	}
   154  
   155  	stillExists = vS.inodeVolumeHandle.Access(inodeNumber, inode.InodeRootUserID, inode.InodeGroupID(0), nil, inode.F_OK,
   156  		inode.NoOverride)
   157  	if stillExists {
   158  		err = vS.inodeVolumeHandle.Flush(inodeNumber, false)
   159  		if nil == err {
   160  			vS.untrackInFlightFileInodeData(inodeNumber, false)
   161  		} else {
   162  			logger.ErrorfWithError(err, "Flush of file data failed on volume '%s' inode %v", vS.volumeName, inodeNumber)
   163  		}
   164  	}
   165  
   166  	err = inodeLock.Unlock()
   167  	if nil != err {
   168  		logger.PanicfWithError(err, "dlm.Unlock() for volume '%s' inode %v failed", vS.volumeName, inodeNumber)
   169  	}
   170  }
   171  
   172  func (inFlightFileInodeData *inFlightFileInodeDataStruct) inFlightFileInodeDataTracker() {
   173  	var (
   174  		flushFirst bool
   175  	)
   176  
   177  	logger.Tracef("fs.inFlightFileInodeDataTracker(): waiting to flush volume '%s' inode %v",
   178  		inFlightFileInodeData.volStruct.volumeName, inFlightFileInodeData.InodeNumber)
   179  
   180  	select {
   181  	case flushFirst = <-inFlightFileInodeData.control:
   182  		// All we needed was the value of flushFirst from control chan
   183  	case <-time.After(inFlightFileInodeData.volStruct.maxFlushTime):
   184  		flushFirst = true
   185  	}
   186  
   187  	logger.Tracef("fs.inFlightFileInodeDataTracker(): flush starting for volume '%s' inode %v flushfirst %t",
   188  		inFlightFileInodeData.volStruct.volumeName, inFlightFileInodeData.InodeNumber, flushFirst)
   189  
   190  	if flushFirst {
   191  		inFlightFileInodeData.volStruct.inFlightFileInodeDataFlusher(inFlightFileInodeData.InodeNumber)
   192  	}
   193  
   194  	inFlightFileInodeData.wg.Done()
   195  }
   196  
   197  func fetchVolumeHandleByAccountName(accountName string) (volumeHandle VolumeHandle, err error) {
   198  	var (
   199  		ok         bool
   200  		vS         *volumeStruct
   201  		volumeName string
   202  	)
   203  
   204  	startTime := time.Now()
   205  	defer func() {
   206  		globals.FetchVolumeHandleUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
   207  		if err != nil {
   208  			globals.FetchVolumeHandleErrors.Add(1)
   209  		}
   210  	}()
   211  
   212  	globals.Lock()
   213  
   214  	volumeName, ok = inode.AccountNameToVolumeName(accountName)
   215  	if !ok {
   216  		err = fmt.Errorf("Unknown accountName passed to mountByAccountName(): \"%s\"", accountName)
   217  		err = blunder.AddError(err, blunder.NotFoundError)
   218  		globals.Unlock()
   219  		return
   220  	}
   221  
   222  	vS, ok = globals.volumeMap[volumeName]
   223  	if !ok {
   224  		err = fmt.Errorf("Unknown volumeName computed by mountByAccountName(): \"%s\"", volumeName)
   225  		err = blunder.AddError(err, blunder.NotFoundError)
   226  		globals.Unlock()
   227  		return
   228  	}
   229  
   230  	globals.Unlock()
   231  
   232  	volumeHandle = vS
   233  	err = nil
   234  
   235  	return
   236  }
   237  
   238  func fetchVolumeHandleByVolumeName(volumeName string) (volumeHandle VolumeHandle, err error) {
   239  	var (
   240  		ok bool
   241  		vS *volumeStruct
   242  	)
   243  
   244  	startTime := time.Now()
   245  	defer func() {
   246  		globals.FetchVolumeHandleUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
   247  		if err != nil {
   248  			globals.FetchVolumeHandleErrors.Add(1)
   249  		}
   250  	}()
   251  
   252  	globals.Lock()
   253  
   254  	vS, ok = globals.volumeMap[volumeName]
   255  	if !ok {
   256  		err = fmt.Errorf("Unknown volumeName passed to mountByVolumeName(): \"%s\"", volumeName)
   257  		err = blunder.AddError(err, blunder.NotFoundError)
   258  		globals.Unlock()
   259  		return
   260  	}
   261  
   262  	globals.Unlock()
   263  
   264  	volumeHandle = vS
   265  	err = nil
   266  
   267  	return
   268  }
   269  
   270  func (vS *volumeStruct) Access(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber, accessMode inode.InodeMode) (accessReturn bool) {
   271  	startTime := time.Now()
   272  	defer func() {
   273  		globals.AccessUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
   274  	}()
   275  
   276  	vS.jobRWMutex.RLock()
   277  	defer vS.jobRWMutex.RUnlock()
   278  
   279  	accessReturn = vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, accessMode,
   280  		inode.NoOverride)
   281  	return
   282  }
   283  
   284  func (vS *volumeStruct) CallInodeToProvisionObject() (pPath string, err error) {
   285  	startTime := time.Now()
   286  	defer func() {
   287  		globals.CallInodeToProvisionObjectUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
   288  		if err != nil {
   289  			globals.CallInodeToProvisionObjectErrors.Add(1)
   290  		}
   291  	}()
   292  
   293  	vS.jobRWMutex.RLock()
   294  	defer vS.jobRWMutex.RUnlock()
   295  
   296  	pPath, err = vS.inodeVolumeHandle.ProvisionObject()
   297  	return
   298  }
   299  
   300  func (vS *volumeStruct) Create(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, dirInodeNumber inode.InodeNumber, basename string, filePerm inode.InodeMode) (fileInodeNumber inode.InodeNumber, err error) {
   301  	startTime := time.Now()
   302  	defer func() {
   303  		globals.CreateUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
   304  		if err != nil {
   305  			globals.CreateErrors.Add(1)
   306  		}
   307  	}()
   308  
   309  	vS.jobRWMutex.RLock()
   310  	defer vS.jobRWMutex.RUnlock()
   311  
   312  	err = validateBaseName(basename)
   313  	if err != nil {
   314  		return 0, err
   315  	}
   316  
   317  	// Lock the directory inode before doing the link
   318  	dirInodeLock, err := vS.inodeVolumeHandle.InitInodeLock(dirInodeNumber, nil)
   319  	if err != nil {
   320  		return 0, err
   321  	}
   322  	err = dirInodeLock.WriteLock()
   323  	if err != nil {
   324  		return 0, err
   325  	}
   326  	defer dirInodeLock.Unlock()
   327  
   328  	if !vS.inodeVolumeHandle.Access(dirInodeNumber, userID, groupID, otherGroupIDs, inode.F_OK,
   329  		inode.NoOverride) {
   330  		return 0, blunder.NewError(blunder.NotFoundError, "ENOENT")
   331  	}
   332  	if !vS.inodeVolumeHandle.Access(dirInodeNumber, userID, groupID, otherGroupIDs, inode.W_OK|inode.X_OK,
   333  		inode.NoOverride) {
   334  		return 0, blunder.NewError(blunder.PermDeniedError, "EACCES")
   335  	}
   336  
   337  	// create the file and add it to the directory
   338  	fileInodeNumber, err = vS.inodeVolumeHandle.CreateFile(filePerm, userID, groupID)
   339  	if err != nil {
   340  		return 0, err
   341  	}
   342  
   343  	err = vS.inodeVolumeHandle.Link(dirInodeNumber, basename, fileInodeNumber, false)
   344  	if err != nil {
   345  		destroyErr := vS.inodeVolumeHandle.Destroy(fileInodeNumber)
   346  		if destroyErr != nil {
   347  			logger.WarnfWithError(destroyErr, "couldn't destroy inode %v after failed Link() in fs.Create", fileInodeNumber)
   348  		}
   349  		return 0, err
   350  	}
   351  
   352  	return fileInodeNumber, nil
   353  }
   354  
   355  func (vS *volumeStruct) DefragmentFile(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, fileInodeNumber inode.InodeNumber) (err error) {
   356  	var (
   357  		eofReached bool
   358  		fileOffset uint64
   359  		inodeLock  *dlm.RWLockStruct
   360  		inodeType  inode.InodeType
   361  	)
   362  
   363  	startTime := time.Now()
   364  	defer func() {
   365  		globals.DefragmentFileUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
   366  		if err != nil {
   367  			globals.DefragmentFileErrors.Add(1)
   368  		}
   369  	}()
   370  
   371  	vS.jobRWMutex.RLock()
   372  
   373  	inodeLock, err = vS.inodeVolumeHandle.InitInodeLock(fileInodeNumber, nil)
   374  	if nil != err {
   375  		vS.jobRWMutex.RUnlock()
   376  		return
   377  	}
   378  	err = inodeLock.WriteLock()
   379  	if nil != err {
   380  		vS.jobRWMutex.RUnlock()
   381  		return
   382  	}
   383  
   384  	if !vS.inodeVolumeHandle.Access(fileInodeNumber, userID, groupID, otherGroupIDs, inode.F_OK,
   385  		inode.NoOverride) {
   386  		_ = inodeLock.Unlock()
   387  		vS.jobRWMutex.RUnlock()
   388  		err = blunder.NewError(blunder.NotFoundError, "ENOENT")
   389  		return
   390  	}
   391  	if !vS.inodeVolumeHandle.Access(fileInodeNumber, userID, groupID, otherGroupIDs, inode.W_OK,
   392  		inode.OwnerOverride) {
   393  		_ = inodeLock.Unlock()
   394  		vS.jobRWMutex.RUnlock()
   395  		err = blunder.NewError(blunder.PermDeniedError, "EACCES")
   396  		return
   397  	}
   398  
   399  	inodeType, err = vS.inodeVolumeHandle.GetType(fileInodeNumber)
   400  	if nil != err {
   401  		_ = inodeLock.Unlock()
   402  		vS.jobRWMutex.RUnlock()
   403  		logger.ErrorfWithError(err, "couldn't get type for inode %v", fileInodeNumber)
   404  		return
   405  	}
   406  	// Make sure the inode number is for a file inode
   407  	if inodeType != inode.FileType {
   408  		_ = inodeLock.Unlock()
   409  		vS.jobRWMutex.RUnlock()
   410  		err = fmt.Errorf("%s: expected inode %v to be a file inode, got %v", utils.GetFnName(), fileInodeNumber, inodeType)
   411  		logger.ErrorWithError(err)
   412  		err = blunder.AddError(err, blunder.NotFileError)
   413  		return
   414  	}
   415  
   416  	fileOffset = 0
   417  
   418  	for {
   419  		fileOffset, eofReached, err = vS.inodeVolumeHandle.DefragmentFile(fileInodeNumber, fileOffset, vS.fileDefragmentChunkSize)
   420  		_ = inodeLock.Unlock()
   421  		vS.jobRWMutex.RUnlock()
   422  		if nil != err {
   423  			return
   424  		}
   425  		if eofReached {
   426  			return
   427  		}
   428  		time.Sleep(vS.fileDefragmentChunkDelay)
   429  		vS.jobRWMutex.RLock()
   430  		err = inodeLock.WriteLock()
   431  		if nil != err {
   432  			vS.jobRWMutex.RUnlock()
   433  			return
   434  		}
   435  	}
   436  }
   437  
   438  func (vS *volumeStruct) FetchExtentMapChunk(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, fileInodeNumber inode.InodeNumber, fileOffset uint64, maxEntriesFromFileOffset int64, maxEntriesBeforeFileOffset int64) (extentMapChunk *inode.ExtentMapChunkStruct, err error) {
   439  	var (
   440  		inodeLock *dlm.RWLockStruct
   441  		inodeType inode.InodeType
   442  	)
   443  
   444  	startTime := time.Now()
   445  	defer func() {
   446  		globals.FetchExtentMapChunkUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
   447  		if err != nil {
   448  			globals.FetchExtentMapChunkErrors.Add(1)
   449  		}
   450  	}()
   451  
   452  	vS.jobRWMutex.RLock()
   453  	defer vS.jobRWMutex.RUnlock()
   454  
   455  	inodeLock, err = vS.inodeVolumeHandle.InitInodeLock(fileInodeNumber, nil)
   456  	if nil != err {
   457  		return
   458  	}
   459  	err = inodeLock.ReadLock()
   460  	if nil != err {
   461  		return
   462  	}
   463  	defer inodeLock.Unlock()
   464  
   465  	if !vS.inodeVolumeHandle.Access(fileInodeNumber, userID, groupID, otherGroupIDs, inode.F_OK,
   466  		inode.NoOverride) {
   467  		err = blunder.NewError(blunder.NotFoundError, "ENOENT")
   468  		return
   469  	}
   470  	if !vS.inodeVolumeHandle.Access(fileInodeNumber, userID, groupID, otherGroupIDs, inode.R_OK,
   471  		inode.OwnerOverride) {
   472  		err = blunder.NewError(blunder.PermDeniedError, "EACCES")
   473  		return
   474  	}
   475  
   476  	inodeType, err = vS.inodeVolumeHandle.GetType(fileInodeNumber)
   477  	if nil != err {
   478  		logger.ErrorfWithError(err, "couldn't get type for inode %v", fileInodeNumber)
   479  		return
   480  	}
   481  	// Make sure the inode number is for a file inode
   482  	if inodeType != inode.FileType {
   483  		err = fmt.Errorf("%s: expected inode %v to be a file inode, got %v", utils.GetFnName(), fileInodeNumber, inodeType)
   484  		logger.ErrorWithError(err)
   485  		err = blunder.AddError(err, blunder.NotFileError)
   486  		return
   487  	}
   488  
   489  	extentMapChunk, err = vS.inodeVolumeHandle.FetchExtentMapChunk(fileInodeNumber, fileOffset, maxEntriesFromFileOffset, maxEntriesBeforeFileOffset)
   490  
   491  	return
   492  }
   493  
   494  // doInlineCheckpointIfEnabled is called whenever we must guarantee that reported state changes
   495  // are, indeed, persisted. Absent any sort of persistent transaction log, this means performing
   496  // a checkpoint unfortunately.
   497  //
   498  // Currently, only explicitly invoked Flushes trigger this. But, actually, any Swift/S3 API call
   499  // that modifies Objects or (what the client thinks are) Containers should also.
   500  //
   501  // TODO is to determine where else a call to this func should also be made.
   502  //
   503  func (vS *volumeStruct) doInlineCheckpointIfEnabled() {
   504  	var (
   505  		err error
   506  	)
   507  
   508  	if !vS.doCheckpointPerFlush {
   509  		return
   510  	}
   511  
   512  	err = vS.headhunterVolumeHandle.DoCheckpoint()
   513  	if nil != err {
   514  		logger.Fatalf("fs.doInlineCheckpoint() call to headhunter.DoCheckpoint() failed: %v", err)
   515  	}
   516  }
   517  
   518  func (vS *volumeStruct) Flush(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber) (err error) {
   519  	startTime := time.Now()
   520  	defer func() {
   521  		globals.FlushUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
   522  		if err != nil {
   523  			globals.FlushErrors.Add(1)
   524  		}
   525  	}()
   526  
   527  	vS.jobRWMutex.RLock()
   528  	defer vS.jobRWMutex.RUnlock()
   529  
   530  	inodeLock, err := vS.inodeVolumeHandle.InitInodeLock(inodeNumber, nil)
   531  	if err != nil {
   532  		return
   533  	}
   534  	err = inodeLock.WriteLock()
   535  	if err != nil {
   536  		return
   537  	}
   538  	defer inodeLock.Unlock()
   539  
   540  	if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.F_OK,
   541  		inode.NoOverride) {
   542  		return blunder.NewError(blunder.NotFoundError, "ENOENT")
   543  	}
   544  
   545  	// Note: We'd normally check EACCES here...but there are paths in FUSE (e.g. when files are
   546  	//       closed) that end up calling Flush()...even though the file was "opened" ReadOnly.
   547  	//       This is presumably to support updated of ATime and such. In any event, an EACCESS
   548  	//       check would fail if the caller actually only had ReadOnly access to the Inode, so
   549  	//       we won't be doing the check here.
   550  
   551  	err = vS.inodeVolumeHandle.Flush(inodeNumber, false)
   552  	vS.untrackInFlightFileInodeData(inodeNumber, false)
   553  
   554  	vS.doInlineCheckpointIfEnabled()
   555  
   556  	return
   557  }
   558  
   559  func (vS *volumeStruct) getFileLockList(inodeNumber inode.InodeNumber) (flockList *list.List) {
   560  	vS.dataMutex.Lock()
   561  	defer vS.dataMutex.Unlock()
   562  
   563  	flockList, ok := vS.FLockMap[inodeNumber]
   564  	if !ok {
   565  		flockList = new(list.List)
   566  		vS.FLockMap[inodeNumber] = flockList
   567  	}
   568  
   569  	return
   570  }
   571  
   572  // Check for lock conflict with other Pids, if there is a conflict then it will return the first occurance of conflicting range.
   573  func checkConflict(elm *FlockStruct, flock *FlockStruct) bool {
   574  
   575  	if flock.Pid == elm.Pid {
   576  		return false
   577  	}
   578  
   579  	if (elm.Start+elm.Len) <= flock.Start || (flock.Start+flock.Len) <= elm.Start {
   580  		return false
   581  	}
   582  
   583  	if (flock.Type == syscall.F_WRLCK) || (elm.Type == syscall.F_WRLCK) {
   584  		return true
   585  	}
   586  
   587  	return false
   588  }
   589  
   590  func (vS *volumeStruct) verifyLock(inodeNumber inode.InodeNumber, flock *FlockStruct) (conflictLock *FlockStruct) {
   591  	flockList := vS.getFileLockList(inodeNumber)
   592  
   593  	for e := flockList.Front(); e != nil; e = e.Next() {
   594  		elm := e.Value.(*FlockStruct)
   595  
   596  		if checkConflict(elm, flock) == true {
   597  			return elm
   598  		}
   599  	}
   600  
   601  	return nil
   602  }
   603  
   604  // Insert a file lock range to corresponding lock list for the pid.
   605  // Assumption: There is no lock conflict and the range that is being inserted has no conflict and is free.
   606  func (vS *volumeStruct) fileLockInsert(inodeNumber inode.InodeNumber, inFlock *FlockStruct) (err error) {
   607  	err = nil
   608  	flockList := vS.getFileLockList(inodeNumber)
   609  
   610  	overlapList := new(list.List)
   611  	var beforeElm *list.Element // Refers to the immediate element that starts before the start of the range.
   612  	var afterElm *list.Element  // Refers to the immediate element that starts after the end of the range.
   613  
   614  	// flockList is sorted by starting offset of the range.
   615  	// Inserting a range happens in two steps. 1) Check if there is any conflict and also identify the
   616  	// point in the list where the entry will be added (before and after elements) 2) Then check if
   617  	// the range can extend the before element, if so adjust it. 3) Simillarly, check if the after
   618  	// element can be collapsed if it forms a contiguous range.
   619  
   620  	for e := flockList.Front(); e != nil; e = e.Next() {
   621  		elm := e.Value.(*FlockStruct)
   622  
   623  		if (elm.Start + elm.Len) <= inFlock.Start {
   624  			beforeElm = e
   625  			continue
   626  		}
   627  
   628  		if elm.Start > (inFlock.Start + inFlock.Len) {
   629  			afterElm = e
   630  			if overlapList.Len() == 0 {
   631  				flockList.InsertBefore(inFlock, e)
   632  				return
   633  			}
   634  
   635  			break
   636  		}
   637  
   638  		if checkConflict(elm, inFlock) {
   639  			err = blunder.AddError(nil, blunder.TryAgainError)
   640  			return
   641  		}
   642  
   643  		if elm.Pid == inFlock.Pid {
   644  			overlapList.PushBack(e)
   645  		}
   646  	}
   647  
   648  	if overlapList.Len() == 0 {
   649  		if beforeElm != nil {
   650  			elm := beforeElm.Value.(*FlockStruct)
   651  			if elm.Pid == inFlock.Pid && elm.Type == inFlock.Type && (elm.Start+elm.Len) == inFlock.Start {
   652  				elm.Len = inFlock.Start + inFlock.Len - elm.Len
   653  			} else {
   654  				flockList.InsertAfter(inFlock, beforeElm)
   655  			}
   656  		} else {
   657  			flockList.PushBack(inFlock)
   658  		}
   659  
   660  		return
   661  	}
   662  
   663  	// Look at the last element in the overlapping list
   664  	lastEnt := overlapList.Back()
   665  	e := lastEnt.Value.(*list.Element)
   666  	elm := e.Value.(*FlockStruct)
   667  	if (elm.Start + elm.Len) > (inFlock.Start + inFlock.Len) {
   668  		inFlock.Len = (elm.Start + elm.Len) - inFlock.Start
   669  	}
   670  
   671  	// We can delete all the entries in the overlapping list. These entries are replaced by
   672  	// the range we are inserting.
   673  	for e := overlapList.Front(); e != nil; e = e.Next() {
   674  		entry := e.Value.(*list.Element)
   675  		flockList.Remove(entry)
   676  	}
   677  
   678  	// Now adjust the before and after entries:
   679  	// First adjust the after:
   680  	if afterElm != nil {
   681  		elm := afterElm.Value.(*FlockStruct)
   682  		if elm.Pid == inFlock.Pid && elm.Type == inFlock.Type && (inFlock.Start+inFlock.Len) == elm.Start {
   683  			// We can collapse the entry:
   684  			elm.Len = elm.Start + elm.Len - inFlock.Start
   685  			elm.Start = inFlock.Start
   686  
   687  			if beforeElm != nil {
   688  				belm := beforeElm.Value.(*FlockStruct)
   689  				if belm.Pid == elm.Pid && belm.Type == elm.Type && (belm.Start+belm.Len) == elm.Start {
   690  					belm.Len = elm.Start + elm.Len - belm.Start
   691  					flockList.Remove(afterElm)
   692  				}
   693  			}
   694  
   695  			return
   696  		}
   697  	}
   698  
   699  	if beforeElm != nil {
   700  		belm := beforeElm.Value.(*FlockStruct)
   701  		if belm.Pid == inFlock.Pid && belm.Type == inFlock.Type && (belm.Start+belm.Len) == inFlock.Start {
   702  			belm.Len = inFlock.Start + inFlock.Len - belm.Start
   703  		}
   704  
   705  		flockList.InsertAfter(inFlock, beforeElm)
   706  		return
   707  	}
   708  
   709  	if afterElm != nil {
   710  		flockList.InsertBefore(inFlock, afterElm)
   711  	} else {
   712  		flockList.PushBack(inFlock)
   713  	}
   714  
   715  	return
   716  
   717  }
   718  
   719  // Unlock a given range. All locks held in this range by the process (identified by Pid) are removed.
   720  func (vS *volumeStruct) fileUnlock(inodeNumber inode.InodeNumber, inFlock *FlockStruct) (err error) {
   721  
   722  	flockList := vS.getFileLockList(inodeNumber)
   723  	if flockList == nil {
   724  		logger.Warnf("Unlock of a region not already locked - %+v", inFlock)
   725  		return
   726  	}
   727  
   728  	start := inFlock.Start
   729  	len := inFlock.Len
   730  
   731  	removeList := new(list.List)
   732  
   733  	for e := flockList.Front(); e != nil; e = e.Next() {
   734  		elm := e.Value.(*FlockStruct)
   735  
   736  		if elm.Pid != inFlock.Pid {
   737  			continue
   738  		}
   739  
   740  		if (elm.Start + elm.Len) < start {
   741  			continue
   742  		}
   743  
   744  		if elm.Start >= (start + len) {
   745  			break
   746  		}
   747  
   748  		// If the lock falls completely in the range, delete it.
   749  		if elm.Start >= start && (elm.Start+elm.Len) <= (start+len) {
   750  			removeList.PushBack(e)
   751  			continue
   752  		}
   753  
   754  		// This lock overlapps with the range - three possibalities 1) lock starts before the range, 2) end after range and 3) both.
   755  
   756  		elmLen := elm.Start + elm.Len // Save the original length, it is required in case of #3 (both)
   757  
   758  		if elm.Start < start { // Handle the first part - lock starts before the range.
   759  			elm.Len = start - elm.Start
   760  		}
   761  
   762  		if elmLen > (start + len) { // Lock extends beyond the unlock range.
   763  			if elm.Start > start { // case #2
   764  				// use the existing record
   765  				elm.Start = start + len
   766  				elm.Len = elmLen - elm.Start
   767  				break
   768  			}
   769  
   770  			// Create a new record - handle case #3 both (starts before the range and extends beyond the range)
   771  			elmTail := new(FlockStruct)
   772  			elmTail.Start = start + len
   773  			elmTail.Len = elmLen - elm.Start
   774  			elmTail.Pid = elm.Pid
   775  			elmTail.Type = elm.Type
   776  			elmTail.Whence = elm.Whence
   777  			flockList.InsertAfter(elmTail, e)
   778  			break
   779  		}
   780  	}
   781  
   782  	for e := removeList.Front(); e != nil; e = e.Next() {
   783  		elm := e.Value.(*list.Element)
   784  		flockList.Remove(elm)
   785  	}
   786  
   787  	return
   788  }
   789  
   790  // Implements file locking conforming to fcntl(2) locking description. F_SETLKW is not implemented. Supports F_SETLW and F_GETLW.
   791  // whence: FS supports only SEEK_SET - starting from 0, since it does not manage file handles, caller is expected to supply the start and length relative to offset ZERO.
   792  func (vS *volumeStruct) Flock(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber, lockCmd int32, inFlock *FlockStruct) (outFlock *FlockStruct, err error) {
   793  	startTime := time.Now()
   794  	defer func() {
   795  		switch lockCmd {
   796  
   797  		case syscall.F_GETLK:
   798  			globals.FlockGetUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
   799  			if err != nil {
   800  				globals.FlockGetErrors.Add(1)
   801  			}
   802  
   803  		case syscall.F_SETLK:
   804  			if inFlock.Type == syscall.F_UNLCK {
   805  				globals.FlockUnlockUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
   806  				if err != nil {
   807  					globals.FlockUnlockErrors.Add(1)
   808  				}
   809  
   810  			} else if inFlock.Type == syscall.F_WRLCK || inFlock.Type == syscall.F_RDLCK {
   811  				globals.FlockLockUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
   812  				if err != nil {
   813  					globals.FlockLockErrors.Add(1)
   814  				}
   815  			} else {
   816  				globals.FlockOtherErrors.Add(1)
   817  			}
   818  
   819  		default:
   820  			globals.FlockOtherErrors.Add(1)
   821  		}
   822  
   823  	}()
   824  
   825  	vS.jobRWMutex.RLock()
   826  	defer vS.jobRWMutex.RUnlock()
   827  
   828  	outFlock = inFlock
   829  
   830  	if lockCmd == syscall.F_SETLKW {
   831  		err = blunder.AddError(nil, blunder.NotSupportedError)
   832  		return
   833  	}
   834  
   835  	// Make sure the inode does not go away, while we are applying the flock.
   836  	inodeLock, err := vS.inodeVolumeHandle.InitInodeLock(inodeNumber, nil)
   837  	if err != nil {
   838  		return
   839  	}
   840  	err = inodeLock.ReadLock()
   841  	if err != nil {
   842  		return
   843  	}
   844  	defer inodeLock.Unlock()
   845  
   846  	if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.F_OK, inode.NoOverride) {
   847  		err = blunder.NewError(blunder.NotFoundError, "ENOENT")
   848  		return
   849  	}
   850  	if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.R_OK, inode.OwnerOverride) {
   851  		err = blunder.NewError(blunder.PermDeniedError, "EACCES")
   852  		return
   853  	}
   854  
   855  	if inFlock.Len == 0 { // If length is ZERO means treat it as whole file.
   856  		inFlock.Len = ^uint64(0)
   857  	}
   858  
   859  	switch lockCmd {
   860  	case syscall.F_GETLK:
   861  		conflictLock := vS.verifyLock(inodeNumber, inFlock)
   862  		if conflictLock != nil {
   863  			outFlock = conflictLock
   864  			err = blunder.AddError(nil, blunder.TryAgainError)
   865  		} else {
   866  			outFlock = inFlock
   867  			outFlock.Type = syscall.F_UNLCK
   868  		}
   869  		break
   870  
   871  	case syscall.F_SETLK:
   872  		if inFlock.Type == syscall.F_UNLCK {
   873  			err = vS.fileUnlock(inodeNumber, inFlock)
   874  
   875  		} else if inFlock.Type == syscall.F_WRLCK || inFlock.Type == syscall.F_RDLCK {
   876  			err = vS.fileLockInsert(inodeNumber, inFlock)
   877  
   878  		} else {
   879  			err = blunder.NewError(blunder.InvalidArgError, "EINVAL")
   880  			return
   881  		}
   882  		break
   883  
   884  	default:
   885  		err = blunder.NewError(blunder.InvalidArgError, "EINVAL")
   886  		return
   887  	}
   888  
   889  	return
   890  }
   891  
   892  func (vS *volumeStruct) getstatHelper(inodeNumber inode.InodeNumber, callerID dlm.CallerID) (stat Stat, err error) {
   893  
   894  	lockID, err := vS.inodeVolumeHandle.MakeLockID(inodeNumber)
   895  	if err != nil {
   896  		return
   897  	}
   898  	if !dlm.IsLockHeld(lockID, callerID, dlm.ANYLOCK) {
   899  		err = fmt.Errorf("%s: inode %v lock must be held before calling", utils.GetFnName(), inodeNumber)
   900  		return nil, blunder.AddError(err, blunder.NotFoundError)
   901  	}
   902  
   903  	stat, err = vS.getstatHelperWhileLocked(inodeNumber)
   904  
   905  	return
   906  }
   907  
   908  func (vS *volumeStruct) getstatHelperWhileLocked(inodeNumber inode.InodeNumber) (stat Stat, err error) {
   909  	var (
   910  		metadata *inode.MetadataStruct
   911  	)
   912  
   913  	metadata, err = vS.inodeVolumeHandle.GetMetadata(inodeNumber)
   914  	if nil != err {
   915  		return
   916  	}
   917  
   918  	stat = make(map[StatKey]uint64)
   919  
   920  	stat[StatCRTime] = uint64(metadata.CreationTime.UnixNano())
   921  	stat[StatMTime] = uint64(metadata.ModificationTime.UnixNano())
   922  	stat[StatCTime] = uint64(metadata.AttrChangeTime.UnixNano())
   923  	stat[StatATime] = uint64(metadata.AccessTime.UnixNano())
   924  	stat[StatSize] = metadata.Size
   925  	stat[StatNLink] = metadata.LinkCount
   926  	stat[StatFType] = uint64(metadata.InodeType)
   927  	stat[StatINum] = uint64(inodeNumber)
   928  	stat[StatMode] = uint64(metadata.Mode)
   929  	stat[StatUserID] = uint64(metadata.UserID)
   930  	stat[StatGroupID] = uint64(metadata.GroupID)
   931  	stat[StatNumWrites] = metadata.NumWrites
   932  
   933  	return
   934  }
   935  
   936  func (vS *volumeStruct) Getstat(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber) (stat Stat, err error) {
   937  	startTime := time.Now()
   938  	defer func() {
   939  		globals.GetstatUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
   940  		if err != nil {
   941  			globals.GetstatErrors.Add(1)
   942  		}
   943  	}()
   944  
   945  	vS.jobRWMutex.RLock()
   946  	defer vS.jobRWMutex.RUnlock()
   947  
   948  	inodeLock, err := vS.inodeVolumeHandle.InitInodeLock(inodeNumber, nil)
   949  	if err != nil {
   950  		return
   951  	}
   952  	err = inodeLock.ReadLock()
   953  	if err != nil {
   954  		return
   955  	}
   956  	defer inodeLock.Unlock()
   957  
   958  	// Call getstat helper function to do the work
   959  	return vS.getstatHelper(inodeNumber, inodeLock.GetCallerID())
   960  }
   961  
   962  func (vS *volumeStruct) getTypeHelper(inodeNumber inode.InodeNumber, callerID dlm.CallerID) (inodeType inode.InodeType, err error) {
   963  
   964  	lockID, err := vS.inodeVolumeHandle.MakeLockID(inodeNumber)
   965  	if err != nil {
   966  		return
   967  	}
   968  	if !dlm.IsLockHeld(lockID, callerID, dlm.ANYLOCK) {
   969  		err = fmt.Errorf("%s: inode %v lock must be held before calling.", utils.GetFnName(), inodeNumber)
   970  		err = blunder.AddError(err, blunder.NotFoundError)
   971  		return
   972  	}
   973  
   974  	inodeType, err = vS.inodeVolumeHandle.GetType(inodeNumber)
   975  	if err != nil {
   976  		logger.ErrorWithError(err, "couldn't get inode type")
   977  		return inodeType, err
   978  	}
   979  	return inodeType, nil
   980  }
   981  
   982  func (vS *volumeStruct) GetType(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber) (inodeType inode.InodeType, err error) {
   983  	startTime := time.Now()
   984  	defer func() {
   985  		globals.GetTypeUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
   986  		if err != nil {
   987  			globals.GetTypeErrors.Add(1)
   988  		}
   989  	}()
   990  
   991  	vS.jobRWMutex.RLock()
   992  	defer vS.jobRWMutex.RUnlock()
   993  
   994  	inodeLock, err := vS.inodeVolumeHandle.InitInodeLock(inodeNumber, nil)
   995  	if err != nil {
   996  		return
   997  	}
   998  	err = inodeLock.ReadLock()
   999  	if err != nil {
  1000  		return
  1001  	}
  1002  	defer inodeLock.Unlock()
  1003  
  1004  	return vS.getTypeHelper(inodeNumber, inodeLock.GetCallerID())
  1005  }
  1006  
  1007  func (vS *volumeStruct) GetXAttr(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber, streamName string) (value []byte, err error) {
  1008  	startTime := time.Now()
  1009  	defer func() {
  1010  		globals.GetXAttrUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
  1011  		if err != nil {
  1012  			globals.GetXAttrErrors.Add(1)
  1013  		}
  1014  	}()
  1015  
  1016  	vS.jobRWMutex.RLock()
  1017  	defer vS.jobRWMutex.RUnlock()
  1018  
  1019  	inodeLock, err := vS.inodeVolumeHandle.InitInodeLock(inodeNumber, nil)
  1020  	if err != nil {
  1021  		return
  1022  	}
  1023  	err = inodeLock.ReadLock()
  1024  	if err != nil {
  1025  		return
  1026  	}
  1027  	defer inodeLock.Unlock()
  1028  
  1029  	if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.F_OK,
  1030  		inode.NoOverride) {
  1031  		err = blunder.NewError(blunder.NotFoundError, "ENOENT")
  1032  		return
  1033  	}
  1034  	if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.R_OK,
  1035  		inode.OwnerOverride) {
  1036  		err = blunder.NewError(blunder.PermDeniedError, "EACCES")
  1037  		return
  1038  	}
  1039  
  1040  	value, err = vS.inodeVolumeHandle.GetStream(inodeNumber, streamName)
  1041  	if err != nil {
  1042  		// Did not find the requested stream. However this isn't really an error since
  1043  		// samba will ask for acl-related streams and is fine with not finding them.
  1044  		logger.TracefWithError(err, "Failed to get XAttr %v of inode %v", streamName, inodeNumber)
  1045  	}
  1046  
  1047  	return
  1048  }
  1049  
  1050  func (vS *volumeStruct) IsDir(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber) (inodeIsDir bool, err error) {
  1051  	startTime := time.Now()
  1052  	defer func() {
  1053  		globals.IsDirUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
  1054  		if err != nil {
  1055  			globals.IsDirErrors.Add(1)
  1056  		}
  1057  	}()
  1058  
  1059  	vS.jobRWMutex.RLock()
  1060  	defer vS.jobRWMutex.RUnlock()
  1061  
  1062  	inodeLock, err := vS.inodeVolumeHandle.InitInodeLock(inodeNumber, nil)
  1063  	if err != nil {
  1064  		return
  1065  	}
  1066  	err = inodeLock.ReadLock()
  1067  	if err != nil {
  1068  		return
  1069  	}
  1070  	defer inodeLock.Unlock()
  1071  
  1072  	lockID, err := vS.inodeVolumeHandle.MakeLockID(inodeNumber)
  1073  	if err != nil {
  1074  		return
  1075  	}
  1076  	if !dlm.IsLockHeld(lockID, inodeLock.GetCallerID(), dlm.ANYLOCK) {
  1077  		err = fmt.Errorf("%s: inode %v lock must be held before calling", utils.GetFnName(), inodeNumber)
  1078  		return false, blunder.AddError(err, blunder.NotFoundError)
  1079  	}
  1080  
  1081  	inodeType, err := vS.inodeVolumeHandle.GetType(inodeNumber)
  1082  	if err != nil {
  1083  		return false, err
  1084  	}
  1085  	return inodeType == inode.DirType, nil
  1086  }
  1087  
  1088  func (vS *volumeStruct) IsFile(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber) (inodeIsFile bool, err error) {
  1089  	startTime := time.Now()
  1090  	defer func() {
  1091  		globals.IsFileUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
  1092  		if err != nil {
  1093  			globals.IsFileErrors.Add(1)
  1094  		}
  1095  	}()
  1096  
  1097  	vS.jobRWMutex.RLock()
  1098  	defer vS.jobRWMutex.RUnlock()
  1099  
  1100  	inodeLock, err := vS.inodeVolumeHandle.InitInodeLock(inodeNumber, nil)
  1101  	if err != nil {
  1102  		return
  1103  	}
  1104  	err = inodeLock.ReadLock()
  1105  	if err != nil {
  1106  		return
  1107  	}
  1108  	defer inodeLock.Unlock()
  1109  
  1110  	inodeType, err := vS.inodeVolumeHandle.GetType(inodeNumber)
  1111  	if err != nil {
  1112  		return false, err
  1113  	}
  1114  
  1115  	return inodeType == inode.FileType, nil
  1116  }
  1117  
  1118  func (vS *volumeStruct) IsSymlink(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber) (inodeIsSymlink bool, err error) {
  1119  	startTime := time.Now()
  1120  	defer func() {
  1121  		globals.IsSymlinkUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
  1122  		if err != nil {
  1123  			globals.IsSymlinkErrors.Add(1)
  1124  		}
  1125  	}()
  1126  
  1127  	vS.jobRWMutex.RLock()
  1128  	defer vS.jobRWMutex.RUnlock()
  1129  
  1130  	inodeLock, err := vS.inodeVolumeHandle.InitInodeLock(inodeNumber, nil)
  1131  	if err != nil {
  1132  		return
  1133  	}
  1134  	err = inodeLock.ReadLock()
  1135  	if err != nil {
  1136  		return
  1137  	}
  1138  	defer inodeLock.Unlock()
  1139  
  1140  	inodeType, err := vS.inodeVolumeHandle.GetType(inodeNumber)
  1141  	if err != nil {
  1142  		return false, err
  1143  	}
  1144  
  1145  	return inodeType == inode.SymlinkType, nil
  1146  }
  1147  
  1148  func (vS *volumeStruct) Link(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, dirInodeNumber inode.InodeNumber, basename string, targetInodeNumber inode.InodeNumber) (err error) {
  1149  	startTime := time.Now()
  1150  	defer func() {
  1151  		globals.LinkUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
  1152  		if err != nil {
  1153  			globals.LinkErrors.Add(1)
  1154  		}
  1155  	}()
  1156  
  1157  	vS.jobRWMutex.RLock()
  1158  	defer vS.jobRWMutex.RUnlock()
  1159  
  1160  	var (
  1161  		inodeType inode.InodeType
  1162  	)
  1163  
  1164  	err = validateBaseName(basename)
  1165  	if err != nil {
  1166  		return
  1167  	}
  1168  
  1169  	// We need both dirInodelock and the targetInode lock to make sure they
  1170  	// don't go away and linkCount is updated correctly.
  1171  	callerID := dlm.GenerateCallerID()
  1172  	dirInodeLock, err := vS.inodeVolumeHandle.InitInodeLock(dirInodeNumber, callerID)
  1173  	if err != nil {
  1174  		return
  1175  	}
  1176  
  1177  	targetInodeLock, err := vS.inodeVolumeHandle.InitInodeLock(targetInodeNumber, callerID)
  1178  	if err != nil {
  1179  		return
  1180  	}
  1181  
  1182  	// Lock the target inode to check its type and insure its not a directory (if it is a
  1183  	// directory then locking it after the target directory could result in deadlock).
  1184  	err = targetInodeLock.WriteLock()
  1185  	if err != nil {
  1186  		return
  1187  	}
  1188  
  1189  	// make sure target inode is not a directory
  1190  	inodeType, err = vS.inodeVolumeHandle.GetType(targetInodeNumber)
  1191  	if err != nil {
  1192  		targetInodeLock.Unlock()
  1193  		// Because we know that GetType() has already "blunderized" the error, we just pass it on
  1194  		logger.ErrorfWithError(err, "%s: couldn't get type for inode %v", utils.GetFnName(), targetInodeNumber)
  1195  		return err
  1196  	}
  1197  	if inodeType == inode.DirType {
  1198  		targetInodeLock.Unlock()
  1199  		// no need to print an error when its a mistake by the client
  1200  		err = fmt.Errorf("%s: inode %v cannot be a dir inode", utils.GetFnName(), targetInodeNumber)
  1201  		return blunder.AddError(err, blunder.LinkDirError)
  1202  	}
  1203  
  1204  	// drop the target inode lock so we can get the directory lock then
  1205  	// reget the target inode lock
  1206  	targetInodeLock.Unlock()
  1207  
  1208  	err = dirInodeLock.WriteLock()
  1209  	if err != nil {
  1210  		return
  1211  	}
  1212  	defer dirInodeLock.Unlock()
  1213  
  1214  	err = targetInodeLock.WriteLock()
  1215  	if err != nil {
  1216  		return
  1217  	}
  1218  	defer targetInodeLock.Unlock()
  1219  
  1220  	if !vS.inodeVolumeHandle.Access(dirInodeNumber, userID, groupID, otherGroupIDs, inode.F_OK,
  1221  		inode.NoOverride) {
  1222  		err = blunder.NewError(blunder.NotFoundError, "ENOENT")
  1223  		return
  1224  	}
  1225  	if !vS.inodeVolumeHandle.Access(targetInodeNumber, userID, groupID, otherGroupIDs, inode.F_OK,
  1226  		inode.NoOverride) {
  1227  		err = blunder.NewError(blunder.NotFoundError, "ENOENT")
  1228  		return
  1229  	}
  1230  	if !vS.inodeVolumeHandle.Access(dirInodeNumber, userID, groupID, otherGroupIDs, inode.W_OK|inode.X_OK,
  1231  		inode.NoOverride) {
  1232  		err = blunder.NewError(blunder.PermDeniedError, "EACCES")
  1233  		return
  1234  	}
  1235  
  1236  	err = vS.inodeVolumeHandle.Link(dirInodeNumber, basename, targetInodeNumber, false)
  1237  
  1238  	// if the link was successful and this is a regular file then any
  1239  	// pending data was flushed
  1240  	if err == nil && inodeType == inode.FileType {
  1241  		vS.untrackInFlightFileInodeData(targetInodeNumber, false)
  1242  	}
  1243  
  1244  	return err
  1245  }
  1246  
  1247  func (vS *volumeStruct) ListXAttr(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber) (streamNames []string, err error) {
  1248  	startTime := time.Now()
  1249  	defer func() {
  1250  		globals.ListXAttrUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
  1251  		if err != nil {
  1252  			globals.ListXAttrErrors.Add(1)
  1253  		}
  1254  	}()
  1255  
  1256  	vS.jobRWMutex.RLock()
  1257  	defer vS.jobRWMutex.RUnlock()
  1258  
  1259  	inodeLock, err := vS.inodeVolumeHandle.InitInodeLock(inodeNumber, nil)
  1260  	if err != nil {
  1261  		return
  1262  	}
  1263  	err = inodeLock.ReadLock()
  1264  	if err != nil {
  1265  		return
  1266  	}
  1267  	defer inodeLock.Unlock()
  1268  
  1269  	if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.F_OK,
  1270  		inode.NoOverride) {
  1271  		err = blunder.NewError(blunder.NotFoundError, "ENOENT")
  1272  		return
  1273  	}
  1274  	if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.R_OK,
  1275  		inode.OwnerOverride) {
  1276  		err = blunder.NewError(blunder.PermDeniedError, "EACCES")
  1277  		return
  1278  	}
  1279  
  1280  	metadata, err := vS.inodeVolumeHandle.GetMetadata(inodeNumber)
  1281  	if err != nil {
  1282  		// Did not find the requested stream. However this isn't really an error since
  1283  		// samba will ask for acl-related streams and is fine with not finding them.
  1284  		logger.TracefWithError(err, "Failed to list XAttrs of inode %v", inodeNumber)
  1285  		return
  1286  	}
  1287  
  1288  	streamNames = make([]string, len(metadata.InodeStreamNameSlice))
  1289  	copy(streamNames, metadata.InodeStreamNameSlice)
  1290  	return
  1291  }
  1292  
  1293  func (vS *volumeStruct) Lookup(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, dirInodeNumber inode.InodeNumber, basename string) (inodeNumber inode.InodeNumber, err error) {
  1294  	startTime := time.Now()
  1295  	defer func() {
  1296  		globals.LookupUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
  1297  		if err != nil {
  1298  			globals.LookupErrors.Add(1)
  1299  		}
  1300  	}()
  1301  
  1302  	vS.jobRWMutex.RLock()
  1303  	defer vS.jobRWMutex.RUnlock()
  1304  
  1305  	dirInodeLock, err := vS.inodeVolumeHandle.InitInodeLock(dirInodeNumber, nil)
  1306  	if err != nil {
  1307  		return
  1308  	}
  1309  	dirInodeLock.ReadLock()
  1310  	defer dirInodeLock.Unlock()
  1311  
  1312  	if !vS.inodeVolumeHandle.Access(dirInodeNumber, userID, groupID, otherGroupIDs, inode.F_OK,
  1313  		inode.NoOverride) {
  1314  		err = blunder.NewError(blunder.NotFoundError, "ENOENT")
  1315  		return
  1316  	}
  1317  	if !vS.inodeVolumeHandle.Access(dirInodeNumber, userID, groupID, otherGroupIDs, inode.X_OK,
  1318  		inode.NoOverride) {
  1319  		err = blunder.NewError(blunder.PermDeniedError, "EACCES")
  1320  		return
  1321  	}
  1322  
  1323  	inodeNumber, err = vS.inodeVolumeHandle.Lookup(dirInodeNumber, basename)
  1324  	return inodeNumber, err
  1325  }
  1326  
  1327  func (vS *volumeStruct) LookupPath(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, fullpath string) (inodeNumber inode.InodeNumber, err error) {
  1328  	startTime := time.Now()
  1329  	defer func() {
  1330  		globals.LookupPathUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
  1331  		if err != nil {
  1332  			globals.LookupPathErrors.Add(1)
  1333  		}
  1334  	}()
  1335  
  1336  	vS.jobRWMutex.RLock()
  1337  	defer vS.jobRWMutex.RUnlock()
  1338  
  1339  	// In the special case of a fullpath starting with "/", the path segment splitting above
  1340  	// results in a first segment that still begins with "/". Because this is not recognized
  1341  	// as a real path segment, by the underlying code, we have trouble looking it up.
  1342  	//
  1343  	// This is a hack to work around this case until I figure out a better way.
  1344  	newfullpath := strings.TrimPrefix(fullpath, "/")
  1345  	if strings.Compare(fullpath, newfullpath) != 0 {
  1346  		fullpath = newfullpath
  1347  	}
  1348  
  1349  	pathSegments := strings.Split(path.Clean(fullpath), "/")
  1350  
  1351  	cursorInodeNumber := inode.RootDirInodeNumber
  1352  	for _, segment := range pathSegments {
  1353  		cursorInodeLock, err1 := vS.inodeVolumeHandle.InitInodeLock(cursorInodeNumber, nil)
  1354  		if err = err1; err != nil {
  1355  			return
  1356  		}
  1357  		err = cursorInodeLock.ReadLock()
  1358  		if err != nil {
  1359  			return
  1360  		}
  1361  
  1362  		if !vS.inodeVolumeHandle.Access(cursorInodeNumber, userID, groupID, otherGroupIDs, inode.X_OK,
  1363  			inode.NoOverride) {
  1364  			cursorInodeLock.Unlock()
  1365  			err = blunder.NewError(blunder.PermDeniedError, "EACCES")
  1366  			return
  1367  		}
  1368  
  1369  		cursorInodeNumber, err = vS.inodeVolumeHandle.Lookup(cursorInodeNumber, segment)
  1370  		cursorInodeLock.Unlock()
  1371  
  1372  		if err != nil {
  1373  			return cursorInodeNumber, err
  1374  		}
  1375  	}
  1376  
  1377  	return cursorInodeNumber, nil
  1378  }
  1379  
  1380  func (vS *volumeStruct) MiddlewareCoalesce(destPath string, metaData []byte, elementPaths []string) (
  1381  	ino uint64, numWrites uint64, attrChangeTime uint64, modificationTime uint64, err error) {
  1382  
  1383  	var (
  1384  		coalesceElementList          []*inode.CoalesceElement
  1385  		coalesceSize                 uint64
  1386  		ctime                        time.Time
  1387  		destFileInodeNumber          inode.InodeNumber
  1388  		dirEntryBasename             string
  1389  		dirEntryInodeNumber          inode.InodeNumber
  1390  		dirInodeNumber               inode.InodeNumber
  1391  		elementPathIndex             int
  1392  		elementPathIndexAtChunkEnd   int
  1393  		elementPathIndexAtChunkStart int
  1394  		heldLocks                    *heldLocksStruct
  1395  		mtime                        time.Time
  1396  		retryRequired                bool
  1397  		tryLockBackoffContext        *tryLockBackoffContextStruct
  1398  	)
  1399  
  1400  	startTime := time.Now()
  1401  	defer func() {
  1402  		globals.MiddlewareCoalesceUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
  1403  		globals.MiddlewareCoalesceBytes.Add(coalesceSize)
  1404  		if err != nil {
  1405  			globals.MiddlewareCoalesceErrors.Add(1)
  1406  		}
  1407  	}()
  1408  
  1409  	vS.jobRWMutex.RLock()
  1410  	defer vS.jobRWMutex.RUnlock()
  1411  
  1412  	// First create the destination file if necessary and ensure that it is empty
  1413  
  1414  	tryLockBackoffContext = &tryLockBackoffContextStruct{}
  1415  
  1416  RestartDestinationFileCreation:
  1417  
  1418  	tryLockBackoffContext.backoff()
  1419  
  1420  	heldLocks = newHeldLocks()
  1421  
  1422  	_, destFileInodeNumber, _, _, retryRequired, err =
  1423  		vS.resolvePath(
  1424  			inode.RootDirInodeNumber,
  1425  			destPath,
  1426  			heldLocks,
  1427  			resolvePathFollowDirEntrySymlinks|
  1428  				resolvePathFollowDirSymlinks|
  1429  				resolvePathCreateMissingPathElements|
  1430  				resolvePathRequireExclusiveLockOnDirEntryInode)
  1431  
  1432  	if nil != err {
  1433  		heldLocks.free()
  1434  		return
  1435  	}
  1436  
  1437  	if retryRequired {
  1438  		heldLocks.free()
  1439  		goto RestartDestinationFileCreation
  1440  	}
  1441  
  1442  	vS.inodeVolumeHandle.SetSize(destFileInodeNumber, 0)
  1443  
  1444  	heldLocks.free()
  1445  
  1446  	// Now setup for looping through elementPaths with fresh locks
  1447  	// every globals.coalesceElementChunkSize elements holding an
  1448  	// Exclusive Lock on each FileInode and their containing DirInode
  1449  
  1450  	elementPathIndexAtChunkStart = 0
  1451  
  1452  	for elementPathIndexAtChunkStart < len(elementPaths) {
  1453  		elementPathIndexAtChunkEnd = elementPathIndexAtChunkStart + int(globals.coalesceElementChunkSize)
  1454  		if elementPathIndexAtChunkEnd > len(elementPaths) {
  1455  			elementPathIndexAtChunkEnd = len(elementPaths)
  1456  		}
  1457  
  1458  		// Coalesce elementPaths[elementPathIndexAtChunkStart:elementPathIndexAtChunkEnd)
  1459  
  1460  		tryLockBackoffContext = &tryLockBackoffContextStruct{}
  1461  
  1462  	RestartCoalesceChunk:
  1463  
  1464  		tryLockBackoffContext.backoff()
  1465  
  1466  		heldLocks = newHeldLocks()
  1467  
  1468  		coalesceElementList = make([]*inode.CoalesceElement, 0, (elementPathIndexAtChunkEnd - elementPathIndexAtChunkStart))
  1469  
  1470  		for elementPathIndex = elementPathIndexAtChunkStart; elementPathIndex < elementPathIndexAtChunkEnd; elementPathIndex++ {
  1471  			dirInodeNumber, dirEntryInodeNumber, dirEntryBasename, _, retryRequired, err =
  1472  				vS.resolvePath(
  1473  					inode.RootDirInodeNumber,
  1474  					elementPaths[elementPathIndex],
  1475  					heldLocks,
  1476  					resolvePathFollowDirSymlinks|
  1477  						resolvePathRequireExclusiveLockOnDirEntryInode|
  1478  						resolvePathRequireExclusiveLockOnDirInode)
  1479  
  1480  			if nil != err {
  1481  				heldLocks.free()
  1482  				return
  1483  			}
  1484  
  1485  			if retryRequired {
  1486  				heldLocks.free()
  1487  				goto RestartCoalesceChunk
  1488  			}
  1489  
  1490  			coalesceElementList = append(coalesceElementList, &inode.CoalesceElement{
  1491  				ContainingDirectoryInodeNumber: dirInodeNumber,
  1492  				ElementInodeNumber:             dirEntryInodeNumber,
  1493  				ElementName:                    dirEntryBasename,
  1494  			})
  1495  		}
  1496  
  1497  		_, destFileInodeNumber, _, _, retryRequired, err =
  1498  			vS.resolvePath(
  1499  				inode.RootDirInodeNumber,
  1500  				destPath,
  1501  				heldLocks,
  1502  				resolvePathFollowDirEntrySymlinks|
  1503  					resolvePathFollowDirSymlinks|
  1504  					resolvePathRequireExclusiveLockOnDirEntryInode)
  1505  
  1506  		if nil != err {
  1507  			heldLocks.free()
  1508  			return
  1509  		}
  1510  
  1511  		if retryRequired {
  1512  			heldLocks.free()
  1513  			goto RestartCoalesceChunk
  1514  		}
  1515  
  1516  		ctime, mtime, numWrites, coalesceSize, err = vS.inodeVolumeHandle.Coalesce(
  1517  			destFileInodeNumber, MiddlewareStream, metaData, coalesceElementList)
  1518  
  1519  		heldLocks.free()
  1520  
  1521  		if nil != err {
  1522  			return
  1523  		}
  1524  
  1525  		elementPathIndexAtChunkStart = elementPathIndexAtChunkEnd
  1526  	}
  1527  
  1528  	// Regardless of err return, fill in other return values
  1529  
  1530  	ino = uint64(destFileInodeNumber)
  1531  	attrChangeTime = uint64(ctime.UnixNano())
  1532  	modificationTime = uint64(mtime.UnixNano())
  1533  
  1534  	return
  1535  }
  1536  
  1537  func (vS *volumeStruct) MiddlewareDelete(parentDir string, basename string) (err error) {
  1538  	var (
  1539  		dirEntryBasename      string
  1540  		dirEntryInodeNumber   inode.InodeNumber
  1541  		dirInodeNumber        inode.InodeNumber
  1542  		doDestroy             bool
  1543  		heldLocks             *heldLocksStruct
  1544  		inodeType             inode.InodeType
  1545  		inodeVolumeHandle     inode.VolumeHandle
  1546  		linkCount             uint64
  1547  		numDirEntries         uint64
  1548  		retryRequired         bool
  1549  		toDestroyInodeNumber  inode.InodeNumber
  1550  		tryLockBackoffContext *tryLockBackoffContextStruct
  1551  	)
  1552  
  1553  	startTime := time.Now()
  1554  	defer func() {
  1555  		globals.MiddlewareDeleteUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
  1556  		if err != nil {
  1557  			globals.MiddlewareDeleteErrors.Add(1)
  1558  		}
  1559  	}()
  1560  
  1561  	// Retry until done or failure (starting with ZERO backoff)
  1562  
  1563  	tryLockBackoffContext = &tryLockBackoffContextStruct{}
  1564  
  1565  Restart:
  1566  
  1567  	// Perform backoff and update for each restart (starting with ZERO backoff of course)
  1568  
  1569  	tryLockBackoffContext.backoff()
  1570  
  1571  	// Construct fresh heldLocks for this restart
  1572  
  1573  	heldLocks = newHeldLocks()
  1574  
  1575  	dirInodeNumber, dirEntryInodeNumber, dirEntryBasename, _, retryRequired, err =
  1576  		vS.resolvePath(
  1577  			inode.RootDirInodeNumber,
  1578  			parentDir+"/"+basename,
  1579  			heldLocks,
  1580  			resolvePathFollowDirSymlinks|
  1581  				resolvePathRequireExclusiveLockOnDirEntryInode|
  1582  				resolvePathRequireExclusiveLockOnDirInode)
  1583  
  1584  	if nil != err {
  1585  		heldLocks.free()
  1586  		return
  1587  	}
  1588  
  1589  	if retryRequired {
  1590  		heldLocks.free()
  1591  		goto Restart
  1592  	}
  1593  
  1594  	// Check if Unlink() and Destroy() are doable
  1595  
  1596  	inodeVolumeHandle = vS.inodeVolumeHandle
  1597  
  1598  	inodeType, err = inodeVolumeHandle.GetType(dirEntryInodeNumber)
  1599  	if nil != err {
  1600  		heldLocks.free()
  1601  		return
  1602  	}
  1603  
  1604  	if inode.DirType == inodeType {
  1605  		numDirEntries, err = inodeVolumeHandle.NumDirEntries(dirEntryInodeNumber)
  1606  		if nil != err {
  1607  			heldLocks.free()
  1608  			return
  1609  		}
  1610  
  1611  		if 2 != numDirEntries {
  1612  			heldLocks.free()
  1613  			err = blunder.NewError(blunder.NotEmptyError, "%s/%s not empty", parentDir, basename)
  1614  			return
  1615  		}
  1616  
  1617  		doDestroy = true
  1618  	} else {
  1619  		linkCount, err = inodeVolumeHandle.GetLinkCount(dirEntryInodeNumber)
  1620  		if nil != err {
  1621  			heldLocks.free()
  1622  			return
  1623  		}
  1624  
  1625  		doDestroy = (1 == linkCount)
  1626  	}
  1627  
  1628  	// Now perform the Unlink() and (potentially) Destroy()
  1629  
  1630  	toDestroyInodeNumber, err = inodeVolumeHandle.Unlink(dirInodeNumber, dirEntryBasename, false)
  1631  	if nil != err {
  1632  		heldLocks.free()
  1633  		return
  1634  	}
  1635  
  1636  	if doDestroy && (inode.InodeNumber(0) != toDestroyInodeNumber) {
  1637  		err = inodeVolumeHandle.Destroy(toDestroyInodeNumber)
  1638  		if nil != err {
  1639  			logger.Errorf("fs.MiddlewareDelete() failed to Destroy dirEntryInodeNumber 0x%016X: %v", dirEntryInodeNumber, err)
  1640  		}
  1641  	}
  1642  
  1643  	// Release heldLocks and exit with success (even if Destroy() failed earlier)
  1644  
  1645  	heldLocks.free()
  1646  
  1647  	err = nil
  1648  	return
  1649  }
  1650  
  1651  func (vS *volumeStruct) middlewareReadDirHelper(path string, maxEntries uint64, prevBasename string) (pathDirInodeNumber inode.InodeNumber, dirEntrySlice []inode.DirEntry, moreEntries bool, err error) {
  1652  	var (
  1653  		dirEntrySliceElement  inode.DirEntry
  1654  		heldLocks             *heldLocksStruct
  1655  		internalDirEntrySlice []inode.DirEntry
  1656  		retryRequired         bool
  1657  		tryLockBackoffContext *tryLockBackoffContextStruct
  1658  	)
  1659  
  1660  	// Retry until done or failure (starting with ZERO backoff)
  1661  
  1662  	tryLockBackoffContext = &tryLockBackoffContextStruct{}
  1663  
  1664  Restart:
  1665  
  1666  	// Perform backoff and update for each restart (starting with ZERO backoff of course)
  1667  
  1668  	tryLockBackoffContext.backoff()
  1669  
  1670  	// Construct fresh heldLocks for this restart
  1671  
  1672  	heldLocks = newHeldLocks()
  1673  
  1674  	_, pathDirInodeNumber, _, _, retryRequired, err =
  1675  		vS.resolvePath(
  1676  			inode.RootDirInodeNumber,
  1677  			path,
  1678  			heldLocks,
  1679  			resolvePathFollowDirSymlinks)
  1680  
  1681  	if nil != err {
  1682  		heldLocks.free()
  1683  		return
  1684  	}
  1685  
  1686  	if retryRequired {
  1687  		heldLocks.free()
  1688  		goto Restart
  1689  	}
  1690  
  1691  	// Now assemble response
  1692  
  1693  	internalDirEntrySlice, moreEntries, err = vS.inodeVolumeHandle.ReadDir(pathDirInodeNumber, maxEntries, 0, prevBasename)
  1694  	if nil != err {
  1695  		heldLocks.free()
  1696  		return
  1697  	}
  1698  
  1699  	// No need to hold any locks now... directory contents should be allowed to change while enumerating
  1700  	heldLocks.free()
  1701  
  1702  	dirEntrySlice = make([]inode.DirEntry, 0, len(internalDirEntrySlice))
  1703  
  1704  	for _, dirEntrySliceElement = range internalDirEntrySlice {
  1705  		if ("." == dirEntrySliceElement.Basename) || (".." == dirEntrySliceElement.Basename) {
  1706  			dirEntrySliceElement.Type = inode.DirType
  1707  		} else {
  1708  			dirEntrySliceElement.Type, err = vS.GetType(inode.InodeRootUserID, inode.InodeGroupID(0), nil, dirEntrySliceElement.InodeNumber)
  1709  			if nil != err {
  1710  				// It's ok to have an error here... it just means the directory we are iterating is changing
  1711  				continue
  1712  			}
  1713  		}
  1714  		dirEntrySlice = append(dirEntrySlice, dirEntrySliceElement)
  1715  	}
  1716  
  1717  	dirEntrySlice = dirEntrySlice[:len(dirEntrySlice)]
  1718  
  1719  	err = nil
  1720  	return
  1721  }
  1722  
  1723  func (vS *volumeStruct) MiddlewareGetAccount(maxEntries uint64, marker string, endmarker string) (accountEnts []AccountEntry, mtime uint64, ctime uint64, err error) {
  1724  	var (
  1725  		dirEntrySlice        []inode.DirEntry
  1726  		dirEntrySliceElement inode.DirEntry
  1727  		remainingMaxEntries  uint64
  1728  		moreEntries          bool
  1729  		statResult           Stat
  1730  	)
  1731  
  1732  	statResult, err = vS.Getstat(inode.InodeRootUserID, inode.InodeGroupID(0), nil, inode.RootDirInodeNumber)
  1733  	if nil != err {
  1734  		return
  1735  	}
  1736  	mtime = statResult[StatMTime]
  1737  	ctime = statResult[StatCTime]
  1738  
  1739  	if 0 != maxEntries {
  1740  		// Hard limit to number of DirInode Basenames to return
  1741  		accountEnts = make([]AccountEntry, 0, maxEntries)
  1742  	}
  1743  
  1744  	remainingMaxEntries = maxEntries
  1745  
  1746  	moreEntries = true
  1747  
  1748  	for moreEntries {
  1749  		_, dirEntrySlice, moreEntries, err = vS.middlewareReadDirHelper("/", remainingMaxEntries, marker)
  1750  		if nil != err {
  1751  			return
  1752  		}
  1753  
  1754  		if 0 == maxEntries {
  1755  			// No limit to number of DirInode Basenames to return... so it must be <= len(dirEntrySlice)
  1756  			accountEnts = make([]AccountEntry, 0, len(dirEntrySlice))
  1757  			// Note: moreEntries should be false so the "for moreEntries" loop should exit after 1st iteration
  1758  		}
  1759  
  1760  		for _, dirEntrySliceElement = range dirEntrySlice {
  1761  			if ("" != endmarker) && (0 <= strings.Compare(dirEntrySliceElement.Basename, endmarker)) {
  1762  				moreEntries = false
  1763  				break
  1764  			}
  1765  			if ("." != dirEntrySliceElement.Basename) && (".." != dirEntrySliceElement.Basename) {
  1766  				// So we've skipped "." & ".." - now also skip non-DirInodes
  1767  				if inode.DirType == dirEntrySliceElement.Type {
  1768  					statResult, err = vS.Getstat(inode.InodeRootUserID, inode.InodeGroupID(0), nil, dirEntrySliceElement.InodeNumber)
  1769  					if nil != err {
  1770  						return
  1771  					}
  1772  					accountEnts = append(accountEnts, AccountEntry{
  1773  						Basename:         dirEntrySliceElement.Basename,
  1774  						ModificationTime: statResult[StatMTime],
  1775  						AttrChangeTime:   statResult[StatCTime],
  1776  					})
  1777  				}
  1778  			}
  1779  		}
  1780  
  1781  		if moreEntries && (0 != maxEntries) {
  1782  			remainingMaxEntries = maxEntries - uint64(len(accountEnts))
  1783  			if 0 == remainingMaxEntries {
  1784  				moreEntries = false
  1785  			}
  1786  		}
  1787  
  1788  		if moreEntries {
  1789  			// Adjust marker to fetch next dirEntrySlice
  1790  			marker = dirEntrySlice[len(dirEntrySlice)-1].Basename
  1791  		}
  1792  	}
  1793  
  1794  	accountEnts = accountEnts[:len(accountEnts)]
  1795  
  1796  	return
  1797  }
  1798  
  1799  type dirEntrySliceStackElementStruct struct {
  1800  	dirPath       string
  1801  	dirEntrySlice []inode.DirEntry
  1802  	numConsumed   int
  1803  	moreEntries   bool
  1804  }
  1805  
  1806  func (vS *volumeStruct) MiddlewareGetContainer(vContainerName string, maxEntries uint64, marker string, endmarker string, prefix string, delimiter string) (containerEnts []ContainerEntry, err error) {
  1807  	var (
  1808  		containerEntry                ContainerEntry
  1809  		containerEntryBasename        string // Misnamed... this is actually everything after ContainerName
  1810  		containerEntryPath            string
  1811  		containerEntryPathSplit       []string // Split on only the first '/' (to remove ContainerName from it)
  1812  		doSingleDirectory             bool
  1813  		dirEntryInodeLock             *dlm.RWLockStruct
  1814  		dirEntryInodeNumber           inode.InodeNumber
  1815  		dirEntryInodeType             inode.InodeType
  1816  		dirEntryMetadata              *inode.MetadataStruct
  1817  		dirEntryPath                  string
  1818  		dirEntrySlice                 []inode.DirEntry
  1819  		dirEntrySliceElement          inode.DirEntry
  1820  		dirEntrySliceElementIndex     int
  1821  		dirEntrySliceElementToPrepend *inode.DirEntry
  1822  		dirEntrySliceStack            []*dirEntrySliceStackElementStruct
  1823  		dirEntrySliceStackElement     *dirEntrySliceStackElementStruct
  1824  		dirEntrySliceToAppend         []inode.DirEntry
  1825  		dirInodeNumber                inode.InodeNumber
  1826  		dirPath                       string
  1827  		dirPathSplit                  []string
  1828  		dlmCallerID                   dlm.CallerID
  1829  		endmarkerCanonicalized        string
  1830  		endmarkerPath                 []string
  1831  		heldLocks                     *heldLocksStruct
  1832  		initialDirEntryToMatch        string // == "" if no initial path should be returned (i.e. in marker starting point case)
  1833  		inodeVolumeHandle             inode.VolumeHandle
  1834  		markerCanonicalized           string
  1835  		markerPath                    []string
  1836  		markerPathDirInodeIndex       int
  1837  		moreEntries                   bool
  1838  		pathIndex                     int
  1839  		prefixCanonicalized           string
  1840  		prefixPath                    []string
  1841  		prefixPathDirInodeIndex       int
  1842  		prevReturned                  string
  1843  		remainingMaxEntries           uint64
  1844  		retryRequired                 bool
  1845  		tryLockBackoffContext         *tryLockBackoffContextStruct
  1846  	)
  1847  
  1848  	// Validate marker, endmarker, and prefix
  1849  
  1850  	if "" == marker {
  1851  		markerPath = []string{}
  1852  		markerPathDirInodeIndex = -1 // Must be special cased below to ensure we don't look in markerPath
  1853  		markerCanonicalized = ""     // Actually never accessed
  1854  	} else {
  1855  		markerPath, markerPathDirInodeIndex, err = vS.canonicalizePathAndLocateLeafDirInode(vContainerName + "/" + marker)
  1856  		if nil != err {
  1857  			err = blunder.AddError(err, blunder.InvalidArgError)
  1858  			return
  1859  		}
  1860  
  1861  		markerCanonicalized = strings.Join(markerPath, "/")
  1862  		if strings.HasSuffix(marker, "/") {
  1863  			markerCanonicalized += "/"
  1864  		}
  1865  
  1866  		if vContainerName+"/"+marker != markerCanonicalized {
  1867  			err = blunder.NewError(blunder.InvalidArgError, "MiddlewareGetContainer() only supports a canonicalized marker")
  1868  			return
  1869  		}
  1870  	}
  1871  
  1872  	if "" == endmarker {
  1873  		endmarkerPath = []string{}
  1874  		endmarkerCanonicalized = "" // Actually never accessed
  1875  	} else {
  1876  		endmarkerPath, _, err = vS.canonicalizePathAndLocateLeafDirInode(vContainerName + "/" + endmarker)
  1877  		if nil != err {
  1878  			err = blunder.AddError(err, blunder.InvalidArgError)
  1879  			return
  1880  		}
  1881  
  1882  		endmarkerCanonicalized = strings.Join(endmarkerPath, "/")
  1883  		if strings.HasSuffix(endmarker, "/") {
  1884  			endmarkerCanonicalized += "/"
  1885  		}
  1886  
  1887  		if vContainerName+"/"+endmarker != endmarkerCanonicalized {
  1888  			err = blunder.NewError(blunder.InvalidArgError, "MiddlewareGetContainer() only supports a canonicalized endmarker")
  1889  			return
  1890  		}
  1891  	}
  1892  
  1893  	prefixPath, prefixPathDirInodeIndex, err = vS.canonicalizePathAndLocateLeafDirInode(vContainerName + "/" + prefix)
  1894  	if nil != err {
  1895  		err = blunder.AddError(err, blunder.InvalidArgError)
  1896  		return
  1897  	}
  1898  	if prefixPathDirInodeIndex < 0 {
  1899  		err = blunder.NewError(blunder.NotFoundError, "MiddlewareGetContainer() only supports querying an existing Container")
  1900  		return
  1901  	}
  1902  
  1903  	prefixCanonicalized = strings.Join(prefixPath, "/")
  1904  	if strings.HasSuffix(prefix, "/") {
  1905  		prefixCanonicalized += "/"
  1906  	}
  1907  
  1908  	if (prefix != "") && (vContainerName+"/"+prefix != prefixCanonicalized) {
  1909  		err = blunder.NewError(blunder.InvalidArgError, "MiddlewareGetContainer() only supports a canonicalized prefix")
  1910  		return
  1911  	}
  1912  
  1913  	// Validate delimiter
  1914  
  1915  	switch delimiter {
  1916  	case "":
  1917  		doSingleDirectory = false
  1918  	case "/":
  1919  		doSingleDirectory = true
  1920  	default:
  1921  		err = blunder.NewError(blunder.InvalidArgError, "MiddlewareGetContainer() only supports a delimiter of \"/\"")
  1922  		return
  1923  	}
  1924  
  1925  	// Determine what DirInode from which to begin our enumeration
  1926  
  1927  	pathIndex = 0
  1928  
  1929  	for {
  1930  		if (pathIndex > markerPathDirInodeIndex) && (pathIndex > prefixPathDirInodeIndex) {
  1931  			// Special (though probably typical) case where marker lands in prefix-indicated directory
  1932  
  1933  			dirPath = strings.Join(prefixPath[:prefixPathDirInodeIndex+1], "/")
  1934  
  1935  			if (1 == len(prefixPath)) || strings.HasSuffix(prefix, "/") {
  1936  				if (markerPathDirInodeIndex + 1) == len(markerPath) {
  1937  					prevReturned = ""
  1938  				} else {
  1939  					prevReturned = markerPath[markerPathDirInodeIndex+1]
  1940  				}
  1941  				initialDirEntryToMatch = ""
  1942  			} else {
  1943  				// Handle four remaining cases:
  1944  				//   marker & prefix both specified directories
  1945  				//   marker specified a directory, prefix did not
  1946  				//   prefix specified a directory, marker did not
  1947  				//   neither marker nor prefix specified a directory
  1948  
  1949  				if (markerPathDirInodeIndex + 1) == len(markerPath) {
  1950  					if (prefixPathDirInodeIndex + 1) == len(prefixPath) {
  1951  						// Case where marker & prefix both specified directories
  1952  
  1953  						prevReturned = ""
  1954  					} else {
  1955  						// Case where marker specified a directory, prefix did not
  1956  
  1957  						prevReturned = prefixPath[prefixPathDirInodeIndex+1]
  1958  					}
  1959  					initialDirEntryToMatch = prevReturned
  1960  				} else { // (markerPathDirInodeIndex + 1) != len(markerPath)
  1961  					if (prefixPathDirInodeIndex + 1) == len(prefixPath) {
  1962  						// Case where prefix specified a directory, marker did not
  1963  
  1964  						prevReturned = markerPath[markerPathDirInodeIndex+1]
  1965  						initialDirEntryToMatch = ""
  1966  					} else {
  1967  						// Case where neither marker nor prefix specified a directory
  1968  
  1969  						if strings.Compare(prefixPath[prefixPathDirInodeIndex+1], markerPath[markerPathDirInodeIndex+1]) <= 0 {
  1970  							prevReturned = markerPath[markerPathDirInodeIndex+1]
  1971  							initialDirEntryToMatch = ""
  1972  						} else {
  1973  							prevReturned = prefixPath[prefixPathDirInodeIndex+1]
  1974  							initialDirEntryToMatch = prevReturned
  1975  						}
  1976  					}
  1977  				}
  1978  			}
  1979  			break
  1980  		}
  1981  
  1982  		if pathIndex > markerPathDirInodeIndex {
  1983  			// Handle case where prefix is more constraining than marker
  1984  
  1985  			if prefixPathDirInodeIndex == (len(prefixPath) - 1) {
  1986  				if (1 == len(prefixPath)) || strings.HasSuffix(prefix, "/") {
  1987  					dirPath = strings.Join(prefixPath[:prefixPathDirInodeIndex+1], "/")
  1988  					prevReturned = ""
  1989  				} else {
  1990  					dirPath = strings.Join(prefixPath[:prefixPathDirInodeIndex], "/")
  1991  					prevReturned = prefixPath[len(prefixPath)-1]
  1992  				}
  1993  			} else {
  1994  				dirPath = strings.Join(prefixPath[:prefixPathDirInodeIndex+1], "/")
  1995  				prevReturned = prefixPath[len(prefixPath)-1]
  1996  			}
  1997  			initialDirEntryToMatch = prevReturned
  1998  			break
  1999  		}
  2000  
  2001  		if pathIndex > prefixPathDirInodeIndex {
  2002  			// Handle case where marker is more constraining than prefix
  2003  
  2004  			dirPath = strings.Join(markerPath[:markerPathDirInodeIndex+1], "/")
  2005  			if markerPathDirInodeIndex == (len(markerPath) - 1) {
  2006  				prevReturned = ""
  2007  			} else {
  2008  				prevReturned = markerPath[len(markerPath)-1]
  2009  			}
  2010  			initialDirEntryToMatch = ""
  2011  			break
  2012  		}
  2013  
  2014  		switch strings.Compare(prefixPath[pathIndex], markerPath[pathIndex]) {
  2015  		case -1:
  2016  			dirPath = strings.Join(markerPath[:markerPathDirInodeIndex+1], "/")
  2017  			if markerPathDirInodeIndex == (len(markerPath) - 1) {
  2018  				prevReturned = ""
  2019  			} else {
  2020  				prevReturned = markerPath[len(markerPath)-1]
  2021  			}
  2022  			initialDirEntryToMatch = ""
  2023  			break
  2024  		case 0:
  2025  			pathIndex++
  2026  		case 1:
  2027  			if prefixPathDirInodeIndex == (len(prefixPath) - 1) {
  2028  				if (1 == len(prefixPath)) || strings.HasSuffix(prefix, "/") {
  2029  					dirPath = strings.Join(prefixPath[:prefixPathDirInodeIndex+1], "/")
  2030  					prevReturned = ""
  2031  				} else {
  2032  					dirPath = strings.Join(prefixPath[:prefixPathDirInodeIndex], "/")
  2033  					prevReturned = prefixPath[len(prefixPath)-1]
  2034  				}
  2035  			} else {
  2036  				dirPath = strings.Join(prefixPath[:prefixPathDirInodeIndex+1], "/")
  2037  				prevReturned = prefixPath[len(prefixPath)-1]
  2038  			}
  2039  			initialDirEntryToMatch = prevReturned
  2040  			break
  2041  		}
  2042  	}
  2043  
  2044  	// Setup shortcuts/contants
  2045  
  2046  	dlmCallerID = dlm.GenerateCallerID()
  2047  	inodeVolumeHandle = vS.inodeVolumeHandle
  2048  
  2049  	// Compute initial response
  2050  
  2051  	tryLockBackoffContext = &tryLockBackoffContextStruct{}
  2052  
  2053  Restart:
  2054  
  2055  	tryLockBackoffContext.backoff()
  2056  
  2057  	heldLocks = newHeldLocks()
  2058  
  2059  	_, dirInodeNumber, _, _, retryRequired, err =
  2060  		vS.resolvePath(
  2061  			inode.RootDirInodeNumber,
  2062  			dirPath,
  2063  			heldLocks,
  2064  			resolvePathDirEntryInodeMustBeDirectory)
  2065  	if nil != err {
  2066  		heldLocks.free()
  2067  		return
  2068  	}
  2069  	if retryRequired {
  2070  		heldLocks.free()
  2071  		goto Restart
  2072  	}
  2073  
  2074  	containerEnts = make([]ContainerEntry, 0, maxEntries)
  2075  
  2076  	if 0 == maxEntries {
  2077  		heldLocks.free()
  2078  		err = nil
  2079  		return
  2080  	}
  2081  
  2082  	if "" == initialDirEntryToMatch {
  2083  		dirEntrySliceElementToPrepend = nil
  2084  	} else {
  2085  		if "" == dirPath {
  2086  			dirEntryPath = initialDirEntryToMatch
  2087  		} else {
  2088  			dirEntryPath = dirPath + "/" + initialDirEntryToMatch
  2089  		}
  2090  		if ("" != endmarker) && (strings.Compare(dirEntryPath, endmarkerCanonicalized) >= 0) {
  2091  			heldLocks.free()
  2092  			err = nil
  2093  			return
  2094  		}
  2095  		dirEntryInodeNumber, err = inodeVolumeHandle.Lookup(dirInodeNumber, initialDirEntryToMatch)
  2096  		if nil == err {
  2097  			retryRequired = heldLocks.attemptSharedLock(inodeVolumeHandle, dlmCallerID, dirEntryInodeNumber)
  2098  			if retryRequired {
  2099  				heldLocks.free()
  2100  				goto Restart
  2101  			}
  2102  			dirEntryInodeType, err = inodeVolumeHandle.GetType(dirEntryInodeNumber)
  2103  			if nil == err {
  2104  				dirEntrySliceElementToPrepend = &inode.DirEntry{
  2105  					InodeNumber: dirEntryInodeNumber,
  2106  					Basename:    initialDirEntryToMatch,
  2107  					Type:        dirEntryInodeType,
  2108  				}
  2109  			} else {
  2110  				dirEntrySliceElementToPrepend = nil
  2111  			}
  2112  			heldLocks.unlock(dirEntryInodeNumber)
  2113  		} else {
  2114  			dirEntrySliceElementToPrepend = nil
  2115  		}
  2116  	}
  2117  
  2118  	heldLocks.free()
  2119  
  2120  	if 0 == maxEntries {
  2121  		remainingMaxEntries = 0
  2122  	} else {
  2123  		if nil == dirEntrySliceElementToPrepend {
  2124  			remainingMaxEntries = maxEntries
  2125  		} else {
  2126  			remainingMaxEntries = maxEntries - 1
  2127  		}
  2128  	}
  2129  
  2130  	// At this point:
  2131  	//   no heldLocks
  2132  	//   containerEnts has been declared
  2133  	//   doSingleDirectory is set based on supplied delimiter
  2134  	//   if {marker,endmarker,prefix} asked to include an exact matched path that existed, it's in dirEntrySliceElementToPrepend
  2135  	//   prefixCanonicalized & endmarkerCanonicalized are set to terminate the ensuing treewalk
  2136  	//   remainingMaxEntries indicates how many more DirEntry's will fit in containerEnts (if capped)
  2137  	//   dirPath is pointing to the initial DirInode to read
  2138  	//   prevReturned indicates from where in the DirInode to start reading
  2139  
  2140  	// Perform initial ReadDir and place in dirEntrySliceStack
  2141  
  2142  	if nil == dirEntrySliceElementToPrepend {
  2143  		_, dirEntrySlice, moreEntries, err = vS.middlewareReadDirHelper(dirPath, remainingMaxEntries, prevReturned)
  2144  		if nil != err {
  2145  			return
  2146  		}
  2147  	} else {
  2148  		if 0 == remainingMaxEntries {
  2149  			dirEntrySlice = []inode.DirEntry{*dirEntrySliceElementToPrepend}
  2150  			moreEntries = false
  2151  		} else {
  2152  			_, dirEntrySliceToAppend, moreEntries, err = vS.middlewareReadDirHelper(dirPath, remainingMaxEntries, prevReturned)
  2153  			if nil == err {
  2154  				dirEntrySlice = make([]inode.DirEntry, 1, 1+len(dirEntrySliceToAppend))
  2155  				dirEntrySlice[0] = *dirEntrySliceElementToPrepend
  2156  				dirEntrySlice = append(dirEntrySlice, dirEntrySliceToAppend...)
  2157  			} else {
  2158  				return
  2159  			}
  2160  		}
  2161  	}
  2162  
  2163  	dirEntrySliceStackElement = &dirEntrySliceStackElementStruct{
  2164  		dirPath:       dirPath,
  2165  		dirEntrySlice: dirEntrySlice,
  2166  		numConsumed:   0,
  2167  		moreEntries:   moreEntries,
  2168  	}
  2169  
  2170  	dirEntrySliceStack = []*dirEntrySliceStackElementStruct{dirEntrySliceStackElement}
  2171  
  2172  	containerEnts = make([]ContainerEntry, 0, len(dirEntrySlice))
  2173  
  2174  	// Now append appropriate ContainerEntry's until exit criteria is reached
  2175  
  2176  	for uint64(len(containerEnts)) < maxEntries {
  2177  		dirEntrySliceStackElement = dirEntrySliceStack[len(dirEntrySliceStack)-1]
  2178  
  2179  		if dirEntrySliceStackElement.numConsumed == len(dirEntrySliceStackElement.dirEntrySlice) {
  2180  			if dirEntrySliceStackElement.moreEntries {
  2181  				dirPath = dirEntrySliceStackElement.dirPath
  2182  				dirEntrySlice = dirEntrySliceStackElement.dirEntrySlice
  2183  				dirEntrySliceElementIndex = len(dirEntrySlice) - 1
  2184  				dirEntrySliceElement = dirEntrySlice[dirEntrySliceElementIndex]
  2185  				prevReturned = dirEntrySliceElement.Basename
  2186  
  2187  				_, dirEntrySlice, moreEntries, err = vS.middlewareReadDirHelper(dirPath, remainingMaxEntries, prevReturned)
  2188  				if (nil != err) || (0 == len(dirEntrySlice)) {
  2189  					// Even though we thought there were moreEntries, there now are not for some reason
  2190  
  2191  					if doSingleDirectory {
  2192  						// Regardless of remaining contents of dirEntrySliceStack, we must be done
  2193  
  2194  						err = nil
  2195  						return
  2196  					}
  2197  
  2198  					// Navigate to parent directory
  2199  
  2200  					dirEntrySliceStack = dirEntrySliceStack[:len(dirEntrySliceStack)-1]
  2201  					continue
  2202  				}
  2203  
  2204  				// Restart this loop on current dirEntrySliceStackElement with new middlewareReadDirHelper() results
  2205  
  2206  				dirEntrySliceStackElement.dirEntrySlice = dirEntrySlice
  2207  				dirEntrySliceStackElement.numConsumed = 0
  2208  				dirEntrySliceStackElement.moreEntries = moreEntries
  2209  
  2210  				continue
  2211  			} else {
  2212  				// We've reached the end of this DirInode
  2213  
  2214  				if doSingleDirectory {
  2215  					// Regardless of remaining contents of dirEntrySliceStack, we must be done
  2216  
  2217  					err = nil
  2218  					return
  2219  				}
  2220  
  2221  				// Navigate to parent directory (staying within this Container)
  2222  
  2223  				if 1 == len(dirEntrySliceStack) {
  2224  					// We are at the starting directory
  2225  
  2226  					dirPathSplit = strings.Split(dirEntrySliceStackElement.dirPath, "/")
  2227  
  2228  					if 1 == len(dirPathSplit) {
  2229  						// We just finished Container-level directory, so we are done
  2230  
  2231  						err = nil
  2232  						return
  2233  					}
  2234  
  2235  					// Modify dirEntrySliceStackElement to point to parent directory as if we'd just processed the dirEntry of this directory
  2236  
  2237  					dirPath = strings.Join(dirPathSplit[:len(dirPathSplit)-1], "/")
  2238  
  2239  					if 0 == maxEntries {
  2240  						remainingMaxEntries = 0
  2241  					} else {
  2242  						remainingMaxEntries = maxEntries - uint64(len(containerEnts))
  2243  					}
  2244  
  2245  					prevReturned = dirPathSplit[len(dirPathSplit)-1]
  2246  
  2247  					_, dirEntrySlice, moreEntries, err = vS.middlewareReadDirHelper(dirPath, remainingMaxEntries, prevReturned)
  2248  					if nil != err {
  2249  						return
  2250  					}
  2251  
  2252  					dirEntrySliceStackElement.dirPath = dirPath
  2253  					dirEntrySliceStackElement.dirEntrySlice = dirEntrySlice
  2254  					dirEntrySliceStackElement.numConsumed = 0
  2255  					dirEntrySliceStackElement.moreEntries = moreEntries
  2256  				} else {
  2257  					// Parent directory already in dirEntrySliceStack... so just pop current ...Element
  2258  
  2259  					dirEntrySliceStack = dirEntrySliceStack[:len(dirEntrySliceStack)-1]
  2260  				}
  2261  
  2262  				continue
  2263  			}
  2264  		}
  2265  
  2266  		// Consume next dirEntrySliceElement
  2267  		// ...skipping "." and ".."
  2268  		// ...skipping if <dirPath>/<Basename> <= marker
  2269  		// ...recursing when encountering DirInode's if !doSingleDirectory
  2270  		// ...terminating early if either:
  2271  		//      len(*containerEnts) reaches maxEntries
  2272  		//      <dirPath>/<Basename> >= endmarker
  2273  		//      <dirPath>/<Basename> does not start with prefix
  2274  
  2275  		dirEntrySlice = dirEntrySliceStackElement.dirEntrySlice
  2276  		dirEntrySliceElementIndex = dirEntrySliceStackElement.numConsumed
  2277  		dirEntrySliceElement = dirEntrySlice[dirEntrySliceElementIndex]
  2278  
  2279  		dirEntrySliceStackElement.numConsumed++
  2280  
  2281  		if ("." == dirEntrySliceElement.Basename) || (".." == dirEntrySliceElement.Basename) {
  2282  			continue
  2283  		}
  2284  
  2285  		containerEntryPath = dirEntrySliceStackElement.dirPath + "/" + dirEntrySliceElement.Basename
  2286  
  2287  		if ("" != marker) && (strings.Compare(containerEntryPath, markerCanonicalized) <= 0) {
  2288  			err = nil
  2289  			return
  2290  		}
  2291  		if ("" != endmarker) && (strings.Compare(containerEntryPath, endmarkerCanonicalized) >= 0) {
  2292  			err = nil
  2293  			return
  2294  		}
  2295  		if ("" != prefix) && !strings.HasPrefix(containerEntryPath, prefixCanonicalized) {
  2296  			err = nil
  2297  			return
  2298  		}
  2299  
  2300  		// Ok... so we actually want to append this entry to containerEnts
  2301  
  2302  		tryLockBackoffContext = &tryLockBackoffContextStruct{}
  2303  
  2304  	Retry:
  2305  
  2306  		tryLockBackoffContext.backoff()
  2307  
  2308  		dirEntryInodeLock, err = inodeVolumeHandle.AttemptReadLock(dirEntrySliceElement.InodeNumber, dlmCallerID)
  2309  		if nil != err {
  2310  			goto Retry
  2311  		}
  2312  
  2313  		dirEntryMetadata, err = inodeVolumeHandle.GetMetadata(dirEntrySliceElement.InodeNumber)
  2314  		if nil != err {
  2315  			// Ok... so it must have disappeared... just skip it
  2316  
  2317  			err = dirEntryInodeLock.Unlock()
  2318  			if nil != err {
  2319  				logger.Fatalf("Failure unlocking a held LockID %s: %v", dirEntryInodeLock.LockID, err)
  2320  			}
  2321  
  2322  			continue
  2323  		}
  2324  
  2325  		containerEntryPathSplit = strings.SplitN(containerEntryPath, "/", 2)
  2326  		containerEntryBasename = containerEntryPathSplit[1]
  2327  
  2328  		containerEntry = ContainerEntry{
  2329  			Basename:         containerEntryBasename,
  2330  			FileSize:         dirEntryMetadata.Size,
  2331  			ModificationTime: uint64(dirEntryMetadata.ModificationTime.UnixNano()),
  2332  			AttrChangeTime:   uint64(dirEntryMetadata.AttrChangeTime.UnixNano()),
  2333  			IsDir:            (dirEntrySliceElement.Type == inode.DirType),
  2334  			NumWrites:        dirEntryMetadata.NumWrites,
  2335  			InodeNumber:      uint64(dirEntrySliceElement.InodeNumber),
  2336  		}
  2337  
  2338  		containerEntry.Metadata, err = inodeVolumeHandle.GetStream(dirEntrySliceElement.InodeNumber, MiddlewareStream)
  2339  		if nil != err {
  2340  			if blunder.Is(err, blunder.StreamNotFound) {
  2341  				// No MiddlewareStream... just make it appear empty
  2342  
  2343  				containerEntry.Metadata = []byte{}
  2344  				err = nil
  2345  			} else {
  2346  				// Ok... so it must have disappeared... just skip it
  2347  
  2348  				err = dirEntryInodeLock.Unlock()
  2349  				if nil != err {
  2350  					logger.Fatalf("Failure unlocking a held LockID %s: %v", dirEntryInodeLock.LockID, err)
  2351  				}
  2352  
  2353  				continue
  2354  			}
  2355  		}
  2356  
  2357  		// We can finally Unlock() this dirEntryInodeLock
  2358  
  2359  		err = dirEntryInodeLock.Unlock()
  2360  		if nil != err {
  2361  			logger.Fatalf("Failure unlocking a held LockID %s: %v", dirEntryInodeLock.LockID, err)
  2362  		}
  2363  
  2364  		// If we reach here, we get to append this containerEntry to containerEnts
  2365  
  2366  		containerEnts = append(containerEnts, containerEntry)
  2367  
  2368  		// We must now descend into dirEntryInode descend into it if it's a DirInode and !doSingleDirectory
  2369  
  2370  		if !doSingleDirectory && (dirEntrySliceElement.Type == inode.DirType) {
  2371  			dirPath = dirEntrySliceStackElement.dirPath + "/" + dirEntrySliceElement.Basename
  2372  
  2373  			if 0 == maxEntries {
  2374  				remainingMaxEntries = 0
  2375  			} else {
  2376  				remainingMaxEntries = maxEntries - uint64(len(containerEnts))
  2377  			}
  2378  
  2379  			prevReturned = ""
  2380  
  2381  			_, dirEntrySlice, moreEntries, err = vS.middlewareReadDirHelper(dirPath, remainingMaxEntries, prevReturned)
  2382  			if nil != err {
  2383  				return
  2384  			}
  2385  
  2386  			dirEntrySliceStackElement = &dirEntrySliceStackElementStruct{
  2387  				dirPath:       dirPath,
  2388  				dirEntrySlice: dirEntrySlice,
  2389  				numConsumed:   0,
  2390  				moreEntries:   moreEntries,
  2391  			}
  2392  
  2393  			dirEntrySliceStack = append(dirEntrySliceStack, dirEntrySliceStackElement)
  2394  		}
  2395  	}
  2396  
  2397  	// We will only reach here if we exhausted maxEntries before exhausing the tree/list of containerEntry's to append
  2398  
  2399  	err = nil
  2400  	return
  2401  }
  2402  
  2403  func (vS *volumeStruct) MiddlewareGetObject(containerObjectPath string,
  2404  	readRangeIn []ReadRangeIn, readRangeOut *[]inode.ReadPlanStep) (
  2405  	response HeadResponse, err error) {
  2406  
  2407  	var (
  2408  		dirEntryInodeNumber   inode.InodeNumber
  2409  		fileOffset            uint64
  2410  		heldLocks             *heldLocksStruct
  2411  		inodeVolumeHandle     inode.VolumeHandle
  2412  		readPlan              []inode.ReadPlanStep
  2413  		readRangeInIndex      int
  2414  		retryRequired         bool
  2415  		stat                  Stat
  2416  		tryLockBackoffContext *tryLockBackoffContextStruct
  2417  	)
  2418  
  2419  	startTime := time.Now()
  2420  	defer func() {
  2421  		var totalReadBytes uint64
  2422  		for _, step := range *readRangeOut {
  2423  			totalReadBytes += step.Length
  2424  		}
  2425  
  2426  		globals.MiddlewareGetObjectUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
  2427  		globals.MiddlewareGetObjectBytes.Add(totalReadBytes)
  2428  		if err != nil {
  2429  			globals.MiddlewareGetObjectErrors.Add(1)
  2430  		}
  2431  	}()
  2432  
  2433  	// Retry until done or failure (starting with ZERO backoff)
  2434  
  2435  	tryLockBackoffContext = &tryLockBackoffContextStruct{}
  2436  
  2437  Restart:
  2438  
  2439  	// Perform backoff and update for each restart (starting with ZERO backoff of course)
  2440  
  2441  	tryLockBackoffContext.backoff()
  2442  
  2443  	// Construct fresh heldLocks for this restart
  2444  
  2445  	heldLocks = newHeldLocks()
  2446  
  2447  	_, dirEntryInodeNumber, _, _, retryRequired, err =
  2448  		vS.resolvePath(
  2449  			inode.RootDirInodeNumber,
  2450  			containerObjectPath,
  2451  			heldLocks,
  2452  			resolvePathFollowDirEntrySymlinks|
  2453  				resolvePathFollowDirSymlinks)
  2454  
  2455  	if nil != err {
  2456  		heldLocks.free()
  2457  		return
  2458  	}
  2459  
  2460  	if retryRequired {
  2461  		heldLocks.free()
  2462  		goto Restart
  2463  	}
  2464  
  2465  	// Now assemble response
  2466  
  2467  	stat, err = vS.getstatHelperWhileLocked(dirEntryInodeNumber)
  2468  	if nil != err {
  2469  		heldLocks.free()
  2470  		return
  2471  	}
  2472  
  2473  	response.FileSize = stat[StatSize]
  2474  	response.ModificationTime = stat[StatMTime]
  2475  	response.AttrChangeTime = stat[StatCTime]
  2476  	response.IsDir = (stat[StatFType] == uint64(inode.DirType))
  2477  	response.InodeNumber = dirEntryInodeNumber
  2478  	response.NumWrites = stat[StatNumWrites]
  2479  
  2480  	// Swift thinks all directories have a size of 0 (and symlinks as well)
  2481  	if stat[StatFType] != uint64(inode.FileType) {
  2482  		response.FileSize = 0
  2483  	}
  2484  
  2485  	response.Metadata, err = vS.inodeVolumeHandle.GetStream(dirEntryInodeNumber, MiddlewareStream)
  2486  	if nil != err {
  2487  		if blunder.Is(err, blunder.StreamNotFound) {
  2488  			response.Metadata = []byte{}
  2489  			err = nil
  2490  		} else {
  2491  			heldLocks.free()
  2492  			return
  2493  		}
  2494  	}
  2495  
  2496  	// The only thing left is to construct a read plan and only regular
  2497  	// files have read plans.  If this is not a regular file then we're
  2498  	// done.
  2499  	if stat[StatFType] != uint64(inode.FileType) {
  2500  		heldLocks.free()
  2501  		return
  2502  	}
  2503  
  2504  	inodeVolumeHandle = vS.inodeVolumeHandle
  2505  	if len(readRangeIn) == 0 {
  2506  		// Get ReadPlan for entire file
  2507  
  2508  		fileOffset = 0
  2509  
  2510  		readPlan, err = inodeVolumeHandle.GetReadPlan(dirEntryInodeNumber, &fileOffset, &response.FileSize)
  2511  		if nil != err {
  2512  			heldLocks.free()
  2513  			return
  2514  		}
  2515  
  2516  		_ = appendReadPlanEntries(readPlan, readRangeOut)
  2517  	} else { // len(readRangeIn) > 0
  2518  		// Append each computed range
  2519  
  2520  		for readRangeInIndex = range readRangeIn {
  2521  			readPlan, err = inodeVolumeHandle.GetReadPlan(dirEntryInodeNumber, readRangeIn[readRangeInIndex].Offset, readRangeIn[readRangeInIndex].Len)
  2522  			if nil != err {
  2523  				heldLocks.free()
  2524  				return
  2525  			}
  2526  
  2527  			_ = appendReadPlanEntries(readPlan, readRangeOut)
  2528  		}
  2529  	}
  2530  
  2531  	heldLocks.free()
  2532  
  2533  	err = nil
  2534  	return
  2535  }
  2536  
  2537  func (vS *volumeStruct) MiddlewareHeadResponse(entityPath string) (response HeadResponse, err error) {
  2538  	var (
  2539  		dirEntryInodeNumber   inode.InodeNumber
  2540  		heldLocks             *heldLocksStruct
  2541  		retryRequired         bool
  2542  		stat                  Stat
  2543  		tryLockBackoffContext *tryLockBackoffContextStruct
  2544  	)
  2545  
  2546  	startTime := time.Now()
  2547  	defer func() {
  2548  		globals.MiddlewareHeadResponseUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
  2549  		if err != nil {
  2550  			globals.MiddlewareHeadResponseErrors.Add(1)
  2551  		}
  2552  	}()
  2553  
  2554  	// Retry until done or failure (starting with ZERO backoff)
  2555  
  2556  	tryLockBackoffContext = &tryLockBackoffContextStruct{}
  2557  
  2558  Restart:
  2559  
  2560  	// Perform backoff and update for each restart (starting with ZERO backoff of course)
  2561  
  2562  	tryLockBackoffContext.backoff()
  2563  
  2564  	// Construct fresh heldLocks for this restart
  2565  
  2566  	heldLocks = newHeldLocks()
  2567  
  2568  	_, dirEntryInodeNumber, _, _, retryRequired, err =
  2569  		vS.resolvePath(
  2570  			inode.RootDirInodeNumber,
  2571  			entityPath,
  2572  			heldLocks,
  2573  			resolvePathFollowDirEntrySymlinks|
  2574  				resolvePathFollowDirSymlinks)
  2575  
  2576  	if nil != err {
  2577  		heldLocks.free()
  2578  		return
  2579  	}
  2580  
  2581  	if retryRequired {
  2582  		heldLocks.free()
  2583  		goto Restart
  2584  	}
  2585  
  2586  	// Now assemble response
  2587  
  2588  	stat, err = vS.getstatHelperWhileLocked(dirEntryInodeNumber)
  2589  	if nil != err {
  2590  		heldLocks.free()
  2591  		return
  2592  	}
  2593  
  2594  	// since resolvePathFollowDirEntrySymlinks is set on the call to
  2595  	// resolvePath(), above, we'll never see a symlink returned
  2596  	response.ModificationTime = stat[StatMTime]
  2597  	response.AttrChangeTime = stat[StatCTime]
  2598  	response.FileSize = stat[StatSize]
  2599  	response.IsDir = (stat[StatFType] == uint64(inode.DirType))
  2600  	response.InodeNumber = dirEntryInodeNumber
  2601  	response.NumWrites = stat[StatNumWrites]
  2602  
  2603  	// Swift thinks all directories have a size of 0 (and symlinks as well)
  2604  	if stat[StatFType] != uint64(inode.FileType) {
  2605  		response.FileSize = 0
  2606  	}
  2607  
  2608  	response.Metadata, err = vS.inodeVolumeHandle.GetStream(dirEntryInodeNumber, MiddlewareStream)
  2609  	if nil != err {
  2610  		heldLocks.free()
  2611  		response.Metadata = []byte{}
  2612  		// If someone makes a directory or file via SMB/FUSE and then
  2613  		// HEADs it via HTTP, we'll see this error. We treat it as
  2614  		// though there is no metadata. The middleware is equipped to
  2615  		// handle this case.
  2616  		if blunder.Is(err, blunder.StreamNotFound) {
  2617  			err = nil
  2618  		}
  2619  		return
  2620  	}
  2621  
  2622  	heldLocks.free()
  2623  	return
  2624  }
  2625  
  2626  func (vS *volumeStruct) MiddlewarePost(parentDir string, baseName string, newMetaData []byte, oldMetaData []byte) (err error) {
  2627  	var (
  2628  		dirEntryInodeNumber   inode.InodeNumber
  2629  		existingStreamData    []byte
  2630  		heldLocks             *heldLocksStruct
  2631  		retryRequired         bool
  2632  		tryLockBackoffContext *tryLockBackoffContextStruct
  2633  	)
  2634  
  2635  	startTime := time.Now()
  2636  	defer func() {
  2637  		globals.MiddlewarePostUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
  2638  		globals.MiddlewarePostBytes.Add(uint64(len(newMetaData)))
  2639  		if err != nil {
  2640  			globals.MiddlewarePostErrors.Add(1)
  2641  		}
  2642  	}()
  2643  
  2644  	// Retry until done or failure (starting with ZERO backoff)
  2645  
  2646  	tryLockBackoffContext = &tryLockBackoffContextStruct{}
  2647  
  2648  Restart:
  2649  
  2650  	// Perform backoff and update for each restart (starting with ZERO backoff of course)
  2651  
  2652  	tryLockBackoffContext.backoff()
  2653  
  2654  	// Construct fresh heldLocks for this restart
  2655  
  2656  	heldLocks = newHeldLocks()
  2657  
  2658  	_, dirEntryInodeNumber, _, _, retryRequired, err =
  2659  		vS.resolvePath(
  2660  			inode.RootDirInodeNumber,
  2661  			parentDir+"/"+baseName,
  2662  			heldLocks,
  2663  			resolvePathFollowDirEntrySymlinks|
  2664  				resolvePathFollowDirSymlinks|
  2665  				resolvePathCreateMissingPathElements|
  2666  				resolvePathRequireExclusiveLockOnDirEntryInode)
  2667  
  2668  	if nil != err {
  2669  		heldLocks.free()
  2670  		return
  2671  	}
  2672  
  2673  	if retryRequired {
  2674  		heldLocks.free()
  2675  		goto Restart
  2676  	}
  2677  
  2678  	// Now apply MiddlewareStream update
  2679  
  2680  	// Compare oldMetaData to existing existingStreamData to make sure that the HTTP metadata has not changed.
  2681  	// If it has changed, then return an error since middleware has to handle it.
  2682  
  2683  	existingStreamData, err = vS.inodeVolumeHandle.GetStream(dirEntryInodeNumber, MiddlewareStream)
  2684  	if nil != err {
  2685  		if blunder.Is(err, blunder.StreamNotFound) {
  2686  			err = nil
  2687  			existingStreamData = make([]byte, 0)
  2688  		} else {
  2689  			heldLocks.free()
  2690  			return
  2691  		}
  2692  	}
  2693  
  2694  	// Verify that the oldMetaData is the same as the one we think we are changing.
  2695  
  2696  	if !bytes.Equal(existingStreamData, oldMetaData) {
  2697  		heldLocks.free()
  2698  		err = blunder.NewError(blunder.TryAgainError, "MiddlewarePost(): MetaData different - existingStreamData: %v OldMetaData: %v", existingStreamData, oldMetaData)
  2699  		return
  2700  	}
  2701  
  2702  	// Change looks okay so make it.
  2703  
  2704  	err = vS.inodeVolumeHandle.PutStream(dirEntryInodeNumber, MiddlewareStream, newMetaData)
  2705  	if nil != err {
  2706  		heldLocks.free()
  2707  		return
  2708  	}
  2709  
  2710  	// PutStream() implicitly flushed... so, if it was a FileInode, we don't need to track it anymore
  2711  
  2712  	vS.untrackInFlightFileInodeData(dirEntryInodeNumber, false)
  2713  
  2714  	heldLocks.free()
  2715  	return
  2716  }
  2717  
  2718  func (vS *volumeStruct) MiddlewarePutComplete(vContainerName string, vObjectPath string, pObjectPaths []string, pObjectLengths []uint64, pObjectMetadata []byte) (mtime uint64, ctime uint64, fileInodeNumber inode.InodeNumber, numWrites uint64, err error) {
  2719  	var (
  2720  		containerName         string
  2721  		dirInodeNumber        inode.InodeNumber
  2722  		dirEntryInodeNumber   inode.InodeNumber
  2723  		dirEntryBasename      string
  2724  		dirEntryInodeType     inode.InodeType
  2725  		fileOffset            uint64
  2726  		heldLocks             *heldLocksStruct
  2727  		inodeVolumeHandle     inode.VolumeHandle = vS.inodeVolumeHandle
  2728  		inodeWroteTime        time.Time
  2729  		numPObjects           int
  2730  		objectName            string
  2731  		pObjectIndex          int
  2732  		retryRequired         bool
  2733  		stat                  Stat
  2734  		tryLockBackoffContext *tryLockBackoffContextStruct
  2735  	)
  2736  
  2737  	startTime := time.Now()
  2738  	defer func() {
  2739  		globals.MiddlewarePutCompleteUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
  2740  		if err != nil {
  2741  			globals.MiddlewarePutCompleteErrors.Add(1)
  2742  		}
  2743  	}()
  2744  
  2745  	// Validate (pObjectPaths,pObjectLengths) args
  2746  
  2747  	numPObjects = len(pObjectPaths)
  2748  
  2749  	if numPObjects != len(pObjectLengths) {
  2750  		blunder.NewError(blunder.InvalidArgError, "MiddlewarePutComplete() expects len(pObjectPaths) == len(pObjectLengths)")
  2751  		return
  2752  	}
  2753  
  2754  	// Retry until done or failure (starting with ZERO backoff)
  2755  
  2756  	tryLockBackoffContext = &tryLockBackoffContextStruct{}
  2757  
  2758  Restart:
  2759  
  2760  	// Perform backoff and update for each restart (starting with ZERO backoff of course)
  2761  
  2762  	tryLockBackoffContext.backoff()
  2763  
  2764  	// Construct fresh heldLocks for this restart
  2765  
  2766  	heldLocks = newHeldLocks()
  2767  
  2768  	dirInodeNumber, dirEntryInodeNumber, dirEntryBasename, dirEntryInodeType, retryRequired, err =
  2769  		vS.resolvePath(
  2770  			inode.RootDirInodeNumber,
  2771  			vContainerName+"/"+vObjectPath,
  2772  			heldLocks,
  2773  			resolvePathFollowDirEntrySymlinks|
  2774  				resolvePathFollowDirSymlinks|
  2775  				resolvePathCreateMissingPathElements|
  2776  				resolvePathRequireExclusiveLockOnDirInode|
  2777  				resolvePathRequireExclusiveLockOnDirEntryInode)
  2778  	if nil != err {
  2779  		heldLocks.free()
  2780  		return
  2781  	}
  2782  	if retryRequired {
  2783  		heldLocks.free()
  2784  		goto Restart
  2785  	}
  2786  
  2787  	// The semantics of PUT mean that the existing object is discarded; with
  2788  	// a file we can just overwrite it, but symlinks or directories must be
  2789  	// removed (if possible).
  2790  	if dirEntryInodeType != inode.FileType {
  2791  
  2792  		if dirEntryInodeType == inode.DirType {
  2793  
  2794  			// try to unlink the directory (rmdir flushes the inodes)
  2795  			err = vS.rmdirActual(dirInodeNumber, dirEntryBasename, dirEntryInodeNumber)
  2796  			if err != nil {
  2797  				// the directory was probably not empty
  2798  				heldLocks.free()
  2799  				return
  2800  
  2801  			}
  2802  
  2803  		} else {
  2804  			// unlink the symlink (unlink flushes the inodes)
  2805  			err = vS.unlinkActual(dirInodeNumber, dirEntryBasename, dirEntryInodeNumber)
  2806  			if err != nil {
  2807  
  2808  				// ReadOnlyError is my best guess for the failure
  2809  				err = blunder.NewError(blunder.ReadOnlyError,
  2810  					"MiddlewareMkdir(): vol '%s' failed to unlink '%s': %v",
  2811  					vS.volumeName, vContainerName+"/"+vObjectPath, err)
  2812  				heldLocks.free()
  2813  				return
  2814  			}
  2815  		}
  2816  
  2817  		// let resolvePath() create the file
  2818  		dirInodeNumber, dirEntryInodeNumber, dirEntryBasename, dirEntryInodeType, retryRequired, err =
  2819  			vS.resolvePath(
  2820  				inode.RootDirInodeNumber,
  2821  				vContainerName+"/"+vObjectPath,
  2822  				heldLocks,
  2823  				resolvePathFollowDirSymlinks|
  2824  					resolvePathCreateMissingPathElements|
  2825  					resolvePathDirEntryInodeMustBeFile|
  2826  					resolvePathRequireExclusiveLockOnDirInode|
  2827  					resolvePathRequireExclusiveLockOnDirEntryInode)
  2828  		if nil != err {
  2829  			heldLocks.free()
  2830  			return
  2831  		}
  2832  		if retryRequired {
  2833  			heldLocks.free()
  2834  			goto Restart
  2835  		}
  2836  	}
  2837  
  2838  	// Apply (pObjectPaths,pObjectLengths) to (erased) FileInode
  2839  
  2840  	inodeWroteTime = time.Now()
  2841  
  2842  	fileOffset = 0
  2843  
  2844  	for pObjectIndex = 0; pObjectIndex < numPObjects; pObjectIndex++ {
  2845  		_, containerName, objectName, err = utils.PathToAcctContObj(pObjectPaths[pObjectIndex])
  2846  		if nil != err {
  2847  			heldLocks.free()
  2848  			logger.DebugfIDWithError(internalDebug, err, "MiddlewarePutComplete(): failed utils.PathToAcctContObj(\"%s\") for dirEntryInodeNumber 0x%016X", pObjectPaths[pObjectIndex], dirEntryInodeNumber)
  2849  			return
  2850  		}
  2851  
  2852  		err = inodeVolumeHandle.Wrote(
  2853  			dirEntryInodeNumber,
  2854  			containerName,
  2855  			objectName,
  2856  			[]uint64{fileOffset},
  2857  			[]uint64{0},
  2858  			[]uint64{pObjectLengths[pObjectIndex]},
  2859  			inodeWroteTime,
  2860  			pObjectIndex > 0) // Initial pObjectIndex == 0 case will implicitly SetSize(,0)
  2861  		if nil != err {
  2862  			heldLocks.free()
  2863  			logger.DebugfIDWithError(internalDebug, err, "MiddlewarePutComplete(): failed inode.Wrote() for dirEntryInodeNumber 0x%016X", dirEntryInodeNumber)
  2864  			return
  2865  		}
  2866  
  2867  		fileOffset += pObjectLengths[pObjectIndex]
  2868  	}
  2869  
  2870  	// Apply pObjectMetadata to FileInode (this will flush it as well)
  2871  
  2872  	err = inodeVolumeHandle.PutStream(dirEntryInodeNumber, MiddlewareStream, pObjectMetadata)
  2873  	if err != nil {
  2874  		heldLocks.free()
  2875  		logger.DebugfIDWithError(internalDebug, err, "MiddlewarePutComplete(): failed PutStream() for dirEntryInodeNumber 0x%016X (pObjectMetadata: %v)", dirEntryInodeNumber, pObjectMetadata)
  2876  		return
  2877  	}
  2878  
  2879  	stat, err = vS.getstatHelperWhileLocked(dirEntryInodeNumber)
  2880  	if nil != err {
  2881  		heldLocks.free()
  2882  		return
  2883  	}
  2884  
  2885  	mtime = stat[StatMTime]
  2886  	ctime = stat[StatCTime]
  2887  	fileInodeNumber = dirEntryInodeNumber
  2888  	numWrites = stat[StatNumWrites]
  2889  
  2890  	heldLocks.free()
  2891  	return
  2892  }
  2893  
  2894  func (vS *volumeStruct) MiddlewareMkdir(vContainerName string, vObjectPath string, metadata []byte) (mtime uint64, ctime uint64, inodeNumber inode.InodeNumber, numWrites uint64, err error) {
  2895  	var (
  2896  		dirInodeNumber        inode.InodeNumber
  2897  		dirEntryInodeNumber   inode.InodeNumber
  2898  		dirEntryBasename      string
  2899  		dirEntryInodeType     inode.InodeType
  2900  		heldLocks             *heldLocksStruct
  2901  		retryRequired         bool
  2902  		stat                  Stat
  2903  		tryLockBackoffContext *tryLockBackoffContextStruct
  2904  	)
  2905  
  2906  	startTime := time.Now()
  2907  	defer func() {
  2908  		globals.MiddlewareMkdirUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
  2909  		if err != nil {
  2910  			globals.MiddlewareMkdirErrors.Add(1)
  2911  		}
  2912  	}()
  2913  
  2914  	// Retry until done or failure (starting with ZERO backoff)
  2915  
  2916  	tryLockBackoffContext = &tryLockBackoffContextStruct{}
  2917  
  2918  Restart:
  2919  
  2920  	// Perform backoff and update for each restart (starting with ZERO backoff of course)
  2921  
  2922  	tryLockBackoffContext.backoff()
  2923  
  2924  	// Construct fresh heldLocks for this restart
  2925  
  2926  	heldLocks = newHeldLocks()
  2927  
  2928  	// Resolve the object, locking it and its parent directory exclusive
  2929  	dirInodeNumber, dirEntryInodeNumber, dirEntryBasename, dirEntryInodeType, retryRequired, err =
  2930  		vS.resolvePath(
  2931  			inode.RootDirInodeNumber,
  2932  			vContainerName+"/"+vObjectPath,
  2933  			heldLocks,
  2934  			resolvePathFollowDirSymlinks|
  2935  				resolvePathCreateMissingPathElements|
  2936  				resolvePathRequireExclusiveLockOnDirInode|
  2937  				resolvePathRequireExclusiveLockOnDirEntryInode)
  2938  	if nil != err {
  2939  		heldLocks.free()
  2940  		return
  2941  	}
  2942  	if retryRequired {
  2943  		heldLocks.free()
  2944  		goto Restart
  2945  	}
  2946  
  2947  	// The semantics of PUT for a directory object require that an existing
  2948  	// file or symlink be discarded and be replaced with a directory (an
  2949  	// existing directory is fine; it just has its headers overwritten).
  2950  	if dirEntryInodeType != inode.DirType {
  2951  
  2952  		// unlink the file or symlink (unlink flushes the inodes)
  2953  		err = vS.unlinkActual(dirInodeNumber, dirEntryBasename, dirEntryInodeNumber)
  2954  		if err != nil {
  2955  
  2956  			// ReadOnlyError is my best guess for the failure
  2957  			err = blunder.NewError(blunder.ReadOnlyError,
  2958  				"MiddlewareMkdir(): vol '%s' failed to unlink '%s': %v",
  2959  				vS.volumeName, vContainerName+"/"+vObjectPath, err)
  2960  			heldLocks.free()
  2961  			return
  2962  		}
  2963  
  2964  		// let resolvePath() make the directory
  2965  		dirInodeNumber, dirEntryInodeNumber, dirEntryBasename, dirEntryInodeType, retryRequired, err =
  2966  			vS.resolvePath(
  2967  				inode.RootDirInodeNumber,
  2968  				vContainerName+"/"+vObjectPath,
  2969  				heldLocks,
  2970  				resolvePathFollowDirSymlinks|
  2971  					resolvePathCreateMissingPathElements|
  2972  					resolvePathDirEntryInodeMustBeDirectory|
  2973  					resolvePathRequireExclusiveLockOnDirInode|
  2974  					resolvePathRequireExclusiveLockOnDirEntryInode)
  2975  		if nil != err {
  2976  			heldLocks.free()
  2977  			return
  2978  		}
  2979  		if retryRequired {
  2980  			heldLocks.free()
  2981  			goto Restart
  2982  		}
  2983  	}
  2984  
  2985  	err = vS.inodeVolumeHandle.PutStream(dirEntryInodeNumber, MiddlewareStream, metadata)
  2986  	if err != nil {
  2987  		heldLocks.free()
  2988  		logger.DebugfIDWithError(internalDebug, err, "MiddlewareHeadResponse(): failed PutStream() for for dirEntryInodeNumber 0x%016X (pObjectMetadata: %v)", dirEntryInodeNumber, metadata)
  2989  		return
  2990  	}
  2991  
  2992  	stat, err = vS.getstatHelperWhileLocked(dirEntryInodeNumber)
  2993  	if nil != err {
  2994  		heldLocks.free()
  2995  		return
  2996  	}
  2997  
  2998  	mtime = stat[StatMTime]
  2999  	ctime = stat[StatCTime]
  3000  	inodeNumber = dirEntryInodeNumber
  3001  	numWrites = stat[StatNumWrites]
  3002  
  3003  	heldLocks.free()
  3004  	return
  3005  }
  3006  
  3007  func (vS *volumeStruct) MiddlewarePutContainer(containerName string, oldMetadata []byte, newMetadata []byte) (err error) {
  3008  	var (
  3009  		containerInodeLock   *dlm.RWLockStruct
  3010  		containerInodeNumber inode.InodeNumber
  3011  		existingMetadata     []byte
  3012  		newDirInodeLock      *dlm.RWLockStruct
  3013  		newDirInodeNumber    inode.InodeNumber
  3014  	)
  3015  
  3016  	startTime := time.Now()
  3017  	defer func() {
  3018  		globals.MiddlewarePutContainerUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
  3019  		globals.MiddlewarePutContainerBytes.Add(uint64(len(newMetadata)))
  3020  		if err != nil {
  3021  			globals.MiddlewarePutContainerErrors.Add(1)
  3022  		}
  3023  	}()
  3024  
  3025  	vS.jobRWMutex.RLock()
  3026  	defer vS.jobRWMutex.RUnlock()
  3027  
  3028  	// Yes, it's a heavy lock to hold on the root inode. However, we
  3029  	// might need to add a new directory entry there, so there's not
  3030  	// much else we can do.
  3031  	rootInodeLock, err := vS.inodeVolumeHandle.GetWriteLock(inode.RootDirInodeNumber, nil)
  3032  	if nil != err {
  3033  		return
  3034  	}
  3035  	defer rootInodeLock.Unlock()
  3036  
  3037  	containerInodeNumber, err = vS.inodeVolumeHandle.Lookup(inode.RootDirInodeNumber, containerName)
  3038  	if err != nil && blunder.IsNot(err, blunder.NotFoundError) {
  3039  		return
  3040  	} else if err != nil {
  3041  		// No such container, so we create it
  3042  		err = validateBaseName(containerName)
  3043  		if err != nil {
  3044  			return
  3045  		}
  3046  
  3047  		newDirInodeNumber, err = vS.inodeVolumeHandle.CreateDir(inode.PosixModePerm, 0, 0)
  3048  		if err != nil {
  3049  			logger.ErrorWithError(err)
  3050  			return
  3051  		}
  3052  
  3053  		newDirInodeLock, err = vS.inodeVolumeHandle.GetWriteLock(newDirInodeNumber, nil)
  3054  		defer newDirInodeLock.Unlock()
  3055  
  3056  		err = vS.inodeVolumeHandle.PutStream(newDirInodeNumber, MiddlewareStream, newMetadata)
  3057  		if err != nil {
  3058  			logger.ErrorWithError(err)
  3059  			return
  3060  		}
  3061  
  3062  		err = vS.inodeVolumeHandle.Link(inode.RootDirInodeNumber, containerName, newDirInodeNumber, false)
  3063  
  3064  		return
  3065  	}
  3066  
  3067  	containerInodeLock, err = vS.inodeVolumeHandle.GetWriteLock(containerInodeNumber, nil)
  3068  	if err != nil {
  3069  		return
  3070  	}
  3071  	defer containerInodeLock.Unlock()
  3072  
  3073  	// Existing container: just update the metadata
  3074  	existingMetadata, err = vS.inodeVolumeHandle.GetStream(containerInodeNumber, MiddlewareStream)
  3075  
  3076  	// GetStream() will return an error if there is no "middleware" stream
  3077  	if err != nil && blunder.IsNot(err, blunder.StreamNotFound) {
  3078  		return
  3079  	} else if err != nil {
  3080  		existingMetadata = []byte{}
  3081  	}
  3082  
  3083  	// Only change it if the caller sent the current value
  3084  	if !bytes.Equal(existingMetadata, oldMetadata) {
  3085  		err = blunder.NewError(blunder.TryAgainError, "Metadata differs - actual: %v request: %v", existingMetadata, oldMetadata)
  3086  		return
  3087  	}
  3088  	err = vS.inodeVolumeHandle.PutStream(containerInodeNumber, MiddlewareStream, newMetadata)
  3089  
  3090  	return
  3091  }
  3092  
  3093  func (vS *volumeStruct) Mkdir(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber, basename string, filePerm inode.InodeMode) (newDirInodeNumber inode.InodeNumber, err error) {
  3094  	startTime := time.Now()
  3095  	defer func() {
  3096  		globals.MkdirUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
  3097  		if err != nil {
  3098  			globals.MkdirErrors.Add(1)
  3099  		}
  3100  	}()
  3101  
  3102  	vS.jobRWMutex.RLock()
  3103  	defer vS.jobRWMutex.RUnlock()
  3104  
  3105  	// Make sure the file basename is not too long
  3106  	err = validateBaseName(basename)
  3107  	if err != nil {
  3108  		return 0, err
  3109  	}
  3110  
  3111  	newDirInodeNumber, err = vS.inodeVolumeHandle.CreateDir(filePerm, userID, groupID)
  3112  	if err != nil {
  3113  		logger.ErrorWithError(err)
  3114  		return 0, err
  3115  	}
  3116  
  3117  	inodeLock, err := vS.inodeVolumeHandle.InitInodeLock(inodeNumber, nil)
  3118  	if err != nil {
  3119  		return
  3120  	}
  3121  	err = inodeLock.WriteLock()
  3122  	if err != nil {
  3123  		return
  3124  	}
  3125  	defer inodeLock.Unlock()
  3126  
  3127  	if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.F_OK,
  3128  		inode.NoOverride) {
  3129  
  3130  		destroyErr := vS.inodeVolumeHandle.Destroy(newDirInodeNumber)
  3131  		if destroyErr != nil {
  3132  			logger.WarnfWithError(destroyErr, "couldn't destroy inode %v after failed Access(F_OK) in fs.Mkdir", newDirInodeNumber)
  3133  		}
  3134  		err = blunder.NewError(blunder.NotFoundError, "ENOENT")
  3135  		return 0, err
  3136  	}
  3137  	if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.W_OK|inode.X_OK,
  3138  		inode.NoOverride) {
  3139  
  3140  		destroyErr := vS.inodeVolumeHandle.Destroy(newDirInodeNumber)
  3141  		if destroyErr != nil {
  3142  			logger.WarnfWithError(destroyErr, "couldn't destroy inode %v after failed Access(W_OK|X_OK) in fs.Mkdir", newDirInodeNumber)
  3143  		}
  3144  		err = blunder.NewError(blunder.PermDeniedError, "EACCES")
  3145  		return 0, err
  3146  	}
  3147  
  3148  	err = vS.inodeVolumeHandle.Link(inodeNumber, basename, newDirInodeNumber, false)
  3149  	if err != nil {
  3150  		destroyErr := vS.inodeVolumeHandle.Destroy(newDirInodeNumber)
  3151  		if destroyErr != nil {
  3152  			logger.WarnfWithError(destroyErr, "couldn't destroy inode %v after failed Link() in fs.Mkdir", newDirInodeNumber)
  3153  		}
  3154  		return 0, err
  3155  	}
  3156  
  3157  	return newDirInodeNumber, nil
  3158  }
  3159  
  3160  func (vS *volumeStruct) RemoveXAttr(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber, streamName string) (err error) {
  3161  	startTime := time.Now()
  3162  	defer func() {
  3163  		globals.RemoveXAttrUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
  3164  		if err != nil {
  3165  			globals.RemoveXAttrErrors.Add(1)
  3166  		}
  3167  	}()
  3168  
  3169  	vS.jobRWMutex.RLock()
  3170  	defer vS.jobRWMutex.RUnlock()
  3171  
  3172  	inodeLock, err := vS.inodeVolumeHandle.InitInodeLock(inodeNumber, nil)
  3173  	if err != nil {
  3174  		return
  3175  	}
  3176  	err = inodeLock.WriteLock()
  3177  	if err != nil {
  3178  		return
  3179  	}
  3180  	defer inodeLock.Unlock()
  3181  
  3182  	if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.F_OK,
  3183  		inode.NoOverride) {
  3184  		err = blunder.NewError(blunder.NotFoundError, "ENOENT")
  3185  		return
  3186  	}
  3187  	if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.W_OK,
  3188  		inode.OwnerOverride) {
  3189  		err = blunder.NewError(blunder.PermDeniedError, "EACCES")
  3190  		return
  3191  	}
  3192  
  3193  	err = vS.inodeVolumeHandle.DeleteStream(inodeNumber, streamName)
  3194  	if err != nil {
  3195  		logger.ErrorfWithError(err, "Failed to delete XAttr %v of inode %v", streamName, inodeNumber)
  3196  	}
  3197  
  3198  	vS.untrackInFlightFileInodeData(inodeNumber, false)
  3199  
  3200  	return
  3201  }
  3202  
  3203  func (vS *volumeStruct) workerForMoveAndRename(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, srcDirInodeNumber inode.InodeNumber, srcBasename string, dstDirInodeNumber inode.InodeNumber, dstBasename string) (toDestroyInodeNumber inode.InodeNumber, heldLocks *heldLocksStruct, err error) {
  3204  	var (
  3205  		dirEntryBasename      string
  3206  		dirEntryInodeNumber   inode.InodeNumber
  3207  		dirInodeNumber        inode.InodeNumber
  3208  		retryRequired         bool
  3209  		tryLockBackoffContext *tryLockBackoffContextStruct
  3210  	)
  3211  
  3212  	err = validateBaseName(srcBasename)
  3213  	if nil != err {
  3214  		heldLocks = nil
  3215  		return
  3216  	}
  3217  
  3218  	err = validateBaseName(dstBasename)
  3219  	if nil != err {
  3220  		heldLocks = nil
  3221  		return
  3222  	}
  3223  
  3224  	// Retry until done or failure (starting with ZERO backoff)
  3225  
  3226  	tryLockBackoffContext = &tryLockBackoffContextStruct{}
  3227  
  3228  Restart:
  3229  
  3230  	// Perform backoff and update for each restart (starting with ZERO backoff of course)
  3231  
  3232  	tryLockBackoffContext.backoff()
  3233  
  3234  	// Construct fresh heldLocks for this restart
  3235  
  3236  	heldLocks = newHeldLocks()
  3237  
  3238  	// Acquire WriteLock on {srcDirInodeNumber,srcBasename} & perform Access Check
  3239  
  3240  	dirInodeNumber, _, dirEntryBasename, _, retryRequired, err =
  3241  		vS.resolvePath(
  3242  			srcDirInodeNumber,
  3243  			srcBasename,
  3244  			heldLocks,
  3245  			resolvePathRequireExclusiveLockOnDirEntryInode|
  3246  				resolvePathRequireExclusiveLockOnDirInode)
  3247  
  3248  	if nil != err {
  3249  		heldLocks.free()
  3250  		heldLocks = nil
  3251  		err = blunder.AddError(err, blunder.NotFoundError)
  3252  		return
  3253  	}
  3254  
  3255  	if retryRequired {
  3256  		heldLocks.free()
  3257  		goto Restart
  3258  	}
  3259  
  3260  	if (dirInodeNumber != srcDirInodeNumber) || (dirEntryBasename != srcBasename) {
  3261  		heldLocks.free()
  3262  		heldLocks = nil
  3263  		err = blunder.NewError(blunder.InvalidArgError, "EINVAL")
  3264  		return
  3265  	}
  3266  
  3267  	if !vS.inodeVolumeHandle.Access(srcDirInodeNumber, userID, groupID, otherGroupIDs, inode.W_OK|inode.X_OK, inode.NoOverride) {
  3268  		heldLocks.free()
  3269  		heldLocks = nil
  3270  		err = blunder.NewError(blunder.PermDeniedError, "EACCES")
  3271  		return
  3272  	}
  3273  
  3274  	// Acquire WriteLock on dstDirInodeNumber & perform Access Check
  3275  
  3276  	_, dirEntryInodeNumber, _, _, retryRequired, err =
  3277  		vS.resolvePath(
  3278  			dstDirInodeNumber,
  3279  			".",
  3280  			heldLocks,
  3281  			resolvePathDirEntryInodeMustBeDirectory|
  3282  				resolvePathRequireExclusiveLockOnDirEntryInode)
  3283  
  3284  	if nil != err {
  3285  		heldLocks.free()
  3286  		heldLocks = nil
  3287  		err = blunder.AddError(err, blunder.NotFoundError)
  3288  		return
  3289  	}
  3290  
  3291  	if retryRequired {
  3292  		heldLocks.free()
  3293  		goto Restart
  3294  	}
  3295  
  3296  	if dirEntryInodeNumber != dstDirInodeNumber {
  3297  		heldLocks.free()
  3298  		heldLocks = nil
  3299  		err = blunder.NewError(blunder.InvalidArgError, "EINVAL")
  3300  		return
  3301  	}
  3302  
  3303  	if !vS.inodeVolumeHandle.Access(dstDirInodeNumber, userID, groupID, otherGroupIDs, inode.W_OK|inode.X_OK, inode.NoOverride) {
  3304  		heldLocks.free()
  3305  		heldLocks = nil
  3306  		err = blunder.NewError(blunder.PermDeniedError, "EACCES")
  3307  		return
  3308  	}
  3309  
  3310  	// Acquire WriteLock on dstBasename if it exists
  3311  
  3312  	dirInodeNumber, _, dirEntryBasename, _, retryRequired, err =
  3313  		vS.resolvePath(
  3314  			dstDirInodeNumber,
  3315  			dstBasename,
  3316  			heldLocks,
  3317  			resolvePathRequireExclusiveLockOnDirEntryInode)
  3318  
  3319  	if nil == err {
  3320  		if retryRequired {
  3321  			heldLocks.free()
  3322  			goto Restart
  3323  		}
  3324  
  3325  		if (dirInodeNumber != dstDirInodeNumber) || (dirEntryBasename != dstBasename) {
  3326  			heldLocks.free()
  3327  			heldLocks = nil
  3328  			err = blunder.NewError(blunder.InvalidArgError, "EINVAL")
  3329  			return
  3330  		}
  3331  	} else {
  3332  		// This is actually OK... it means the target path of the Rename() isn't being potentially replaced
  3333  	}
  3334  
  3335  	// Locks held & Access Checks succeeded... time to do the Move
  3336  
  3337  	toDestroyInodeNumber, err = vS.inodeVolumeHandle.Move(srcDirInodeNumber, srcBasename, dstDirInodeNumber, dstBasename)
  3338  
  3339  	return // err returned from inode.Move() suffices here
  3340  }
  3341  
  3342  func (vS *volumeStruct) Rename(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, srcDirInodeNumber inode.InodeNumber, srcBasename string, dstDirInodeNumber inode.InodeNumber, dstBasename string) (err error) {
  3343  	var (
  3344  		destroyErr           error
  3345  		heldLocks            *heldLocksStruct
  3346  		toDestroyInodeNumber inode.InodeNumber
  3347  	)
  3348  
  3349  	startTime := time.Now()
  3350  	defer func() {
  3351  		globals.RenameUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
  3352  		if err != nil {
  3353  			globals.RenameErrors.Add(1)
  3354  		}
  3355  	}()
  3356  
  3357  	vS.jobRWMutex.RLock()
  3358  	defer vS.jobRWMutex.RUnlock()
  3359  
  3360  	toDestroyInodeNumber, heldLocks, err = vS.workerForMoveAndRename(userID, groupID, otherGroupIDs, srcDirInodeNumber, srcBasename, dstDirInodeNumber, dstBasename)
  3361  
  3362  	if (nil == err) && (inode.InodeNumber(0) != toDestroyInodeNumber) {
  3363  		destroyErr = vS.inodeVolumeHandle.Destroy(toDestroyInodeNumber)
  3364  		if nil != destroyErr {
  3365  			logger.ErrorWithError(destroyErr)
  3366  		}
  3367  	}
  3368  
  3369  	if nil != heldLocks {
  3370  		heldLocks.free()
  3371  	}
  3372  
  3373  	return
  3374  }
  3375  
  3376  func (vS *volumeStruct) Move(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, srcDirInodeNumber inode.InodeNumber, srcBasename string, dstDirInodeNumber inode.InodeNumber, dstBasename string) (toDestroyInodeNumber inode.InodeNumber, err error) {
  3377  	var (
  3378  		heldLocks *heldLocksStruct
  3379  	)
  3380  
  3381  	startTime := time.Now()
  3382  	defer func() {
  3383  		globals.MoveUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
  3384  		if err != nil {
  3385  			globals.MoveErrors.Add(1)
  3386  		}
  3387  	}()
  3388  
  3389  	vS.jobRWMutex.RLock()
  3390  	defer vS.jobRWMutex.RUnlock()
  3391  
  3392  	toDestroyInodeNumber, heldLocks, err = vS.workerForMoveAndRename(userID, groupID, otherGroupIDs, srcDirInodeNumber, srcBasename, dstDirInodeNumber, dstBasename)
  3393  
  3394  	if nil != heldLocks {
  3395  		heldLocks.free()
  3396  	}
  3397  
  3398  	return
  3399  }
  3400  
  3401  func (vS *volumeStruct) Destroy(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber) (err error) {
  3402  	var (
  3403  		inodeLock *dlm.RWLockStruct
  3404  	)
  3405  
  3406  	startTime := time.Now()
  3407  	defer func() {
  3408  		globals.DestroyUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
  3409  		if err != nil {
  3410  			globals.DestroyErrors.Add(1)
  3411  		}
  3412  	}()
  3413  
  3414  	vS.jobRWMutex.RLock()
  3415  
  3416  	inodeLock, err = vS.inodeVolumeHandle.InitInodeLock(inodeNumber, nil)
  3417  	if nil != err {
  3418  		vS.jobRWMutex.RUnlock()
  3419  		return
  3420  	}
  3421  	err = inodeLock.WriteLock()
  3422  	if nil != err {
  3423  		vS.jobRWMutex.RUnlock()
  3424  		return
  3425  	}
  3426  
  3427  	if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.F_OK,
  3428  		inode.NoOverride) {
  3429  		_ = inodeLock.Unlock()
  3430  		vS.jobRWMutex.RUnlock()
  3431  		err = blunder.NewError(blunder.NotFoundError, "ENOENT")
  3432  		return
  3433  	}
  3434  	if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.W_OK,
  3435  		inode.OwnerOverride) {
  3436  		_ = inodeLock.Unlock()
  3437  		vS.jobRWMutex.RUnlock()
  3438  		err = blunder.NewError(blunder.PermDeniedError, "EACCES")
  3439  		return
  3440  	}
  3441  
  3442  	err = vS.inodeVolumeHandle.Destroy(inodeNumber)
  3443  
  3444  	_ = inodeLock.Unlock()
  3445  	vS.jobRWMutex.RUnlock()
  3446  
  3447  	return
  3448  }
  3449  
  3450  func (vS *volumeStruct) Read(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber, offset uint64, length uint64, profiler *utils.Profiler) (buf []byte, err error) {
  3451  	startTime := time.Now()
  3452  	defer func() {
  3453  		globals.ReadUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
  3454  		globals.ReadBytes.Add(uint64(len(buf)))
  3455  		if err != nil {
  3456  			globals.ReadErrors.Add(1)
  3457  		}
  3458  	}()
  3459  
  3460  	vS.jobRWMutex.RLock()
  3461  	defer vS.jobRWMutex.RUnlock()
  3462  
  3463  	inodeLock, err := vS.inodeVolumeHandle.InitInodeLock(inodeNumber, nil)
  3464  	if err != nil {
  3465  		return
  3466  	}
  3467  	err = inodeLock.ReadLock()
  3468  	if err != nil {
  3469  		return
  3470  	}
  3471  	defer inodeLock.Unlock()
  3472  
  3473  	if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.F_OK,
  3474  		inode.NoOverride) {
  3475  		err = blunder.NewError(blunder.NotFoundError, "ENOENT")
  3476  		return
  3477  	}
  3478  	if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.R_OK,
  3479  		inode.OwnerOverride) {
  3480  		err = blunder.NewError(blunder.PermDeniedError, "EACCES")
  3481  		return
  3482  	}
  3483  
  3484  	inodeType, err := vS.inodeVolumeHandle.GetType(inodeNumber)
  3485  	if err != nil {
  3486  		logger.ErrorfWithError(err, "couldn't get type for inode %v", inodeNumber)
  3487  		return buf, err
  3488  	}
  3489  	// Make sure the inode number is for a file inode
  3490  	if inodeType != inode.FileType {
  3491  		err = fmt.Errorf("%s: expected inode %v to be a file inode, got %v", utils.GetFnName(), inodeNumber, inodeType)
  3492  		logger.ErrorWithError(err)
  3493  		return buf, blunder.AddError(err, blunder.NotFileError)
  3494  	}
  3495  
  3496  	profiler.AddEventNow("before inode.Read()")
  3497  	buf, err = vS.inodeVolumeHandle.Read(inodeNumber, offset, length, profiler)
  3498  	profiler.AddEventNow("after inode.Read()")
  3499  	if uint64(len(buf)) > length {
  3500  		err = fmt.Errorf("%s: Buf length %v is greater than supplied length %v", utils.GetFnName(), uint64(len(buf)), length)
  3501  		logger.ErrorWithError(err)
  3502  		return buf, blunder.AddError(err, blunder.IOError)
  3503  	}
  3504  
  3505  	return buf, err
  3506  }
  3507  
  3508  func (vS *volumeStruct) readdirHelper(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber, maxEntries uint64, prevReturned ...interface{}) (dirEntries []inode.DirEntry, statEntries []Stat, numEntries uint64, areMoreEntries bool, err error) {
  3509  	var (
  3510  		dirEntryIndex         uint64
  3511  		dlmCallerID           dlm.CallerID
  3512  		inodeLock             *dlm.RWLockStruct
  3513  		inodeVolumeHandle     inode.VolumeHandle
  3514  		internalErr           error
  3515  		tryLockBackoffContext *tryLockBackoffContextStruct
  3516  	)
  3517  
  3518  	vS.jobRWMutex.RLock()
  3519  	defer vS.jobRWMutex.RUnlock()
  3520  
  3521  	dlmCallerID = dlm.GenerateCallerID()
  3522  	inodeVolumeHandle = vS.inodeVolumeHandle
  3523  
  3524  	tryLockBackoffContext = &tryLockBackoffContextStruct{}
  3525  
  3526  Restart:
  3527  
  3528  	tryLockBackoffContext.backoff()
  3529  
  3530  	inodeLock, err = inodeVolumeHandle.AttemptReadLock(inodeNumber, dlmCallerID)
  3531  	if nil != err {
  3532  		goto Restart
  3533  	}
  3534  
  3535  	if !inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.F_OK, inode.NoOverride) {
  3536  		internalErr = inodeLock.Unlock()
  3537  		if nil != internalErr {
  3538  			logger.Fatalf("Failure unlocking a held LockID %s: %v", inodeLock.LockID, internalErr)
  3539  		}
  3540  		err = blunder.NewError(blunder.NotFoundError, "ENOENT")
  3541  		return
  3542  	}
  3543  	if !inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.R_OK, inode.OwnerOverride) {
  3544  		internalErr = inodeLock.Unlock()
  3545  		if nil != internalErr {
  3546  			logger.Fatalf("Failure unlocking a held LockID %s: %v", inodeLock.LockID, internalErr)
  3547  		}
  3548  		err = blunder.NewError(blunder.PermDeniedError, "EACCES")
  3549  		return
  3550  	}
  3551  
  3552  	dirEntries, areMoreEntries, err = inodeVolumeHandle.ReadDir(inodeNumber, maxEntries, 0, prevReturned...)
  3553  
  3554  	internalErr = inodeLock.Unlock()
  3555  	if nil != internalErr {
  3556  		logger.Fatalf("Failure unlocking a held LockID %s: %v", inodeLock.LockID, internalErr)
  3557  	}
  3558  
  3559  	if nil != err {
  3560  		return
  3561  	}
  3562  
  3563  	// Now go back and fill in (dirEntries.Type and) statEntries
  3564  
  3565  	numEntries = uint64(len(dirEntries))
  3566  
  3567  	statEntries = make([]Stat, numEntries, numEntries)
  3568  
  3569  	for dirEntryIndex = 0; dirEntryIndex < numEntries; dirEntryIndex++ {
  3570  		inodeLock, err = inodeVolumeHandle.AttemptReadLock(dirEntries[dirEntryIndex].InodeNumber, dlmCallerID)
  3571  		if nil != err {
  3572  			goto Restart
  3573  		}
  3574  
  3575  		statEntries[dirEntryIndex], err = vS.getstatHelperWhileLocked(dirEntries[dirEntryIndex].InodeNumber)
  3576  		if nil != err {
  3577  			internalErr = inodeLock.Unlock()
  3578  			if nil != internalErr {
  3579  				logger.Fatalf("Failure unlocking a held LockID %s: %v", inodeLock.LockID, internalErr)
  3580  			}
  3581  			return
  3582  		}
  3583  
  3584  		dirEntries[dirEntryIndex].Type = inode.InodeType(statEntries[dirEntryIndex][StatFType])
  3585  
  3586  		internalErr = inodeLock.Unlock()
  3587  		if nil != internalErr {
  3588  			logger.Fatalf("Failure unlocking a held LockID %s: %v", inodeLock.LockID, internalErr)
  3589  		}
  3590  	}
  3591  
  3592  	return
  3593  }
  3594  
  3595  func (vS *volumeStruct) Readdir(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber, maxEntries uint64, prevReturned ...interface{}) (entries []inode.DirEntry, numEntries uint64, areMoreEntries bool, err error) {
  3596  	startTime := time.Now()
  3597  	defer func() {
  3598  		globals.ReaddirUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
  3599  		globals.ReaddirEntries.Add(uint64(len(entries)))
  3600  		if err != nil {
  3601  			globals.ReaddirErrors.Add(1)
  3602  		}
  3603  	}()
  3604  
  3605  	entries, _, numEntries, areMoreEntries, err = vS.readdirHelper(userID, groupID, otherGroupIDs, inodeNumber, maxEntries, prevReturned...)
  3606  
  3607  	return
  3608  }
  3609  
  3610  func (vS *volumeStruct) ReaddirPlus(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber, maxEntries uint64, prevReturned ...interface{}) (dirEntries []inode.DirEntry, statEntries []Stat, numEntries uint64, areMoreEntries bool, err error) {
  3611  	startTime := time.Now()
  3612  	defer func() {
  3613  		globals.ReaddirPlusUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
  3614  		globals.ReaddirPlusBytes.Add(uint64(len(dirEntries)))
  3615  		if err != nil {
  3616  			globals.ReaddirPlusErrors.Add(1)
  3617  		}
  3618  	}()
  3619  
  3620  	dirEntries, statEntries, numEntries, areMoreEntries, err = vS.readdirHelper(userID, groupID, otherGroupIDs, inodeNumber, maxEntries, prevReturned...)
  3621  
  3622  	return
  3623  }
  3624  
  3625  func (vS *volumeStruct) Readsymlink(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber) (target string, err error) {
  3626  	startTime := time.Now()
  3627  	defer func() {
  3628  		globals.ReadsymlinkUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
  3629  		if err != nil {
  3630  			globals.ReadsymlinkErrors.Add(1)
  3631  		}
  3632  	}()
  3633  
  3634  	vS.jobRWMutex.RLock()
  3635  	defer vS.jobRWMutex.RUnlock()
  3636  
  3637  	inodeLock, err := vS.inodeVolumeHandle.InitInodeLock(inodeNumber, nil)
  3638  	if err != nil {
  3639  		return
  3640  	}
  3641  	err = inodeLock.ReadLock()
  3642  	if err != nil {
  3643  		return
  3644  	}
  3645  	defer inodeLock.Unlock()
  3646  
  3647  	if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.F_OK,
  3648  		inode.NoOverride) {
  3649  
  3650  		err = blunder.NewError(blunder.NotFoundError, "ENOENT")
  3651  		return
  3652  	}
  3653  	if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.R_OK,
  3654  		inode.NoOverride) {
  3655  
  3656  		err = blunder.NewError(blunder.PermDeniedError, "EACCES")
  3657  		return
  3658  	}
  3659  
  3660  	target, err = vS.inodeVolumeHandle.GetSymlink(inodeNumber)
  3661  
  3662  	return target, err
  3663  }
  3664  
  3665  func (vS *volumeStruct) Resize(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber, newSize uint64) (err error) {
  3666  	startTime := time.Now()
  3667  	defer func() {
  3668  		globals.ResizeUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
  3669  		if err != nil {
  3670  			globals.ResizeErrors.Add(1)
  3671  		}
  3672  	}()
  3673  
  3674  	vS.jobRWMutex.RLock()
  3675  	defer vS.jobRWMutex.RUnlock()
  3676  
  3677  	inodeLock, err := vS.inodeVolumeHandle.InitInodeLock(inodeNumber, nil)
  3678  	if err != nil {
  3679  		return
  3680  	}
  3681  	err = inodeLock.WriteLock()
  3682  	if err != nil {
  3683  		return
  3684  	}
  3685  	defer inodeLock.Unlock()
  3686  
  3687  	if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.F_OK,
  3688  		inode.NoOverride) {
  3689  
  3690  		err = blunder.NewError(blunder.NotFoundError, "ENOENT")
  3691  		return
  3692  	}
  3693  	if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.W_OK,
  3694  		inode.OwnerOverride) {
  3695  
  3696  		err = blunder.NewError(blunder.PermDeniedError, "EACCES")
  3697  		return
  3698  	}
  3699  
  3700  	err = vS.inodeVolumeHandle.SetSize(inodeNumber, newSize)
  3701  	vS.untrackInFlightFileInodeData(inodeNumber, false)
  3702  
  3703  	return err
  3704  }
  3705  
  3706  func (vS *volumeStruct) Rmdir(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber, basename string) (err error) {
  3707  	startTime := time.Now()
  3708  	defer func() {
  3709  		globals.RmdirUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
  3710  		if err != nil {
  3711  			globals.RmdirErrors.Add(1)
  3712  		}
  3713  	}()
  3714  
  3715  	vS.jobRWMutex.RLock()
  3716  	defer vS.jobRWMutex.RUnlock()
  3717  
  3718  	callerID := dlm.GenerateCallerID()
  3719  	inodeLock, err := vS.inodeVolumeHandle.InitInodeLock(inodeNumber, callerID)
  3720  	if err != nil {
  3721  		return
  3722  	}
  3723  	err = inodeLock.WriteLock()
  3724  	if err != nil {
  3725  		return
  3726  	}
  3727  	defer inodeLock.Unlock()
  3728  
  3729  	if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.F_OK,
  3730  		inode.NoOverride) {
  3731  		err = blunder.NewError(blunder.NotFoundError, "ENOENT")
  3732  		return
  3733  	}
  3734  	if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.W_OK|inode.X_OK,
  3735  		inode.NoOverride) {
  3736  		err = blunder.NewError(blunder.PermDeniedError, "EACCES")
  3737  		return
  3738  	}
  3739  
  3740  	basenameInodeNumber, err := vS.inodeVolumeHandle.Lookup(inodeNumber, basename)
  3741  	if nil != err {
  3742  		return
  3743  	}
  3744  
  3745  	basenameInodeLock, err := vS.inodeVolumeHandle.InitInodeLock(basenameInodeNumber, callerID)
  3746  	if err != nil {
  3747  		return
  3748  	}
  3749  	err = basenameInodeLock.WriteLock()
  3750  	if err != nil {
  3751  		return
  3752  	}
  3753  	defer basenameInodeLock.Unlock()
  3754  
  3755  	// no permissions are required on the target directory
  3756  
  3757  	err = vS.rmdirActual(inodeNumber, basename, basenameInodeNumber)
  3758  	return
  3759  }
  3760  
  3761  func (vS *volumeStruct) rmdirActual(inodeNumber inode.InodeNumber, basename string, basenameInodeNumber inode.InodeNumber) (err error) {
  3762  	var (
  3763  		basenameInodeType    inode.InodeType
  3764  		dirEntries           uint64
  3765  		toDestroyInodeNumber inode.InodeNumber
  3766  	)
  3767  
  3768  	basenameInodeType, err = vS.inodeVolumeHandle.GetType(basenameInodeNumber)
  3769  	if nil != err {
  3770  		return
  3771  	}
  3772  
  3773  	if inode.DirType != basenameInodeType {
  3774  		err = fmt.Errorf("Rmdir() called on non-Directory")
  3775  		err = blunder.AddError(err, blunder.NotDirError)
  3776  		return
  3777  	}
  3778  
  3779  	dirEntries, err = vS.inodeVolumeHandle.NumDirEntries(basenameInodeNumber)
  3780  	if nil != err {
  3781  		return
  3782  	}
  3783  
  3784  	if 2 != dirEntries {
  3785  		err = fmt.Errorf("Directory not empty")
  3786  		err = blunder.AddError(err, blunder.NotEmptyError)
  3787  		return
  3788  	}
  3789  
  3790  	toDestroyInodeNumber, err = vS.inodeVolumeHandle.Unlink(inodeNumber, basename, false)
  3791  	if nil != err {
  3792  		return
  3793  	}
  3794  
  3795  	if inode.InodeNumber(0) != toDestroyInodeNumber {
  3796  		err = vS.inodeVolumeHandle.Destroy(basenameInodeNumber)
  3797  		if nil != err {
  3798  			return
  3799  		}
  3800  	}
  3801  
  3802  	return
  3803  }
  3804  
  3805  func (vS *volumeStruct) Setstat(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber, stat Stat) (err error) {
  3806  	startTime := time.Now()
  3807  	defer func() {
  3808  		globals.SetstatUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
  3809  		if err != nil {
  3810  			globals.SetstatErrors.Add(1)
  3811  		}
  3812  	}()
  3813  
  3814  	vS.jobRWMutex.RLock()
  3815  	defer vS.jobRWMutex.RUnlock()
  3816  
  3817  	inodeLock, err := vS.inodeVolumeHandle.InitInodeLock(inodeNumber, nil)
  3818  	if err != nil {
  3819  		return
  3820  	}
  3821  	err = inodeLock.WriteLock()
  3822  	if err != nil {
  3823  		return
  3824  	}
  3825  	defer inodeLock.Unlock()
  3826  
  3827  	if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.P_OK,
  3828  		inode.NoOverride) {
  3829  		err = blunder.NewError(blunder.NotPermError, "EPERM")
  3830  		return
  3831  	}
  3832  
  3833  	// perform all permissions checks before making any changes
  3834  	//
  3835  	// changing the filesize requires write permission
  3836  	_, ok := stat[StatSize]
  3837  	if ok {
  3838  		if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.W_OK,
  3839  			inode.OwnerOverride) {
  3840  			err = blunder.NewError(blunder.NotPermError, "EPERM")
  3841  			return
  3842  		}
  3843  	}
  3844  
  3845  	// most other attributes can only be changed by the owner of the file
  3846  	ownerOnly := []StatKey{StatCTime, StatCRTime, StatMTime, StatATime, StatMode, StatUserID, StatGroupID}
  3847  	for _, key := range ownerOnly {
  3848  		_, ok := stat[key]
  3849  		if ok {
  3850  			if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.P_OK,
  3851  				inode.NoOverride) {
  3852  				err = blunder.NewError(blunder.NotPermError, "EPERM")
  3853  				return
  3854  			}
  3855  			break
  3856  		}
  3857  	}
  3858  
  3859  	// the superuser (root) is the only one that can change the owner of the file to a
  3860  	// different user, but the owner of the file can perform a no-op "change" in
  3861  	// ownership
  3862  	newUserID, settingUserID := stat[StatUserID]
  3863  	if settingUserID && userID != inode.InodeRootUserID {
  3864  		if userID != inode.InodeUserID(newUserID) {
  3865  			err = blunder.NewError(blunder.NotPermError, "EPERM")
  3866  			return
  3867  		}
  3868  	}
  3869  
  3870  	// the group can only be changed to the current group or another group the owner
  3871  	// is in (unless its the superuser asking)
  3872  	newGroupID, settingGroupID := stat[StatGroupID]
  3873  	if settingGroupID && groupID != inode.InodeGroupID(newGroupID) && userID != inode.InodeRootUserID {
  3874  
  3875  		err = blunder.NewError(blunder.NotPermError, "EPERM")
  3876  		for _, otherID := range otherGroupIDs {
  3877  			if inode.InodeGroupID(newGroupID) == otherID {
  3878  				err = nil
  3879  				break
  3880  			}
  3881  		}
  3882  		if err != nil {
  3883  			return
  3884  		}
  3885  	}
  3886  
  3887  	// sanity checks for invalid/illegal values
  3888  	if settingUserID {
  3889  		// Since we are using a uint64 to convey a uint32 value, make sure we didn't get something too big
  3890  		if newUserID > uint64(math.MaxUint32) {
  3891  			err = fmt.Errorf("%s: userID is too large - value is %v, max is %v.", utils.GetFnName(), newUserID, uint64(math.MaxUint32))
  3892  			err = blunder.AddError(err, blunder.InvalidUserIDError)
  3893  			return
  3894  		}
  3895  	}
  3896  
  3897  	if settingGroupID {
  3898  		// Since we are using a uint64 to convey a uint32 value, make sure we didn't get something too big
  3899  		if newGroupID > uint64(math.MaxUint32) {
  3900  			err = fmt.Errorf("%s: groupID is too large - value is %v, max is %v.", utils.GetFnName(), newGroupID, uint64(math.MaxUint32))
  3901  			err = blunder.AddError(err, blunder.InvalidGroupIDError)
  3902  			return
  3903  		}
  3904  	}
  3905  
  3906  	filePerm, settingFilePerm := stat[StatMode]
  3907  	if settingFilePerm {
  3908  		// Since we are using a uint64 to convey a 12 bit value, make sure we didn't get something too big
  3909  		if filePerm >= 1<<12 {
  3910  			err = fmt.Errorf("%s: filePerm is too large - value is %v, max is %v.", utils.GetFnName(),
  3911  				filePerm, 1<<12)
  3912  			err = blunder.AddError(err, blunder.InvalidFileModeError)
  3913  			return
  3914  		}
  3915  	}
  3916  
  3917  	// get to work setting things
  3918  	//
  3919  	// Set permissions, if present in the map
  3920  	if settingFilePerm {
  3921  		err = vS.inodeVolumeHandle.SetPermMode(inodeNumber, inode.InodeMode(filePerm))
  3922  		if err != nil {
  3923  			logger.ErrorWithError(err)
  3924  			return err
  3925  		}
  3926  	}
  3927  
  3928  	// set owner and/or group owner, if present in the map
  3929  	err = nil
  3930  	if settingUserID && settingGroupID {
  3931  		err = vS.inodeVolumeHandle.SetOwnerUserIDGroupID(inodeNumber, inode.InodeUserID(newUserID),
  3932  			inode.InodeGroupID(newGroupID))
  3933  	} else if settingUserID {
  3934  		err = vS.inodeVolumeHandle.SetOwnerUserID(inodeNumber, inode.InodeUserID(newUserID))
  3935  	} else if settingGroupID {
  3936  		err = vS.inodeVolumeHandle.SetOwnerGroupID(inodeNumber, inode.InodeGroupID(newGroupID))
  3937  	}
  3938  	if err != nil {
  3939  		logger.ErrorWithError(err)
  3940  		return
  3941  	}
  3942  
  3943  	// Set crtime, if present in the map
  3944  	crtime, ok := stat[StatCRTime]
  3945  	if ok {
  3946  		newCreationTime := time.Unix(0, int64(crtime))
  3947  		err = vS.inodeVolumeHandle.SetCreationTime(inodeNumber, newCreationTime)
  3948  		if err != nil {
  3949  			logger.ErrorWithError(err)
  3950  			return err
  3951  		}
  3952  	}
  3953  
  3954  	// Set mtime, if present in the map
  3955  	mtime, ok := stat[StatMTime]
  3956  	if ok {
  3957  		newModificationTime := time.Unix(0, int64(mtime))
  3958  		err = vS.inodeVolumeHandle.SetModificationTime(inodeNumber, newModificationTime)
  3959  		if err != nil {
  3960  			logger.ErrorWithError(err)
  3961  			return err
  3962  		}
  3963  	}
  3964  
  3965  	// Set atime, if present in the map
  3966  	atime, ok := stat[StatATime]
  3967  	if ok {
  3968  		newAccessTime := time.Unix(0, int64(atime))
  3969  		err = vS.inodeVolumeHandle.SetAccessTime(inodeNumber, newAccessTime)
  3970  		if err != nil {
  3971  			logger.ErrorWithError(err)
  3972  			return err
  3973  		}
  3974  	}
  3975  
  3976  	// ctime is used to reliably determine whether the contents of a file
  3977  	// have changed so it cannot be altered by a client (some security
  3978  	// software depends on this)
  3979  	ctime, ok := stat[StatCTime]
  3980  	if ok {
  3981  		newAccessTime := time.Unix(0, int64(ctime))
  3982  		logger.Infof("%s: ignoring attempt to change ctime to %v on volume '%s' inode %v",
  3983  			utils.GetFnName(), newAccessTime, vS.volumeName, inodeNumber)
  3984  	}
  3985  
  3986  	// Set size, if present in the map
  3987  	size, ok := stat[StatSize]
  3988  	if ok {
  3989  		err = vS.inodeVolumeHandle.SetSize(inodeNumber, size)
  3990  		if err != nil {
  3991  			logger.ErrorWithError(err)
  3992  			return err
  3993  		}
  3994  	}
  3995  
  3996  	return
  3997  }
  3998  
  3999  func (vS *volumeStruct) SetXAttr(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber, streamName string, value []byte, flags int) (err error) {
  4000  	startTime := time.Now()
  4001  	defer func() {
  4002  		globals.SetXAttrUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
  4003  		if err != nil {
  4004  			globals.SetXAttrErrors.Add(1)
  4005  		}
  4006  	}()
  4007  
  4008  	vS.jobRWMutex.RLock()
  4009  	defer vS.jobRWMutex.RUnlock()
  4010  
  4011  	inodeLock, err := vS.inodeVolumeHandle.InitInodeLock(inodeNumber, nil)
  4012  	if err != nil {
  4013  		return
  4014  	}
  4015  	err = inodeLock.WriteLock()
  4016  	if err != nil {
  4017  		return
  4018  	}
  4019  	defer inodeLock.Unlock()
  4020  
  4021  	if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.F_OK,
  4022  		inode.NoOverride) {
  4023  		err = blunder.NewError(blunder.NotFoundError, "ENOENT")
  4024  		return
  4025  	}
  4026  	if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.W_OK,
  4027  		inode.OwnerOverride) {
  4028  		err = blunder.NewError(blunder.PermDeniedError, "EACCES")
  4029  		return
  4030  	}
  4031  
  4032  	switch flags {
  4033  	case SetXAttrCreateOrReplace:
  4034  		break
  4035  	case SetXAttrCreate:
  4036  		_, err = vS.GetXAttr(userID, groupID, otherGroupIDs, inodeNumber, streamName)
  4037  		if err == nil {
  4038  			return blunder.AddError(err, blunder.FileExistsError)
  4039  		}
  4040  	case SetXAttrReplace:
  4041  		_, err = vS.GetXAttr(userID, groupID, otherGroupIDs, inodeNumber, streamName)
  4042  		if err != nil {
  4043  			return blunder.AddError(err, blunder.StreamNotFound)
  4044  		}
  4045  	default:
  4046  		return blunder.AddError(err, blunder.InvalidArgError)
  4047  	}
  4048  
  4049  	err = vS.inodeVolumeHandle.PutStream(inodeNumber, streamName, value)
  4050  	if err != nil {
  4051  		logger.ErrorfWithError(err, "Failed to set XAttr %v to inode %v", streamName, inodeNumber)
  4052  	}
  4053  
  4054  	vS.untrackInFlightFileInodeData(inodeNumber, false)
  4055  
  4056  	return
  4057  }
  4058  
  4059  func (vS *volumeStruct) StatVfs() (statVFS StatVFS, err error) {
  4060  	startTime := time.Now()
  4061  	defer func() {
  4062  		globals.StatVfsUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
  4063  		if err != nil {
  4064  			globals.StatVfsErrors.Add(1)
  4065  		}
  4066  	}()
  4067  
  4068  	vS.jobRWMutex.RLock()
  4069  	defer vS.jobRWMutex.RUnlock()
  4070  
  4071  	statVFS = make(map[StatVFSKey]uint64)
  4072  
  4073  	statVFS[StatVFSFilesystemID] = vS.inodeVolumeHandle.GetFSID()
  4074  	statVFS[StatVFSBlockSize] = vS.reportedBlockSize
  4075  	statVFS[StatVFSFragmentSize] = vS.reportedFragmentSize
  4076  	statVFS[StatVFSTotalBlocks] = vS.reportedNumBlocks
  4077  	statVFS[StatVFSFreeBlocks] = vS.reportedNumBlocks
  4078  	statVFS[StatVFSAvailBlocks] = vS.reportedNumBlocks
  4079  	statVFS[StatVFSTotalInodes] = vS.reportedNumInodes
  4080  	statVFS[StatVFSFreeInodes] = vS.reportedNumInodes
  4081  	statVFS[StatVFSAvailInodes] = vS.reportedNumInodes
  4082  	statVFS[StatVFSMountFlags] = 0
  4083  	statVFS[StatVFSMaxFilenameLen] = FileNameMax
  4084  
  4085  	return statVFS, nil
  4086  }
  4087  
  4088  func (vS *volumeStruct) Symlink(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber, basename string, target string) (symlinkInodeNumber inode.InodeNumber, err error) {
  4089  	startTime := time.Now()
  4090  	defer func() {
  4091  		globals.SymlinkUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
  4092  		if err != nil {
  4093  			globals.SymlinkErrors.Add(1)
  4094  		}
  4095  	}()
  4096  
  4097  	vS.jobRWMutex.RLock()
  4098  	defer vS.jobRWMutex.RUnlock()
  4099  
  4100  	err = validateBaseName(basename)
  4101  	if err != nil {
  4102  		return
  4103  	}
  4104  
  4105  	err = validateFullPath(target)
  4106  	if err != nil {
  4107  		return
  4108  	}
  4109  
  4110  	// Mode for symlinks defaults to rwxrwxrwx, i.e. inode.PosixModePerm
  4111  	symlinkInodeNumber, err = vS.inodeVolumeHandle.CreateSymlink(target, inode.PosixModePerm, userID, groupID)
  4112  	if err != nil {
  4113  		return
  4114  	}
  4115  
  4116  	inodeLock, err := vS.inodeVolumeHandle.InitInodeLock(inodeNumber, nil)
  4117  	if err != nil {
  4118  		return
  4119  	}
  4120  	err = inodeLock.WriteLock()
  4121  	if err != nil {
  4122  		return
  4123  	}
  4124  	defer inodeLock.Unlock()
  4125  
  4126  	if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.F_OK,
  4127  		inode.NoOverride) {
  4128  
  4129  		destroyErr := vS.inodeVolumeHandle.Destroy(symlinkInodeNumber)
  4130  		if destroyErr != nil {
  4131  			logger.WarnfWithError(destroyErr, "couldn't destroy inode %v after failed Access(F_OK) in fs.Symlink", symlinkInodeNumber)
  4132  		}
  4133  		err = blunder.NewError(blunder.NotFoundError, "ENOENT")
  4134  		return
  4135  	}
  4136  	if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.W_OK|inode.X_OK,
  4137  		inode.NoOverride) {
  4138  
  4139  		destroyErr := vS.inodeVolumeHandle.Destroy(symlinkInodeNumber)
  4140  		if destroyErr != nil {
  4141  			logger.WarnfWithError(destroyErr, "couldn't destroy inode %v after failed Access(W_OK|X_OK) in fs.Symlink", symlinkInodeNumber)
  4142  		}
  4143  		err = blunder.NewError(blunder.PermDeniedError, "EACCES")
  4144  		return
  4145  	}
  4146  
  4147  	err = vS.inodeVolumeHandle.Link(inodeNumber, basename, symlinkInodeNumber, false)
  4148  	if err != nil {
  4149  		destroyErr := vS.inodeVolumeHandle.Destroy(symlinkInodeNumber)
  4150  		if destroyErr != nil {
  4151  			logger.WarnfWithError(destroyErr, "couldn't destroy inode %v after failed Link() in fs.Symlink", symlinkInodeNumber)
  4152  		}
  4153  		return
  4154  	}
  4155  
  4156  	return
  4157  }
  4158  
  4159  func (vS *volumeStruct) Unlink(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber, basename string) (err error) {
  4160  	startTime := time.Now()
  4161  	defer func() {
  4162  		globals.UnlinkUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
  4163  		if err != nil {
  4164  			globals.UnlinkErrors.Add(1)
  4165  		}
  4166  	}()
  4167  
  4168  	vS.jobRWMutex.RLock()
  4169  	defer vS.jobRWMutex.RUnlock()
  4170  
  4171  	callerID := dlm.GenerateCallerID()
  4172  	inodeLock, err := vS.inodeVolumeHandle.InitInodeLock(inodeNumber, callerID)
  4173  	if err != nil {
  4174  		return
  4175  	}
  4176  	err = inodeLock.WriteLock()
  4177  	if err != nil {
  4178  		return
  4179  	}
  4180  	defer inodeLock.Unlock()
  4181  
  4182  	if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.F_OK,
  4183  		inode.NoOverride) {
  4184  		err = blunder.NewError(blunder.NotFoundError, "ENOENT")
  4185  		return
  4186  	}
  4187  	if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.W_OK|inode.X_OK,
  4188  		inode.NoOverride) {
  4189  		err = blunder.NewError(blunder.PermDeniedError, "EACCES")
  4190  		return
  4191  	}
  4192  
  4193  	basenameInodeNumber, err := vS.inodeVolumeHandle.Lookup(inodeNumber, basename)
  4194  	if nil != err {
  4195  		return
  4196  	}
  4197  
  4198  	basenameInodeLock, err := vS.inodeVolumeHandle.InitInodeLock(basenameInodeNumber, callerID)
  4199  	if err != nil {
  4200  		return
  4201  	}
  4202  	err = basenameInodeLock.WriteLock()
  4203  	if err != nil {
  4204  		return
  4205  	}
  4206  	defer basenameInodeLock.Unlock()
  4207  
  4208  	err = vS.unlinkActual(inodeNumber, basename, basenameInodeNumber)
  4209  	return
  4210  }
  4211  
  4212  func (vS *volumeStruct) unlinkActual(inodeNumber inode.InodeNumber, basename string, basenameInodeNumber inode.InodeNumber) (err error) {
  4213  	var (
  4214  		basenameInodeType    inode.InodeType
  4215  		toDestroyInodeNumber inode.InodeNumber
  4216  	)
  4217  
  4218  	basenameInodeType, err = vS.inodeVolumeHandle.GetType(basenameInodeNumber)
  4219  	if nil != err {
  4220  		return
  4221  	}
  4222  
  4223  	if inode.DirType == basenameInodeType {
  4224  		err = fmt.Errorf("Unlink() called on a Directory")
  4225  		err = blunder.AddError(err, blunder.IsDirError)
  4226  		return
  4227  	}
  4228  
  4229  	toDestroyInodeNumber, err = vS.inodeVolumeHandle.Unlink(inodeNumber, basename, false)
  4230  	if nil != err {
  4231  		return
  4232  	}
  4233  
  4234  	if inode.InodeNumber(0) != toDestroyInodeNumber {
  4235  		vS.untrackInFlightFileInodeData(basenameInodeNumber, false)
  4236  		err = vS.inodeVolumeHandle.Destroy(toDestroyInodeNumber)
  4237  	}
  4238  
  4239  	return
  4240  }
  4241  
  4242  func (vS *volumeStruct) VolumeName() (volumeName string) {
  4243  	startTime := time.Now()
  4244  
  4245  	volumeName = vS.volumeName
  4246  	globals.VolumeNameUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
  4247  	return
  4248  }
  4249  
  4250  func (vS *volumeStruct) Write(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber, offset uint64, buf []byte, profiler *utils.Profiler) (size uint64, err error) {
  4251  	startTime := time.Now()
  4252  	defer func() {
  4253  		globals.WriteUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
  4254  		globals.WriteBytes.Add(size)
  4255  		if err != nil {
  4256  			globals.WriteErrors.Add(1)
  4257  		}
  4258  	}()
  4259  
  4260  	vS.jobRWMutex.RLock()
  4261  	defer vS.jobRWMutex.RUnlock()
  4262  
  4263  	logger.Tracef("fs.Write(): starting volume '%s' inode %v offset %v len %v",
  4264  		vS.volumeName, inodeNumber, offset, len(buf))
  4265  
  4266  	inodeLock, err := vS.inodeVolumeHandle.InitInodeLock(inodeNumber, nil)
  4267  	if err != nil {
  4268  		return
  4269  	}
  4270  	err = inodeLock.WriteLock()
  4271  	if err != nil {
  4272  		return
  4273  	}
  4274  	defer inodeLock.Unlock()
  4275  
  4276  	if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.F_OK,
  4277  		inode.NoOverride) {
  4278  		err = blunder.NewError(blunder.NotFoundError, "ENOENT")
  4279  		return
  4280  	}
  4281  	if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.W_OK,
  4282  		inode.OwnerOverride) {
  4283  		err = blunder.NewError(blunder.PermDeniedError, "EACCES")
  4284  		return
  4285  	}
  4286  
  4287  	profiler.AddEventNow("before inode.Write()")
  4288  	err = vS.inodeVolumeHandle.Write(inodeNumber, offset, buf, profiler)
  4289  	profiler.AddEventNow("after inode.Write()")
  4290  	// write to Swift presumably succeeds or fails as a whole
  4291  	if err != nil {
  4292  		return 0, err
  4293  	}
  4294  
  4295  	logger.Tracef("fs.Write(): tracking write volume '%s' inode %v", vS.volumeName, inodeNumber)
  4296  	vS.trackInFlightFileInodeData(inodeNumber)
  4297  	size = uint64(len(buf))
  4298  
  4299  	return
  4300  }
  4301  
  4302  func (vS *volumeStruct) Wrote(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber, containerName string, objectName string, fileOffset []uint64, objectOffset []uint64, length []uint64, wroteTime uint64) (err error) {
  4303  	vS.jobRWMutex.RLock()
  4304  	defer vS.jobRWMutex.RUnlock()
  4305  
  4306  	inodeLock, err := vS.inodeVolumeHandle.InitInodeLock(inodeNumber, nil)
  4307  	if err != nil {
  4308  		return
  4309  	}
  4310  	err = inodeLock.WriteLock()
  4311  	if err != nil {
  4312  		return
  4313  	}
  4314  	defer inodeLock.Unlock()
  4315  
  4316  	if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.F_OK,
  4317  		inode.NoOverride) {
  4318  		err = blunder.NewError(blunder.NotFoundError, "ENOENT")
  4319  		return
  4320  	}
  4321  	if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.W_OK,
  4322  		inode.OwnerOverride) {
  4323  		err = blunder.NewError(blunder.PermDeniedError, "EACCES")
  4324  		return
  4325  	}
  4326  
  4327  	err = vS.inodeVolumeHandle.Flush(inodeNumber, false)
  4328  	vS.untrackInFlightFileInodeData(inodeNumber, false)
  4329  
  4330  	inodeWroteTime := time.Unix(0, int64(wroteTime))
  4331  
  4332  	err = vS.inodeVolumeHandle.Wrote(inodeNumber, containerName, objectName, fileOffset, objectOffset, length, inodeWroteTime, true)
  4333  
  4334  	return // err, as set by inode.Wrote(), is sufficient
  4335  }
  4336  
  4337  func validateBaseName(baseName string) (err error) {
  4338  	// Make sure the file baseName is not too long
  4339  	baseLen := len(baseName)
  4340  	if baseLen > FileNameMax {
  4341  		err = fmt.Errorf("%s: basename is too long. Length %v, max %v", utils.GetFnName(), baseLen, FileNameMax)
  4342  		logger.ErrorWithError(err)
  4343  		return blunder.AddError(err, blunder.NameTooLongError)
  4344  	}
  4345  	return
  4346  }
  4347  
  4348  func validateFullPath(fullPath string) (err error) {
  4349  	pathLen := len(fullPath)
  4350  	if pathLen > FilePathMax {
  4351  		err = fmt.Errorf("%s: fullpath is too long. Length %v, max %v", utils.GetFnName(), pathLen, FilePathMax)
  4352  		logger.ErrorWithError(err)
  4353  		return blunder.AddError(err, blunder.NameTooLongError)
  4354  	}
  4355  	return
  4356  }
  4357  
  4358  func revSplitPath(fullpath string) []string {
  4359  	// TrimPrefix avoids empty [0] element in pathSegments
  4360  	trimmed := strings.TrimPrefix(fullpath, "/")
  4361  	if trimmed == "" {
  4362  		// path.Clean("") = ".", which is not useful
  4363  		return []string{}
  4364  	}
  4365  
  4366  	segments := strings.Split(path.Clean(trimmed), "/")
  4367  	slen := len(segments)
  4368  	for i := 0; i < slen/2; i++ {
  4369  		segments[i], segments[slen-i-1] = segments[slen-i-1], segments[i]
  4370  	}
  4371  	return segments
  4372  }
  4373  
  4374  // Utility function to unlink, but not destroy, a particular file or empty subdirectory.
  4375  //
  4376  // This function checks that the directory is empty.
  4377  //
  4378  // The caller of this function must hold appropriate locks.
  4379  //
  4380  // obstacleInodeNumber must refer to an existing file or directory
  4381  // that is (a) already part of the directory tree and (b) not the root
  4382  // directory.
  4383  func (vS *volumeStruct) removeObstacleToObjectPut(callerID dlm.CallerID, dirInodeNumber inode.InodeNumber, obstacleName string, obstacleInodeNumber inode.InodeNumber) (err error) {
  4384  	var (
  4385  		fileType             inode.InodeType
  4386  		numEntries           uint64
  4387  		statResult           Stat
  4388  		toDestroyInodeNumber inode.InodeNumber
  4389  	)
  4390  
  4391  	statResult, err = vS.getstatHelper(obstacleInodeNumber, callerID)
  4392  	if err != nil {
  4393  		return
  4394  	}
  4395  
  4396  	fileType = inode.InodeType(statResult[StatFType])
  4397  	if fileType == inode.FileType || fileType == inode.SymlinkType {
  4398  		// Files and symlinks can always, barring errors, be unlinked
  4399  		toDestroyInodeNumber, err = vS.inodeVolumeHandle.Unlink(dirInodeNumber, obstacleName, false)
  4400  		if err != nil {
  4401  			return
  4402  		}
  4403  	} else if fileType == inode.DirType {
  4404  		numEntries, err = vS.inodeVolumeHandle.NumDirEntries(obstacleInodeNumber)
  4405  		if err != nil {
  4406  			return
  4407  		}
  4408  		if numEntries >= 3 {
  4409  			// We're looking at a pre-existing, user-visible directory
  4410  			// that's linked into the directory structure, so we've
  4411  			// got at least two entries, namely "." and ".."
  4412  			//
  4413  			// If there's a third, then the directory is non-empty.
  4414  			err = blunder.NewError(blunder.NotEmptyError, "%s is a non-empty directory", obstacleName)
  4415  			return
  4416  		} else {
  4417  			// We don't want to call Rmdir() here since
  4418  			// that function (a) grabs locks, (b) checks
  4419  			// that it's a directory and is empty, then
  4420  			// (c) calls Unlink() and Destroy().
  4421  			//
  4422  			// We already have the locks and we've already
  4423  			// checked that it's empty, so let's just get
  4424  			// down to it.
  4425  			toDestroyInodeNumber, err = vS.inodeVolumeHandle.Unlink(dirInodeNumber, obstacleName, false)
  4426  			if err != nil {
  4427  				return
  4428  			}
  4429  		}
  4430  	}
  4431  
  4432  	if inode.InodeNumber(0) != toDestroyInodeNumber {
  4433  		err = vS.inodeVolumeHandle.Destroy(toDestroyInodeNumber)
  4434  	}
  4435  
  4436  	return
  4437  }
  4438  
  4439  // Utility function to append entries to reply
  4440  func appendReadPlanEntries(readPlan []inode.ReadPlanStep, readRangeOut *[]inode.ReadPlanStep) (numEntries uint64) {
  4441  	for i := range readPlan {
  4442  		entry := inode.ReadPlanStep{ObjectPath: readPlan[i].ObjectPath, Offset: readPlan[i].Offset, Length: readPlan[i].Length}
  4443  		*readRangeOut = append(*readRangeOut, entry)
  4444  		numEntries++
  4445  	}
  4446  	return
  4447  }