github.com/rohankumardubey/proxyfs@v0.0.0-20210108201508-653efa9ab00e/fs/api_internal.go (about)

     1  // Package fs, sitting on top of the inode manager, defines the filesystem exposed by ProxyFS.
     2  package fs
     3  
     4  import (
     5  	"bytes"
     6  	"container/list"
     7  	"fmt"
     8  	"math"
     9  	"path"
    10  	"strings"
    11  	"syscall"
    12  	"time"
    13  
    14  	"github.com/swiftstack/ProxyFS/blunder"
    15  	"github.com/swiftstack/ProxyFS/dlm"
    16  	"github.com/swiftstack/ProxyFS/inode"
    17  	"github.com/swiftstack/ProxyFS/logger"
    18  	"github.com/swiftstack/ProxyFS/utils"
    19  )
    20  
    21  // Shorthand for our internal API debug log id; global to the package
    22  var internalDebug = logger.DbgInternal
    23  
    24  type symlinkFollowState struct {
    25  	seen      map[inode.InodeNumber]bool
    26  	traversed int
    27  }
    28  
    29  // Let us sort an array of directory and file names
    30  type dirAndFileName struct {
    31  	dirName  string
    32  	fileName string
    33  }
    34  
    35  // this has to be a named type to be a method receiver
    36  type dirAndFileNameSlice []dirAndFileName
    37  
    38  func (coll dirAndFileNameSlice) Len() int {
    39  	return len(coll)
    40  }
    41  
    42  func (coll dirAndFileNameSlice) Less(i int, j int) bool {
    43  	return coll[i].dirName < coll[j].dirName
    44  }
    45  
    46  func (coll dirAndFileNameSlice) Swap(i int, j int) {
    47  	coll[i], coll[j] = coll[j], coll[i]
    48  }
    49  
    50  // trackInFlightFileInodeData is called to ensure a timely Flush occurs.
    51  //
    52  // Only Write() will call this while holding a WriteLock on the fileInode
    53  // either just before or just after its call to inode.Write().
    54  func (vS *volumeStruct) trackInFlightFileInodeData(inodeNumber inode.InodeNumber) {
    55  	var (
    56  		inFlightFileInodeData *inFlightFileInodeDataStruct
    57  		ok                    bool
    58  	)
    59  
    60  	globals.Lock()
    61  	vS.dataMutex.Lock()
    62  	inFlightFileInodeData, ok = vS.inFlightFileInodeDataMap[inodeNumber]
    63  	if !ok {
    64  		inFlightFileInodeData = &inFlightFileInodeDataStruct{
    65  			InodeNumber: inodeNumber,
    66  			volStruct:   vS,
    67  			control:     make(chan bool, inFlightFileInodeDataControlBuffering),
    68  		}
    69  		vS.inFlightFileInodeDataMap[inodeNumber] = inFlightFileInodeData
    70  		inFlightFileInodeData.globalsListElement = globals.inFlightFileInodeDataList.PushBack(inFlightFileInodeData)
    71  		inFlightFileInodeData.wg.Add(1)
    72  		go inFlightFileInodeData.inFlightFileInodeDataTracker()
    73  	}
    74  	vS.dataMutex.Unlock()
    75  	globals.Unlock()
    76  }
    77  
    78  // untrackInFlightInodeData is called once it is known a Flush() is no longer needed
    79  // or to actually request a Flush() [as would be the case during unmounting a volume].
    80  func (vS *volumeStruct) untrackInFlightFileInodeData(inodeNumber inode.InodeNumber, flushFirst bool) {
    81  	var (
    82  		inFlightFileInodeData *inFlightFileInodeDataStruct
    83  		ok                    bool
    84  	)
    85  
    86  	globals.Lock()
    87  	vS.dataMutex.Lock()
    88  	inFlightFileInodeData, ok = vS.inFlightFileInodeDataMap[inodeNumber]
    89  	if !ok {
    90  		vS.dataMutex.Unlock()
    91  		globals.Unlock()
    92  		return
    93  	}
    94  	delete(vS.inFlightFileInodeDataMap, inodeNumber)
    95  	if nil != inFlightFileInodeData.globalsListElement {
    96  		_ = globals.inFlightFileInodeDataList.Remove(inFlightFileInodeData.globalsListElement)
    97  		inFlightFileInodeData.globalsListElement = nil
    98  	}
    99  	inFlightFileInodeData.control <- flushFirst
   100  	vS.dataMutex.Unlock()
   101  	globals.Unlock()
   102  	if flushFirst {
   103  		inFlightFileInodeData.wg.Wait()
   104  	}
   105  }
   106  
   107  // untrackInFlightFileInodeDataAll is called to flush all current elements
   108  // of vS.inFlightFileInodeDataMap (if any) during SIGHUP or Down().
   109  func (vS *volumeStruct) untrackInFlightFileInodeDataAll() {
   110  	var (
   111  		inFlightFileInodeNumber          inode.InodeNumber
   112  		inFlightFileInodeNumbers         []inode.InodeNumber
   113  		inFlightFileInodeNumbersCapacity int
   114  	)
   115  
   116  	// Snapshot list of inode.InodeNumber's currently in vS.inFlightFileInodeDataMap
   117  
   118  	vS.dataMutex.Lock()
   119  	inFlightFileInodeNumbersCapacity = len(vS.inFlightFileInodeDataMap)
   120  	if 0 == inFlightFileInodeNumbersCapacity {
   121  		vS.dataMutex.Unlock()
   122  		return
   123  	}
   124  	inFlightFileInodeNumbers = make([]inode.InodeNumber, 0, inFlightFileInodeNumbersCapacity)
   125  	for inFlightFileInodeNumber, _ = range vS.inFlightFileInodeDataMap {
   126  		inFlightFileInodeNumbers = append(inFlightFileInodeNumbers, inFlightFileInodeNumber)
   127  	}
   128  	vS.dataMutex.Unlock()
   129  
   130  	// Now go flush each of those
   131  
   132  	for _, inFlightFileInodeNumber = range inFlightFileInodeNumbers {
   133  		vS.untrackInFlightFileInodeData(inFlightFileInodeNumber, true)
   134  	}
   135  }
   136  
   137  func (vS *volumeStruct) inFlightFileInodeDataFlusher(inodeNumber inode.InodeNumber) {
   138  	var (
   139  		err         error
   140  		inodeLock   *dlm.RWLockStruct
   141  		stillExists bool
   142  	)
   143  
   144  	// Act as if a package fs client called Flush()...
   145  
   146  	inodeLock, err = vS.inodeVolumeHandle.InitInodeLock(inodeNumber, nil)
   147  	if nil != err {
   148  		logger.PanicfWithError(err, "InitInodeLock() for volume '%s' inode %v failed", vS.volumeName, inodeNumber)
   149  	}
   150  	err = inodeLock.WriteLock()
   151  	if nil != err {
   152  		logger.PanicfWithError(err, "dlm.Writelock() for volume '%s' inode %v failed", vS.volumeName, inodeNumber)
   153  	}
   154  
   155  	stillExists = vS.inodeVolumeHandle.Access(inodeNumber, inode.InodeRootUserID, inode.InodeGroupID(0), nil, inode.F_OK,
   156  		inode.NoOverride)
   157  	if stillExists {
   158  		err = vS.inodeVolumeHandle.Flush(inodeNumber, false)
   159  		if nil == err {
   160  			vS.untrackInFlightFileInodeData(inodeNumber, false)
   161  		} else {
   162  			logger.ErrorfWithError(err, "Flush of file data failed on volume '%s' inode %v", vS.volumeName, inodeNumber)
   163  		}
   164  	}
   165  
   166  	err = inodeLock.Unlock()
   167  	if nil != err {
   168  		logger.PanicfWithError(err, "dlm.Unlock() for volume '%s' inode %v failed", vS.volumeName, inodeNumber)
   169  	}
   170  }
   171  
   172  func (inFlightFileInodeData *inFlightFileInodeDataStruct) inFlightFileInodeDataTracker() {
   173  	var (
   174  		flushFirst bool
   175  	)
   176  
   177  	logger.Tracef("fs.inFlightFileInodeDataTracker(): waiting to flush volume '%s' inode %v",
   178  		inFlightFileInodeData.volStruct.volumeName, inFlightFileInodeData.InodeNumber)
   179  
   180  	select {
   181  	case flushFirst = <-inFlightFileInodeData.control:
   182  		// All we needed was the value of flushFirst from control chan
   183  	case <-time.After(inFlightFileInodeData.volStruct.maxFlushTime):
   184  		flushFirst = true
   185  	}
   186  
   187  	logger.Tracef("fs.inFlightFileInodeDataTracker(): flush starting for volume '%s' inode %v flushfirst %t",
   188  		inFlightFileInodeData.volStruct.volumeName, inFlightFileInodeData.InodeNumber, flushFirst)
   189  
   190  	if flushFirst {
   191  		inFlightFileInodeData.volStruct.inFlightFileInodeDataFlusher(inFlightFileInodeData.InodeNumber)
   192  	}
   193  
   194  	inFlightFileInodeData.wg.Done()
   195  }
   196  
   197  func fetchVolumeHandleByAccountName(accountName string) (volumeHandle VolumeHandle, err error) {
   198  	var (
   199  		ok         bool
   200  		vS         *volumeStruct
   201  		volumeName string
   202  	)
   203  
   204  	startTime := time.Now()
   205  	defer func() {
   206  		globals.FetchVolumeHandleUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
   207  		if err != nil {
   208  			globals.FetchVolumeHandleErrors.Add(1)
   209  		}
   210  	}()
   211  
   212  	globals.Lock()
   213  
   214  	volumeName, ok = inode.AccountNameToVolumeName(accountName)
   215  	if !ok {
   216  		err = fmt.Errorf("Unknown accountName passed to mountByAccountName(): \"%s\"", accountName)
   217  		err = blunder.AddError(err, blunder.NotFoundError)
   218  		globals.Unlock()
   219  		return
   220  	}
   221  
   222  	vS, ok = globals.volumeMap[volumeName]
   223  	if !ok {
   224  		err = fmt.Errorf("Unknown volumeName computed by mountByAccountName(): \"%s\"", volumeName)
   225  		err = blunder.AddError(err, blunder.NotFoundError)
   226  		globals.Unlock()
   227  		return
   228  	}
   229  
   230  	globals.Unlock()
   231  
   232  	volumeHandle = vS
   233  	err = nil
   234  
   235  	return
   236  }
   237  
   238  func fetchVolumeHandleByVolumeName(volumeName string) (volumeHandle VolumeHandle, err error) {
   239  	var (
   240  		ok bool
   241  		vS *volumeStruct
   242  	)
   243  
   244  	startTime := time.Now()
   245  	defer func() {
   246  		globals.FetchVolumeHandleUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
   247  		if err != nil {
   248  			globals.FetchVolumeHandleErrors.Add(1)
   249  		}
   250  	}()
   251  
   252  	globals.Lock()
   253  
   254  	vS, ok = globals.volumeMap[volumeName]
   255  	if !ok {
   256  		err = fmt.Errorf("Unknown volumeName passed to mountByVolumeName(): \"%s\"", volumeName)
   257  		err = blunder.AddError(err, blunder.NotFoundError)
   258  		globals.Unlock()
   259  		return
   260  	}
   261  
   262  	globals.Unlock()
   263  
   264  	volumeHandle = vS
   265  	err = nil
   266  
   267  	return
   268  }
   269  
   270  func (vS *volumeStruct) Access(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber, accessMode inode.InodeMode) (accessReturn bool) {
   271  	startTime := time.Now()
   272  	defer func() {
   273  		globals.AccessUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
   274  	}()
   275  
   276  	vS.jobRWMutex.RLock()
   277  	defer vS.jobRWMutex.RUnlock()
   278  
   279  	accessReturn = vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, accessMode,
   280  		inode.NoOverride)
   281  	return
   282  }
   283  
   284  func (vS *volumeStruct) CallInodeToProvisionObject() (pPath string, err error) {
   285  	startTime := time.Now()
   286  	defer func() {
   287  		globals.CallInodeToProvisionObjectUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
   288  		if err != nil {
   289  			globals.CallInodeToProvisionObjectErrors.Add(1)
   290  		}
   291  	}()
   292  
   293  	vS.jobRWMutex.RLock()
   294  	defer vS.jobRWMutex.RUnlock()
   295  
   296  	pPath, err = vS.inodeVolumeHandle.ProvisionObject()
   297  	return
   298  }
   299  
   300  func (vS *volumeStruct) Create(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, dirInodeNumber inode.InodeNumber, basename string, filePerm inode.InodeMode) (fileInodeNumber inode.InodeNumber, err error) {
   301  	startTime := time.Now()
   302  	defer func() {
   303  		globals.CreateUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
   304  		if err != nil {
   305  			globals.CreateErrors.Add(1)
   306  		}
   307  	}()
   308  
   309  	vS.jobRWMutex.RLock()
   310  	defer vS.jobRWMutex.RUnlock()
   311  
   312  	err = validateBaseName(basename)
   313  	if err != nil {
   314  		return 0, err
   315  	}
   316  
   317  	// Lock the directory inode before doing the link
   318  	dirInodeLock, err := vS.inodeVolumeHandle.InitInodeLock(dirInodeNumber, nil)
   319  	if err != nil {
   320  		return 0, err
   321  	}
   322  	err = dirInodeLock.WriteLock()
   323  	if err != nil {
   324  		return 0, err
   325  	}
   326  	defer dirInodeLock.Unlock()
   327  
   328  	if !vS.inodeVolumeHandle.Access(dirInodeNumber, userID, groupID, otherGroupIDs, inode.F_OK,
   329  		inode.NoOverride) {
   330  		return 0, blunder.NewError(blunder.NotFoundError, "ENOENT")
   331  	}
   332  	if !vS.inodeVolumeHandle.Access(dirInodeNumber, userID, groupID, otherGroupIDs, inode.W_OK|inode.X_OK,
   333  		inode.NoOverride) {
   334  		return 0, blunder.NewError(blunder.PermDeniedError, "EACCES")
   335  	}
   336  
   337  	// create the file and add it to the directory
   338  	fileInodeNumber, err = vS.inodeVolumeHandle.CreateFile(filePerm, userID, groupID)
   339  	if err != nil {
   340  		return 0, err
   341  	}
   342  
   343  	err = vS.inodeVolumeHandle.Link(dirInodeNumber, basename, fileInodeNumber, false)
   344  	if err != nil {
   345  		destroyErr := vS.inodeVolumeHandle.Destroy(fileInodeNumber)
   346  		if destroyErr != nil {
   347  			logger.WarnfWithError(destroyErr, "couldn't destroy inode %v after failed Link() in fs.Create", fileInodeNumber)
   348  		}
   349  		return 0, err
   350  	}
   351  
   352  	return fileInodeNumber, nil
   353  }
   354  
   355  func (vS *volumeStruct) DefragmentFile(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, fileInodeNumber inode.InodeNumber) (err error) {
   356  	var (
   357  		eofReached bool
   358  		fileOffset uint64
   359  		inodeLock  *dlm.RWLockStruct
   360  		inodeType  inode.InodeType
   361  	)
   362  
   363  	startTime := time.Now()
   364  	defer func() {
   365  		globals.DefragmentFileUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
   366  		if err != nil {
   367  			globals.DefragmentFileErrors.Add(1)
   368  		}
   369  	}()
   370  
   371  	vS.jobRWMutex.RLock()
   372  
   373  	inodeLock, err = vS.inodeVolumeHandle.InitInodeLock(fileInodeNumber, nil)
   374  	if nil != err {
   375  		vS.jobRWMutex.RUnlock()
   376  		return
   377  	}
   378  	err = inodeLock.WriteLock()
   379  	if nil != err {
   380  		vS.jobRWMutex.RUnlock()
   381  		return
   382  	}
   383  
   384  	if !vS.inodeVolumeHandle.Access(fileInodeNumber, userID, groupID, otherGroupIDs, inode.F_OK,
   385  		inode.NoOverride) {
   386  		_ = inodeLock.Unlock()
   387  		vS.jobRWMutex.RUnlock()
   388  		err = blunder.NewError(blunder.NotFoundError, "ENOENT")
   389  		return
   390  	}
   391  	if !vS.inodeVolumeHandle.Access(fileInodeNumber, userID, groupID, otherGroupIDs, inode.W_OK,
   392  		inode.OwnerOverride) {
   393  		_ = inodeLock.Unlock()
   394  		vS.jobRWMutex.RUnlock()
   395  		err = blunder.NewError(blunder.PermDeniedError, "EACCES")
   396  		return
   397  	}
   398  
   399  	inodeType, err = vS.inodeVolumeHandle.GetType(fileInodeNumber)
   400  	if nil != err {
   401  		_ = inodeLock.Unlock()
   402  		vS.jobRWMutex.RUnlock()
   403  		logger.ErrorfWithError(err, "couldn't get type for inode %v", fileInodeNumber)
   404  		return
   405  	}
   406  	// Make sure the inode number is for a file inode
   407  	if inodeType != inode.FileType {
   408  		_ = inodeLock.Unlock()
   409  		vS.jobRWMutex.RUnlock()
   410  		err = fmt.Errorf("%s: expected inode %v to be a file inode, got %v", utils.GetFnName(), fileInodeNumber, inodeType)
   411  		logger.ErrorWithError(err)
   412  		err = blunder.AddError(err, blunder.NotFileError)
   413  		return
   414  	}
   415  
   416  	fileOffset = 0
   417  
   418  	for {
   419  		fileOffset, eofReached, err = vS.inodeVolumeHandle.DefragmentFile(fileInodeNumber, fileOffset, vS.fileDefragmentChunkSize)
   420  		_ = inodeLock.Unlock()
   421  		vS.jobRWMutex.RUnlock()
   422  		if nil != err {
   423  			return
   424  		}
   425  		if eofReached {
   426  			return
   427  		}
   428  		time.Sleep(vS.fileDefragmentChunkDelay)
   429  		vS.jobRWMutex.RLock()
   430  		err = inodeLock.WriteLock()
   431  		if nil != err {
   432  			vS.jobRWMutex.RUnlock()
   433  			return
   434  		}
   435  	}
   436  }
   437  
   438  func (vS *volumeStruct) FetchExtentMapChunk(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, fileInodeNumber inode.InodeNumber, fileOffset uint64, maxEntriesFromFileOffset int64, maxEntriesBeforeFileOffset int64) (extentMapChunk *inode.ExtentMapChunkStruct, err error) {
   439  	var (
   440  		inodeLock *dlm.RWLockStruct
   441  		inodeType inode.InodeType
   442  	)
   443  
   444  	startTime := time.Now()
   445  	defer func() {
   446  		globals.FetchExtentMapChunkUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
   447  		if err != nil {
   448  			globals.FetchExtentMapChunkErrors.Add(1)
   449  		}
   450  	}()
   451  
   452  	vS.jobRWMutex.RLock()
   453  	defer vS.jobRWMutex.RUnlock()
   454  
   455  	inodeLock, err = vS.inodeVolumeHandle.InitInodeLock(fileInodeNumber, nil)
   456  	if nil != err {
   457  		return
   458  	}
   459  	err = inodeLock.ReadLock()
   460  	if nil != err {
   461  		return
   462  	}
   463  	defer inodeLock.Unlock()
   464  
   465  	if !vS.inodeVolumeHandle.Access(fileInodeNumber, userID, groupID, otherGroupIDs, inode.F_OK,
   466  		inode.NoOverride) {
   467  		err = blunder.NewError(blunder.NotFoundError, "ENOENT")
   468  		return
   469  	}
   470  	if !vS.inodeVolumeHandle.Access(fileInodeNumber, userID, groupID, otherGroupIDs, inode.R_OK,
   471  		inode.OwnerOverride) {
   472  		err = blunder.NewError(blunder.PermDeniedError, "EACCES")
   473  		return
   474  	}
   475  
   476  	inodeType, err = vS.inodeVolumeHandle.GetType(fileInodeNumber)
   477  	if nil != err {
   478  		logger.ErrorfWithError(err, "couldn't get type for inode %v", fileInodeNumber)
   479  		return
   480  	}
   481  	// Make sure the inode number is for a file inode
   482  	if inodeType != inode.FileType {
   483  		err = fmt.Errorf("%s: expected inode %v to be a file inode, got %v", utils.GetFnName(), fileInodeNumber, inodeType)
   484  		logger.ErrorWithError(err)
   485  		err = blunder.AddError(err, blunder.NotFileError)
   486  		return
   487  	}
   488  
   489  	extentMapChunk, err = vS.inodeVolumeHandle.FetchExtentMapChunk(fileInodeNumber, fileOffset, maxEntriesFromFileOffset, maxEntriesBeforeFileOffset)
   490  
   491  	return
   492  }
   493  
   494  // doInlineCheckpointIfEnabled is called whenever we must guarantee that reported state changes
   495  // are, indeed, persisted. Absent any sort of persistent transaction log, this means performing
   496  // a checkpoint unfortunately.
   497  //
   498  // Currently, only explicitly invoked Flushes trigger this. But, actually, any Swift/S3 API call
   499  // that modifies Objects or (what the client thinks are) Containers should also.
   500  //
   501  // TODO is to determine where else a call to this func should also be made.
   502  //
   503  func (vS *volumeStruct) doInlineCheckpointIfEnabled() {
   504  	var (
   505  		err error
   506  	)
   507  
   508  	if !vS.doCheckpointPerFlush {
   509  		return
   510  	}
   511  
   512  	err = vS.headhunterVolumeHandle.DoCheckpoint()
   513  	if nil != err {
   514  		logger.Fatalf("fs.doInlineCheckpoint() call to headhunter.DoCheckpoint() failed: %v", err)
   515  	}
   516  }
   517  
   518  func (vS *volumeStruct) Flush(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber) (err error) {
   519  	startTime := time.Now()
   520  	defer func() {
   521  		globals.FlushUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
   522  		if err != nil {
   523  			globals.FlushErrors.Add(1)
   524  		}
   525  	}()
   526  
   527  	vS.jobRWMutex.RLock()
   528  	defer vS.jobRWMutex.RUnlock()
   529  
   530  	inodeLock, err := vS.inodeVolumeHandle.InitInodeLock(inodeNumber, nil)
   531  	if err != nil {
   532  		return
   533  	}
   534  	err = inodeLock.WriteLock()
   535  	if err != nil {
   536  		return
   537  	}
   538  	defer inodeLock.Unlock()
   539  
   540  	if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.F_OK,
   541  		inode.NoOverride) {
   542  		return blunder.NewError(blunder.NotFoundError, "ENOENT")
   543  	}
   544  
   545  	// Note: We'd normally check EACCES here...but there are paths in FUSE (e.g. when files are
   546  	//       closed) that end up calling Flush()...even though the file was "opened" ReadOnly.
   547  	//       This is presumably to support updated of ATime and such. In any event, an EACCESS
   548  	//       check would fail if the caller actually only had ReadOnly access to the Inode, so
   549  	//       we won't be doing the check here.
   550  
   551  	err = vS.inodeVolumeHandle.Flush(inodeNumber, false)
   552  	vS.untrackInFlightFileInodeData(inodeNumber, false)
   553  
   554  	vS.doInlineCheckpointIfEnabled()
   555  
   556  	return
   557  }
   558  
   559  func (vS *volumeStruct) getFileLockList(inodeNumber inode.InodeNumber) (flockList *list.List) {
   560  	vS.dataMutex.Lock()
   561  	defer vS.dataMutex.Unlock()
   562  
   563  	flockList, ok := vS.FLockMap[inodeNumber]
   564  	if !ok {
   565  		flockList = new(list.List)
   566  		vS.FLockMap[inodeNumber] = flockList
   567  	}
   568  
   569  	return
   570  }
   571  
   572  // Check for lock conflict with other Pids, if there is a conflict then it will return the first occurance of conflicting range.
   573  func checkConflict(elm *FlockStruct, flock *FlockStruct) bool {
   574  
   575  	if flock.Pid == elm.Pid {
   576  		return false
   577  	}
   578  
   579  	if (elm.Start+elm.Len) <= flock.Start || (flock.Start+flock.Len) <= elm.Start {
   580  		return false
   581  	}
   582  
   583  	if (flock.Type == syscall.F_WRLCK) || (elm.Type == syscall.F_WRLCK) {
   584  		return true
   585  	}
   586  
   587  	return false
   588  }
   589  
   590  func (vS *volumeStruct) verifyLock(inodeNumber inode.InodeNumber, flock *FlockStruct) (conflictLock *FlockStruct) {
   591  	flockList := vS.getFileLockList(inodeNumber)
   592  
   593  	for e := flockList.Front(); e != nil; e = e.Next() {
   594  		elm := e.Value.(*FlockStruct)
   595  
   596  		if checkConflict(elm, flock) == true {
   597  			return elm
   598  		}
   599  	}
   600  
   601  	return nil
   602  }
   603  
   604  // Insert a file lock range to corresponding lock list for the pid.
   605  // Assumption: There is no lock conflict and the range that is being inserted has no conflict and is free.
   606  func (vS *volumeStruct) fileLockInsert(inodeNumber inode.InodeNumber, inFlock *FlockStruct) (err error) {
   607  	err = nil
   608  	flockList := vS.getFileLockList(inodeNumber)
   609  
   610  	overlapList := new(list.List)
   611  	var beforeElm *list.Element // Refers to the immediate element that starts before the start of the range.
   612  	var afterElm *list.Element  // Refers to the immediate element that starts after the end of the range.
   613  
   614  	// flockList is sorted by starting offset of the range.
   615  	// Inserting a range happens in two steps. 1) Check if there is any conflict and also identify the
   616  	// point in the list where the entry will be added (before and after elements) 2) Then check if
   617  	// the range can extend the before element, if so adjust it. 3) Simillarly, check if the after
   618  	// element can be collapsed if it forms a contiguous range.
   619  
   620  	for e := flockList.Front(); e != nil; e = e.Next() {
   621  		elm := e.Value.(*FlockStruct)
   622  
   623  		if (elm.Start + elm.Len) <= inFlock.Start {
   624  			beforeElm = e
   625  			continue
   626  		}
   627  
   628  		if elm.Start > (inFlock.Start + inFlock.Len) {
   629  			afterElm = e
   630  			if overlapList.Len() == 0 {
   631  				flockList.InsertBefore(inFlock, e)
   632  				return
   633  			}
   634  
   635  			break
   636  		}
   637  
   638  		if checkConflict(elm, inFlock) {
   639  			err = blunder.AddError(nil, blunder.TryAgainError)
   640  			return
   641  		}
   642  
   643  		if elm.Pid == inFlock.Pid {
   644  			overlapList.PushBack(e)
   645  		}
   646  	}
   647  
   648  	if overlapList.Len() == 0 {
   649  		if beforeElm != nil {
   650  			elm := beforeElm.Value.(*FlockStruct)
   651  			if elm.Pid == inFlock.Pid && elm.Type == inFlock.Type && (elm.Start+elm.Len) == inFlock.Start {
   652  				elm.Len = inFlock.Start + inFlock.Len - elm.Len
   653  			} else {
   654  				flockList.InsertAfter(inFlock, beforeElm)
   655  			}
   656  		} else {
   657  			flockList.PushBack(inFlock)
   658  		}
   659  
   660  		return
   661  	}
   662  
   663  	// Look at the last element in the overlapping list
   664  	lastEnt := overlapList.Back()
   665  	e := lastEnt.Value.(*list.Element)
   666  	elm := e.Value.(*FlockStruct)
   667  	if (elm.Start + elm.Len) > (inFlock.Start + inFlock.Len) {
   668  		inFlock.Len = (elm.Start + elm.Len) - inFlock.Start
   669  	}
   670  
   671  	// We can delete all the entries in the overlapping list. These entries are replaced by
   672  	// the range we are inserting.
   673  	for e := overlapList.Front(); e != nil; e = e.Next() {
   674  		entry := e.Value.(*list.Element)
   675  		flockList.Remove(entry)
   676  	}
   677  
   678  	// Now adjust the before and after entries:
   679  	// First adjust the after:
   680  	if afterElm != nil {
   681  		elm := afterElm.Value.(*FlockStruct)
   682  		if elm.Pid == inFlock.Pid && elm.Type == inFlock.Type && (inFlock.Start+inFlock.Len) == elm.Start {
   683  			// We can collapse the entry:
   684  			elm.Len = elm.Start + elm.Len - inFlock.Start
   685  			elm.Start = inFlock.Start
   686  
   687  			if beforeElm != nil {
   688  				belm := beforeElm.Value.(*FlockStruct)
   689  				if belm.Pid == elm.Pid && belm.Type == elm.Type && (belm.Start+belm.Len) == elm.Start {
   690  					belm.Len = elm.Start + elm.Len - belm.Start
   691  					flockList.Remove(afterElm)
   692  				}
   693  			}
   694  
   695  			return
   696  		}
   697  	}
   698  
   699  	if beforeElm != nil {
   700  		belm := beforeElm.Value.(*FlockStruct)
   701  		if belm.Pid == inFlock.Pid && belm.Type == inFlock.Type && (belm.Start+belm.Len) == inFlock.Start {
   702  			belm.Len = inFlock.Start + inFlock.Len - belm.Start
   703  		}
   704  
   705  		flockList.InsertAfter(inFlock, beforeElm)
   706  		return
   707  	}
   708  
   709  	if afterElm != nil {
   710  		flockList.InsertBefore(inFlock, afterElm)
   711  	} else {
   712  		flockList.PushBack(inFlock)
   713  	}
   714  
   715  	return
   716  
   717  }
   718  
   719  // Unlock a given range. All locks held in this range by the process (identified by Pid) are removed.
   720  func (vS *volumeStruct) fileUnlock(inodeNumber inode.InodeNumber, inFlock *FlockStruct) (err error) {
   721  
   722  	flockList := vS.getFileLockList(inodeNumber)
   723  	if flockList == nil {
   724  		logger.Warnf("Unlock of a region not already locked - %+v", inFlock)
   725  		return
   726  	}
   727  
   728  	start := inFlock.Start
   729  	len := inFlock.Len
   730  
   731  	removeList := new(list.List)
   732  
   733  	for e := flockList.Front(); e != nil; e = e.Next() {
   734  		elm := e.Value.(*FlockStruct)
   735  
   736  		if elm.Pid != inFlock.Pid {
   737  			continue
   738  		}
   739  
   740  		if (elm.Start + elm.Len) < start {
   741  			continue
   742  		}
   743  
   744  		if elm.Start >= (start + len) {
   745  			break
   746  		}
   747  
   748  		// If the lock falls completely in the range, delete it.
   749  		if elm.Start >= start && (elm.Start+elm.Len) <= (start+len) {
   750  			removeList.PushBack(e)
   751  			continue
   752  		}
   753  
   754  		// This lock overlapps with the range - three possibalities 1) lock starts before the range, 2) end after range and 3) both.
   755  
   756  		elmLen := elm.Start + elm.Len // Save the original length, it is required in case of #3 (both)
   757  
   758  		if elm.Start < start { // Handle the first part - lock starts before the range.
   759  			elm.Len = start - elm.Start
   760  		}
   761  
   762  		if elmLen > (start + len) { // Lock extends beyond the unlock range.
   763  			if elm.Start > start { // case #2
   764  				// use the existing record
   765  				elm.Start = start + len
   766  				elm.Len = elmLen - elm.Start
   767  				break
   768  			}
   769  
   770  			// Create a new record - handle case #3 both (starts before the range and extends beyond the range)
   771  			elmTail := new(FlockStruct)
   772  			elmTail.Start = start + len
   773  			elmTail.Len = elmLen - elm.Start
   774  			elmTail.Pid = elm.Pid
   775  			elmTail.Type = elm.Type
   776  			elmTail.Whence = elm.Whence
   777  			flockList.InsertAfter(elmTail, e)
   778  			break
   779  		}
   780  	}
   781  
   782  	for e := removeList.Front(); e != nil; e = e.Next() {
   783  		elm := e.Value.(*list.Element)
   784  		flockList.Remove(elm)
   785  	}
   786  
   787  	return
   788  }
   789  
   790  // Implements file locking conforming to fcntl(2) locking description. F_SETLKW is not implemented. Supports F_SETLW and F_GETLW.
   791  // whence: FS supports only SEEK_SET - starting from 0, since it does not manage file handles, caller is expected to supply the start and length relative to offset ZERO.
   792  func (vS *volumeStruct) Flock(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber, lockCmd int32, inFlock *FlockStruct) (outFlock *FlockStruct, err error) {
   793  	startTime := time.Now()
   794  	defer func() {
   795  		switch lockCmd {
   796  
   797  		case syscall.F_GETLK:
   798  			globals.FlockGetUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
   799  			if err != nil {
   800  				globals.FlockGetErrors.Add(1)
   801  			}
   802  
   803  		case syscall.F_SETLK:
   804  			if inFlock.Type == syscall.F_UNLCK {
   805  				globals.FlockUnlockUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
   806  				if err != nil {
   807  					globals.FlockUnlockErrors.Add(1)
   808  				}
   809  
   810  			} else if inFlock.Type == syscall.F_WRLCK || inFlock.Type == syscall.F_RDLCK {
   811  				globals.FlockLockUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
   812  				if err != nil {
   813  					globals.FlockLockErrors.Add(1)
   814  				}
   815  			} else {
   816  				globals.FlockOtherErrors.Add(1)
   817  			}
   818  
   819  		default:
   820  			globals.FlockOtherErrors.Add(1)
   821  		}
   822  
   823  	}()
   824  
   825  	vS.jobRWMutex.RLock()
   826  	defer vS.jobRWMutex.RUnlock()
   827  
   828  	outFlock = inFlock
   829  
   830  	if lockCmd == syscall.F_SETLKW {
   831  		err = blunder.AddError(nil, blunder.NotSupportedError)
   832  		return
   833  	}
   834  
   835  	// Make sure the inode does not go away, while we are applying the flock.
   836  	inodeLock, err := vS.inodeVolumeHandle.InitInodeLock(inodeNumber, nil)
   837  	if err != nil {
   838  		return
   839  	}
   840  	err = inodeLock.ReadLock()
   841  	if err != nil {
   842  		return
   843  	}
   844  	defer inodeLock.Unlock()
   845  
   846  	if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.F_OK, inode.NoOverride) {
   847  		err = blunder.NewError(blunder.NotFoundError, "ENOENT")
   848  		return
   849  	}
   850  	if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.R_OK, inode.OwnerOverride) {
   851  		err = blunder.NewError(blunder.PermDeniedError, "EACCES")
   852  		return
   853  	}
   854  
   855  	if inFlock.Len == 0 { // If length is ZERO means treat it as whole file.
   856  		inFlock.Len = ^uint64(0)
   857  	}
   858  
   859  	switch lockCmd {
   860  	case syscall.F_GETLK:
   861  		conflictLock := vS.verifyLock(inodeNumber, inFlock)
   862  		if conflictLock != nil {
   863  			outFlock = conflictLock
   864  			err = blunder.AddError(nil, blunder.TryAgainError)
   865  		} else {
   866  			outFlock = inFlock
   867  			outFlock.Type = syscall.F_UNLCK
   868  		}
   869  		break
   870  
   871  	case syscall.F_SETLK:
   872  		if inFlock.Type == syscall.F_UNLCK {
   873  			err = vS.fileUnlock(inodeNumber, inFlock)
   874  
   875  		} else if inFlock.Type == syscall.F_WRLCK || inFlock.Type == syscall.F_RDLCK {
   876  			err = vS.fileLockInsert(inodeNumber, inFlock)
   877  
   878  		} else {
   879  			err = blunder.NewError(blunder.InvalidArgError, "EINVAL")
   880  			return
   881  		}
   882  		break
   883  
   884  	default:
   885  		err = blunder.NewError(blunder.InvalidArgError, "EINVAL")
   886  		return
   887  	}
   888  
   889  	return
   890  }
   891  
   892  func (vS *volumeStruct) getstatHelper(inodeNumber inode.InodeNumber, callerID dlm.CallerID) (stat Stat, err error) {
   893  
   894  	lockID, err := vS.inodeVolumeHandle.MakeLockID(inodeNumber)
   895  	if err != nil {
   896  		return
   897  	}
   898  	if !dlm.IsLockHeld(lockID, callerID, dlm.ANYLOCK) {
   899  		err = fmt.Errorf("%s: inode %v lock must be held before calling", utils.GetFnName(), inodeNumber)
   900  		return nil, blunder.AddError(err, blunder.NotFoundError)
   901  	}
   902  
   903  	stat, err = vS.getstatHelperWhileLocked(inodeNumber)
   904  
   905  	return
   906  }
   907  
   908  func (vS *volumeStruct) getstatHelperWhileLocked(inodeNumber inode.InodeNumber) (stat Stat, err error) {
   909  	var (
   910  		metadata *inode.MetadataStruct
   911  	)
   912  
   913  	metadata, err = vS.inodeVolumeHandle.GetMetadata(inodeNumber)
   914  	if nil != err {
   915  		return
   916  	}
   917  
   918  	stat = make(map[StatKey]uint64)
   919  
   920  	stat[StatCRTime] = uint64(metadata.CreationTime.UnixNano())
   921  	stat[StatMTime] = uint64(metadata.ModificationTime.UnixNano())
   922  	stat[StatCTime] = uint64(metadata.AttrChangeTime.UnixNano())
   923  	stat[StatATime] = uint64(metadata.AccessTime.UnixNano())
   924  	stat[StatSize] = metadata.Size
   925  	stat[StatNLink] = metadata.LinkCount
   926  	stat[StatFType] = uint64(metadata.InodeType)
   927  	stat[StatINum] = uint64(inodeNumber)
   928  	stat[StatMode] = uint64(metadata.Mode)
   929  	stat[StatUserID] = uint64(metadata.UserID)
   930  	stat[StatGroupID] = uint64(metadata.GroupID)
   931  	stat[StatNumWrites] = metadata.NumWrites
   932  
   933  	return
   934  }
   935  
   936  func (vS *volumeStruct) Getstat(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber) (stat Stat, err error) {
   937  	startTime := time.Now()
   938  	defer func() {
   939  		globals.GetstatUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
   940  		if err != nil {
   941  			globals.GetstatErrors.Add(1)
   942  		}
   943  	}()
   944  
   945  	vS.jobRWMutex.RLock()
   946  	defer vS.jobRWMutex.RUnlock()
   947  
   948  	inodeLock, err := vS.inodeVolumeHandle.InitInodeLock(inodeNumber, nil)
   949  	if err != nil {
   950  		return
   951  	}
   952  	err = inodeLock.ReadLock()
   953  	if err != nil {
   954  		return
   955  	}
   956  	defer inodeLock.Unlock()
   957  
   958  	// Call getstat helper function to do the work
   959  	return vS.getstatHelper(inodeNumber, inodeLock.GetCallerID())
   960  }
   961  
   962  func (vS *volumeStruct) getTypeHelper(inodeNumber inode.InodeNumber, callerID dlm.CallerID) (inodeType inode.InodeType, err error) {
   963  
   964  	lockID, err := vS.inodeVolumeHandle.MakeLockID(inodeNumber)
   965  	if err != nil {
   966  		return
   967  	}
   968  	if !dlm.IsLockHeld(lockID, callerID, dlm.ANYLOCK) {
   969  		err = fmt.Errorf("%s: inode %v lock must be held before calling.", utils.GetFnName(), inodeNumber)
   970  		err = blunder.AddError(err, blunder.NotFoundError)
   971  		return
   972  	}
   973  
   974  	inodeType, err = vS.inodeVolumeHandle.GetType(inodeNumber)
   975  	if err != nil {
   976  		logger.ErrorWithError(err, "couldn't get inode type")
   977  		return inodeType, err
   978  	}
   979  	return inodeType, nil
   980  }
   981  
   982  func (vS *volumeStruct) GetType(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber) (inodeType inode.InodeType, err error) {
   983  	startTime := time.Now()
   984  	defer func() {
   985  		globals.GetTypeUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
   986  		if err != nil {
   987  			globals.GetTypeErrors.Add(1)
   988  		}
   989  	}()
   990  
   991  	vS.jobRWMutex.RLock()
   992  	defer vS.jobRWMutex.RUnlock()
   993  
   994  	inodeLock, err := vS.inodeVolumeHandle.InitInodeLock(inodeNumber, nil)
   995  	if err != nil {
   996  		return
   997  	}
   998  	err = inodeLock.ReadLock()
   999  	if err != nil {
  1000  		return
  1001  	}
  1002  	defer inodeLock.Unlock()
  1003  
  1004  	return vS.getTypeHelper(inodeNumber, inodeLock.GetCallerID())
  1005  }
  1006  
  1007  func (vS *volumeStruct) GetXAttr(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber, streamName string) (value []byte, err error) {
  1008  	startTime := time.Now()
  1009  	defer func() {
  1010  		globals.GetXAttrUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
  1011  		if err != nil {
  1012  			globals.GetXAttrErrors.Add(1)
  1013  		}
  1014  	}()
  1015  
  1016  	vS.jobRWMutex.RLock()
  1017  	defer vS.jobRWMutex.RUnlock()
  1018  
  1019  	inodeLock, err := vS.inodeVolumeHandle.InitInodeLock(inodeNumber, nil)
  1020  	if err != nil {
  1021  		return
  1022  	}
  1023  	err = inodeLock.ReadLock()
  1024  	if err != nil {
  1025  		return
  1026  	}
  1027  	defer inodeLock.Unlock()
  1028  
  1029  	if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.F_OK,
  1030  		inode.NoOverride) {
  1031  		err = blunder.NewError(blunder.NotFoundError, "ENOENT")
  1032  		return
  1033  	}
  1034  	if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.R_OK,
  1035  		inode.OwnerOverride) {
  1036  		err = blunder.NewError(blunder.PermDeniedError, "EACCES")
  1037  		return
  1038  	}
  1039  
  1040  	value, err = vS.inodeVolumeHandle.GetStream(inodeNumber, streamName)
  1041  	if err != nil {
  1042  		// Did not find the requested stream. However this isn't really an error since
  1043  		// samba will ask for acl-related streams and is fine with not finding them.
  1044  		logger.TracefWithError(err, "Failed to get XAttr %v of inode %v", streamName, inodeNumber)
  1045  	}
  1046  
  1047  	return
  1048  }
  1049  
  1050  func (vS *volumeStruct) IsDir(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber) (inodeIsDir bool, err error) {
  1051  	startTime := time.Now()
  1052  	defer func() {
  1053  		globals.IsDirUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
  1054  		if err != nil {
  1055  			globals.IsDirErrors.Add(1)
  1056  		}
  1057  	}()
  1058  
  1059  	vS.jobRWMutex.RLock()
  1060  	defer vS.jobRWMutex.RUnlock()
  1061  
  1062  	inodeLock, err := vS.inodeVolumeHandle.InitInodeLock(inodeNumber, nil)
  1063  	if err != nil {
  1064  		return
  1065  	}
  1066  	err = inodeLock.ReadLock()
  1067  	if err != nil {
  1068  		return
  1069  	}
  1070  	defer inodeLock.Unlock()
  1071  
  1072  	lockID, err := vS.inodeVolumeHandle.MakeLockID(inodeNumber)
  1073  	if err != nil {
  1074  		return
  1075  	}
  1076  	if !dlm.IsLockHeld(lockID, inodeLock.GetCallerID(), dlm.ANYLOCK) {
  1077  		err = fmt.Errorf("%s: inode %v lock must be held before calling", utils.GetFnName(), inodeNumber)
  1078  		return false, blunder.AddError(err, blunder.NotFoundError)
  1079  	}
  1080  
  1081  	inodeType, err := vS.inodeVolumeHandle.GetType(inodeNumber)
  1082  	if err != nil {
  1083  		return false, err
  1084  	}
  1085  	return inodeType == inode.DirType, nil
  1086  }
  1087  
  1088  func (vS *volumeStruct) IsFile(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber) (inodeIsFile bool, err error) {
  1089  	startTime := time.Now()
  1090  	defer func() {
  1091  		globals.IsFileUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
  1092  		if err != nil {
  1093  			globals.IsFileErrors.Add(1)
  1094  		}
  1095  	}()
  1096  
  1097  	vS.jobRWMutex.RLock()
  1098  	defer vS.jobRWMutex.RUnlock()
  1099  
  1100  	inodeLock, err := vS.inodeVolumeHandle.InitInodeLock(inodeNumber, nil)
  1101  	if err != nil {
  1102  		return
  1103  	}
  1104  	err = inodeLock.ReadLock()
  1105  	if err != nil {
  1106  		return
  1107  	}
  1108  	defer inodeLock.Unlock()
  1109  
  1110  	inodeType, err := vS.inodeVolumeHandle.GetType(inodeNumber)
  1111  	if err != nil {
  1112  		return false, err
  1113  	}
  1114  
  1115  	return inodeType == inode.FileType, nil
  1116  }
  1117  
  1118  func (vS *volumeStruct) IsSymlink(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber) (inodeIsSymlink bool, err error) {
  1119  	startTime := time.Now()
  1120  	defer func() {
  1121  		globals.IsSymlinkUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
  1122  		if err != nil {
  1123  			globals.IsSymlinkErrors.Add(1)
  1124  		}
  1125  	}()
  1126  
  1127  	vS.jobRWMutex.RLock()
  1128  	defer vS.jobRWMutex.RUnlock()
  1129  
  1130  	inodeLock, err := vS.inodeVolumeHandle.InitInodeLock(inodeNumber, nil)
  1131  	if err != nil {
  1132  		return
  1133  	}
  1134  	err = inodeLock.ReadLock()
  1135  	if err != nil {
  1136  		return
  1137  	}
  1138  	defer inodeLock.Unlock()
  1139  
  1140  	inodeType, err := vS.inodeVolumeHandle.GetType(inodeNumber)
  1141  	if err != nil {
  1142  		return false, err
  1143  	}
  1144  
  1145  	return inodeType == inode.SymlinkType, nil
  1146  }
  1147  
  1148  func (vS *volumeStruct) Link(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, dirInodeNumber inode.InodeNumber, basename string, targetInodeNumber inode.InodeNumber) (err error) {
  1149  	startTime := time.Now()
  1150  	defer func() {
  1151  		globals.LinkUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
  1152  		if err != nil {
  1153  			globals.LinkErrors.Add(1)
  1154  		}
  1155  	}()
  1156  
  1157  	vS.jobRWMutex.RLock()
  1158  	defer vS.jobRWMutex.RUnlock()
  1159  
  1160  	var (
  1161  		inodeType inode.InodeType
  1162  	)
  1163  
  1164  	err = validateBaseName(basename)
  1165  	if err != nil {
  1166  		return
  1167  	}
  1168  
  1169  	// We need both dirInodelock and the targetInode lock to make sure they
  1170  	// don't go away and linkCount is updated correctly.
  1171  	callerID := dlm.GenerateCallerID()
  1172  	dirInodeLock, err := vS.inodeVolumeHandle.InitInodeLock(dirInodeNumber, callerID)
  1173  	if err != nil {
  1174  		return
  1175  	}
  1176  
  1177  	targetInodeLock, err := vS.inodeVolumeHandle.InitInodeLock(targetInodeNumber, callerID)
  1178  	if err != nil {
  1179  		return
  1180  	}
  1181  
  1182  	// Lock the target inode to check its type and insure its not a directory (if it is a
  1183  	// directory then locking it after the target directory could result in deadlock).
  1184  	err = targetInodeLock.WriteLock()
  1185  	if err != nil {
  1186  		return
  1187  	}
  1188  
  1189  	// make sure target inode is not a directory
  1190  	inodeType, err = vS.inodeVolumeHandle.GetType(targetInodeNumber)
  1191  	if err != nil {
  1192  		targetInodeLock.Unlock()
  1193  		// Because we know that GetType() has already "blunderized" the error, we just pass it on
  1194  		logger.ErrorfWithError(err, "%s: couldn't get type for inode %v", utils.GetFnName(), targetInodeNumber)
  1195  		return err
  1196  	}
  1197  	if inodeType == inode.DirType {
  1198  		targetInodeLock.Unlock()
  1199  		// no need to print an error when its a mistake by the client
  1200  		err = fmt.Errorf("%s: inode %v cannot be a dir inode", utils.GetFnName(), targetInodeNumber)
  1201  		return blunder.AddError(err, blunder.LinkDirError)
  1202  	}
  1203  
  1204  	// drop the target inode lock so we can get the directory lock then
  1205  	// reget the target inode lock
  1206  	targetInodeLock.Unlock()
  1207  
  1208  	err = dirInodeLock.WriteLock()
  1209  	if err != nil {
  1210  		return
  1211  	}
  1212  	defer dirInodeLock.Unlock()
  1213  
  1214  	err = targetInodeLock.WriteLock()
  1215  	if err != nil {
  1216  		return
  1217  	}
  1218  	defer targetInodeLock.Unlock()
  1219  
  1220  	if !vS.inodeVolumeHandle.Access(dirInodeNumber, userID, groupID, otherGroupIDs, inode.F_OK,
  1221  		inode.NoOverride) {
  1222  		err = blunder.NewError(blunder.NotFoundError, "ENOENT")
  1223  		return
  1224  	}
  1225  	if !vS.inodeVolumeHandle.Access(targetInodeNumber, userID, groupID, otherGroupIDs, inode.F_OK,
  1226  		inode.NoOverride) {
  1227  		err = blunder.NewError(blunder.NotFoundError, "ENOENT")
  1228  		return
  1229  	}
  1230  	if !vS.inodeVolumeHandle.Access(dirInodeNumber, userID, groupID, otherGroupIDs, inode.W_OK|inode.X_OK,
  1231  		inode.NoOverride) {
  1232  		err = blunder.NewError(blunder.PermDeniedError, "EACCES")
  1233  		return
  1234  	}
  1235  
  1236  	err = vS.inodeVolumeHandle.Link(dirInodeNumber, basename, targetInodeNumber, false)
  1237  
  1238  	// if the link was successful and this is a regular file then any
  1239  	// pending data was flushed
  1240  	if err == nil && inodeType == inode.FileType {
  1241  		vS.untrackInFlightFileInodeData(targetInodeNumber, false)
  1242  	}
  1243  
  1244  	return err
  1245  }
  1246  
  1247  func (vS *volumeStruct) ListXAttr(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber) (streamNames []string, err error) {
  1248  	startTime := time.Now()
  1249  	defer func() {
  1250  		globals.ListXAttrUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
  1251  		if err != nil {
  1252  			globals.ListXAttrErrors.Add(1)
  1253  		}
  1254  	}()
  1255  
  1256  	vS.jobRWMutex.RLock()
  1257  	defer vS.jobRWMutex.RUnlock()
  1258  
  1259  	inodeLock, err := vS.inodeVolumeHandle.InitInodeLock(inodeNumber, nil)
  1260  	if err != nil {
  1261  		return
  1262  	}
  1263  	err = inodeLock.ReadLock()
  1264  	if err != nil {
  1265  		return
  1266  	}
  1267  	defer inodeLock.Unlock()
  1268  
  1269  	if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.F_OK,
  1270  		inode.NoOverride) {
  1271  		err = blunder.NewError(blunder.NotFoundError, "ENOENT")
  1272  		return
  1273  	}
  1274  	if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.R_OK,
  1275  		inode.OwnerOverride) {
  1276  		err = blunder.NewError(blunder.PermDeniedError, "EACCES")
  1277  		return
  1278  	}
  1279  
  1280  	metadata, err := vS.inodeVolumeHandle.GetMetadata(inodeNumber)
  1281  	if err != nil {
  1282  		// Did not find the requested stream. However this isn't really an error since
  1283  		// samba will ask for acl-related streams and is fine with not finding them.
  1284  		logger.TracefWithError(err, "Failed to list XAttrs of inode %v", inodeNumber)
  1285  		return
  1286  	}
  1287  
  1288  	streamNames = make([]string, len(metadata.InodeStreamNameSlice))
  1289  	copy(streamNames, metadata.InodeStreamNameSlice)
  1290  	return
  1291  }
  1292  
  1293  func (vS *volumeStruct) Lookup(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, dirInodeNumber inode.InodeNumber, basename string) (inodeNumber inode.InodeNumber, err error) {
  1294  	startTime := time.Now()
  1295  	defer func() {
  1296  		globals.LookupUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
  1297  		if err != nil {
  1298  			globals.LookupErrors.Add(1)
  1299  		}
  1300  	}()
  1301  
  1302  	vS.jobRWMutex.RLock()
  1303  	defer vS.jobRWMutex.RUnlock()
  1304  
  1305  	dirInodeLock, err := vS.inodeVolumeHandle.InitInodeLock(dirInodeNumber, nil)
  1306  	if err != nil {
  1307  		return
  1308  	}
  1309  	dirInodeLock.ReadLock()
  1310  	defer dirInodeLock.Unlock()
  1311  
  1312  	if !vS.inodeVolumeHandle.Access(dirInodeNumber, userID, groupID, otherGroupIDs, inode.F_OK,
  1313  		inode.NoOverride) {
  1314  		err = blunder.NewError(blunder.NotFoundError, "ENOENT")
  1315  		return
  1316  	}
  1317  	if !vS.inodeVolumeHandle.Access(dirInodeNumber, userID, groupID, otherGroupIDs, inode.X_OK,
  1318  		inode.NoOverride) {
  1319  		err = blunder.NewError(blunder.PermDeniedError, "EACCES")
  1320  		return
  1321  	}
  1322  
  1323  	inodeNumber, err = vS.inodeVolumeHandle.Lookup(dirInodeNumber, basename)
  1324  	return inodeNumber, err
  1325  }
  1326  
  1327  func (vS *volumeStruct) LookupPath(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, fullpath string) (inodeNumber inode.InodeNumber, err error) {
  1328  	startTime := time.Now()
  1329  	defer func() {
  1330  		globals.LookupPathUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
  1331  		if err != nil {
  1332  			globals.LookupPathErrors.Add(1)
  1333  		}
  1334  	}()
  1335  
  1336  	vS.jobRWMutex.RLock()
  1337  	defer vS.jobRWMutex.RUnlock()
  1338  
  1339  	// In the special case of a fullpath starting with "/", the path segment splitting above
  1340  	// results in a first segment that still begins with "/". Because this is not recognized
  1341  	// as a real path segment, by the underlying code, we have trouble looking it up.
  1342  	//
  1343  	// This is a hack to work around this case until I figure out a better way.
  1344  	newfullpath := strings.TrimPrefix(fullpath, "/")
  1345  	if strings.Compare(fullpath, newfullpath) != 0 {
  1346  		fullpath = newfullpath
  1347  	}
  1348  
  1349  	pathSegments := strings.Split(path.Clean(fullpath), "/")
  1350  
  1351  	cursorInodeNumber := inode.RootDirInodeNumber
  1352  	for _, segment := range pathSegments {
  1353  		cursorInodeLock, err1 := vS.inodeVolumeHandle.InitInodeLock(cursorInodeNumber, nil)
  1354  		if err = err1; err != nil {
  1355  			return
  1356  		}
  1357  		err = cursorInodeLock.ReadLock()
  1358  		if err != nil {
  1359  			return
  1360  		}
  1361  
  1362  		if !vS.inodeVolumeHandle.Access(cursorInodeNumber, userID, groupID, otherGroupIDs, inode.X_OK,
  1363  			inode.NoOverride) {
  1364  			cursorInodeLock.Unlock()
  1365  			err = blunder.NewError(blunder.PermDeniedError, "EACCES")
  1366  			return
  1367  		}
  1368  
  1369  		cursorInodeNumber, err = vS.inodeVolumeHandle.Lookup(cursorInodeNumber, segment)
  1370  		cursorInodeLock.Unlock()
  1371  
  1372  		if err != nil {
  1373  			return cursorInodeNumber, err
  1374  		}
  1375  	}
  1376  
  1377  	return cursorInodeNumber, nil
  1378  }
  1379  
  1380  func (vS *volumeStruct) MiddlewareCoalesce(destPath string, metaData []byte, elementPaths []string) (
  1381  	ino uint64, numWrites uint64, attrChangeTime uint64, modificationTime uint64, err error) {
  1382  
  1383  	var (
  1384  		coalesceElementList          []*inode.CoalesceElement
  1385  		coalesceSize                 uint64
  1386  		ctime                        time.Time
  1387  		destFileInodeNumber          inode.InodeNumber
  1388  		dirEntryBasename             string
  1389  		dirEntryInodeNumber          inode.InodeNumber
  1390  		dirInodeNumber               inode.InodeNumber
  1391  		elementPathIndex             int
  1392  		elementPathIndexAtChunkEnd   int
  1393  		elementPathIndexAtChunkStart int
  1394  		heldLocks                    *heldLocksStruct
  1395  		mtime                        time.Time
  1396  		retryRequired                bool
  1397  		tryLockBackoffContext        *tryLockBackoffContextStruct
  1398  	)
  1399  
  1400  	startTime := time.Now()
  1401  	defer func() {
  1402  		globals.MiddlewareCoalesceUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
  1403  		globals.MiddlewareCoalesceBytes.Add(coalesceSize)
  1404  		if err != nil {
  1405  			globals.MiddlewareCoalesceErrors.Add(1)
  1406  		}
  1407  	}()
  1408  
  1409  	vS.jobRWMutex.RLock()
  1410  	defer vS.jobRWMutex.RUnlock()
  1411  
  1412  	// First create the destination file if necessary and ensure that it is empty
  1413  
  1414  	tryLockBackoffContext = &tryLockBackoffContextStruct{}
  1415  
  1416  RestartDestinationFileCreation:
  1417  
  1418  	tryLockBackoffContext.backoff()
  1419  
  1420  	heldLocks = newHeldLocks()
  1421  
  1422  	_, destFileInodeNumber, _, _, retryRequired, err =
  1423  		vS.resolvePath(
  1424  			inode.RootDirInodeNumber,
  1425  			destPath,
  1426  			heldLocks,
  1427  			resolvePathFollowDirEntrySymlinks|
  1428  				resolvePathFollowDirSymlinks|
  1429  				resolvePathCreateMissingPathElements|
  1430  				resolvePathRequireExclusiveLockOnDirEntryInode)
  1431  
  1432  	if nil != err {
  1433  		heldLocks.free()
  1434  		return
  1435  	}
  1436  
  1437  	if retryRequired {
  1438  		heldLocks.free()
  1439  		goto RestartDestinationFileCreation
  1440  	}
  1441  
  1442  	vS.inodeVolumeHandle.SetSize(destFileInodeNumber, 0)
  1443  
  1444  	heldLocks.free()
  1445  
  1446  	// Now setup for looping through elementPaths with fresh locks
  1447  	// every globals.coalesceElementChunkSize elements holding an
  1448  	// Exclusive Lock on each FileInode and their containing DirInode
  1449  
  1450  	elementPathIndexAtChunkStart = 0
  1451  
  1452  	for elementPathIndexAtChunkStart < len(elementPaths) {
  1453  		elementPathIndexAtChunkEnd = elementPathIndexAtChunkStart + int(globals.coalesceElementChunkSize)
  1454  		if elementPathIndexAtChunkEnd > len(elementPaths) {
  1455  			elementPathIndexAtChunkEnd = len(elementPaths)
  1456  		}
  1457  
  1458  		// Coalesce elementPaths[elementPathIndexAtChunkStart:elementPathIndexAtChunkEnd)
  1459  
  1460  		tryLockBackoffContext = &tryLockBackoffContextStruct{}
  1461  
  1462  	RestartCoalesceChunk:
  1463  
  1464  		tryLockBackoffContext.backoff()
  1465  
  1466  		heldLocks = newHeldLocks()
  1467  
  1468  		coalesceElementList = make([]*inode.CoalesceElement, 0, (elementPathIndexAtChunkEnd - elementPathIndexAtChunkStart))
  1469  
  1470  		for elementPathIndex = elementPathIndexAtChunkStart; elementPathIndex < elementPathIndexAtChunkEnd; elementPathIndex++ {
  1471  			dirInodeNumber, dirEntryInodeNumber, dirEntryBasename, _, retryRequired, err =
  1472  				vS.resolvePath(
  1473  					inode.RootDirInodeNumber,
  1474  					elementPaths[elementPathIndex],
  1475  					heldLocks,
  1476  					resolvePathFollowDirSymlinks|
  1477  						resolvePathRequireExclusiveLockOnDirEntryInode|
  1478  						resolvePathRequireExclusiveLockOnDirInode)
  1479  
  1480  			if nil != err {
  1481  				heldLocks.free()
  1482  				return
  1483  			}
  1484  
  1485  			if retryRequired {
  1486  				heldLocks.free()
  1487  				goto RestartCoalesceChunk
  1488  			}
  1489  
  1490  			coalesceElementList = append(coalesceElementList, &inode.CoalesceElement{
  1491  				ContainingDirectoryInodeNumber: dirInodeNumber,
  1492  				ElementInodeNumber:             dirEntryInodeNumber,
  1493  				ElementName:                    dirEntryBasename,
  1494  			})
  1495  		}
  1496  
  1497  		_, destFileInodeNumber, _, _, retryRequired, err =
  1498  			vS.resolvePath(
  1499  				inode.RootDirInodeNumber,
  1500  				destPath,
  1501  				heldLocks,
  1502  				resolvePathFollowDirEntrySymlinks|
  1503  					resolvePathFollowDirSymlinks|
  1504  					resolvePathRequireExclusiveLockOnDirEntryInode)
  1505  
  1506  		if nil != err {
  1507  			heldLocks.free()
  1508  			return
  1509  		}
  1510  
  1511  		if retryRequired {
  1512  			heldLocks.free()
  1513  			goto RestartCoalesceChunk
  1514  		}
  1515  
  1516  		ctime, mtime, numWrites, coalesceSize, err = vS.inodeVolumeHandle.Coalesce(
  1517  			destFileInodeNumber, MiddlewareStream, metaData, coalesceElementList)
  1518  
  1519  		heldLocks.free()
  1520  
  1521  		if nil != err {
  1522  			return
  1523  		}
  1524  
  1525  		elementPathIndexAtChunkStart = elementPathIndexAtChunkEnd
  1526  	}
  1527  
  1528  	// Regardless of err return, fill in other return values
  1529  
  1530  	ino = uint64(destFileInodeNumber)
  1531  	attrChangeTime = uint64(ctime.UnixNano())
  1532  	modificationTime = uint64(mtime.UnixNano())
  1533  
  1534  	return
  1535  }
  1536  
  1537  func (vS *volumeStruct) MiddlewareDelete(parentDir string, basename string) (err error) {
  1538  	var (
  1539  		dirEntryBasename      string
  1540  		dirEntryInodeNumber   inode.InodeNumber
  1541  		dirInodeNumber        inode.InodeNumber
  1542  		doDestroy             bool
  1543  		heldLocks             *heldLocksStruct
  1544  		inodeType             inode.InodeType
  1545  		inodeVolumeHandle     inode.VolumeHandle
  1546  		linkCount             uint64
  1547  		numDirEntries         uint64
  1548  		retryRequired         bool
  1549  		tryLockBackoffContext *tryLockBackoffContextStruct
  1550  	)
  1551  
  1552  	startTime := time.Now()
  1553  	defer func() {
  1554  		globals.MiddlewareDeleteUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
  1555  		if err != nil {
  1556  			globals.MiddlewareDeleteErrors.Add(1)
  1557  		}
  1558  	}()
  1559  
  1560  	// Retry until done or failure (starting with ZERO backoff)
  1561  
  1562  	tryLockBackoffContext = &tryLockBackoffContextStruct{}
  1563  
  1564  Restart:
  1565  
  1566  	// Perform backoff and update for each restart (starting with ZERO backoff of course)
  1567  
  1568  	tryLockBackoffContext.backoff()
  1569  
  1570  	// Construct fresh heldLocks for this restart
  1571  
  1572  	heldLocks = newHeldLocks()
  1573  
  1574  	dirInodeNumber, dirEntryInodeNumber, dirEntryBasename, _, retryRequired, err =
  1575  		vS.resolvePath(
  1576  			inode.RootDirInodeNumber,
  1577  			parentDir+"/"+basename,
  1578  			heldLocks,
  1579  			resolvePathFollowDirSymlinks|
  1580  				resolvePathRequireExclusiveLockOnDirEntryInode|
  1581  				resolvePathRequireExclusiveLockOnDirInode)
  1582  
  1583  	if nil != err {
  1584  		heldLocks.free()
  1585  		return
  1586  	}
  1587  
  1588  	if retryRequired {
  1589  		heldLocks.free()
  1590  		goto Restart
  1591  	}
  1592  
  1593  	// Check if Unlink() and Destroy() are doable
  1594  
  1595  	inodeVolumeHandle = vS.inodeVolumeHandle
  1596  
  1597  	inodeType, err = inodeVolumeHandle.GetType(dirEntryInodeNumber)
  1598  	if nil != err {
  1599  		heldLocks.free()
  1600  		return
  1601  	}
  1602  
  1603  	if inode.DirType == inodeType {
  1604  		numDirEntries, err = inodeVolumeHandle.NumDirEntries(dirEntryInodeNumber)
  1605  		if nil != err {
  1606  			heldLocks.free()
  1607  			return
  1608  		}
  1609  
  1610  		if 2 != numDirEntries {
  1611  			heldLocks.free()
  1612  			err = blunder.NewError(blunder.NotEmptyError, "%s/%s not empty", parentDir, basename)
  1613  			return
  1614  		}
  1615  
  1616  		doDestroy = true
  1617  	} else {
  1618  		linkCount, err = inodeVolumeHandle.GetLinkCount(dirEntryInodeNumber)
  1619  		if nil != err {
  1620  			heldLocks.free()
  1621  			return
  1622  		}
  1623  
  1624  		doDestroy = (1 == linkCount)
  1625  	}
  1626  
  1627  	// Now perform the Unlink() and (potentially) Destroy()
  1628  
  1629  	err = inodeVolumeHandle.Unlink(dirInodeNumber, dirEntryBasename, false)
  1630  	if nil != err {
  1631  		heldLocks.free()
  1632  		return
  1633  	}
  1634  
  1635  	if doDestroy {
  1636  		err = inodeVolumeHandle.Destroy(dirEntryInodeNumber)
  1637  		if nil != err {
  1638  			logger.Errorf("fs.MiddlewareDelete() failed to Destroy dirEntryInodeNumber 0x%016X: %v", dirEntryInodeNumber, err)
  1639  		}
  1640  	}
  1641  
  1642  	// Release heldLocks and exit with success (even if Destroy() failed earlier)
  1643  
  1644  	heldLocks.free()
  1645  
  1646  	err = nil
  1647  	return
  1648  }
  1649  
  1650  func (vS *volumeStruct) middlewareReadDirHelper(path string, maxEntries uint64, prevBasename string) (pathDirInodeNumber inode.InodeNumber, dirEntrySlice []inode.DirEntry, moreEntries bool, err error) {
  1651  	var (
  1652  		dirEntrySliceElement  inode.DirEntry
  1653  		heldLocks             *heldLocksStruct
  1654  		internalDirEntrySlice []inode.DirEntry
  1655  		retryRequired         bool
  1656  		tryLockBackoffContext *tryLockBackoffContextStruct
  1657  	)
  1658  
  1659  	// Retry until done or failure (starting with ZERO backoff)
  1660  
  1661  	tryLockBackoffContext = &tryLockBackoffContextStruct{}
  1662  
  1663  Restart:
  1664  
  1665  	// Perform backoff and update for each restart (starting with ZERO backoff of course)
  1666  
  1667  	tryLockBackoffContext.backoff()
  1668  
  1669  	// Construct fresh heldLocks for this restart
  1670  
  1671  	heldLocks = newHeldLocks()
  1672  
  1673  	_, pathDirInodeNumber, _, _, retryRequired, err =
  1674  		vS.resolvePath(
  1675  			inode.RootDirInodeNumber,
  1676  			path,
  1677  			heldLocks,
  1678  			resolvePathFollowDirSymlinks)
  1679  
  1680  	if nil != err {
  1681  		heldLocks.free()
  1682  		return
  1683  	}
  1684  
  1685  	if retryRequired {
  1686  		heldLocks.free()
  1687  		goto Restart
  1688  	}
  1689  
  1690  	// Now assemble response
  1691  
  1692  	internalDirEntrySlice, moreEntries, err = vS.inodeVolumeHandle.ReadDir(pathDirInodeNumber, maxEntries, 0, prevBasename)
  1693  	if nil != err {
  1694  		heldLocks.free()
  1695  		return
  1696  	}
  1697  
  1698  	// No need to hold any locks now... directory contents should be allowed to change while enumerating
  1699  	heldLocks.free()
  1700  
  1701  	dirEntrySlice = make([]inode.DirEntry, 0, len(internalDirEntrySlice))
  1702  
  1703  	for _, dirEntrySliceElement = range internalDirEntrySlice {
  1704  		if ("." == dirEntrySliceElement.Basename) || (".." == dirEntrySliceElement.Basename) {
  1705  			dirEntrySliceElement.Type = inode.DirType
  1706  		} else {
  1707  			dirEntrySliceElement.Type, err = vS.GetType(inode.InodeRootUserID, inode.InodeGroupID(0), nil, dirEntrySliceElement.InodeNumber)
  1708  			if nil != err {
  1709  				// It's ok to have an error here... it just means the directory we are iterating is changing
  1710  				continue
  1711  			}
  1712  		}
  1713  		dirEntrySlice = append(dirEntrySlice, dirEntrySliceElement)
  1714  	}
  1715  
  1716  	dirEntrySlice = dirEntrySlice[:len(dirEntrySlice)]
  1717  
  1718  	err = nil
  1719  	return
  1720  }
  1721  
  1722  func (vS *volumeStruct) MiddlewareGetAccount(maxEntries uint64, marker string, endmarker string) (accountEnts []AccountEntry, mtime uint64, ctime uint64, err error) {
  1723  	var (
  1724  		dirEntrySlice        []inode.DirEntry
  1725  		dirEntrySliceElement inode.DirEntry
  1726  		remainingMaxEntries  uint64
  1727  		moreEntries          bool
  1728  		statResult           Stat
  1729  	)
  1730  
  1731  	statResult, err = vS.Getstat(inode.InodeRootUserID, inode.InodeGroupID(0), nil, inode.RootDirInodeNumber)
  1732  	if nil != err {
  1733  		return
  1734  	}
  1735  	mtime = statResult[StatMTime]
  1736  	ctime = statResult[StatCTime]
  1737  
  1738  	if 0 != maxEntries {
  1739  		// Hard limit to number of DirInode Basenames to return
  1740  		accountEnts = make([]AccountEntry, 0, maxEntries)
  1741  	}
  1742  
  1743  	remainingMaxEntries = maxEntries
  1744  
  1745  	moreEntries = true
  1746  
  1747  	for moreEntries {
  1748  		_, dirEntrySlice, moreEntries, err = vS.middlewareReadDirHelper("/", remainingMaxEntries, marker)
  1749  		if nil != err {
  1750  			return
  1751  		}
  1752  
  1753  		if 0 == maxEntries {
  1754  			// No limit to number of DirInode Basenames to return... so it must be <= len(dirEntrySlice)
  1755  			accountEnts = make([]AccountEntry, 0, len(dirEntrySlice))
  1756  			// Note: moreEntries should be false so the "for moreEntries" loop should exit after 1st iteration
  1757  		}
  1758  
  1759  		for _, dirEntrySliceElement = range dirEntrySlice {
  1760  			if ("" != endmarker) && (0 <= strings.Compare(dirEntrySliceElement.Basename, endmarker)) {
  1761  				moreEntries = false
  1762  				break
  1763  			}
  1764  			if ("." != dirEntrySliceElement.Basename) && (".." != dirEntrySliceElement.Basename) {
  1765  				// So we've skipped "." & ".." - now also skip non-DirInodes
  1766  				if inode.DirType == dirEntrySliceElement.Type {
  1767  					statResult, err = vS.Getstat(inode.InodeRootUserID, inode.InodeGroupID(0), nil, dirEntrySliceElement.InodeNumber)
  1768  					if nil != err {
  1769  						return
  1770  					}
  1771  					accountEnts = append(accountEnts, AccountEntry{
  1772  						Basename:         dirEntrySliceElement.Basename,
  1773  						ModificationTime: statResult[StatMTime],
  1774  						AttrChangeTime:   statResult[StatCTime],
  1775  					})
  1776  				}
  1777  			}
  1778  		}
  1779  
  1780  		if moreEntries && (0 != maxEntries) {
  1781  			remainingMaxEntries = maxEntries - uint64(len(accountEnts))
  1782  			if 0 == remainingMaxEntries {
  1783  				moreEntries = false
  1784  			}
  1785  		}
  1786  
  1787  		if moreEntries {
  1788  			// Adjust marker to fetch next dirEntrySlice
  1789  			marker = dirEntrySlice[len(dirEntrySlice)-1].Basename
  1790  		}
  1791  	}
  1792  
  1793  	accountEnts = accountEnts[:len(accountEnts)]
  1794  
  1795  	return
  1796  }
  1797  
  1798  type dirEntrySliceStackElementStruct struct {
  1799  	dirPath       string
  1800  	dirEntrySlice []inode.DirEntry
  1801  	numConsumed   int
  1802  	moreEntries   bool
  1803  }
  1804  
  1805  func (vS *volumeStruct) MiddlewareGetContainer(vContainerName string, maxEntries uint64, marker string, endmarker string, prefix string, delimiter string) (containerEnts []ContainerEntry, err error) {
  1806  	var (
  1807  		containerEntry                ContainerEntry
  1808  		containerEntryBasename        string // Misnamed... this is actually everything after ContainerName
  1809  		containerEntryPath            string
  1810  		containerEntryPathSplit       []string // Split on only the first '/' (to remove ContainerName from it)
  1811  		doSingleDirectory             bool
  1812  		dirEntryInodeLock             *dlm.RWLockStruct
  1813  		dirEntryInodeNumber           inode.InodeNumber
  1814  		dirEntryInodeType             inode.InodeType
  1815  		dirEntryMetadata              *inode.MetadataStruct
  1816  		dirEntryPath                  string
  1817  		dirEntrySlice                 []inode.DirEntry
  1818  		dirEntrySliceElement          inode.DirEntry
  1819  		dirEntrySliceElementIndex     int
  1820  		dirEntrySliceElementToPrepend *inode.DirEntry
  1821  		dirEntrySliceStack            []*dirEntrySliceStackElementStruct
  1822  		dirEntrySliceStackElement     *dirEntrySliceStackElementStruct
  1823  		dirEntrySliceToAppend         []inode.DirEntry
  1824  		dirInodeNumber                inode.InodeNumber
  1825  		dirPath                       string
  1826  		dirPathSplit                  []string
  1827  		dlmCallerID                   dlm.CallerID
  1828  		endmarkerCanonicalized        string
  1829  		endmarkerPath                 []string
  1830  		heldLocks                     *heldLocksStruct
  1831  		initialDirEntryToMatch        string // == "" if no initial path should be returned (i.e. in marker starting point case)
  1832  		inodeVolumeHandle             inode.VolumeHandle
  1833  		markerCanonicalized           string
  1834  		markerPath                    []string
  1835  		markerPathDirInodeIndex       int
  1836  		moreEntries                   bool
  1837  		pathIndex                     int
  1838  		prefixCanonicalized           string
  1839  		prefixPath                    []string
  1840  		prefixPathDirInodeIndex       int
  1841  		prevReturned                  string
  1842  		remainingMaxEntries           uint64
  1843  		retryRequired                 bool
  1844  		tryLockBackoffContext         *tryLockBackoffContextStruct
  1845  	)
  1846  
  1847  	// Validate marker, endmarker, and prefix
  1848  
  1849  	if "" == marker {
  1850  		markerPath = []string{}
  1851  		markerPathDirInodeIndex = -1 // Must be special cased below to ensure we don't look in markerPath
  1852  		markerCanonicalized = ""     // Actually never accessed
  1853  	} else {
  1854  		markerPath, markerPathDirInodeIndex, err = vS.canonicalizePathAndLocateLeafDirInode(vContainerName + "/" + marker)
  1855  		if nil != err {
  1856  			err = blunder.AddError(err, blunder.InvalidArgError)
  1857  			return
  1858  		}
  1859  
  1860  		markerCanonicalized = strings.Join(markerPath, "/")
  1861  		if strings.HasSuffix(marker, "/") {
  1862  			markerCanonicalized += "/"
  1863  		}
  1864  
  1865  		if vContainerName+"/"+marker != markerCanonicalized {
  1866  			err = blunder.NewError(blunder.InvalidArgError, "MiddlewareGetContainer() only supports a canonicalized marker")
  1867  			return
  1868  		}
  1869  	}
  1870  
  1871  	if "" == endmarker {
  1872  		endmarkerPath = []string{}
  1873  		endmarkerCanonicalized = "" // Actually never accessed
  1874  	} else {
  1875  		endmarkerPath, _, err = vS.canonicalizePathAndLocateLeafDirInode(vContainerName + "/" + endmarker)
  1876  		if nil != err {
  1877  			err = blunder.AddError(err, blunder.InvalidArgError)
  1878  			return
  1879  		}
  1880  
  1881  		endmarkerCanonicalized = strings.Join(endmarkerPath, "/")
  1882  		if strings.HasSuffix(endmarker, "/") {
  1883  			endmarkerCanonicalized += "/"
  1884  		}
  1885  
  1886  		if vContainerName+"/"+endmarker != endmarkerCanonicalized {
  1887  			err = blunder.NewError(blunder.InvalidArgError, "MiddlewareGetContainer() only supports a canonicalized endmarker")
  1888  			return
  1889  		}
  1890  	}
  1891  
  1892  	prefixPath, prefixPathDirInodeIndex, err = vS.canonicalizePathAndLocateLeafDirInode(vContainerName + "/" + prefix)
  1893  	if nil != err {
  1894  		err = blunder.AddError(err, blunder.InvalidArgError)
  1895  		return
  1896  	}
  1897  	if prefixPathDirInodeIndex < 0 {
  1898  		err = blunder.NewError(blunder.NotFoundError, "MiddlewareGetContainer() only supports querying an existing Container")
  1899  		return
  1900  	}
  1901  
  1902  	prefixCanonicalized = strings.Join(prefixPath, "/")
  1903  	if strings.HasSuffix(prefix, "/") {
  1904  		prefixCanonicalized += "/"
  1905  	}
  1906  
  1907  	if (prefix != "") && (vContainerName+"/"+prefix != prefixCanonicalized) {
  1908  		err = blunder.NewError(blunder.InvalidArgError, "MiddlewareGetContainer() only supports a canonicalized prefix")
  1909  		return
  1910  	}
  1911  
  1912  	// Validate delimiter
  1913  
  1914  	switch delimiter {
  1915  	case "":
  1916  		doSingleDirectory = false
  1917  	case "/":
  1918  		doSingleDirectory = true
  1919  	default:
  1920  		err = blunder.NewError(blunder.InvalidArgError, "MiddlewareGetContainer() only supports a delimiter of \"/\"")
  1921  		return
  1922  	}
  1923  
  1924  	// Determine what DirInode from which to begin our enumeration
  1925  
  1926  	pathIndex = 0
  1927  
  1928  	for {
  1929  		if (pathIndex > markerPathDirInodeIndex) && (pathIndex > prefixPathDirInodeIndex) {
  1930  			// Special (though probably typical) case where marker lands in prefix-indicated directory
  1931  
  1932  			dirPath = strings.Join(prefixPath[:prefixPathDirInodeIndex+1], "/")
  1933  
  1934  			if (1 == len(prefixPath)) || strings.HasSuffix(prefix, "/") {
  1935  				if (markerPathDirInodeIndex + 1) == len(markerPath) {
  1936  					prevReturned = ""
  1937  				} else {
  1938  					prevReturned = markerPath[markerPathDirInodeIndex+1]
  1939  				}
  1940  				initialDirEntryToMatch = ""
  1941  			} else {
  1942  				// Handle four remaining cases:
  1943  				//   marker & prefix both specified directories
  1944  				//   marker specified a directory, prefix did not
  1945  				//   prefix specified a directory, marker did not
  1946  				//   neither marker nor prefix specified a directory
  1947  
  1948  				if (markerPathDirInodeIndex + 1) == len(markerPath) {
  1949  					if (prefixPathDirInodeIndex + 1) == len(prefixPath) {
  1950  						// Case where marker & prefix both specified directories
  1951  
  1952  						prevReturned = ""
  1953  					} else {
  1954  						// Case where marker specified a directory, prefix did not
  1955  
  1956  						prevReturned = prefixPath[prefixPathDirInodeIndex+1]
  1957  					}
  1958  					initialDirEntryToMatch = prevReturned
  1959  				} else { // (markerPathDirInodeIndex + 1) != len(markerPath)
  1960  					if (prefixPathDirInodeIndex + 1) == len(prefixPath) {
  1961  						// Case where prefix specified a directory, marker did not
  1962  
  1963  						prevReturned = markerPath[markerPathDirInodeIndex+1]
  1964  						initialDirEntryToMatch = ""
  1965  					} else {
  1966  						// Case where neither marker nor prefix specified a directory
  1967  
  1968  						if strings.Compare(prefixPath[prefixPathDirInodeIndex+1], markerPath[markerPathDirInodeIndex+1]) <= 0 {
  1969  							prevReturned = markerPath[markerPathDirInodeIndex+1]
  1970  							initialDirEntryToMatch = ""
  1971  						} else {
  1972  							prevReturned = prefixPath[prefixPathDirInodeIndex+1]
  1973  							initialDirEntryToMatch = prevReturned
  1974  						}
  1975  					}
  1976  				}
  1977  			}
  1978  			break
  1979  		}
  1980  
  1981  		if pathIndex > markerPathDirInodeIndex {
  1982  			// Handle case where prefix is more constraining than marker
  1983  
  1984  			if prefixPathDirInodeIndex == (len(prefixPath) - 1) {
  1985  				if (1 == len(prefixPath)) || strings.HasSuffix(prefix, "/") {
  1986  					dirPath = strings.Join(prefixPath[:prefixPathDirInodeIndex+1], "/")
  1987  					prevReturned = ""
  1988  				} else {
  1989  					dirPath = strings.Join(prefixPath[:prefixPathDirInodeIndex], "/")
  1990  					prevReturned = prefixPath[len(prefixPath)-1]
  1991  				}
  1992  			} else {
  1993  				dirPath = strings.Join(prefixPath[:prefixPathDirInodeIndex+1], "/")
  1994  				prevReturned = prefixPath[len(prefixPath)-1]
  1995  			}
  1996  			initialDirEntryToMatch = prevReturned
  1997  			break
  1998  		}
  1999  
  2000  		if pathIndex > prefixPathDirInodeIndex {
  2001  			// Handle case where marker is more constraining than prefix
  2002  
  2003  			dirPath = strings.Join(markerPath[:markerPathDirInodeIndex+1], "/")
  2004  			if markerPathDirInodeIndex == (len(markerPath) - 1) {
  2005  				prevReturned = ""
  2006  			} else {
  2007  				prevReturned = markerPath[len(markerPath)-1]
  2008  			}
  2009  			initialDirEntryToMatch = ""
  2010  			break
  2011  		}
  2012  
  2013  		switch strings.Compare(prefixPath[pathIndex], markerPath[pathIndex]) {
  2014  		case -1:
  2015  			dirPath = strings.Join(markerPath[:markerPathDirInodeIndex+1], "/")
  2016  			if markerPathDirInodeIndex == (len(markerPath) - 1) {
  2017  				prevReturned = ""
  2018  			} else {
  2019  				prevReturned = markerPath[len(markerPath)-1]
  2020  			}
  2021  			initialDirEntryToMatch = ""
  2022  			break
  2023  		case 0:
  2024  			pathIndex++
  2025  		case 1:
  2026  			if prefixPathDirInodeIndex == (len(prefixPath) - 1) {
  2027  				if (1 == len(prefixPath)) || strings.HasSuffix(prefix, "/") {
  2028  					dirPath = strings.Join(prefixPath[:prefixPathDirInodeIndex+1], "/")
  2029  					prevReturned = ""
  2030  				} else {
  2031  					dirPath = strings.Join(prefixPath[:prefixPathDirInodeIndex], "/")
  2032  					prevReturned = prefixPath[len(prefixPath)-1]
  2033  				}
  2034  			} else {
  2035  				dirPath = strings.Join(prefixPath[:prefixPathDirInodeIndex+1], "/")
  2036  				prevReturned = prefixPath[len(prefixPath)-1]
  2037  			}
  2038  			initialDirEntryToMatch = prevReturned
  2039  			break
  2040  		}
  2041  	}
  2042  
  2043  	// Setup shortcuts/contants
  2044  
  2045  	dlmCallerID = dlm.GenerateCallerID()
  2046  	inodeVolumeHandle = vS.inodeVolumeHandle
  2047  
  2048  	// Compute initial response
  2049  
  2050  	tryLockBackoffContext = &tryLockBackoffContextStruct{}
  2051  
  2052  Restart:
  2053  
  2054  	tryLockBackoffContext.backoff()
  2055  
  2056  	heldLocks = newHeldLocks()
  2057  
  2058  	_, dirInodeNumber, _, _, retryRequired, err =
  2059  		vS.resolvePath(
  2060  			inode.RootDirInodeNumber,
  2061  			dirPath,
  2062  			heldLocks,
  2063  			resolvePathDirEntryInodeMustBeDirectory)
  2064  	if nil != err {
  2065  		heldLocks.free()
  2066  		return
  2067  	}
  2068  	if retryRequired {
  2069  		heldLocks.free()
  2070  		goto Restart
  2071  	}
  2072  
  2073  	containerEnts = make([]ContainerEntry, 0, maxEntries)
  2074  
  2075  	if 0 == maxEntries {
  2076  		heldLocks.free()
  2077  		err = nil
  2078  		return
  2079  	}
  2080  
  2081  	if "" == initialDirEntryToMatch {
  2082  		dirEntrySliceElementToPrepend = nil
  2083  	} else {
  2084  		if "" == dirPath {
  2085  			dirEntryPath = initialDirEntryToMatch
  2086  		} else {
  2087  			dirEntryPath = dirPath + "/" + initialDirEntryToMatch
  2088  		}
  2089  		if ("" != endmarker) && (strings.Compare(dirEntryPath, endmarkerCanonicalized) >= 0) {
  2090  			heldLocks.free()
  2091  			err = nil
  2092  			return
  2093  		}
  2094  		dirEntryInodeNumber, err = inodeVolumeHandle.Lookup(dirInodeNumber, initialDirEntryToMatch)
  2095  		if nil == err {
  2096  			retryRequired = heldLocks.attemptSharedLock(inodeVolumeHandle, dlmCallerID, dirEntryInodeNumber)
  2097  			if retryRequired {
  2098  				heldLocks.free()
  2099  				goto Restart
  2100  			}
  2101  			dirEntryInodeType, err = inodeVolumeHandle.GetType(dirEntryInodeNumber)
  2102  			if nil == err {
  2103  				dirEntrySliceElementToPrepend = &inode.DirEntry{
  2104  					InodeNumber: dirEntryInodeNumber,
  2105  					Basename:    initialDirEntryToMatch,
  2106  					Type:        dirEntryInodeType,
  2107  				}
  2108  			} else {
  2109  				dirEntrySliceElementToPrepend = nil
  2110  			}
  2111  			heldLocks.unlock(dirEntryInodeNumber)
  2112  		} else {
  2113  			dirEntrySliceElementToPrepend = nil
  2114  		}
  2115  	}
  2116  
  2117  	heldLocks.free()
  2118  
  2119  	if 0 == maxEntries {
  2120  		remainingMaxEntries = 0
  2121  	} else {
  2122  		if nil == dirEntrySliceElementToPrepend {
  2123  			remainingMaxEntries = maxEntries
  2124  		} else {
  2125  			remainingMaxEntries = maxEntries - 1
  2126  		}
  2127  	}
  2128  
  2129  	// At this point:
  2130  	//   no heldLocks
  2131  	//   containerEnts has been declared
  2132  	//   doSingleDirectory is set based on supplied delimiter
  2133  	//   if {marker,endmarker,prefix} asked to include an exact matched path that existed, it's in dirEntrySliceElementToPrepend
  2134  	//   prefixCanonicalized & endmarkerCanonicalized are set to terminate the ensuing treewalk
  2135  	//   remainingMaxEntries indicates how many more DirEntry's will fit in containerEnts (if capped)
  2136  	//   dirPath is pointing to the initial DirInode to read
  2137  	//   prevReturned indicates from where in the DirInode to start reading
  2138  
  2139  	// Perform initial ReadDir and place in dirEntrySliceStack
  2140  
  2141  	if nil == dirEntrySliceElementToPrepend {
  2142  		_, dirEntrySlice, moreEntries, err = vS.middlewareReadDirHelper(dirPath, remainingMaxEntries, prevReturned)
  2143  		if nil != err {
  2144  			return
  2145  		}
  2146  	} else {
  2147  		if 0 == remainingMaxEntries {
  2148  			dirEntrySlice = []inode.DirEntry{*dirEntrySliceElementToPrepend}
  2149  			moreEntries = false
  2150  		} else {
  2151  			_, dirEntrySliceToAppend, moreEntries, err = vS.middlewareReadDirHelper(dirPath, remainingMaxEntries, prevReturned)
  2152  			if nil == err {
  2153  				dirEntrySlice = make([]inode.DirEntry, 1, 1+len(dirEntrySliceToAppend))
  2154  				dirEntrySlice[0] = *dirEntrySliceElementToPrepend
  2155  				dirEntrySlice = append(dirEntrySlice, dirEntrySliceToAppend...)
  2156  			} else {
  2157  				return
  2158  			}
  2159  		}
  2160  	}
  2161  
  2162  	dirEntrySliceStackElement = &dirEntrySliceStackElementStruct{
  2163  		dirPath:       dirPath,
  2164  		dirEntrySlice: dirEntrySlice,
  2165  		numConsumed:   0,
  2166  		moreEntries:   moreEntries,
  2167  	}
  2168  
  2169  	dirEntrySliceStack = []*dirEntrySliceStackElementStruct{dirEntrySliceStackElement}
  2170  
  2171  	containerEnts = make([]ContainerEntry, 0, len(dirEntrySlice))
  2172  
  2173  	// Now append appropriate ContainerEntry's until exit criteria is reached
  2174  
  2175  	for uint64(len(containerEnts)) < maxEntries {
  2176  		dirEntrySliceStackElement = dirEntrySliceStack[len(dirEntrySliceStack)-1]
  2177  
  2178  		if dirEntrySliceStackElement.numConsumed == len(dirEntrySliceStackElement.dirEntrySlice) {
  2179  			if dirEntrySliceStackElement.moreEntries {
  2180  				dirPath = dirEntrySliceStackElement.dirPath
  2181  				dirEntrySlice = dirEntrySliceStackElement.dirEntrySlice
  2182  				dirEntrySliceElementIndex = len(dirEntrySlice) - 1
  2183  				dirEntrySliceElement = dirEntrySlice[dirEntrySliceElementIndex]
  2184  				prevReturned = dirEntrySliceElement.Basename
  2185  
  2186  				_, dirEntrySlice, moreEntries, err = vS.middlewareReadDirHelper(dirPath, remainingMaxEntries, prevReturned)
  2187  				if (nil != err) || (0 == len(dirEntrySlice)) {
  2188  					// Even though we thought there were moreEntries, there now are not for some reason
  2189  
  2190  					if doSingleDirectory {
  2191  						// Regardless of remaining contents of dirEntrySliceStack, we must be done
  2192  
  2193  						err = nil
  2194  						return
  2195  					}
  2196  
  2197  					// Navigate to parent directory
  2198  
  2199  					dirEntrySliceStack = dirEntrySliceStack[:len(dirEntrySliceStack)-1]
  2200  					continue
  2201  				}
  2202  
  2203  				// Restart this loop on current dirEntrySliceStackElement with new middlewareReadDirHelper() results
  2204  
  2205  				dirEntrySliceStackElement.dirEntrySlice = dirEntrySlice
  2206  				dirEntrySliceStackElement.numConsumed = 0
  2207  				dirEntrySliceStackElement.moreEntries = moreEntries
  2208  
  2209  				continue
  2210  			} else {
  2211  				// We've reached the end of this DirInode
  2212  
  2213  				if doSingleDirectory {
  2214  					// Regardless of remaining contents of dirEntrySliceStack, we must be done
  2215  
  2216  					err = nil
  2217  					return
  2218  				}
  2219  
  2220  				// Navigate to parent directory (staying within this Container)
  2221  
  2222  				if 1 == len(dirEntrySliceStack) {
  2223  					// We are at the starting directory
  2224  
  2225  					dirPathSplit = strings.Split(dirEntrySliceStackElement.dirPath, "/")
  2226  
  2227  					if 1 == len(dirPathSplit) {
  2228  						// We just finished Container-level directory, so we are done
  2229  
  2230  						err = nil
  2231  						return
  2232  					}
  2233  
  2234  					// Modify dirEntrySliceStackElement to point to parent directory as if we'd just processed the dirEntry of this directory
  2235  
  2236  					dirPath = strings.Join(dirPathSplit[:len(dirPathSplit)-1], "/")
  2237  
  2238  					if 0 == maxEntries {
  2239  						remainingMaxEntries = 0
  2240  					} else {
  2241  						remainingMaxEntries = maxEntries - uint64(len(containerEnts))
  2242  					}
  2243  
  2244  					prevReturned = dirPathSplit[len(dirPathSplit)-1]
  2245  
  2246  					_, dirEntrySlice, moreEntries, err = vS.middlewareReadDirHelper(dirPath, remainingMaxEntries, prevReturned)
  2247  					if nil != err {
  2248  						return
  2249  					}
  2250  
  2251  					dirEntrySliceStackElement.dirPath = dirPath
  2252  					dirEntrySliceStackElement.dirEntrySlice = dirEntrySlice
  2253  					dirEntrySliceStackElement.numConsumed = 0
  2254  					dirEntrySliceStackElement.moreEntries = moreEntries
  2255  				} else {
  2256  					// Parent directory already in dirEntrySliceStack... so just pop current ...Element
  2257  
  2258  					dirEntrySliceStack = dirEntrySliceStack[:len(dirEntrySliceStack)-1]
  2259  				}
  2260  
  2261  				continue
  2262  			}
  2263  		}
  2264  
  2265  		// Consume next dirEntrySliceElement
  2266  		// ...skipping "." and ".."
  2267  		// ...skipping if <dirPath>/<Basename> <= marker
  2268  		// ...recursing when encountering DirInode's if !doSingleDirectory
  2269  		// ...terminating early if either:
  2270  		//      len(*containerEnts) reaches maxEntries
  2271  		//      <dirPath>/<Basename> >= endmarker
  2272  		//      <dirPath>/<Basename> does not start with prefix
  2273  
  2274  		dirEntrySlice = dirEntrySliceStackElement.dirEntrySlice
  2275  		dirEntrySliceElementIndex = dirEntrySliceStackElement.numConsumed
  2276  		dirEntrySliceElement = dirEntrySlice[dirEntrySliceElementIndex]
  2277  
  2278  		dirEntrySliceStackElement.numConsumed++
  2279  
  2280  		if ("." == dirEntrySliceElement.Basename) || (".." == dirEntrySliceElement.Basename) {
  2281  			continue
  2282  		}
  2283  
  2284  		containerEntryPath = dirEntrySliceStackElement.dirPath + "/" + dirEntrySliceElement.Basename
  2285  
  2286  		if ("" != marker) && (strings.Compare(containerEntryPath, markerCanonicalized) <= 0) {
  2287  			err = nil
  2288  			return
  2289  		}
  2290  		if ("" != endmarker) && (strings.Compare(containerEntryPath, endmarkerCanonicalized) >= 0) {
  2291  			err = nil
  2292  			return
  2293  		}
  2294  		if ("" != prefix) && !strings.HasPrefix(containerEntryPath, prefixCanonicalized) {
  2295  			err = nil
  2296  			return
  2297  		}
  2298  
  2299  		// Ok... so we actually want to append this entry to containerEnts
  2300  
  2301  		tryLockBackoffContext = &tryLockBackoffContextStruct{}
  2302  
  2303  	Retry:
  2304  
  2305  		tryLockBackoffContext.backoff()
  2306  
  2307  		dirEntryInodeLock, err = inodeVolumeHandle.AttemptReadLock(dirEntrySliceElement.InodeNumber, dlmCallerID)
  2308  		if nil != err {
  2309  			goto Retry
  2310  		}
  2311  
  2312  		dirEntryMetadata, err = inodeVolumeHandle.GetMetadata(dirEntrySliceElement.InodeNumber)
  2313  		if nil != err {
  2314  			// Ok... so it must have disappeared... just skip it
  2315  
  2316  			err = dirEntryInodeLock.Unlock()
  2317  			if nil != err {
  2318  				logger.Fatalf("Failure unlocking a held LockID %s: %v", dirEntryInodeLock.LockID, err)
  2319  			}
  2320  
  2321  			continue
  2322  		}
  2323  
  2324  		containerEntryPathSplit = strings.SplitN(containerEntryPath, "/", 2)
  2325  		containerEntryBasename = containerEntryPathSplit[1]
  2326  
  2327  		containerEntry = ContainerEntry{
  2328  			Basename:         containerEntryBasename,
  2329  			FileSize:         dirEntryMetadata.Size,
  2330  			ModificationTime: uint64(dirEntryMetadata.ModificationTime.UnixNano()),
  2331  			AttrChangeTime:   uint64(dirEntryMetadata.AttrChangeTime.UnixNano()),
  2332  			IsDir:            (dirEntrySliceElement.Type == inode.DirType),
  2333  			NumWrites:        dirEntryMetadata.NumWrites,
  2334  			InodeNumber:      uint64(dirEntrySliceElement.InodeNumber),
  2335  		}
  2336  
  2337  		containerEntry.Metadata, err = inodeVolumeHandle.GetStream(dirEntrySliceElement.InodeNumber, MiddlewareStream)
  2338  		if nil != err {
  2339  			if blunder.Is(err, blunder.StreamNotFound) {
  2340  				// No MiddlewareStream... just make it appear empty
  2341  
  2342  				containerEntry.Metadata = []byte{}
  2343  				err = nil
  2344  			} else {
  2345  				// Ok... so it must have disappeared... just skip it
  2346  
  2347  				err = dirEntryInodeLock.Unlock()
  2348  				if nil != err {
  2349  					logger.Fatalf("Failure unlocking a held LockID %s: %v", dirEntryInodeLock.LockID, err)
  2350  				}
  2351  
  2352  				continue
  2353  			}
  2354  		}
  2355  
  2356  		// We can finally Unlock() this dirEntryInodeLock
  2357  
  2358  		err = dirEntryInodeLock.Unlock()
  2359  		if nil != err {
  2360  			logger.Fatalf("Failure unlocking a held LockID %s: %v", dirEntryInodeLock.LockID, err)
  2361  		}
  2362  
  2363  		// If we reach here, we get to append this containerEntry to containerEnts
  2364  
  2365  		containerEnts = append(containerEnts, containerEntry)
  2366  
  2367  		// We must now descend into dirEntryInode descend into it if it's a DirInode and !doSingleDirectory
  2368  
  2369  		if !doSingleDirectory && (dirEntrySliceElement.Type == inode.DirType) {
  2370  			dirPath = dirEntrySliceStackElement.dirPath + "/" + dirEntrySliceElement.Basename
  2371  
  2372  			if 0 == maxEntries {
  2373  				remainingMaxEntries = 0
  2374  			} else {
  2375  				remainingMaxEntries = maxEntries - uint64(len(containerEnts))
  2376  			}
  2377  
  2378  			prevReturned = ""
  2379  
  2380  			_, dirEntrySlice, moreEntries, err = vS.middlewareReadDirHelper(dirPath, remainingMaxEntries, prevReturned)
  2381  			if nil != err {
  2382  				return
  2383  			}
  2384  
  2385  			dirEntrySliceStackElement = &dirEntrySliceStackElementStruct{
  2386  				dirPath:       dirPath,
  2387  				dirEntrySlice: dirEntrySlice,
  2388  				numConsumed:   0,
  2389  				moreEntries:   moreEntries,
  2390  			}
  2391  
  2392  			dirEntrySliceStack = append(dirEntrySliceStack, dirEntrySliceStackElement)
  2393  		}
  2394  	}
  2395  
  2396  	// We will only reach here if we exhausted maxEntries before exhausing the tree/list of containerEntry's to append
  2397  
  2398  	err = nil
  2399  	return
  2400  }
  2401  
  2402  func (vS *volumeStruct) MiddlewareGetObject(containerObjectPath string,
  2403  	readRangeIn []ReadRangeIn, readRangeOut *[]inode.ReadPlanStep) (
  2404  	response HeadResponse, err error) {
  2405  
  2406  	var (
  2407  		dirEntryInodeNumber   inode.InodeNumber
  2408  		fileOffset            uint64
  2409  		heldLocks             *heldLocksStruct
  2410  		inodeVolumeHandle     inode.VolumeHandle
  2411  		readPlan              []inode.ReadPlanStep
  2412  		readRangeInIndex      int
  2413  		retryRequired         bool
  2414  		stat                  Stat
  2415  		tryLockBackoffContext *tryLockBackoffContextStruct
  2416  	)
  2417  
  2418  	startTime := time.Now()
  2419  	defer func() {
  2420  		var totalReadBytes uint64
  2421  		for _, step := range *readRangeOut {
  2422  			totalReadBytes += step.Length
  2423  		}
  2424  
  2425  		globals.MiddlewareGetObjectUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
  2426  		globals.MiddlewareGetObjectBytes.Add(totalReadBytes)
  2427  		if err != nil {
  2428  			globals.MiddlewareGetObjectErrors.Add(1)
  2429  		}
  2430  	}()
  2431  
  2432  	// Retry until done or failure (starting with ZERO backoff)
  2433  
  2434  	tryLockBackoffContext = &tryLockBackoffContextStruct{}
  2435  
  2436  Restart:
  2437  
  2438  	// Perform backoff and update for each restart (starting with ZERO backoff of course)
  2439  
  2440  	tryLockBackoffContext.backoff()
  2441  
  2442  	// Construct fresh heldLocks for this restart
  2443  
  2444  	heldLocks = newHeldLocks()
  2445  
  2446  	_, dirEntryInodeNumber, _, _, retryRequired, err =
  2447  		vS.resolvePath(
  2448  			inode.RootDirInodeNumber,
  2449  			containerObjectPath,
  2450  			heldLocks,
  2451  			resolvePathFollowDirEntrySymlinks|
  2452  				resolvePathFollowDirSymlinks)
  2453  
  2454  	if nil != err {
  2455  		heldLocks.free()
  2456  		return
  2457  	}
  2458  
  2459  	if retryRequired {
  2460  		heldLocks.free()
  2461  		goto Restart
  2462  	}
  2463  
  2464  	// Now assemble response
  2465  
  2466  	stat, err = vS.getstatHelperWhileLocked(dirEntryInodeNumber)
  2467  	if nil != err {
  2468  		heldLocks.free()
  2469  		return
  2470  	}
  2471  
  2472  	response.FileSize = stat[StatSize]
  2473  	response.ModificationTime = stat[StatMTime]
  2474  	response.AttrChangeTime = stat[StatCTime]
  2475  	response.IsDir = (stat[StatFType] == uint64(inode.DirType))
  2476  	response.InodeNumber = dirEntryInodeNumber
  2477  	response.NumWrites = stat[StatNumWrites]
  2478  
  2479  	// Swift thinks all directories have a size of 0 (and symlinks as well)
  2480  	if stat[StatFType] != uint64(inode.FileType) {
  2481  		response.FileSize = 0
  2482  	}
  2483  
  2484  	response.Metadata, err = vS.inodeVolumeHandle.GetStream(dirEntryInodeNumber, MiddlewareStream)
  2485  	if nil != err {
  2486  		if blunder.Is(err, blunder.StreamNotFound) {
  2487  			response.Metadata = []byte{}
  2488  			err = nil
  2489  		} else {
  2490  			heldLocks.free()
  2491  			return
  2492  		}
  2493  	}
  2494  
  2495  	// The only thing left is to construct a read plan and only regular
  2496  	// files have read plans.  If this is not a regular file then we're
  2497  	// done.
  2498  	if stat[StatFType] != uint64(inode.FileType) {
  2499  		heldLocks.free()
  2500  		return
  2501  	}
  2502  
  2503  	inodeVolumeHandle = vS.inodeVolumeHandle
  2504  	if len(readRangeIn) == 0 {
  2505  		// Get ReadPlan for entire file
  2506  
  2507  		fileOffset = 0
  2508  
  2509  		readPlan, err = inodeVolumeHandle.GetReadPlan(dirEntryInodeNumber, &fileOffset, &response.FileSize)
  2510  		if nil != err {
  2511  			heldLocks.free()
  2512  			return
  2513  		}
  2514  
  2515  		_ = appendReadPlanEntries(readPlan, readRangeOut)
  2516  	} else { // len(readRangeIn) > 0
  2517  		// Append each computed range
  2518  
  2519  		for readRangeInIndex = range readRangeIn {
  2520  			readPlan, err = inodeVolumeHandle.GetReadPlan(dirEntryInodeNumber, readRangeIn[readRangeInIndex].Offset, readRangeIn[readRangeInIndex].Len)
  2521  			if nil != err {
  2522  				heldLocks.free()
  2523  				return
  2524  			}
  2525  
  2526  			_ = appendReadPlanEntries(readPlan, readRangeOut)
  2527  		}
  2528  	}
  2529  
  2530  	heldLocks.free()
  2531  
  2532  	err = nil
  2533  	return
  2534  }
  2535  
  2536  func (vS *volumeStruct) MiddlewareHeadResponse(entityPath string) (response HeadResponse, err error) {
  2537  	var (
  2538  		dirEntryInodeNumber   inode.InodeNumber
  2539  		heldLocks             *heldLocksStruct
  2540  		retryRequired         bool
  2541  		stat                  Stat
  2542  		tryLockBackoffContext *tryLockBackoffContextStruct
  2543  	)
  2544  
  2545  	startTime := time.Now()
  2546  	defer func() {
  2547  		globals.MiddlewareHeadResponseUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
  2548  		if err != nil {
  2549  			globals.MiddlewareHeadResponseErrors.Add(1)
  2550  		}
  2551  	}()
  2552  
  2553  	// Retry until done or failure (starting with ZERO backoff)
  2554  
  2555  	tryLockBackoffContext = &tryLockBackoffContextStruct{}
  2556  
  2557  Restart:
  2558  
  2559  	// Perform backoff and update for each restart (starting with ZERO backoff of course)
  2560  
  2561  	tryLockBackoffContext.backoff()
  2562  
  2563  	// Construct fresh heldLocks for this restart
  2564  
  2565  	heldLocks = newHeldLocks()
  2566  
  2567  	_, dirEntryInodeNumber, _, _, retryRequired, err =
  2568  		vS.resolvePath(
  2569  			inode.RootDirInodeNumber,
  2570  			entityPath,
  2571  			heldLocks,
  2572  			resolvePathFollowDirEntrySymlinks|
  2573  				resolvePathFollowDirSymlinks)
  2574  
  2575  	if nil != err {
  2576  		heldLocks.free()
  2577  		return
  2578  	}
  2579  
  2580  	if retryRequired {
  2581  		heldLocks.free()
  2582  		goto Restart
  2583  	}
  2584  
  2585  	// Now assemble response
  2586  
  2587  	stat, err = vS.getstatHelperWhileLocked(dirEntryInodeNumber)
  2588  	if nil != err {
  2589  		heldLocks.free()
  2590  		return
  2591  	}
  2592  
  2593  	// since resolvePathFollowDirEntrySymlinks is set on the call to
  2594  	// resolvePath(), above, we'll never see a symlink returned
  2595  	response.ModificationTime = stat[StatMTime]
  2596  	response.AttrChangeTime = stat[StatCTime]
  2597  	response.FileSize = stat[StatSize]
  2598  	response.IsDir = (stat[StatFType] == uint64(inode.DirType))
  2599  	response.InodeNumber = dirEntryInodeNumber
  2600  	response.NumWrites = stat[StatNumWrites]
  2601  
  2602  	// Swift thinks all directories have a size of 0 (and symlinks as well)
  2603  	if stat[StatFType] != uint64(inode.FileType) {
  2604  		response.FileSize = 0
  2605  	}
  2606  
  2607  	response.Metadata, err = vS.inodeVolumeHandle.GetStream(dirEntryInodeNumber, MiddlewareStream)
  2608  	if nil != err {
  2609  		heldLocks.free()
  2610  		response.Metadata = []byte{}
  2611  		// If someone makes a directory or file via SMB/FUSE and then
  2612  		// HEADs it via HTTP, we'll see this error. We treat it as
  2613  		// though there is no metadata. The middleware is equipped to
  2614  		// handle this case.
  2615  		if blunder.Is(err, blunder.StreamNotFound) {
  2616  			err = nil
  2617  		}
  2618  		return
  2619  	}
  2620  
  2621  	heldLocks.free()
  2622  	return
  2623  }
  2624  
  2625  func (vS *volumeStruct) MiddlewarePost(parentDir string, baseName string, newMetaData []byte, oldMetaData []byte) (err error) {
  2626  	var (
  2627  		dirEntryInodeNumber   inode.InodeNumber
  2628  		existingStreamData    []byte
  2629  		heldLocks             *heldLocksStruct
  2630  		retryRequired         bool
  2631  		tryLockBackoffContext *tryLockBackoffContextStruct
  2632  	)
  2633  
  2634  	startTime := time.Now()
  2635  	defer func() {
  2636  		globals.MiddlewarePostUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
  2637  		globals.MiddlewarePostBytes.Add(uint64(len(newMetaData)))
  2638  		if err != nil {
  2639  			globals.MiddlewarePostErrors.Add(1)
  2640  		}
  2641  	}()
  2642  
  2643  	// Retry until done or failure (starting with ZERO backoff)
  2644  
  2645  	tryLockBackoffContext = &tryLockBackoffContextStruct{}
  2646  
  2647  Restart:
  2648  
  2649  	// Perform backoff and update for each restart (starting with ZERO backoff of course)
  2650  
  2651  	tryLockBackoffContext.backoff()
  2652  
  2653  	// Construct fresh heldLocks for this restart
  2654  
  2655  	heldLocks = newHeldLocks()
  2656  
  2657  	_, dirEntryInodeNumber, _, _, retryRequired, err =
  2658  		vS.resolvePath(
  2659  			inode.RootDirInodeNumber,
  2660  			parentDir+"/"+baseName,
  2661  			heldLocks,
  2662  			resolvePathFollowDirEntrySymlinks|
  2663  				resolvePathFollowDirSymlinks|
  2664  				resolvePathCreateMissingPathElements|
  2665  				resolvePathRequireExclusiveLockOnDirEntryInode)
  2666  
  2667  	if nil != err {
  2668  		heldLocks.free()
  2669  		return
  2670  	}
  2671  
  2672  	if retryRequired {
  2673  		heldLocks.free()
  2674  		goto Restart
  2675  	}
  2676  
  2677  	// Now apply MiddlewareStream update
  2678  
  2679  	// Compare oldMetaData to existing existingStreamData to make sure that the HTTP metadata has not changed.
  2680  	// If it has changed, then return an error since middleware has to handle it.
  2681  
  2682  	existingStreamData, err = vS.inodeVolumeHandle.GetStream(dirEntryInodeNumber, MiddlewareStream)
  2683  	if nil != err {
  2684  		if blunder.Is(err, blunder.StreamNotFound) {
  2685  			err = nil
  2686  			existingStreamData = make([]byte, 0)
  2687  		} else {
  2688  			heldLocks.free()
  2689  			return
  2690  		}
  2691  	}
  2692  
  2693  	// Verify that the oldMetaData is the same as the one we think we are changing.
  2694  
  2695  	if !bytes.Equal(existingStreamData, oldMetaData) {
  2696  		heldLocks.free()
  2697  		err = blunder.NewError(blunder.TryAgainError, "MiddlewarePost(): MetaData different - existingStreamData: %v OldMetaData: %v", existingStreamData, oldMetaData)
  2698  		return
  2699  	}
  2700  
  2701  	// Change looks okay so make it.
  2702  
  2703  	err = vS.inodeVolumeHandle.PutStream(dirEntryInodeNumber, MiddlewareStream, newMetaData)
  2704  	if nil != err {
  2705  		heldLocks.free()
  2706  		return
  2707  	}
  2708  
  2709  	// PutStream() implicitly flushed... so, if it was a FileInode, we don't need to track it anymore
  2710  
  2711  	vS.untrackInFlightFileInodeData(dirEntryInodeNumber, false)
  2712  
  2713  	heldLocks.free()
  2714  	return
  2715  }
  2716  
  2717  func (vS *volumeStruct) MiddlewarePutComplete(vContainerName string, vObjectPath string, pObjectPaths []string, pObjectLengths []uint64, pObjectMetadata []byte) (mtime uint64, ctime uint64, fileInodeNumber inode.InodeNumber, numWrites uint64, err error) {
  2718  	var (
  2719  		containerName         string
  2720  		dirInodeNumber        inode.InodeNumber
  2721  		dirEntryInodeNumber   inode.InodeNumber
  2722  		dirEntryBasename      string
  2723  		dirEntryInodeType     inode.InodeType
  2724  		fileOffset            uint64
  2725  		heldLocks             *heldLocksStruct
  2726  		inodeVolumeHandle     inode.VolumeHandle = vS.inodeVolumeHandle
  2727  		numPObjects           int
  2728  		objectName            string
  2729  		pObjectIndex          int
  2730  		retryRequired         bool
  2731  		stat                  Stat
  2732  		tryLockBackoffContext *tryLockBackoffContextStruct
  2733  	)
  2734  
  2735  	startTime := time.Now()
  2736  	defer func() {
  2737  		globals.MiddlewarePutCompleteUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
  2738  		if err != nil {
  2739  			globals.MiddlewarePutCompleteErrors.Add(1)
  2740  		}
  2741  	}()
  2742  
  2743  	// Validate (pObjectPaths,pObjectLengths) args
  2744  
  2745  	numPObjects = len(pObjectPaths)
  2746  
  2747  	if numPObjects != len(pObjectLengths) {
  2748  		blunder.NewError(blunder.InvalidArgError, "MiddlewarePutComplete() expects len(pObjectPaths) == len(pObjectLengths)")
  2749  		return
  2750  	}
  2751  
  2752  	// Retry until done or failure (starting with ZERO backoff)
  2753  
  2754  	tryLockBackoffContext = &tryLockBackoffContextStruct{}
  2755  
  2756  Restart:
  2757  
  2758  	// Perform backoff and update for each restart (starting with ZERO backoff of course)
  2759  
  2760  	tryLockBackoffContext.backoff()
  2761  
  2762  	// Construct fresh heldLocks for this restart
  2763  
  2764  	heldLocks = newHeldLocks()
  2765  
  2766  	dirInodeNumber, dirEntryInodeNumber, dirEntryBasename, dirEntryInodeType, retryRequired, err =
  2767  		vS.resolvePath(
  2768  			inode.RootDirInodeNumber,
  2769  			vContainerName+"/"+vObjectPath,
  2770  			heldLocks,
  2771  			resolvePathFollowDirEntrySymlinks|
  2772  				resolvePathFollowDirSymlinks|
  2773  				resolvePathCreateMissingPathElements|
  2774  				resolvePathRequireExclusiveLockOnDirInode|
  2775  				resolvePathRequireExclusiveLockOnDirEntryInode)
  2776  	if nil != err {
  2777  		heldLocks.free()
  2778  		return
  2779  	}
  2780  	if retryRequired {
  2781  		heldLocks.free()
  2782  		goto Restart
  2783  	}
  2784  
  2785  	// The semantics of PUT mean that the existing object is discarded; with
  2786  	// a file we can just overwrite it, but symlinks or directories must be
  2787  	// removed (if possible).
  2788  	if dirEntryInodeType != inode.FileType {
  2789  
  2790  		if dirEntryInodeType == inode.DirType {
  2791  
  2792  			// try to unlink the directory (rmdir flushes the inodes)
  2793  			err = vS.rmdirActual(dirInodeNumber, dirEntryBasename, dirEntryInodeNumber)
  2794  			if err != nil {
  2795  				// the directory was probably not empty
  2796  				heldLocks.free()
  2797  				return
  2798  
  2799  			}
  2800  
  2801  		} else {
  2802  			// unlink the symlink (unlink flushes the inodes)
  2803  			err = vS.unlinkActual(dirInodeNumber, dirEntryBasename, dirEntryInodeNumber)
  2804  			if err != nil {
  2805  
  2806  				// ReadOnlyError is my best guess for the failure
  2807  				err = blunder.NewError(blunder.ReadOnlyError,
  2808  					"MiddlewareMkdir(): vol '%s' failed to unlink '%s': %v",
  2809  					vS.volumeName, vContainerName+"/"+vObjectPath, err)
  2810  				heldLocks.free()
  2811  				return
  2812  			}
  2813  		}
  2814  
  2815  		// let resolvePath() create the file
  2816  		dirInodeNumber, dirEntryInodeNumber, dirEntryBasename, dirEntryInodeType, retryRequired, err =
  2817  			vS.resolvePath(
  2818  				inode.RootDirInodeNumber,
  2819  				vContainerName+"/"+vObjectPath,
  2820  				heldLocks,
  2821  				resolvePathFollowDirSymlinks|
  2822  					resolvePathCreateMissingPathElements|
  2823  					resolvePathDirEntryInodeMustBeFile|
  2824  					resolvePathRequireExclusiveLockOnDirInode|
  2825  					resolvePathRequireExclusiveLockOnDirEntryInode)
  2826  		if nil != err {
  2827  			heldLocks.free()
  2828  			return
  2829  		}
  2830  		if retryRequired {
  2831  			heldLocks.free()
  2832  			goto Restart
  2833  		}
  2834  	}
  2835  
  2836  	// Apply (pObjectPaths,pObjectLengths) to (erased) FileInode
  2837  
  2838  	fileOffset = 0
  2839  
  2840  	for pObjectIndex = 0; pObjectIndex < numPObjects; pObjectIndex++ {
  2841  		_, containerName, objectName, err = utils.PathToAcctContObj(pObjectPaths[pObjectIndex])
  2842  		if nil != err {
  2843  			heldLocks.free()
  2844  			logger.DebugfIDWithError(internalDebug, err, "MiddlewarePutComplete(): failed utils.PathToAcctContObj(\"%s\") for dirEntryInodeNumber 0x%016X", pObjectPaths[pObjectIndex], dirEntryInodeNumber)
  2845  			return
  2846  		}
  2847  
  2848  		err = inodeVolumeHandle.Wrote(
  2849  			dirEntryInodeNumber,
  2850  			containerName,
  2851  			objectName,
  2852  			[]uint64{fileOffset},
  2853  			[]uint64{0},
  2854  			[]uint64{pObjectLengths[pObjectIndex]},
  2855  			pObjectIndex > 0) // Initial pObjectIndex == 0 case will implicitly SetSize(,0)
  2856  		if nil != err {
  2857  			heldLocks.free()
  2858  			logger.DebugfIDWithError(internalDebug, err, "MiddlewarePutComplete(): failed inode.Wrote() for dirEntryInodeNumber 0x%016X", dirEntryInodeNumber)
  2859  			return
  2860  		}
  2861  
  2862  		fileOffset += pObjectLengths[pObjectIndex]
  2863  	}
  2864  
  2865  	// Apply pObjectMetadata to FileInode (this will flush it as well)
  2866  
  2867  	err = inodeVolumeHandle.PutStream(dirEntryInodeNumber, MiddlewareStream, pObjectMetadata)
  2868  	if err != nil {
  2869  		heldLocks.free()
  2870  		logger.DebugfIDWithError(internalDebug, err, "MiddlewarePutComplete(): failed PutStream() for dirEntryInodeNumber 0x%016X (pObjectMetadata: %v)", dirEntryInodeNumber, pObjectMetadata)
  2871  		return
  2872  	}
  2873  
  2874  	stat, err = vS.getstatHelperWhileLocked(dirEntryInodeNumber)
  2875  	if nil != err {
  2876  		heldLocks.free()
  2877  		return
  2878  	}
  2879  
  2880  	mtime = stat[StatMTime]
  2881  	ctime = stat[StatCTime]
  2882  	fileInodeNumber = dirEntryInodeNumber
  2883  	numWrites = stat[StatNumWrites]
  2884  
  2885  	heldLocks.free()
  2886  	return
  2887  }
  2888  
  2889  func (vS *volumeStruct) MiddlewareMkdir(vContainerName string, vObjectPath string, metadata []byte) (mtime uint64, ctime uint64, inodeNumber inode.InodeNumber, numWrites uint64, err error) {
  2890  	var (
  2891  		dirInodeNumber        inode.InodeNumber
  2892  		dirEntryInodeNumber   inode.InodeNumber
  2893  		dirEntryBasename      string
  2894  		dirEntryInodeType     inode.InodeType
  2895  		heldLocks             *heldLocksStruct
  2896  		retryRequired         bool
  2897  		stat                  Stat
  2898  		tryLockBackoffContext *tryLockBackoffContextStruct
  2899  	)
  2900  
  2901  	startTime := time.Now()
  2902  	defer func() {
  2903  		globals.MiddlewareMkdirUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
  2904  		if err != nil {
  2905  			globals.MiddlewareMkdirErrors.Add(1)
  2906  		}
  2907  	}()
  2908  
  2909  	// Retry until done or failure (starting with ZERO backoff)
  2910  
  2911  	tryLockBackoffContext = &tryLockBackoffContextStruct{}
  2912  
  2913  Restart:
  2914  
  2915  	// Perform backoff and update for each restart (starting with ZERO backoff of course)
  2916  
  2917  	tryLockBackoffContext.backoff()
  2918  
  2919  	// Construct fresh heldLocks for this restart
  2920  
  2921  	heldLocks = newHeldLocks()
  2922  
  2923  	// Resolve the object, locking it and its parent directory exclusive
  2924  	dirInodeNumber, dirEntryInodeNumber, dirEntryBasename, dirEntryInodeType, retryRequired, err =
  2925  		vS.resolvePath(
  2926  			inode.RootDirInodeNumber,
  2927  			vContainerName+"/"+vObjectPath,
  2928  			heldLocks,
  2929  			resolvePathFollowDirSymlinks|
  2930  				resolvePathCreateMissingPathElements|
  2931  				resolvePathRequireExclusiveLockOnDirInode|
  2932  				resolvePathRequireExclusiveLockOnDirEntryInode)
  2933  	if nil != err {
  2934  		heldLocks.free()
  2935  		return
  2936  	}
  2937  	if retryRequired {
  2938  		heldLocks.free()
  2939  		goto Restart
  2940  	}
  2941  
  2942  	// The semantics of PUT for a directory object require that an existing
  2943  	// file or symlink be discarded and be replaced with a directory (an
  2944  	// existing directory is fine; it just has its headers overwritten).
  2945  	if dirEntryInodeType != inode.DirType {
  2946  
  2947  		// unlink the file or symlink (unlink flushes the inodes)
  2948  		err = vS.unlinkActual(dirInodeNumber, dirEntryBasename, dirEntryInodeNumber)
  2949  		if err != nil {
  2950  
  2951  			// ReadOnlyError is my best guess for the failure
  2952  			err = blunder.NewError(blunder.ReadOnlyError,
  2953  				"MiddlewareMkdir(): vol '%s' failed to unlink '%s': %v",
  2954  				vS.volumeName, vContainerName+"/"+vObjectPath, err)
  2955  			heldLocks.free()
  2956  			return
  2957  		}
  2958  
  2959  		// let resolvePath() make the directory
  2960  		dirInodeNumber, dirEntryInodeNumber, dirEntryBasename, dirEntryInodeType, retryRequired, err =
  2961  			vS.resolvePath(
  2962  				inode.RootDirInodeNumber,
  2963  				vContainerName+"/"+vObjectPath,
  2964  				heldLocks,
  2965  				resolvePathFollowDirSymlinks|
  2966  					resolvePathCreateMissingPathElements|
  2967  					resolvePathDirEntryInodeMustBeDirectory|
  2968  					resolvePathRequireExclusiveLockOnDirInode|
  2969  					resolvePathRequireExclusiveLockOnDirEntryInode)
  2970  		if nil != err {
  2971  			heldLocks.free()
  2972  			return
  2973  		}
  2974  		if retryRequired {
  2975  			heldLocks.free()
  2976  			goto Restart
  2977  		}
  2978  	}
  2979  
  2980  	err = vS.inodeVolumeHandle.PutStream(dirEntryInodeNumber, MiddlewareStream, metadata)
  2981  	if err != nil {
  2982  		heldLocks.free()
  2983  		logger.DebugfIDWithError(internalDebug, err, "MiddlewareHeadResponse(): failed PutStream() for for dirEntryInodeNumber 0x%016X (pObjectMetadata: %v)", dirEntryInodeNumber, metadata)
  2984  		return
  2985  	}
  2986  
  2987  	stat, err = vS.getstatHelperWhileLocked(dirEntryInodeNumber)
  2988  	if nil != err {
  2989  		heldLocks.free()
  2990  		return
  2991  	}
  2992  
  2993  	mtime = stat[StatMTime]
  2994  	ctime = stat[StatCTime]
  2995  	inodeNumber = dirEntryInodeNumber
  2996  	numWrites = stat[StatNumWrites]
  2997  
  2998  	heldLocks.free()
  2999  	return
  3000  }
  3001  
  3002  func (vS *volumeStruct) MiddlewarePutContainer(containerName string, oldMetadata []byte, newMetadata []byte) (err error) {
  3003  	var (
  3004  		containerInodeLock   *dlm.RWLockStruct
  3005  		containerInodeNumber inode.InodeNumber
  3006  		existingMetadata     []byte
  3007  		newDirInodeLock      *dlm.RWLockStruct
  3008  		newDirInodeNumber    inode.InodeNumber
  3009  	)
  3010  
  3011  	startTime := time.Now()
  3012  	defer func() {
  3013  		globals.MiddlewarePutContainerUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
  3014  		globals.MiddlewarePutContainerBytes.Add(uint64(len(newMetadata)))
  3015  		if err != nil {
  3016  			globals.MiddlewarePutContainerErrors.Add(1)
  3017  		}
  3018  	}()
  3019  
  3020  	vS.jobRWMutex.RLock()
  3021  	defer vS.jobRWMutex.RUnlock()
  3022  
  3023  	// Yes, it's a heavy lock to hold on the root inode. However, we
  3024  	// might need to add a new directory entry there, so there's not
  3025  	// much else we can do.
  3026  	rootInodeLock, err := vS.inodeVolumeHandle.GetWriteLock(inode.RootDirInodeNumber, nil)
  3027  	if nil != err {
  3028  		return
  3029  	}
  3030  	defer rootInodeLock.Unlock()
  3031  
  3032  	containerInodeNumber, err = vS.inodeVolumeHandle.Lookup(inode.RootDirInodeNumber, containerName)
  3033  	if err != nil && blunder.IsNot(err, blunder.NotFoundError) {
  3034  		return
  3035  	} else if err != nil {
  3036  		// No such container, so we create it
  3037  		err = validateBaseName(containerName)
  3038  		if err != nil {
  3039  			return
  3040  		}
  3041  
  3042  		newDirInodeNumber, err = vS.inodeVolumeHandle.CreateDir(inode.PosixModePerm, 0, 0)
  3043  		if err != nil {
  3044  			logger.ErrorWithError(err)
  3045  			return
  3046  		}
  3047  
  3048  		newDirInodeLock, err = vS.inodeVolumeHandle.GetWriteLock(newDirInodeNumber, nil)
  3049  		defer newDirInodeLock.Unlock()
  3050  
  3051  		err = vS.inodeVolumeHandle.PutStream(newDirInodeNumber, MiddlewareStream, newMetadata)
  3052  		if err != nil {
  3053  			logger.ErrorWithError(err)
  3054  			return
  3055  		}
  3056  
  3057  		err = vS.inodeVolumeHandle.Link(inode.RootDirInodeNumber, containerName, newDirInodeNumber, false)
  3058  
  3059  		return
  3060  	}
  3061  
  3062  	containerInodeLock, err = vS.inodeVolumeHandle.GetWriteLock(containerInodeNumber, nil)
  3063  	if err != nil {
  3064  		return
  3065  	}
  3066  	defer containerInodeLock.Unlock()
  3067  
  3068  	// Existing container: just update the metadata
  3069  	existingMetadata, err = vS.inodeVolumeHandle.GetStream(containerInodeNumber, MiddlewareStream)
  3070  
  3071  	// GetStream() will return an error if there is no "middleware" stream
  3072  	if err != nil && blunder.IsNot(err, blunder.StreamNotFound) {
  3073  		return
  3074  	} else if err != nil {
  3075  		existingMetadata = []byte{}
  3076  	}
  3077  
  3078  	// Only change it if the caller sent the current value
  3079  	if !bytes.Equal(existingMetadata, oldMetadata) {
  3080  		err = blunder.NewError(blunder.TryAgainError, "Metadata differs - actual: %v request: %v", existingMetadata, oldMetadata)
  3081  		return
  3082  	}
  3083  	err = vS.inodeVolumeHandle.PutStream(containerInodeNumber, MiddlewareStream, newMetadata)
  3084  
  3085  	return
  3086  }
  3087  
  3088  func (vS *volumeStruct) Mkdir(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber, basename string, filePerm inode.InodeMode) (newDirInodeNumber inode.InodeNumber, err error) {
  3089  	startTime := time.Now()
  3090  	defer func() {
  3091  		globals.MkdirUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
  3092  		if err != nil {
  3093  			globals.MkdirErrors.Add(1)
  3094  		}
  3095  	}()
  3096  
  3097  	vS.jobRWMutex.RLock()
  3098  	defer vS.jobRWMutex.RUnlock()
  3099  
  3100  	// Make sure the file basename is not too long
  3101  	err = validateBaseName(basename)
  3102  	if err != nil {
  3103  		return 0, err
  3104  	}
  3105  
  3106  	newDirInodeNumber, err = vS.inodeVolumeHandle.CreateDir(filePerm, userID, groupID)
  3107  	if err != nil {
  3108  		logger.ErrorWithError(err)
  3109  		return 0, err
  3110  	}
  3111  
  3112  	inodeLock, err := vS.inodeVolumeHandle.InitInodeLock(inodeNumber, nil)
  3113  	if err != nil {
  3114  		return
  3115  	}
  3116  	err = inodeLock.WriteLock()
  3117  	if err != nil {
  3118  		return
  3119  	}
  3120  	defer inodeLock.Unlock()
  3121  
  3122  	if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.F_OK,
  3123  		inode.NoOverride) {
  3124  
  3125  		destroyErr := vS.inodeVolumeHandle.Destroy(newDirInodeNumber)
  3126  		if destroyErr != nil {
  3127  			logger.WarnfWithError(destroyErr, "couldn't destroy inode %v after failed Access(F_OK) in fs.Mkdir", newDirInodeNumber)
  3128  		}
  3129  		err = blunder.NewError(blunder.NotFoundError, "ENOENT")
  3130  		return 0, err
  3131  	}
  3132  	if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.W_OK|inode.X_OK,
  3133  		inode.NoOverride) {
  3134  
  3135  		destroyErr := vS.inodeVolumeHandle.Destroy(newDirInodeNumber)
  3136  		if destroyErr != nil {
  3137  			logger.WarnfWithError(destroyErr, "couldn't destroy inode %v after failed Access(W_OK|X_OK) in fs.Mkdir", newDirInodeNumber)
  3138  		}
  3139  		err = blunder.NewError(blunder.PermDeniedError, "EACCES")
  3140  		return 0, err
  3141  	}
  3142  
  3143  	err = vS.inodeVolumeHandle.Link(inodeNumber, basename, newDirInodeNumber, false)
  3144  	if err != nil {
  3145  		destroyErr := vS.inodeVolumeHandle.Destroy(newDirInodeNumber)
  3146  		if destroyErr != nil {
  3147  			logger.WarnfWithError(destroyErr, "couldn't destroy inode %v after failed Link() in fs.Mkdir", newDirInodeNumber)
  3148  		}
  3149  		return 0, err
  3150  	}
  3151  
  3152  	return newDirInodeNumber, nil
  3153  }
  3154  
  3155  func (vS *volumeStruct) RemoveXAttr(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber, streamName string) (err error) {
  3156  	startTime := time.Now()
  3157  	defer func() {
  3158  		globals.RemoveXAttrUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
  3159  		if err != nil {
  3160  			globals.RemoveXAttrErrors.Add(1)
  3161  		}
  3162  	}()
  3163  
  3164  	vS.jobRWMutex.RLock()
  3165  	defer vS.jobRWMutex.RUnlock()
  3166  
  3167  	inodeLock, err := vS.inodeVolumeHandle.InitInodeLock(inodeNumber, nil)
  3168  	if err != nil {
  3169  		return
  3170  	}
  3171  	err = inodeLock.WriteLock()
  3172  	if err != nil {
  3173  		return
  3174  	}
  3175  	defer inodeLock.Unlock()
  3176  
  3177  	if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.F_OK,
  3178  		inode.NoOverride) {
  3179  		err = blunder.NewError(blunder.NotFoundError, "ENOENT")
  3180  		return
  3181  	}
  3182  	if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.W_OK,
  3183  		inode.OwnerOverride) {
  3184  		err = blunder.NewError(blunder.PermDeniedError, "EACCES")
  3185  		return
  3186  	}
  3187  
  3188  	err = vS.inodeVolumeHandle.DeleteStream(inodeNumber, streamName)
  3189  	if err != nil {
  3190  		logger.ErrorfWithError(err, "Failed to delete XAttr %v of inode %v", streamName, inodeNumber)
  3191  	}
  3192  
  3193  	vS.untrackInFlightFileInodeData(inodeNumber, false)
  3194  
  3195  	return
  3196  }
  3197  
  3198  func (vS *volumeStruct) Rename(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, srcDirInodeNumber inode.InodeNumber, srcBasename string, dstDirInodeNumber inode.InodeNumber, dstBasename string) (err error) {
  3199  	var (
  3200  		dirEntryBasename      string
  3201  		dirEntryInodeNumber   inode.InodeNumber
  3202  		dirInodeNumber        inode.InodeNumber
  3203  		heldLocks             *heldLocksStruct
  3204  		retryRequired         bool
  3205  		tryLockBackoffContext *tryLockBackoffContextStruct
  3206  	)
  3207  
  3208  	startTime := time.Now()
  3209  	defer func() {
  3210  		globals.RenameUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
  3211  		if err != nil {
  3212  			globals.RenameErrors.Add(1)
  3213  		}
  3214  	}()
  3215  
  3216  	vS.jobRWMutex.RLock()
  3217  	defer vS.jobRWMutex.RUnlock()
  3218  
  3219  	err = validateBaseName(srcBasename)
  3220  	if nil != err {
  3221  		return
  3222  	}
  3223  
  3224  	err = validateBaseName(dstBasename)
  3225  	if nil != err {
  3226  		return
  3227  	}
  3228  
  3229  	// Retry until done or failure (starting with ZERO backoff)
  3230  
  3231  	tryLockBackoffContext = &tryLockBackoffContextStruct{}
  3232  
  3233  Restart:
  3234  
  3235  	// Perform backoff and update for each restart (starting with ZERO backoff of course)
  3236  
  3237  	tryLockBackoffContext.backoff()
  3238  
  3239  	// Construct fresh heldLocks for this restart
  3240  
  3241  	heldLocks = newHeldLocks()
  3242  
  3243  	// Acquire WriteLock on {srcDirInodeNumber,srcBasename} & perform Access Check
  3244  
  3245  	dirInodeNumber, _, dirEntryBasename, _, retryRequired, err =
  3246  		vS.resolvePath(
  3247  			srcDirInodeNumber,
  3248  			srcBasename,
  3249  			heldLocks,
  3250  			resolvePathRequireExclusiveLockOnDirEntryInode|
  3251  				resolvePathRequireExclusiveLockOnDirInode)
  3252  
  3253  	if nil != err {
  3254  		heldLocks.free()
  3255  		err = blunder.AddError(err, blunder.NotFoundError)
  3256  		return
  3257  	}
  3258  
  3259  	if retryRequired {
  3260  		heldLocks.free()
  3261  		goto Restart
  3262  	}
  3263  
  3264  	if (dirInodeNumber != srcDirInodeNumber) || (dirEntryBasename != srcBasename) {
  3265  		heldLocks.free()
  3266  		err = blunder.NewError(blunder.InvalidArgError, "EINVAL")
  3267  		return
  3268  	}
  3269  
  3270  	if !vS.inodeVolumeHandle.Access(srcDirInodeNumber, userID, groupID, otherGroupIDs, inode.W_OK|inode.X_OK, inode.NoOverride) {
  3271  		heldLocks.free()
  3272  		err = blunder.NewError(blunder.PermDeniedError, "EACCES")
  3273  		return
  3274  	}
  3275  	// Acquire WriteLock on dstDirInodeNumber & perform Access Check
  3276  
  3277  	_, dirEntryInodeNumber, _, _, retryRequired, err =
  3278  		vS.resolvePath(
  3279  			dstDirInodeNumber,
  3280  			".",
  3281  			heldLocks,
  3282  			resolvePathDirEntryInodeMustBeDirectory|
  3283  				resolvePathRequireExclusiveLockOnDirEntryInode)
  3284  
  3285  	if nil != err {
  3286  		heldLocks.free()
  3287  		err = blunder.AddError(err, blunder.NotFoundError)
  3288  		return
  3289  	}
  3290  
  3291  	if retryRequired {
  3292  		heldLocks.free()
  3293  		goto Restart
  3294  	}
  3295  
  3296  	if dirEntryInodeNumber != dstDirInodeNumber {
  3297  		heldLocks.free()
  3298  		err = blunder.NewError(blunder.InvalidArgError, "EINVAL")
  3299  		return
  3300  	}
  3301  
  3302  	if !vS.inodeVolumeHandle.Access(dstDirInodeNumber, userID, groupID, otherGroupIDs, inode.W_OK|inode.X_OK, inode.NoOverride) {
  3303  		heldLocks.free()
  3304  		err = blunder.NewError(blunder.PermDeniedError, "EACCES")
  3305  		return
  3306  	}
  3307  
  3308  	// Acquire WriteLock on dstBasename if it exists
  3309  
  3310  	dirInodeNumber, _, dirEntryBasename, _, retryRequired, err =
  3311  		vS.resolvePath(
  3312  			dstDirInodeNumber,
  3313  			dstBasename,
  3314  			heldLocks,
  3315  			resolvePathRequireExclusiveLockOnDirEntryInode)
  3316  
  3317  	if nil == err {
  3318  		if retryRequired {
  3319  			heldLocks.free()
  3320  			goto Restart
  3321  		}
  3322  
  3323  		if (dirInodeNumber != dstDirInodeNumber) || (dirEntryBasename != dstBasename) {
  3324  			heldLocks.free()
  3325  			err = blunder.NewError(blunder.InvalidArgError, "EINVAL")
  3326  			return
  3327  		}
  3328  	} else {
  3329  		// This is actually OK... it means the target path of the Rename() isn't being potentially replaced
  3330  	}
  3331  
  3332  	// Locks held & Access Checks succeeded... time to do the Move
  3333  
  3334  	err = vS.inodeVolumeHandle.Move(srcDirInodeNumber, srcBasename, dstDirInodeNumber, dstBasename)
  3335  
  3336  	heldLocks.free()
  3337  
  3338  	return // err returned from inode.Move() suffices here
  3339  }
  3340  
  3341  func (vS *volumeStruct) Read(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber, offset uint64, length uint64, profiler *utils.Profiler) (buf []byte, err error) {
  3342  	startTime := time.Now()
  3343  	defer func() {
  3344  		globals.ReadUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
  3345  		globals.ReadBytes.Add(uint64(len(buf)))
  3346  		if err != nil {
  3347  			globals.ReadErrors.Add(1)
  3348  		}
  3349  	}()
  3350  
  3351  	vS.jobRWMutex.RLock()
  3352  	defer vS.jobRWMutex.RUnlock()
  3353  
  3354  	inodeLock, err := vS.inodeVolumeHandle.InitInodeLock(inodeNumber, nil)
  3355  	if err != nil {
  3356  		return
  3357  	}
  3358  	err = inodeLock.ReadLock()
  3359  	if err != nil {
  3360  		return
  3361  	}
  3362  	defer inodeLock.Unlock()
  3363  
  3364  	if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.F_OK,
  3365  		inode.NoOverride) {
  3366  		err = blunder.NewError(blunder.NotFoundError, "ENOENT")
  3367  		return
  3368  	}
  3369  	if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.R_OK,
  3370  		inode.OwnerOverride) {
  3371  		err = blunder.NewError(blunder.PermDeniedError, "EACCES")
  3372  		return
  3373  	}
  3374  
  3375  	inodeType, err := vS.inodeVolumeHandle.GetType(inodeNumber)
  3376  	if err != nil {
  3377  		logger.ErrorfWithError(err, "couldn't get type for inode %v", inodeNumber)
  3378  		return buf, err
  3379  	}
  3380  	// Make sure the inode number is for a file inode
  3381  	if inodeType != inode.FileType {
  3382  		err = fmt.Errorf("%s: expected inode %v to be a file inode, got %v", utils.GetFnName(), inodeNumber, inodeType)
  3383  		logger.ErrorWithError(err)
  3384  		return buf, blunder.AddError(err, blunder.NotFileError)
  3385  	}
  3386  
  3387  	profiler.AddEventNow("before inode.Read()")
  3388  	buf, err = vS.inodeVolumeHandle.Read(inodeNumber, offset, length, profiler)
  3389  	profiler.AddEventNow("after inode.Read()")
  3390  	if uint64(len(buf)) > length {
  3391  		err = fmt.Errorf("%s: Buf length %v is greater than supplied length %v", utils.GetFnName(), uint64(len(buf)), length)
  3392  		logger.ErrorWithError(err)
  3393  		return buf, blunder.AddError(err, blunder.IOError)
  3394  	}
  3395  
  3396  	return buf, err
  3397  }
  3398  
  3399  func (vS *volumeStruct) readdirHelper(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber, maxEntries uint64, prevReturned ...interface{}) (dirEntries []inode.DirEntry, statEntries []Stat, numEntries uint64, areMoreEntries bool, err error) {
  3400  	var (
  3401  		dirEntryIndex         uint64
  3402  		dlmCallerID           dlm.CallerID
  3403  		inodeLock             *dlm.RWLockStruct
  3404  		inodeVolumeHandle     inode.VolumeHandle
  3405  		internalErr           error
  3406  		tryLockBackoffContext *tryLockBackoffContextStruct
  3407  	)
  3408  
  3409  	vS.jobRWMutex.RLock()
  3410  	defer vS.jobRWMutex.RUnlock()
  3411  
  3412  	dlmCallerID = dlm.GenerateCallerID()
  3413  	inodeVolumeHandle = vS.inodeVolumeHandle
  3414  
  3415  	tryLockBackoffContext = &tryLockBackoffContextStruct{}
  3416  
  3417  Restart:
  3418  
  3419  	tryLockBackoffContext.backoff()
  3420  
  3421  	inodeLock, err = inodeVolumeHandle.AttemptReadLock(inodeNumber, dlmCallerID)
  3422  	if nil != err {
  3423  		goto Restart
  3424  	}
  3425  
  3426  	if !inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.F_OK, inode.NoOverride) {
  3427  		internalErr = inodeLock.Unlock()
  3428  		if nil != internalErr {
  3429  			logger.Fatalf("Failure unlocking a held LockID %s: %v", inodeLock.LockID, internalErr)
  3430  		}
  3431  		err = blunder.NewError(blunder.NotFoundError, "ENOENT")
  3432  		return
  3433  	}
  3434  	if !inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.R_OK, inode.OwnerOverride) {
  3435  		internalErr = inodeLock.Unlock()
  3436  		if nil != internalErr {
  3437  			logger.Fatalf("Failure unlocking a held LockID %s: %v", inodeLock.LockID, internalErr)
  3438  		}
  3439  		err = blunder.NewError(blunder.PermDeniedError, "EACCES")
  3440  		return
  3441  	}
  3442  
  3443  	dirEntries, areMoreEntries, err = inodeVolumeHandle.ReadDir(inodeNumber, maxEntries, 0, prevReturned...)
  3444  
  3445  	internalErr = inodeLock.Unlock()
  3446  	if nil != internalErr {
  3447  		logger.Fatalf("Failure unlocking a held LockID %s: %v", inodeLock.LockID, internalErr)
  3448  	}
  3449  
  3450  	if nil != err {
  3451  		return
  3452  	}
  3453  
  3454  	// Now go back and fill in (dirEntries.Type and) statEntries
  3455  
  3456  	numEntries = uint64(len(dirEntries))
  3457  
  3458  	statEntries = make([]Stat, numEntries, numEntries)
  3459  
  3460  	for dirEntryIndex = 0; dirEntryIndex < numEntries; dirEntryIndex++ {
  3461  		inodeLock, err = inodeVolumeHandle.AttemptReadLock(dirEntries[dirEntryIndex].InodeNumber, dlmCallerID)
  3462  		if nil != err {
  3463  			goto Restart
  3464  		}
  3465  
  3466  		statEntries[dirEntryIndex], err = vS.getstatHelperWhileLocked(dirEntries[dirEntryIndex].InodeNumber)
  3467  
  3468  		internalErr = inodeLock.Unlock()
  3469  		if nil != internalErr {
  3470  			logger.Fatalf("Failure unlocking a held LockID %s: %v", inodeLock.LockID, internalErr)
  3471  		}
  3472  
  3473  		if nil == err {
  3474  			dirEntries[dirEntryIndex].Type = inode.InodeType(statEntries[dirEntryIndex][StatFType])
  3475  		} else {
  3476  			logger.ErrorfWithError(err, "fs.readdirHelper(,,,inodeNumber:0x%016X,,...) couldn't `stat` %s:0x%016X... defaulting .Type to inode.DirType", inodeNumber, dirEntries[dirEntryIndex].Basename, dirEntries[dirEntryIndex].InodeNumber)
  3477  			dirEntries[dirEntryIndex].Type = inode.DirType
  3478  			err = nil
  3479  		}
  3480  	}
  3481  
  3482  	return
  3483  }
  3484  
  3485  func (vS *volumeStruct) Readdir(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber, maxEntries uint64, prevReturned ...interface{}) (entries []inode.DirEntry, numEntries uint64, areMoreEntries bool, err error) {
  3486  	startTime := time.Now()
  3487  	defer func() {
  3488  		globals.ReaddirUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
  3489  		globals.ReaddirEntries.Add(uint64(len(entries)))
  3490  		if err != nil {
  3491  			globals.ReaddirErrors.Add(1)
  3492  		}
  3493  	}()
  3494  
  3495  	entries, _, numEntries, areMoreEntries, err = vS.readdirHelper(userID, groupID, otherGroupIDs, inodeNumber, maxEntries, prevReturned...)
  3496  
  3497  	return
  3498  }
  3499  
  3500  func (vS *volumeStruct) ReaddirPlus(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber, maxEntries uint64, prevReturned ...interface{}) (dirEntries []inode.DirEntry, statEntries []Stat, numEntries uint64, areMoreEntries bool, err error) {
  3501  	startTime := time.Now()
  3502  	defer func() {
  3503  		globals.ReaddirPlusUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
  3504  		globals.ReaddirPlusBytes.Add(uint64(len(dirEntries)))
  3505  		if err != nil {
  3506  			globals.ReaddirPlusErrors.Add(1)
  3507  		}
  3508  	}()
  3509  
  3510  	dirEntries, statEntries, numEntries, areMoreEntries, err = vS.readdirHelper(userID, groupID, otherGroupIDs, inodeNumber, maxEntries, prevReturned...)
  3511  
  3512  	return
  3513  }
  3514  
  3515  func (vS *volumeStruct) Readsymlink(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber) (target string, err error) {
  3516  	startTime := time.Now()
  3517  	defer func() {
  3518  		globals.ReadsymlinkUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
  3519  		if err != nil {
  3520  			globals.ReadsymlinkErrors.Add(1)
  3521  		}
  3522  	}()
  3523  
  3524  	vS.jobRWMutex.RLock()
  3525  	defer vS.jobRWMutex.RUnlock()
  3526  
  3527  	inodeLock, err := vS.inodeVolumeHandle.InitInodeLock(inodeNumber, nil)
  3528  	if err != nil {
  3529  		return
  3530  	}
  3531  	err = inodeLock.ReadLock()
  3532  	if err != nil {
  3533  		return
  3534  	}
  3535  	defer inodeLock.Unlock()
  3536  
  3537  	if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.F_OK,
  3538  		inode.NoOverride) {
  3539  
  3540  		err = blunder.NewError(blunder.NotFoundError, "ENOENT")
  3541  		return
  3542  	}
  3543  	if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.R_OK,
  3544  		inode.NoOverride) {
  3545  
  3546  		err = blunder.NewError(blunder.PermDeniedError, "EACCES")
  3547  		return
  3548  	}
  3549  
  3550  	target, err = vS.inodeVolumeHandle.GetSymlink(inodeNumber)
  3551  
  3552  	return target, err
  3553  }
  3554  
  3555  func (vS *volumeStruct) Resize(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber, newSize uint64) (err error) {
  3556  	startTime := time.Now()
  3557  	defer func() {
  3558  		globals.ResizeUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
  3559  		if err != nil {
  3560  			globals.ResizeErrors.Add(1)
  3561  		}
  3562  	}()
  3563  
  3564  	vS.jobRWMutex.RLock()
  3565  	defer vS.jobRWMutex.RUnlock()
  3566  
  3567  	inodeLock, err := vS.inodeVolumeHandle.InitInodeLock(inodeNumber, nil)
  3568  	if err != nil {
  3569  		return
  3570  	}
  3571  	err = inodeLock.WriteLock()
  3572  	if err != nil {
  3573  		return
  3574  	}
  3575  	defer inodeLock.Unlock()
  3576  
  3577  	if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.F_OK,
  3578  		inode.NoOverride) {
  3579  
  3580  		err = blunder.NewError(blunder.NotFoundError, "ENOENT")
  3581  		return
  3582  	}
  3583  	if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.W_OK,
  3584  		inode.OwnerOverride) {
  3585  
  3586  		err = blunder.NewError(blunder.PermDeniedError, "EACCES")
  3587  		return
  3588  	}
  3589  
  3590  	err = vS.inodeVolumeHandle.SetSize(inodeNumber, newSize)
  3591  	vS.untrackInFlightFileInodeData(inodeNumber, false)
  3592  
  3593  	return err
  3594  }
  3595  
  3596  func (vS *volumeStruct) Rmdir(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber, basename string) (err error) {
  3597  	startTime := time.Now()
  3598  	defer func() {
  3599  		globals.RmdirUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
  3600  		if err != nil {
  3601  			globals.RmdirErrors.Add(1)
  3602  		}
  3603  	}()
  3604  
  3605  	vS.jobRWMutex.RLock()
  3606  	defer vS.jobRWMutex.RUnlock()
  3607  
  3608  	callerID := dlm.GenerateCallerID()
  3609  	inodeLock, err := vS.inodeVolumeHandle.InitInodeLock(inodeNumber, callerID)
  3610  	if err != nil {
  3611  		return
  3612  	}
  3613  	err = inodeLock.WriteLock()
  3614  	if err != nil {
  3615  		return
  3616  	}
  3617  	defer inodeLock.Unlock()
  3618  
  3619  	if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.F_OK,
  3620  		inode.NoOverride) {
  3621  		err = blunder.NewError(blunder.NotFoundError, "ENOENT")
  3622  		return
  3623  	}
  3624  	if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.W_OK|inode.X_OK,
  3625  		inode.NoOverride) {
  3626  		err = blunder.NewError(blunder.PermDeniedError, "EACCES")
  3627  		return
  3628  	}
  3629  
  3630  	basenameInodeNumber, err := vS.inodeVolumeHandle.Lookup(inodeNumber, basename)
  3631  	if nil != err {
  3632  		return
  3633  	}
  3634  
  3635  	basenameInodeLock, err := vS.inodeVolumeHandle.InitInodeLock(basenameInodeNumber, callerID)
  3636  	if err != nil {
  3637  		return
  3638  	}
  3639  	err = basenameInodeLock.WriteLock()
  3640  	if err != nil {
  3641  		return
  3642  	}
  3643  	defer basenameInodeLock.Unlock()
  3644  
  3645  	// no permissions are required on the target directory
  3646  
  3647  	err = vS.rmdirActual(inodeNumber, basename, basenameInodeNumber)
  3648  	return
  3649  }
  3650  
  3651  func (vS *volumeStruct) rmdirActual(inodeNumber inode.InodeNumber,
  3652  	basename string, basenameInodeNumber inode.InodeNumber) (err error) {
  3653  
  3654  	basenameInodeType, err := vS.inodeVolumeHandle.GetType(basenameInodeNumber)
  3655  	if nil != err {
  3656  		return
  3657  	}
  3658  
  3659  	if inode.DirType != basenameInodeType {
  3660  		err = fmt.Errorf("Rmdir() called on non-Directory")
  3661  		err = blunder.AddError(err, blunder.NotDirError)
  3662  		return
  3663  	}
  3664  
  3665  	dirEntries, err := vS.inodeVolumeHandle.NumDirEntries(basenameInodeNumber)
  3666  	if nil != err {
  3667  		return
  3668  	}
  3669  
  3670  	if 2 != dirEntries {
  3671  		err = fmt.Errorf("Directory not empty")
  3672  		err = blunder.AddError(err, blunder.NotEmptyError)
  3673  		return
  3674  	}
  3675  
  3676  	err = vS.inodeVolumeHandle.Unlink(inodeNumber, basename, false)
  3677  	if nil != err {
  3678  		return
  3679  	}
  3680  
  3681  	err = vS.inodeVolumeHandle.Destroy(basenameInodeNumber)
  3682  	if nil != err {
  3683  		return
  3684  	}
  3685  
  3686  	return
  3687  }
  3688  
  3689  func (vS *volumeStruct) Setstat(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber, stat Stat) (err error) {
  3690  	startTime := time.Now()
  3691  	defer func() {
  3692  		globals.SetstatUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
  3693  		if err != nil {
  3694  			globals.SetstatErrors.Add(1)
  3695  		}
  3696  	}()
  3697  
  3698  	vS.jobRWMutex.RLock()
  3699  	defer vS.jobRWMutex.RUnlock()
  3700  
  3701  	inodeLock, err := vS.inodeVolumeHandle.InitInodeLock(inodeNumber, nil)
  3702  	if err != nil {
  3703  		return
  3704  	}
  3705  	err = inodeLock.WriteLock()
  3706  	if err != nil {
  3707  		return
  3708  	}
  3709  	defer inodeLock.Unlock()
  3710  
  3711  	if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.P_OK,
  3712  		inode.NoOverride) {
  3713  		err = blunder.NewError(blunder.NotPermError, "EPERM")
  3714  		return
  3715  	}
  3716  
  3717  	// perform all permissions checks before making any changes
  3718  	//
  3719  	// changing the filesize requires write permission
  3720  	_, ok := stat[StatSize]
  3721  	if ok {
  3722  		if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.W_OK,
  3723  			inode.OwnerOverride) {
  3724  			err = blunder.NewError(blunder.NotPermError, "EPERM")
  3725  			return
  3726  		}
  3727  	}
  3728  
  3729  	// most other attributes can only be changed by the owner of the file
  3730  	ownerOnly := []StatKey{StatCTime, StatCRTime, StatMTime, StatATime, StatMode, StatUserID, StatGroupID}
  3731  	for _, key := range ownerOnly {
  3732  		_, ok := stat[key]
  3733  		if ok {
  3734  			if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.P_OK,
  3735  				inode.NoOverride) {
  3736  				err = blunder.NewError(blunder.NotPermError, "EPERM")
  3737  				return
  3738  			}
  3739  			break
  3740  		}
  3741  	}
  3742  
  3743  	// the superuser (root) is the only one that can change the owner of the file to a
  3744  	// different user, but the owner of the file can perform a no-op "change" in
  3745  	// ownership
  3746  	newUserID, settingUserID := stat[StatUserID]
  3747  	if settingUserID && userID != inode.InodeRootUserID {
  3748  		if userID != inode.InodeUserID(newUserID) {
  3749  			err = blunder.NewError(blunder.NotPermError, "EPERM")
  3750  			return
  3751  		}
  3752  	}
  3753  
  3754  	// the group can only be changed to the current group or another group the owner
  3755  	// is in (unless its the superuser asking)
  3756  	newGroupID, settingGroupID := stat[StatGroupID]
  3757  	if settingGroupID && groupID != inode.InodeGroupID(newGroupID) && userID != inode.InodeRootUserID {
  3758  
  3759  		err = blunder.NewError(blunder.NotPermError, "EPERM")
  3760  		for _, otherID := range otherGroupIDs {
  3761  			if inode.InodeGroupID(newGroupID) == otherID {
  3762  				err = nil
  3763  				break
  3764  			}
  3765  		}
  3766  		if err != nil {
  3767  			return
  3768  		}
  3769  	}
  3770  
  3771  	// sanity checks for invalid/illegal values
  3772  	if settingUserID {
  3773  		// Since we are using a uint64 to convey a uint32 value, make sure we didn't get something too big
  3774  		if newUserID > uint64(math.MaxUint32) {
  3775  			err = fmt.Errorf("%s: userID is too large - value is %v, max is %v.", utils.GetFnName(), newUserID, uint64(math.MaxUint32))
  3776  			err = blunder.AddError(err, blunder.InvalidUserIDError)
  3777  			return
  3778  		}
  3779  	}
  3780  
  3781  	if settingGroupID {
  3782  		// Since we are using a uint64 to convey a uint32 value, make sure we didn't get something too big
  3783  		if newGroupID > uint64(math.MaxUint32) {
  3784  			err = fmt.Errorf("%s: groupID is too large - value is %v, max is %v.", utils.GetFnName(), newGroupID, uint64(math.MaxUint32))
  3785  			err = blunder.AddError(err, blunder.InvalidGroupIDError)
  3786  			return
  3787  		}
  3788  	}
  3789  
  3790  	filePerm, settingFilePerm := stat[StatMode]
  3791  	if settingFilePerm {
  3792  		// Since we are using a uint64 to convey a 12 bit value, make sure we didn't get something too big
  3793  		if filePerm >= 1<<12 {
  3794  			err = fmt.Errorf("%s: filePerm is too large - value is %v, max is %v.", utils.GetFnName(),
  3795  				filePerm, 1<<12)
  3796  			err = blunder.AddError(err, blunder.InvalidFileModeError)
  3797  			return
  3798  		}
  3799  	}
  3800  
  3801  	// get to work setting things
  3802  	//
  3803  	// Set permissions, if present in the map
  3804  	if settingFilePerm {
  3805  		err = vS.inodeVolumeHandle.SetPermMode(inodeNumber, inode.InodeMode(filePerm))
  3806  		if err != nil {
  3807  			logger.ErrorWithError(err)
  3808  			return err
  3809  		}
  3810  	}
  3811  
  3812  	// set owner and/or group owner, if present in the map
  3813  	err = nil
  3814  	if settingUserID && settingGroupID {
  3815  		err = vS.inodeVolumeHandle.SetOwnerUserIDGroupID(inodeNumber, inode.InodeUserID(newUserID),
  3816  			inode.InodeGroupID(newGroupID))
  3817  	} else if settingUserID {
  3818  		err = vS.inodeVolumeHandle.SetOwnerUserID(inodeNumber, inode.InodeUserID(newUserID))
  3819  	} else if settingGroupID {
  3820  		err = vS.inodeVolumeHandle.SetOwnerGroupID(inodeNumber, inode.InodeGroupID(newGroupID))
  3821  	}
  3822  	if err != nil {
  3823  		logger.ErrorWithError(err)
  3824  		return
  3825  	}
  3826  
  3827  	// Set crtime, if present in the map
  3828  	crtime, ok := stat[StatCRTime]
  3829  	if ok {
  3830  		newCreationTime := time.Unix(0, int64(crtime))
  3831  		err = vS.inodeVolumeHandle.SetCreationTime(inodeNumber, newCreationTime)
  3832  		if err != nil {
  3833  			logger.ErrorWithError(err)
  3834  			return err
  3835  		}
  3836  	}
  3837  
  3838  	// Set mtime, if present in the map
  3839  	mtime, ok := stat[StatMTime]
  3840  	if ok {
  3841  		newModificationTime := time.Unix(0, int64(mtime))
  3842  		err = vS.inodeVolumeHandle.SetModificationTime(inodeNumber, newModificationTime)
  3843  		if err != nil {
  3844  			logger.ErrorWithError(err)
  3845  			return err
  3846  		}
  3847  	}
  3848  
  3849  	// Set atime, if present in the map
  3850  	atime, ok := stat[StatATime]
  3851  	if ok {
  3852  		newAccessTime := time.Unix(0, int64(atime))
  3853  		err = vS.inodeVolumeHandle.SetAccessTime(inodeNumber, newAccessTime)
  3854  		if err != nil {
  3855  			logger.ErrorWithError(err)
  3856  			return err
  3857  		}
  3858  	}
  3859  
  3860  	// ctime is used to reliably determine whether the contents of a file
  3861  	// have changed so it cannot be altered by a client (some security
  3862  	// software depends on this)
  3863  	ctime, ok := stat[StatCTime]
  3864  	if ok {
  3865  		newAccessTime := time.Unix(0, int64(ctime))
  3866  		logger.Infof("%s: ignoring attempt to change ctime to %v on volume '%s' inode %v",
  3867  			utils.GetFnName(), newAccessTime, vS.volumeName, inodeNumber)
  3868  	}
  3869  
  3870  	// Set size, if present in the map
  3871  	size, ok := stat[StatSize]
  3872  	if ok {
  3873  		err = vS.inodeVolumeHandle.SetSize(inodeNumber, size)
  3874  		if err != nil {
  3875  			logger.ErrorWithError(err)
  3876  			return err
  3877  		}
  3878  	}
  3879  
  3880  	return
  3881  }
  3882  
  3883  func (vS *volumeStruct) SetXAttr(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber, streamName string, value []byte, flags int) (err error) {
  3884  	startTime := time.Now()
  3885  	defer func() {
  3886  		globals.SetXAttrUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
  3887  		if err != nil {
  3888  			globals.SetXAttrErrors.Add(1)
  3889  		}
  3890  	}()
  3891  
  3892  	vS.jobRWMutex.RLock()
  3893  	defer vS.jobRWMutex.RUnlock()
  3894  
  3895  	inodeLock, err := vS.inodeVolumeHandle.InitInodeLock(inodeNumber, nil)
  3896  	if err != nil {
  3897  		return
  3898  	}
  3899  	err = inodeLock.WriteLock()
  3900  	if err != nil {
  3901  		return
  3902  	}
  3903  	defer inodeLock.Unlock()
  3904  
  3905  	if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.F_OK,
  3906  		inode.NoOverride) {
  3907  		err = blunder.NewError(blunder.NotFoundError, "ENOENT")
  3908  		return
  3909  	}
  3910  	if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.W_OK,
  3911  		inode.OwnerOverride) {
  3912  		err = blunder.NewError(blunder.PermDeniedError, "EACCES")
  3913  		return
  3914  	}
  3915  
  3916  	switch flags {
  3917  	case SetXAttrCreateOrReplace:
  3918  		break
  3919  	case SetXAttrCreate:
  3920  		_, err = vS.GetXAttr(userID, groupID, otherGroupIDs, inodeNumber, streamName)
  3921  		if err == nil {
  3922  			return blunder.AddError(err, blunder.FileExistsError)
  3923  		}
  3924  	case SetXAttrReplace:
  3925  		_, err = vS.GetXAttr(userID, groupID, otherGroupIDs, inodeNumber, streamName)
  3926  		if err != nil {
  3927  			return blunder.AddError(err, blunder.StreamNotFound)
  3928  		}
  3929  	default:
  3930  		return blunder.AddError(err, blunder.InvalidArgError)
  3931  	}
  3932  
  3933  	err = vS.inodeVolumeHandle.PutStream(inodeNumber, streamName, value)
  3934  	if err != nil {
  3935  		logger.ErrorfWithError(err, "Failed to set XAttr %v to inode %v", streamName, inodeNumber)
  3936  	}
  3937  
  3938  	vS.untrackInFlightFileInodeData(inodeNumber, false)
  3939  
  3940  	return
  3941  }
  3942  
  3943  func (vS *volumeStruct) StatVfs() (statVFS StatVFS, err error) {
  3944  	startTime := time.Now()
  3945  	defer func() {
  3946  		globals.StatVfsUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
  3947  		if err != nil {
  3948  			globals.StatVfsErrors.Add(1)
  3949  		}
  3950  	}()
  3951  
  3952  	vS.jobRWMutex.RLock()
  3953  	defer vS.jobRWMutex.RUnlock()
  3954  
  3955  	statVFS = make(map[StatVFSKey]uint64)
  3956  
  3957  	statVFS[StatVFSFilesystemID] = vS.inodeVolumeHandle.GetFSID()
  3958  	statVFS[StatVFSBlockSize] = vS.reportedBlockSize
  3959  	statVFS[StatVFSFragmentSize] = vS.reportedFragmentSize
  3960  	statVFS[StatVFSTotalBlocks] = vS.reportedNumBlocks
  3961  	statVFS[StatVFSFreeBlocks] = vS.reportedNumBlocks
  3962  	statVFS[StatVFSAvailBlocks] = vS.reportedNumBlocks
  3963  	statVFS[StatVFSTotalInodes] = vS.reportedNumInodes
  3964  	statVFS[StatVFSFreeInodes] = vS.reportedNumInodes
  3965  	statVFS[StatVFSAvailInodes] = vS.reportedNumInodes
  3966  	statVFS[StatVFSMountFlags] = 0
  3967  	statVFS[StatVFSMaxFilenameLen] = FileNameMax
  3968  
  3969  	return statVFS, nil
  3970  }
  3971  
  3972  func (vS *volumeStruct) Symlink(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber, basename string, target string) (symlinkInodeNumber inode.InodeNumber, err error) {
  3973  	startTime := time.Now()
  3974  	defer func() {
  3975  		globals.SymlinkUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
  3976  		if err != nil {
  3977  			globals.SymlinkErrors.Add(1)
  3978  		}
  3979  	}()
  3980  
  3981  	vS.jobRWMutex.RLock()
  3982  	defer vS.jobRWMutex.RUnlock()
  3983  
  3984  	err = validateBaseName(basename)
  3985  	if err != nil {
  3986  		return
  3987  	}
  3988  
  3989  	err = validateFullPath(target)
  3990  	if err != nil {
  3991  		return
  3992  	}
  3993  
  3994  	// Mode for symlinks defaults to rwxrwxrwx, i.e. inode.PosixModePerm
  3995  	symlinkInodeNumber, err = vS.inodeVolumeHandle.CreateSymlink(target, inode.PosixModePerm, userID, groupID)
  3996  	if err != nil {
  3997  		return
  3998  	}
  3999  
  4000  	inodeLock, err := vS.inodeVolumeHandle.InitInodeLock(inodeNumber, nil)
  4001  	if err != nil {
  4002  		return
  4003  	}
  4004  	err = inodeLock.WriteLock()
  4005  	if err != nil {
  4006  		return
  4007  	}
  4008  	defer inodeLock.Unlock()
  4009  
  4010  	if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.F_OK,
  4011  		inode.NoOverride) {
  4012  
  4013  		destroyErr := vS.inodeVolumeHandle.Destroy(symlinkInodeNumber)
  4014  		if destroyErr != nil {
  4015  			logger.WarnfWithError(destroyErr, "couldn't destroy inode %v after failed Access(F_OK) in fs.Symlink", symlinkInodeNumber)
  4016  		}
  4017  		err = blunder.NewError(blunder.NotFoundError, "ENOENT")
  4018  		return
  4019  	}
  4020  	if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.W_OK|inode.X_OK,
  4021  		inode.NoOverride) {
  4022  
  4023  		destroyErr := vS.inodeVolumeHandle.Destroy(symlinkInodeNumber)
  4024  		if destroyErr != nil {
  4025  			logger.WarnfWithError(destroyErr, "couldn't destroy inode %v after failed Access(W_OK|X_OK) in fs.Symlink", symlinkInodeNumber)
  4026  		}
  4027  		err = blunder.NewError(blunder.PermDeniedError, "EACCES")
  4028  		return
  4029  	}
  4030  
  4031  	err = vS.inodeVolumeHandle.Link(inodeNumber, basename, symlinkInodeNumber, false)
  4032  	if err != nil {
  4033  		destroyErr := vS.inodeVolumeHandle.Destroy(symlinkInodeNumber)
  4034  		if destroyErr != nil {
  4035  			logger.WarnfWithError(destroyErr, "couldn't destroy inode %v after failed Link() in fs.Symlink", symlinkInodeNumber)
  4036  		}
  4037  		return
  4038  	}
  4039  
  4040  	return
  4041  }
  4042  
  4043  func (vS *volumeStruct) Unlink(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber, basename string) (err error) {
  4044  	startTime := time.Now()
  4045  	defer func() {
  4046  		globals.UnlinkUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
  4047  		if err != nil {
  4048  			globals.UnlinkErrors.Add(1)
  4049  		}
  4050  	}()
  4051  
  4052  	vS.jobRWMutex.RLock()
  4053  	defer vS.jobRWMutex.RUnlock()
  4054  
  4055  	callerID := dlm.GenerateCallerID()
  4056  	inodeLock, err := vS.inodeVolumeHandle.InitInodeLock(inodeNumber, callerID)
  4057  	if err != nil {
  4058  		return
  4059  	}
  4060  	err = inodeLock.WriteLock()
  4061  	if err != nil {
  4062  		return
  4063  	}
  4064  	defer inodeLock.Unlock()
  4065  
  4066  	if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.F_OK,
  4067  		inode.NoOverride) {
  4068  		err = blunder.NewError(blunder.NotFoundError, "ENOENT")
  4069  		return
  4070  	}
  4071  	if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.W_OK|inode.X_OK,
  4072  		inode.NoOverride) {
  4073  		err = blunder.NewError(blunder.PermDeniedError, "EACCES")
  4074  		return
  4075  	}
  4076  
  4077  	basenameInodeNumber, err := vS.inodeVolumeHandle.Lookup(inodeNumber, basename)
  4078  	if nil != err {
  4079  		return
  4080  	}
  4081  
  4082  	basenameInodeLock, err := vS.inodeVolumeHandle.InitInodeLock(basenameInodeNumber, callerID)
  4083  	if err != nil {
  4084  		return
  4085  	}
  4086  	err = basenameInodeLock.WriteLock()
  4087  	if err != nil {
  4088  		return
  4089  	}
  4090  	defer basenameInodeLock.Unlock()
  4091  
  4092  	err = vS.unlinkActual(inodeNumber, basename, basenameInodeNumber)
  4093  	return
  4094  }
  4095  
  4096  func (vS *volumeStruct) unlinkActual(inodeNumber inode.InodeNumber,
  4097  	basename string, basenameInodeNumber inode.InodeNumber) (err error) {
  4098  
  4099  	basenameInodeType, err := vS.inodeVolumeHandle.GetType(basenameInodeNumber)
  4100  	if nil != err {
  4101  		return
  4102  	}
  4103  
  4104  	if inode.DirType == basenameInodeType {
  4105  		err = fmt.Errorf("Unlink() called on a Directory")
  4106  		err = blunder.AddError(err, blunder.IsDirError)
  4107  		return
  4108  	}
  4109  
  4110  	err = vS.inodeVolumeHandle.Unlink(inodeNumber, basename, false)
  4111  	if nil != err {
  4112  		return
  4113  	}
  4114  
  4115  	basenameLinkCount, err := vS.inodeVolumeHandle.GetLinkCount(basenameInodeNumber)
  4116  	if nil != err {
  4117  		return
  4118  	}
  4119  
  4120  	if 0 == basenameLinkCount {
  4121  		vS.untrackInFlightFileInodeData(basenameInodeNumber, false)
  4122  		err = vS.inodeVolumeHandle.Destroy(basenameInodeNumber)
  4123  		if nil != err {
  4124  			return
  4125  		}
  4126  	}
  4127  
  4128  	return
  4129  }
  4130  
  4131  func (vS *volumeStruct) VolumeName() (volumeName string) {
  4132  	startTime := time.Now()
  4133  
  4134  	volumeName = vS.volumeName
  4135  	globals.VolumeNameUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
  4136  	return
  4137  }
  4138  
  4139  func (vS *volumeStruct) Write(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber, offset uint64, buf []byte, profiler *utils.Profiler) (size uint64, err error) {
  4140  	startTime := time.Now()
  4141  	defer func() {
  4142  		globals.WriteUsec.Add(uint64(time.Since(startTime) / time.Microsecond))
  4143  		globals.WriteBytes.Add(size)
  4144  		if err != nil {
  4145  			globals.WriteErrors.Add(1)
  4146  		}
  4147  	}()
  4148  
  4149  	vS.jobRWMutex.RLock()
  4150  	defer vS.jobRWMutex.RUnlock()
  4151  
  4152  	logger.Tracef("fs.Write(): starting volume '%s' inode %v offset %v len %v",
  4153  		vS.volumeName, inodeNumber, offset, len(buf))
  4154  
  4155  	inodeLock, err := vS.inodeVolumeHandle.InitInodeLock(inodeNumber, nil)
  4156  	if err != nil {
  4157  		return
  4158  	}
  4159  	err = inodeLock.WriteLock()
  4160  	if err != nil {
  4161  		return
  4162  	}
  4163  	defer inodeLock.Unlock()
  4164  
  4165  	if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.F_OK,
  4166  		inode.NoOverride) {
  4167  		err = blunder.NewError(blunder.NotFoundError, "ENOENT")
  4168  		return
  4169  	}
  4170  	if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.W_OK,
  4171  		inode.OwnerOverride) {
  4172  		err = blunder.NewError(blunder.PermDeniedError, "EACCES")
  4173  		return
  4174  	}
  4175  
  4176  	profiler.AddEventNow("before inode.Write()")
  4177  	err = vS.inodeVolumeHandle.Write(inodeNumber, offset, buf, profiler)
  4178  	profiler.AddEventNow("after inode.Write()")
  4179  	// write to Swift presumably succeeds or fails as a whole
  4180  	if err != nil {
  4181  		return 0, err
  4182  	}
  4183  
  4184  	logger.Tracef("fs.Write(): tracking write volume '%s' inode %v", vS.volumeName, inodeNumber)
  4185  	vS.trackInFlightFileInodeData(inodeNumber)
  4186  	size = uint64(len(buf))
  4187  
  4188  	return
  4189  }
  4190  
  4191  func (vS *volumeStruct) Wrote(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber, containerName string, objectName string, fileOffset []uint64, objectOffset []uint64, length []uint64) (err error) {
  4192  	vS.jobRWMutex.RLock()
  4193  	defer vS.jobRWMutex.RUnlock()
  4194  
  4195  	inodeLock, err := vS.inodeVolumeHandle.InitInodeLock(inodeNumber, nil)
  4196  	if err != nil {
  4197  		return
  4198  	}
  4199  	err = inodeLock.WriteLock()
  4200  	if err != nil {
  4201  		return
  4202  	}
  4203  	defer inodeLock.Unlock()
  4204  
  4205  	if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.F_OK,
  4206  		inode.NoOverride) {
  4207  		err = blunder.NewError(blunder.NotFoundError, "ENOENT")
  4208  		return
  4209  	}
  4210  	if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.W_OK,
  4211  		inode.OwnerOverride) {
  4212  		err = blunder.NewError(blunder.PermDeniedError, "EACCES")
  4213  		return
  4214  	}
  4215  
  4216  	err = vS.inodeVolumeHandle.Flush(inodeNumber, false)
  4217  	vS.untrackInFlightFileInodeData(inodeNumber, false)
  4218  
  4219  	err = vS.inodeVolumeHandle.Wrote(inodeNumber, containerName, objectName, fileOffset, objectOffset, length, true)
  4220  
  4221  	return // err, as set by inode.Wrote(), is sufficient
  4222  }
  4223  
  4224  func validateBaseName(baseName string) (err error) {
  4225  	// Make sure the file baseName is not too long
  4226  	baseLen := len(baseName)
  4227  	if baseLen > FileNameMax {
  4228  		err = fmt.Errorf("%s: basename is too long. Length %v, max %v", utils.GetFnName(), baseLen, FileNameMax)
  4229  		logger.ErrorWithError(err)
  4230  		return blunder.AddError(err, blunder.NameTooLongError)
  4231  	}
  4232  	return
  4233  }
  4234  
  4235  func validateFullPath(fullPath string) (err error) {
  4236  	pathLen := len(fullPath)
  4237  	if pathLen > FilePathMax {
  4238  		err = fmt.Errorf("%s: fullpath is too long. Length %v, max %v", utils.GetFnName(), pathLen, FilePathMax)
  4239  		logger.ErrorWithError(err)
  4240  		return blunder.AddError(err, blunder.NameTooLongError)
  4241  	}
  4242  	return
  4243  }
  4244  
  4245  func revSplitPath(fullpath string) []string {
  4246  	// TrimPrefix avoids empty [0] element in pathSegments
  4247  	trimmed := strings.TrimPrefix(fullpath, "/")
  4248  	if trimmed == "" {
  4249  		// path.Clean("") = ".", which is not useful
  4250  		return []string{}
  4251  	}
  4252  
  4253  	segments := strings.Split(path.Clean(trimmed), "/")
  4254  	slen := len(segments)
  4255  	for i := 0; i < slen/2; i++ {
  4256  		segments[i], segments[slen-i-1] = segments[slen-i-1], segments[i]
  4257  	}
  4258  	return segments
  4259  }
  4260  
  4261  // Utility function to unlink, but not destroy, a particular file or empty subdirectory.
  4262  //
  4263  // This function checks that the directory is empty.
  4264  //
  4265  // The caller of this function must hold appropriate locks.
  4266  //
  4267  // obstacleInodeNumber must refer to an existing file or directory
  4268  // that is (a) already part of the directory tree and (b) not the root
  4269  // directory.
  4270  func (vS *volumeStruct) removeObstacleToObjectPut(callerID dlm.CallerID, dirInodeNumber inode.InodeNumber, obstacleName string, obstacleInodeNumber inode.InodeNumber) error {
  4271  	statResult, err := vS.getstatHelper(obstacleInodeNumber, callerID)
  4272  	if err != nil {
  4273  		return err
  4274  	}
  4275  
  4276  	fileType := inode.InodeType(statResult[StatFType])
  4277  	if fileType == inode.FileType || fileType == inode.SymlinkType {
  4278  		// Files and symlinks can always, barring errors, be unlinked
  4279  		err = vS.inodeVolumeHandle.Unlink(dirInodeNumber, obstacleName, false)
  4280  		if err != nil {
  4281  			return err
  4282  		}
  4283  	} else if fileType == inode.DirType {
  4284  		numEntries, err := vS.inodeVolumeHandle.NumDirEntries(obstacleInodeNumber)
  4285  		if err != nil {
  4286  			return err
  4287  		}
  4288  		if numEntries >= 3 {
  4289  			// We're looking at a pre-existing, user-visible directory
  4290  			// that's linked into the directory structure, so we've
  4291  			// got at least two entries, namely "." and ".."
  4292  			//
  4293  			// If there's a third, then the directory is non-empty.
  4294  			return blunder.NewError(blunder.NotEmptyError, "%s is a non-empty directory", obstacleName)
  4295  
  4296  		} else {
  4297  			// We don't want to call Rmdir() here since
  4298  			// that function (a) grabs locks, (b) checks
  4299  			// that it's a directory and is empty, then
  4300  			// (c) calls Unlink() and Destroy().
  4301  			//
  4302  			// We already have the locks and we've already
  4303  			// checked that it's empty, so let's just get
  4304  			// down to it.
  4305  			err = vS.inodeVolumeHandle.Unlink(dirInodeNumber, obstacleName, false)
  4306  			if err != nil {
  4307  				return err
  4308  			}
  4309  		}
  4310  	}
  4311  	return nil
  4312  }
  4313  
  4314  // Utility function to append entries to reply
  4315  func appendReadPlanEntries(readPlan []inode.ReadPlanStep, readRangeOut *[]inode.ReadPlanStep) (numEntries uint64) {
  4316  	for i := range readPlan {
  4317  		entry := inode.ReadPlanStep{ObjectPath: readPlan[i].ObjectPath, Offset: readPlan[i].Offset, Length: readPlan[i].Length}
  4318  		*readRangeOut = append(*readRangeOut, entry)
  4319  		numEntries++
  4320  	}
  4321  	return
  4322  }