github.com/swiftstack/ProxyFS@v0.0.0-20210203235616-4017c267d62f/inode/inode.go (about)

     1  // Copyright (c) 2015-2021, NVIDIA CORPORATION.
     2  // SPDX-License-Identifier: Apache-2.0
     3  
     4  package inode
     5  
     6  import (
     7  	"container/list"
     8  	"encoding/json"
     9  	"fmt"
    10  	"runtime/debug"
    11  	"strings"
    12  	"sync"
    13  	"time"
    14  
    15  	"github.com/ansel1/merry"
    16  	"github.com/swiftstack/cstruct"
    17  	"github.com/swiftstack/sortedmap"
    18  
    19  	"github.com/swiftstack/ProxyFS/blunder"
    20  	"github.com/swiftstack/ProxyFS/dlm"
    21  	"github.com/swiftstack/ProxyFS/evtlog"
    22  	"github.com/swiftstack/ProxyFS/halter"
    23  	"github.com/swiftstack/ProxyFS/headhunter"
    24  	"github.com/swiftstack/ProxyFS/logger"
    25  	"github.com/swiftstack/ProxyFS/stats"
    26  	"github.com/swiftstack/ProxyFS/swiftclient"
    27  	"github.com/swiftstack/ProxyFS/trackedlock"
    28  	"github.com/swiftstack/ProxyFS/utils"
    29  )
    30  
    31  // Shorthand for inode internal API debug log id; global to the package
    32  var int_inode_debug = logger.DbgInodeInternal
    33  
    34  const (
    35  	optimisticInodeFetchBytes = 2048
    36  )
    37  
    38  type CorruptionDetected bool
    39  type Version uint64
    40  
    41  const (
    42  	V1                               Version = iota + 1 // use type/struct onDiskInodeV1Struct
    43  	onDiskInodeV1PayloadObjectOffset uint64  = 0
    44  )
    45  
    46  type onDiskInodeV1Struct struct { // Preceded "on disk" by CorruptionDetected then Version both in cstruct.LittleEndian form
    47  	InodeNumber
    48  	InodeType
    49  	LinkCount           uint64
    50  	Size                uint64
    51  	CreationTime        time.Time
    52  	ModificationTime    time.Time
    53  	AccessTime          time.Time
    54  	AttrChangeTime      time.Time
    55  	NumWrites           uint64
    56  	Mode                InodeMode
    57  	UserID              InodeUserID
    58  	GroupID             InodeGroupID
    59  	StreamMap           map[string][]byte
    60  	PayloadObjectNumber uint64            // DirInode:     B+Tree Root with Key == dir_entry_name, Value = InodeNumber
    61  	PayloadObjectLength uint64            // FileInode:    B+Tree Root with Key == fileOffset, Value = fileExtent
    62  	SymlinkTarget       string            // SymlinkInode: target path of symbolic link
    63  	LogSegmentMap       map[uint64]uint64 // FileInode:    Key == LogSegment#, Value = file user data byte count
    64  }
    65  
    66  type inFlightLogSegmentStruct struct { //               Used as (by reference) Value for inMemoryInodeStruct.inFlightLogSegmentMap
    67  	logSegmentNumber          uint64 //                 Used as (by value)     Key   for inMemoryInodeStruct.inFlightLogSegmentMap
    68  	openLogSegmentLRUNext     *inFlightLogSegmentStruct
    69  	openLogSegmentLRUPrev     *inFlightLogSegmentStruct
    70  	fileInode                 *inMemoryInodeStruct
    71  	accountName               string
    72  	containerName             string
    73  	objectName                string
    74  	openLogSegmentListElement list.Element
    75  	swiftclient.ChunkedPutContext
    76  }
    77  
    78  type inMemoryInodeStruct struct {
    79  	trackedlock.Mutex //                                             Used to synchronize with background fileInodeFlusherDaemon
    80  	sync.WaitGroup    //                                             FileInode Flush requests wait on this
    81  	inodeCacheLRUNext *inMemoryInodeStruct
    82  	inodeCacheLRUPrev *inMemoryInodeStruct
    83  	dirty             bool
    84  	volume            *volumeStruct
    85  	snapShotID        uint64
    86  	payload           interface{} //                                 DirInode:  B+Tree with Key == dir_entry_name, Value = InodeNumber
    87  	//                                                               FileInode: B+Tree with Key == fileOffset, Value = *fileExtent
    88  	openLogSegment           *inFlightLogSegmentStruct            // FileInode only... also in inFlightLogSegmentMap
    89  	inFlightLogSegmentMap    map[uint64]*inFlightLogSegmentStruct // FileInode: key == logSegmentNumber
    90  	inFlightLogSegmentErrors map[uint64]error                     // FileInode: key == logSegmentNumber; value == err (if non nil)
    91  	onDiskInodeV1Struct                                           // Real on-disk inode information embedded here
    92  }
    93  
    94  func (vS *volumeStruct) DumpKey(key sortedmap.Key) (keyAsString string, err error) {
    95  	keyAsInodeNumber, ok := key.(InodeNumber)
    96  	if !ok {
    97  		err = fmt.Errorf("inode.volumeStruct.DumpKey() could not parse key as a InodeNumber")
    98  		return
    99  	}
   100  
   101  	keyAsString = fmt.Sprintf("0x%016X", keyAsInodeNumber)
   102  
   103  	err = nil
   104  	return
   105  }
   106  
   107  func (vS *volumeStruct) DumpValue(value sortedmap.Value) (valueAsString string, err error) {
   108  	valueAsInMemoryInodeStructPtr, ok := value.(*inMemoryInodeStruct)
   109  	if !ok {
   110  		err = fmt.Errorf("inode.volumeStruct.DumpValue() could not parse value as a *inMemoryInodeStruct")
   111  		return
   112  	}
   113  
   114  	valueAsString = fmt.Sprintf("%016p", valueAsInMemoryInodeStructPtr)
   115  
   116  	err = nil
   117  	return
   118  }
   119  
   120  func compareInodeNumber(key1 sortedmap.Key, key2 sortedmap.Key) (result int, err error) {
   121  	key1InodeNumber, ok := key1.(InodeNumber)
   122  	if !ok {
   123  		err = fmt.Errorf("compareInodeNumber(non-InodeNumber,) not supported")
   124  		return
   125  	}
   126  	key2InodeNumber, ok := key2.(InodeNumber)
   127  	if !ok {
   128  		err = fmt.Errorf("compareInodeNumber(InodeNumber, non-InodeNumber) not supported")
   129  		return
   130  	}
   131  
   132  	if key1InodeNumber < key2InodeNumber {
   133  		result = -1
   134  	} else if key1InodeNumber == key2InodeNumber {
   135  		result = 0
   136  	} else { // key1InodeNumber > key2InodeNumber
   137  		result = 1
   138  	}
   139  
   140  	err = nil
   141  
   142  	return
   143  }
   144  
   145  func setRWMode(rwMode RWModeType) (err error) {
   146  	if rwMode != globals.rwMode {
   147  		switch rwMode {
   148  		case RWModeNormal:
   149  			stats.IncrementOperations(&stats.ReconCheckTriggeredNormalMode)
   150  		case RWModeNoWrite:
   151  			stats.IncrementOperations(&stats.ReconCheckTriggeredNoWriteMode)
   152  		case RWModeReadOnly:
   153  			stats.IncrementOperations(&stats.ReconCheckTriggeredReadOnlyMode)
   154  		default:
   155  			err = fmt.Errorf("SetRWMode(rwMode==%d) not allowed... must be one of RWModeNormal(%d), RWModeNoWrite(%d), or RWModeReadOnly(%d)", rwMode, RWModeNormal, RWModeNoWrite, RWModeReadOnly)
   156  			return
   157  		}
   158  
   159  		globals.rwMode = rwMode
   160  	}
   161  
   162  	err = nil
   163  	return
   164  }
   165  
   166  func enforceRWMode(enforceNoWriteMode bool) (err error) {
   167  	var (
   168  		rwModeCopy RWModeType
   169  	)
   170  
   171  	rwModeCopy = globals.rwMode
   172  
   173  	if rwModeCopy == RWModeReadOnly {
   174  		err = blunder.NewError(globals.readOnlyThresholdErrno, globals.readOnlyThresholdErrnoString)
   175  	} else if enforceNoWriteMode && (rwModeCopy == RWModeNoWrite) {
   176  		err = blunder.NewError(globals.noWriteThresholdErrno, globals.noWriteThresholdErrnoString)
   177  	} else {
   178  		err = nil
   179  	}
   180  
   181  	return
   182  }
   183  
   184  func (vS *volumeStruct) FetchOnDiskInode(inodeNumber InodeNumber) (corruptionDetected CorruptionDetected, version Version, onDiskInode []byte, err error) {
   185  	var (
   186  		bytesConsumedByCorruptionDetected uint64
   187  		bytesConsumedByVersion            uint64
   188  		inodeRec                          []byte
   189  		ok                                bool
   190  	)
   191  
   192  	corruptionDetected = CorruptionDetected(false)
   193  	version = Version(0)
   194  	onDiskInode = make([]byte, 0)
   195  
   196  	inodeRec, ok, err = vS.headhunterVolumeHandle.GetInodeRec(uint64(inodeNumber))
   197  	if nil != err {
   198  		err = fmt.Errorf("headhunter.GetInodeRec() failed: %v", err)
   199  		return
   200  	}
   201  	if !ok {
   202  		err = fmt.Errorf("headhunter.GetInodeRec() returned !ok")
   203  		return
   204  	}
   205  
   206  	bytesConsumedByCorruptionDetected, err = cstruct.Unpack(inodeRec, &corruptionDetected, cstruct.LittleEndian)
   207  	if nil != err {
   208  		err = fmt.Errorf("cstruct.Unpack(,&corruptionDetected,) failed: %v", err)
   209  		return
   210  	}
   211  	if corruptionDetected {
   212  		return
   213  	}
   214  
   215  	bytesConsumedByVersion, err = cstruct.Unpack(inodeRec[bytesConsumedByCorruptionDetected:], &version, cstruct.LittleEndian)
   216  	if nil != err {
   217  		err = fmt.Errorf("cstruct.Unpack(,&version,) failed: %v", err)
   218  		return
   219  	}
   220  
   221  	onDiskInode = inodeRec[bytesConsumedByCorruptionDetected+bytesConsumedByVersion:]
   222  
   223  	return
   224  }
   225  
   226  func (vS *volumeStruct) fetchOnDiskInode(inodeNumber InodeNumber) (inMemoryInode *inMemoryInodeStruct, ok bool, err error) {
   227  	var (
   228  		bytesConsumedByCorruptionDetected uint64
   229  		bytesConsumedByVersion            uint64
   230  		corruptionDetected                CorruptionDetected
   231  		inodeRec                          []byte
   232  		onDiskInodeV1                     *onDiskInodeV1Struct
   233  		snapShotID                        uint64
   234  		snapShotIDType                    headhunter.SnapShotIDType
   235  		version                           Version
   236  	)
   237  
   238  	snapShotIDType, snapShotID, _ = vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber))
   239  	if headhunter.SnapShotIDTypeDotSnapShot == snapShotIDType {
   240  		logger.Fatalf("fetchOnDiskInode for headhunter.SnapShotIDTypeDotSnapShot not allowed")
   241  	}
   242  
   243  	inodeRec, ok, err = vS.headhunterVolumeHandle.GetInodeRec(uint64(inodeNumber))
   244  	if nil != err {
   245  		stackStr := string(debug.Stack())
   246  		err = fmt.Errorf("%s: unable to get inodeRec for inode %d: %v stack: %s",
   247  			utils.GetFnName(), inodeNumber, err, stackStr)
   248  		err = blunder.AddError(err, blunder.NotFoundError)
   249  		return
   250  	}
   251  	if !ok {
   252  		return
   253  	}
   254  
   255  	bytesConsumedByCorruptionDetected, err = cstruct.Unpack(inodeRec, &corruptionDetected, cstruct.LittleEndian)
   256  	if nil != err {
   257  		err = fmt.Errorf("%s: unable to parse inodeRec.CorruptionDetected for inode %d: %v", utils.GetFnName(), inodeNumber, err)
   258  		err = blunder.AddError(err, blunder.CorruptInodeError)
   259  		return
   260  	}
   261  	if corruptionDetected {
   262  		err = fmt.Errorf("%s: inode %d has been marked corrupted", utils.GetFnName(), inodeNumber)
   263  		err = blunder.AddError(err, blunder.CorruptInodeError)
   264  		return
   265  	}
   266  
   267  	bytesConsumedByVersion, err = cstruct.Unpack(inodeRec[bytesConsumedByCorruptionDetected:], &version, cstruct.LittleEndian)
   268  	if nil != err {
   269  		err = fmt.Errorf("%s: unable to get inodeRec.Version for inode %d: %v", utils.GetFnName(), inodeNumber, err)
   270  		err = blunder.AddError(err, blunder.CorruptInodeError)
   271  		return
   272  	}
   273  	if V1 != version {
   274  		err = fmt.Errorf("%s: inodeRec.Version for inode %d (%v) not supported", utils.GetFnName(), inodeNumber, version)
   275  		err = blunder.AddError(err, blunder.CorruptInodeError)
   276  		return
   277  	}
   278  
   279  	onDiskInodeV1 = &onDiskInodeV1Struct{StreamMap: make(map[string][]byte)}
   280  
   281  	err = json.Unmarshal(inodeRec[bytesConsumedByCorruptionDetected+bytesConsumedByVersion:], onDiskInodeV1)
   282  	if nil != err {
   283  		err = fmt.Errorf("%s: inodeRec.<body> for inode %d json.Unmarshal() failed: %v", utils.GetFnName(), inodeNumber, err)
   284  		err = blunder.AddError(err, blunder.CorruptInodeError)
   285  		return
   286  	}
   287  
   288  	inMemoryInode = &inMemoryInodeStruct{
   289  		inodeCacheLRUNext:        nil,
   290  		inodeCacheLRUPrev:        nil,
   291  		dirty:                    false,
   292  		volume:                   vS,
   293  		snapShotID:               snapShotID,
   294  		openLogSegment:           nil,
   295  		inFlightLogSegmentMap:    make(map[uint64]*inFlightLogSegmentStruct),
   296  		inFlightLogSegmentErrors: make(map[uint64]error),
   297  		onDiskInodeV1Struct:      *onDiskInodeV1,
   298  	}
   299  
   300  	inMemoryInode.onDiskInodeV1Struct.InodeNumber = inodeNumber
   301  
   302  	switch inMemoryInode.InodeType {
   303  	case DirType:
   304  		if 0 == inMemoryInode.PayloadObjectNumber {
   305  			inMemoryInode.payload =
   306  				sortedmap.NewBPlusTree(
   307  					vS.maxEntriesPerDirNode,
   308  					sortedmap.CompareString,
   309  					&dirInodeCallbacks{treeNodeLoadable{inode: inMemoryInode}},
   310  					globals.dirEntryCache)
   311  		} else {
   312  			inMemoryInode.payload, err =
   313  				sortedmap.OldBPlusTree(
   314  					inMemoryInode.PayloadObjectNumber,
   315  					onDiskInodeV1PayloadObjectOffset,
   316  					inMemoryInode.PayloadObjectLength,
   317  					sortedmap.CompareString,
   318  					&dirInodeCallbacks{treeNodeLoadable{inode: inMemoryInode}},
   319  					globals.dirEntryCache)
   320  			if nil != err {
   321  				err = fmt.Errorf("%s: sortedmap.OldBPlusTree(inodeRec.<body>.PayloadObjectNumber) for DirType inode %d failed: %v", utils.GetFnName(), inodeNumber, err)
   322  				err = blunder.AddError(err, blunder.CorruptInodeError)
   323  				return
   324  			}
   325  		}
   326  	case FileType:
   327  		if 0 == inMemoryInode.PayloadObjectNumber {
   328  			inMemoryInode.payload =
   329  				sortedmap.NewBPlusTree(
   330  					vS.maxExtentsPerFileNode,
   331  					sortedmap.CompareUint64,
   332  					&fileInodeCallbacks{treeNodeLoadable{inode: inMemoryInode}},
   333  					globals.fileExtentMapCache)
   334  		} else {
   335  			inMemoryInode.payload, err =
   336  				sortedmap.OldBPlusTree(
   337  					inMemoryInode.PayloadObjectNumber,
   338  					onDiskInodeV1PayloadObjectOffset,
   339  					inMemoryInode.PayloadObjectLength,
   340  					sortedmap.CompareUint64,
   341  					&fileInodeCallbacks{treeNodeLoadable{inode: inMemoryInode}},
   342  					globals.fileExtentMapCache)
   343  			if nil != err {
   344  				err = fmt.Errorf("%s: sortedmap.OldBPlusTree(inodeRec.<body>.PayloadObjectNumber) for FileType inode %d failed: %v", utils.GetFnName(), inodeNumber, err)
   345  				err = blunder.AddError(err, blunder.CorruptInodeError)
   346  				return
   347  			}
   348  		}
   349  	case SymlinkType:
   350  		// Nothing special here
   351  	default:
   352  		err = fmt.Errorf("%s: inodeRec.InodeType for inode %d (%v) not supported", utils.GetFnName(), inodeNumber, inMemoryInode.InodeType)
   353  		err = blunder.AddError(err, blunder.CorruptInodeError)
   354  		return
   355  	}
   356  
   357  	err = nil
   358  	return
   359  }
   360  
   361  func (vS *volumeStruct) inodeCacheFetchWhileLocked(inodeNumber InodeNumber) (inode *inMemoryInodeStruct, ok bool, err error) {
   362  	var (
   363  		inodeAsValue sortedmap.Value
   364  	)
   365  
   366  	inodeAsValue, ok, err = vS.inodeCache.GetByKey(inodeNumber)
   367  	if nil != err {
   368  		return
   369  	}
   370  
   371  	if ok {
   372  		inode, ok = inodeAsValue.(*inMemoryInodeStruct)
   373  		if ok {
   374  			vS.inodeCacheTouchWhileLocked(inode)
   375  			err = nil
   376  		} else {
   377  			ok = false
   378  			err = fmt.Errorf("inodeCache[inodeNumber==0x%016X] contains a value not mappable to a *inMemoryInodeStruct", inodeNumber)
   379  		}
   380  	}
   381  
   382  	return
   383  }
   384  
   385  func (vS *volumeStruct) inodeCacheFetch(inodeNumber InodeNumber) (inode *inMemoryInodeStruct, ok bool, err error) {
   386  	vS.Lock()
   387  	inode, ok, err = vS.inodeCacheFetchWhileLocked(inodeNumber)
   388  	vS.Unlock()
   389  	return
   390  }
   391  
   392  func (vS *volumeStruct) inodeCacheInsertWhileLocked(inode *inMemoryInodeStruct) (ok bool, err error) {
   393  	ok, err = vS.inodeCache.Put(inode.InodeNumber, inode)
   394  	if (nil != err) || !ok {
   395  		return
   396  	}
   397  
   398  	// Place inode at the MRU end of inodeCacheLRU
   399  
   400  	if 0 == vS.inodeCacheLRUItems {
   401  		vS.inodeCacheLRUHead = inode
   402  		vS.inodeCacheLRUTail = inode
   403  		vS.inodeCacheLRUItems = 1
   404  	} else {
   405  		inode.inodeCacheLRUPrev = vS.inodeCacheLRUTail
   406  		inode.inodeCacheLRUPrev.inodeCacheLRUNext = inode
   407  
   408  		vS.inodeCacheLRUTail = inode
   409  		vS.inodeCacheLRUItems++
   410  	}
   411  
   412  	return
   413  }
   414  
   415  func (vS *volumeStruct) inodeCacheInsert(inode *inMemoryInodeStruct) (ok bool, err error) {
   416  	vS.Lock()
   417  	ok, err = vS.inodeCacheInsertWhileLocked(inode)
   418  	vS.Unlock()
   419  	return
   420  }
   421  
   422  func (vS *volumeStruct) inodeCacheTouchWhileLocked(inode *inMemoryInodeStruct) {
   423  	// Move inode to the MRU end of inodeCacheLRU
   424  
   425  	if inode != vS.inodeCacheLRUTail {
   426  		if inode == vS.inodeCacheLRUHead {
   427  			vS.inodeCacheLRUHead = inode.inodeCacheLRUNext
   428  			vS.inodeCacheLRUHead.inodeCacheLRUPrev = nil
   429  
   430  			inode.inodeCacheLRUPrev = vS.inodeCacheLRUTail
   431  			inode.inodeCacheLRUNext = nil
   432  
   433  			vS.inodeCacheLRUTail.inodeCacheLRUNext = inode
   434  			vS.inodeCacheLRUTail = inode
   435  		} else {
   436  			inode.inodeCacheLRUPrev.inodeCacheLRUNext = inode.inodeCacheLRUNext
   437  			inode.inodeCacheLRUNext.inodeCacheLRUPrev = inode.inodeCacheLRUPrev
   438  
   439  			inode.inodeCacheLRUNext = nil
   440  			inode.inodeCacheLRUPrev = vS.inodeCacheLRUTail
   441  
   442  			vS.inodeCacheLRUTail.inodeCacheLRUNext = inode
   443  			vS.inodeCacheLRUTail = inode
   444  		}
   445  	}
   446  }
   447  
   448  func (vS *volumeStruct) inodeCacheTouch(inode *inMemoryInodeStruct) {
   449  	vS.Lock()
   450  	vS.inodeCacheTouchWhileLocked(inode)
   451  	vS.Unlock()
   452  }
   453  
   454  // The inode cache discard thread calls this routine when the ticker goes off.
   455  func (vS *volumeStruct) inodeCacheDiscard() (discarded uint64, dirty uint64, locked uint64, lruItems uint64) {
   456  	inodesToDrop := uint64(0)
   457  
   458  	vS.Lock()
   459  
   460  	if (vS.inodeCacheLRUItems * globals.inodeSize) > vS.inodeCacheLRUMaxBytes {
   461  		// Check, at most, 1.25 * (minimum_number_to_drop)
   462  		inodesToDrop = (vS.inodeCacheLRUItems * globals.inodeSize) - vS.inodeCacheLRUMaxBytes
   463  		inodesToDrop = inodesToDrop / globals.inodeSize
   464  		inodesToDrop += inodesToDrop / 4
   465  		for (inodesToDrop > 0) && ((vS.inodeCacheLRUItems * globals.inodeSize) > vS.inodeCacheLRUMaxBytes) {
   466  			inodesToDrop--
   467  
   468  			ic := vS.inodeCacheLRUHead
   469  
   470  			// Create a DLM lock object
   471  			id := dlm.GenerateCallerID()
   472  			inodeRWLock, _ := vS.InitInodeLock(ic.InodeNumber, id)
   473  			err := inodeRWLock.TryWriteLock()
   474  
   475  			// Inode is locked; skip it
   476  			if err != nil {
   477  				// Move inode to tail of LRU
   478  				vS.inodeCacheTouchWhileLocked(ic)
   479  				locked++
   480  				continue
   481  			}
   482  
   483  			if ic.dirty {
   484  				// The inode is busy - drop the DLM lock and move to tail
   485  				inodeRWLock.Unlock()
   486  				dirty++
   487  				vS.inodeCacheTouchWhileLocked(ic)
   488  				continue
   489  			}
   490  
   491  			var ok bool
   492  
   493  			discarded++
   494  			ok, err = vS.inodeCacheDropWhileLocked(ic)
   495  			if err != nil || !ok {
   496  				pStr := fmt.Errorf("The inodes was not found in the inode cache - ok: %v err: %v", ok, err)
   497  				panic(pStr)
   498  			}
   499  
   500  			inodeRWLock.Unlock()
   501  
   502  			// NOTE: vS.inodeCacheDropWhileLocked() removed the inode from the LRU list so
   503  			// the head is now different
   504  		}
   505  	}
   506  	lruItems = vS.inodeCacheLRUItems
   507  	vS.Unlock()
   508  	//logger.Infof("discard: %v dirty: %v locked: %v LRUitems: %v", discarded, dirty, locked, lruItems)
   509  	return
   510  }
   511  
   512  func (vS *volumeStruct) inodeCacheDropWhileLocked(inode *inMemoryInodeStruct) (ok bool, err error) {
   513  	ok, err = vS.inodeCache.DeleteByKey(inode.InodeNumber)
   514  	if (nil != err) || !ok {
   515  		return
   516  	}
   517  
   518  	if inode == vS.inodeCacheLRUHead {
   519  		if inode == vS.inodeCacheLRUTail {
   520  			vS.inodeCacheLRUHead = nil
   521  			vS.inodeCacheLRUTail = nil
   522  			vS.inodeCacheLRUItems = 0
   523  		} else {
   524  			vS.inodeCacheLRUHead = inode.inodeCacheLRUNext
   525  			vS.inodeCacheLRUHead.inodeCacheLRUPrev = nil
   526  			vS.inodeCacheLRUItems--
   527  
   528  			inode.inodeCacheLRUNext = nil
   529  		}
   530  	} else {
   531  		if inode == vS.inodeCacheLRUTail {
   532  			vS.inodeCacheLRUTail = inode.inodeCacheLRUPrev
   533  			vS.inodeCacheLRUTail.inodeCacheLRUNext = nil
   534  			vS.inodeCacheLRUItems--
   535  
   536  			inode.inodeCacheLRUPrev = nil
   537  		} else {
   538  			inode.inodeCacheLRUPrev.inodeCacheLRUNext = inode.inodeCacheLRUNext
   539  			inode.inodeCacheLRUNext.inodeCacheLRUPrev = inode.inodeCacheLRUPrev
   540  			vS.inodeCacheLRUItems--
   541  
   542  			inode.inodeCacheLRUNext = nil
   543  			inode.inodeCacheLRUPrev = nil
   544  		}
   545  	}
   546  
   547  	return
   548  }
   549  
   550  func (vS *volumeStruct) inodeCacheDrop(inode *inMemoryInodeStruct) (ok bool, err error) {
   551  	vS.Lock()
   552  	ok, err = vS.inodeCacheDropWhileLocked(inode)
   553  	vS.Unlock()
   554  	return
   555  }
   556  
   557  func (vS *volumeStruct) fetchInode(inodeNumber InodeNumber) (inode *inMemoryInodeStruct, ok bool, err error) {
   558  	for {
   559  		inode, ok, err = vS.inodeCacheFetch(inodeNumber)
   560  		if nil != err {
   561  			return
   562  		}
   563  
   564  		if ok {
   565  			return
   566  		}
   567  
   568  		inode, ok, err = vS.fetchOnDiskInode(inodeNumber)
   569  		if nil != err {
   570  			return
   571  		}
   572  		if !ok {
   573  			err = fmt.Errorf("%s.fetchInode(0x%016X) not found", vS.volumeName, inodeNumber)
   574  			return
   575  		}
   576  
   577  		ok, err = vS.inodeCacheInsert(inode)
   578  		if nil != err {
   579  			return
   580  		}
   581  
   582  		if ok {
   583  			return
   584  		}
   585  
   586  		// If we reach here, somebody beat us to it... just restart the fetch...
   587  	}
   588  }
   589  
   590  // Fetch inode with inode type checking
   591  func (vS *volumeStruct) fetchInodeType(inodeNumber InodeNumber, expectedType InodeType) (inode *inMemoryInodeStruct, err error) {
   592  	inode, ok, err := vS.fetchInode(inodeNumber)
   593  	if nil != err {
   594  		return
   595  	}
   596  	if !ok {
   597  		err = fmt.Errorf("%s: expected inode %d volume '%s' to be type %v, but it was unallocated",
   598  			utils.GetFnName(), inode.InodeNumber, vS.volumeName, expectedType)
   599  		err = blunder.AddError(err, blunder.NotFoundError)
   600  		return
   601  	}
   602  	if inode.InodeType == expectedType {
   603  		// success
   604  		return
   605  	}
   606  
   607  	err = fmt.Errorf("%s: expected inode %d volume '%s' to be type %v, got %v",
   608  		utils.GetFnName(), inode.InodeNumber, vS.volumeName, expectedType, inode.InodeType)
   609  
   610  	var errVal blunder.FsError
   611  	switch expectedType {
   612  	case DirType:
   613  		errVal = blunder.NotDirError
   614  	case FileType:
   615  		errVal = blunder.NotFileError
   616  	case SymlinkType:
   617  		errVal = blunder.NotSymlinkError
   618  	default:
   619  		panic(fmt.Sprintf("unknown inode type=%v!", expectedType))
   620  	}
   621  	err = blunder.AddError(err, errVal)
   622  
   623  	return
   624  }
   625  
   626  func (vS *volumeStruct) makeInMemoryInodeWithThisInodeNumber(inodeType InodeType, fileMode InodeMode, userID InodeUserID, groupID InodeGroupID, inodeNumber InodeNumber, volumeLocked bool) (inMemoryInode *inMemoryInodeStruct) {
   627  	var (
   628  		birthTime      time.Time
   629  		nonce          uint64
   630  		snapShotID     uint64
   631  		snapShotIDType headhunter.SnapShotIDType
   632  	)
   633  
   634  	snapShotIDType, snapShotID, nonce = vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber))
   635  	if headhunter.SnapShotIDTypeDotSnapShot == snapShotIDType {
   636  		logger.Fatalf("makeInMemoryInodeWithThisInodeNumber for headhunter.SnapShotIDTypeDotSnapShot not allowed")
   637  	}
   638  
   639  	birthTime = time.Now()
   640  
   641  	inMemoryInode = &inMemoryInodeStruct{
   642  		inodeCacheLRUNext:        nil,
   643  		inodeCacheLRUPrev:        nil,
   644  		dirty:                    true,
   645  		volume:                   vS,
   646  		snapShotID:               snapShotID,
   647  		openLogSegment:           nil,
   648  		inFlightLogSegmentMap:    make(map[uint64]*inFlightLogSegmentStruct),
   649  		inFlightLogSegmentErrors: make(map[uint64]error),
   650  		onDiskInodeV1Struct: onDiskInodeV1Struct{
   651  			InodeNumber:      InodeNumber(nonce),
   652  			InodeType:        inodeType,
   653  			CreationTime:     birthTime,
   654  			ModificationTime: birthTime,
   655  			AccessTime:       birthTime,
   656  			AttrChangeTime:   birthTime,
   657  			NumWrites:        0,
   658  			Mode:             fileMode,
   659  			UserID:           userID,
   660  			GroupID:          groupID,
   661  			StreamMap:        make(map[string][]byte),
   662  			LogSegmentMap:    make(map[uint64]uint64),
   663  		},
   664  	}
   665  
   666  	return
   667  }
   668  
   669  func (vS *volumeStruct) makeInMemoryInode(inodeType InodeType, fileMode InodeMode, userID InodeUserID, groupID InodeGroupID) (inMemoryInode *inMemoryInodeStruct, err error) {
   670  	inodeNumberAsUint64 := vS.headhunterVolumeHandle.FetchNonce()
   671  
   672  	inMemoryInode = vS.makeInMemoryInodeWithThisInodeNumber(inodeType, fileMode, userID, groupID, InodeNumber(inodeNumberAsUint64), false)
   673  
   674  	return
   675  }
   676  
   677  func (vS *volumeStruct) PatchInode(inodeNumber InodeNumber, inodeType InodeType, linkCount uint64, mode InodeMode, userID InodeUserID, groupID InodeGroupID, parentInodeNumber InodeNumber, symlinkTarget string) (err error) {
   678  	var (
   679  		callerID                              dlm.CallerID
   680  		inode                                 *inMemoryInodeStruct
   681  		inodeNumberDecodedAsInodeNumber       InodeNumber
   682  		inodeNumberDecodedAsUint64            uint64
   683  		inodeRWLock                           *dlm.RWLockStruct
   684  		modeAdornedWithInodeType              InodeMode
   685  		ok                                    bool
   686  		parentInodeNumberDecodedAsInodeNumber InodeNumber
   687  		parentInodeNumberDecodedAsUint64      uint64
   688  		payload                               sortedmap.BPlusTree
   689  		snapShotIDType                        headhunter.SnapShotIDType
   690  	)
   691  
   692  	snapShotIDType, _, inodeNumberDecodedAsUint64 = vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber))
   693  	if headhunter.SnapShotIDTypeLive != snapShotIDType {
   694  		err = fmt.Errorf("PatchInode(inodeNumber==0x%016X,,,,,,,) must provide a non-SnapShot inodeNumber", inodeNumber)
   695  		return
   696  	}
   697  	inodeNumberDecodedAsInodeNumber = InodeNumber(inodeNumberDecodedAsUint64)
   698  
   699  	switch inodeType {
   700  	case DirType:
   701  		if 2 != linkCount {
   702  			err = fmt.Errorf("PatchInode(inodeNumber==0x%016X,inodeType==DirType,linkCount==%v,,,,,) must set linkCount to 2", inodeNumber, linkCount)
   703  			return
   704  		}
   705  		if InodeNumber(0) == parentInodeNumber {
   706  			err = fmt.Errorf("PatchInode(inodeNumber==0x%016X,inodeType==DirType,,,,,parentInodeNumber==0,) must provide a non-zero parentInodeNumber", inodeNumber)
   707  			return
   708  		}
   709  		if (RootDirInodeNumber == inodeNumber) && (RootDirInodeNumber != parentInodeNumber) {
   710  			err = fmt.Errorf("PatchInode(inodeNumber==RootDirInodeNumber,inodeType==DirType,,,,,parentInodeNumber==0x%016X,) must provide RootDirInode's parent as also RootDirInodeNumber", parentInodeNumber)
   711  			return
   712  		}
   713  		snapShotIDType, _, parentInodeNumberDecodedAsUint64 = vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber))
   714  		if headhunter.SnapShotIDTypeLive != snapShotIDType {
   715  			err = fmt.Errorf("PatchInode(inodeNumber==0x%016X,inodeType==DirType,,,,,parentInodeNumber==0x%016X,) must provide a non-SnapShot parentInodeNumber", inodeNumber, parentInodeNumber)
   716  			return
   717  		}
   718  		parentInodeNumberDecodedAsInodeNumber = InodeNumber(parentInodeNumberDecodedAsUint64)
   719  	case FileType:
   720  		if 0 == linkCount {
   721  			err = fmt.Errorf("PatchInode(inodeNumber==0x%016X,inodeType==FileType,linkCount==0,,,,,) must provide a non-zero linkCount", inodeNumber)
   722  			return
   723  		}
   724  	case SymlinkType:
   725  		if 0 == linkCount {
   726  			err = fmt.Errorf("PatchInode(inodeNumber==0x%016X,inodeType==SymlinkType,linkCount==0,,,,,) must provide a non-zero linkCount", inodeNumber)
   727  			return
   728  		}
   729  		if "" == symlinkTarget {
   730  			err = fmt.Errorf("PatchInode(inodeNumber==0x%016X,inodeType==SymlinkType,,,,,,symlinkTarget==\"\") must provide a non-empty symlinkTarget", inodeNumber)
   731  			return
   732  		}
   733  	default:
   734  		err = fmt.Errorf("PatchInode(inodeNumber==0x%016X,inodeType==%v,,,,,,) must provide a inodeType of DirType(%v), FileType(%v), or SymlinkType(%v)", inodeNumber, inodeType, DirType, FileType, SymlinkType)
   735  		return
   736  	}
   737  
   738  	modeAdornedWithInodeType, err = determineMode(mode, inodeType)
   739  	if nil != err {
   740  		err = fmt.Errorf("PatchInode(inodeNumber==0x%016X,inodeType==%v,,mode==0o%011o,,,,) failed: %v", inodeNumber, inodeType, mode, err)
   741  		return
   742  	}
   743  
   744  	vS.Lock()
   745  
   746  	callerID = dlm.GenerateCallerID()
   747  	inodeRWLock, _ = vS.InitInodeLock(inodeNumber, callerID)
   748  	err = inodeRWLock.TryWriteLock()
   749  	if nil != err {
   750  		vS.Unlock()
   751  		err = fmt.Errorf("PatchInode(inodeNumber==0x%016X,,,,,,,) couldn't create a *dlm.RWLockStruct: %v", inodeNumber, err)
   752  		return
   753  	}
   754  
   755  	inode, ok, err = vS.inodeCacheFetchWhileLocked(inodeNumber)
   756  	if nil != err {
   757  		_ = inodeRWLock.Unlock()
   758  		vS.Unlock()
   759  		err = fmt.Errorf("PatchInode(inodeNumber==0x%016X,,,,,,,) couldn't search inodeCache for pre-existing inode: %v", inodeNumber, err)
   760  		return
   761  	}
   762  	if ok {
   763  		if inode.dirty {
   764  			_ = inodeRWLock.Unlock()
   765  			vS.Unlock()
   766  			err = fmt.Errorf("PatchInode(inodeNumber==0x%016X,,,,,,,) of dirty Inode is not allowed", inodeNumber)
   767  			return
   768  		}
   769  		ok, err = vS.inodeCacheDropWhileLocked(inode)
   770  		if nil != err {
   771  			_ = inodeRWLock.Unlock()
   772  			vS.Unlock()
   773  			err = fmt.Errorf("PatchInode(inodeNumber==0x%016X,,,,,,,) drop of pre-existing inode from inodeCache failed: %v", inodeNumber, err)
   774  			return
   775  		}
   776  		if !ok {
   777  			_ = inodeRWLock.Unlock()
   778  			vS.Unlock()
   779  			err = fmt.Errorf("PatchInode(inodeNumber==0x%016X,,,,,,,) drop of pre-existing inode from inodeCache returned !ok", inodeNumber)
   780  			return
   781  		}
   782  	}
   783  
   784  	inode = vS.makeInMemoryInodeWithThisInodeNumber(inodeType, modeAdornedWithInodeType, userID, groupID, inodeNumberDecodedAsInodeNumber, true)
   785  
   786  	inode.dirty = true
   787  
   788  	inode.onDiskInodeV1Struct.LinkCount = linkCount
   789  
   790  	switch inodeType {
   791  	case DirType:
   792  		payload = sortedmap.NewBPlusTree(
   793  			vS.maxEntriesPerDirNode,
   794  			sortedmap.CompareString,
   795  			&dirInodeCallbacks{treeNodeLoadable{inode: inode}},
   796  			globals.dirEntryCache)
   797  
   798  		ok, err = payload.Put(".", inodeNumberDecodedAsInodeNumber)
   799  		if nil != err {
   800  			err = fmt.Errorf("PatchInode(inodeNumber==0x%016X,inodeType==DirType,,,,,,) failed to insert \".\" dirEntry: %v", inodeNumber, err)
   801  			panic(err)
   802  		}
   803  		if !ok {
   804  			err = fmt.Errorf("PatchInode(inodeNumber==0x%016X,inodeType==DirType,,,,,,) insert \".\" dirEntry got a !ok", inodeNumber)
   805  			panic(err)
   806  		}
   807  
   808  		ok, err = payload.Put("..", parentInodeNumberDecodedAsInodeNumber)
   809  		if nil != err {
   810  			err = fmt.Errorf("PatchInode(inodeNumber==0x%016X,inodeType==DirType,,,,,parentInodeNumber==0x%016X,) failed to insert \"..\" dirEntry: %v", inodeNumber, parentInodeNumber, err)
   811  			panic(err)
   812  		}
   813  		if !ok {
   814  			err = fmt.Errorf("PatchInode(inodeNumber==0x%016X,inodeType==DirType,,,,,parentInodeNumber==0x%016X,) insert \"..\" dirEntry got a !ok", inodeNumber, parentInodeNumber)
   815  			panic(err)
   816  		}
   817  
   818  		inode.payload = payload
   819  		inode.onDiskInodeV1Struct.SymlinkTarget = ""
   820  	case FileType:
   821  		payload = sortedmap.NewBPlusTree(
   822  			vS.maxExtentsPerFileNode,
   823  			sortedmap.CompareUint64,
   824  			&fileInodeCallbacks{treeNodeLoadable{inode: inode}},
   825  			globals.fileExtentMapCache)
   826  
   827  		inode.payload = payload
   828  		inode.onDiskInodeV1Struct.SymlinkTarget = ""
   829  	case SymlinkType:
   830  		inode.payload = nil
   831  		inode.onDiskInodeV1Struct.SymlinkTarget = symlinkTarget
   832  	}
   833  
   834  	ok, err = vS.inodeCacheInsertWhileLocked(inode)
   835  	if nil != err {
   836  		err = fmt.Errorf("PatchInode(inodeNumber==0x%016X,inodeType==DirType,,,,,,) failed to insert inode in inodeCache: %v", inodeNumber, err)
   837  		panic(err)
   838  	}
   839  	if !ok {
   840  		err = fmt.Errorf("PatchInode(inodeNumber==0x%016X,inodeType==DirType,,,,,,) insert of inode in inodeCache got a !ok", inodeNumber)
   841  		panic(err)
   842  	}
   843  
   844  	_ = inodeRWLock.Unlock()
   845  
   846  	vS.Unlock()
   847  
   848  	err = vS.flushInode(inode)
   849  	if nil != err {
   850  		err = fmt.Errorf("PatchInode(inodeNumber==0x%016X,,,,,,,) failed to flush: %v", inodeNumber, err)
   851  		panic(err)
   852  	}
   853  
   854  	return
   855  }
   856  
   857  func (inMemoryInode *inMemoryInodeStruct) convertToOnDiskInodeV1() (onDiskInodeV1 *onDiskInodeV1Struct, err error) {
   858  	onDiskInode := inMemoryInode.onDiskInodeV1Struct
   859  
   860  	if (DirType == inMemoryInode.InodeType) || (FileType == inMemoryInode.InodeType) {
   861  		content := inMemoryInode.payload.(sortedmap.BPlusTree)
   862  		payloadObjectNumber, payloadObjectOffset, payloadObjectLength, flushErr := content.Flush(false)
   863  		if nil != flushErr {
   864  			panic(flushErr)
   865  		}
   866  		pruneErr := content.Prune()
   867  		if nil != pruneErr {
   868  			panic(pruneErr)
   869  		}
   870  		if onDiskInodeV1PayloadObjectOffset != payloadObjectOffset {
   871  			flushErr = fmt.Errorf("Logic Error: content.Flush() should have returned payloadObjectOffset == %v", onDiskInodeV1PayloadObjectOffset)
   872  			panic(flushErr)
   873  		}
   874  		onDiskInode.PayloadObjectNumber = payloadObjectNumber
   875  		onDiskInode.PayloadObjectLength = payloadObjectLength
   876  	}
   877  
   878  	// maps are refernce types, so this needs to be copied manually
   879  
   880  	onDiskInode.StreamMap = make(map[string][]byte)
   881  	for key, value := range inMemoryInode.StreamMap {
   882  		valueCopy := make([]byte, len(value))
   883  		copy(valueCopy, value)
   884  		onDiskInode.StreamMap[key] = valueCopy
   885  	}
   886  
   887  	onDiskInode.LogSegmentMap = make(map[uint64]uint64)
   888  	for logSegmentNumber, logSegmentBytesUsed := range inMemoryInode.LogSegmentMap {
   889  		onDiskInode.LogSegmentMap[logSegmentNumber] = logSegmentBytesUsed
   890  	}
   891  
   892  	return &onDiskInode, nil
   893  }
   894  
   895  func (vS *volumeStruct) flushInode(inode *inMemoryInodeStruct) (err error) {
   896  	err = vS.flushInodes([]*inMemoryInodeStruct{inode})
   897  	return
   898  }
   899  
   900  func (vS *volumeStruct) flushInodeNumber(inodeNumber InodeNumber) (err error) {
   901  	err = vS.flushInodeNumbers([]InodeNumber{inodeNumber})
   902  	return
   903  }
   904  
   905  // REVIEW: Need to clearly explain what "flush" means (i.e. "to HH", not "to disk")
   906  
   907  func (vS *volumeStruct) flushInodes(inodes []*inMemoryInodeStruct) (err error) {
   908  	var (
   909  		dirtyInodeNumbers         []uint64
   910  		dirtyInodeRecBytes        []byte
   911  		dirtyInodeRecs            [][]byte
   912  		emptyLogSegments          []uint64
   913  		emptyLogSegmentsThisInode []uint64
   914  		inode                     *inMemoryInodeStruct
   915  		logSegmentNumber          uint64
   916  		logSegmentValidBytes      uint64
   917  		onDiskInodeV1             *onDiskInodeV1Struct
   918  		onDiskInodeV1Buf          []byte
   919  		payloadAsBPlusTree        sortedmap.BPlusTree
   920  		payloadObjectLength       uint64
   921  		payloadObjectNumber       uint64
   922  		toFlushInodeNumbers       []uint64
   923  	)
   924  
   925  	halter.Trigger(halter.InodeFlushInodesEntry)
   926  	defer halter.Trigger(halter.InodeFlushInodesExit)
   927  
   928  	toFlushInodeNumbers = make([]uint64, 0, len(inodes))
   929  	for _, inode = range inodes {
   930  		toFlushInodeNumbers = append(toFlushInodeNumbers, uint64(inode.InodeNumber))
   931  	}
   932  
   933  	evtlog.Record(evtlog.FormatFlushInodesEntry, vS.volumeName, toFlushInodeNumbers)
   934  
   935  	// Assemble slice of "dirty" inodes while flushing them
   936  	dirtyInodeNumbers = make([]uint64, 0, len(inodes))
   937  	dirtyInodeRecs = make([][]byte, 0, len(inodes))
   938  	emptyLogSegments = make([]uint64, 0)
   939  
   940  	for _, inode = range inodes {
   941  		if FileType == inode.InodeType {
   942  			err = vS.doFileInodeDataFlush(inode)
   943  			if nil != err {
   944  				evtlog.Record(evtlog.FormatFlushInodesErrorOnInode, vS.volumeName, uint64(inode.InodeNumber), err.Error())
   945  				logger.ErrorWithError(err)
   946  				err = blunder.AddError(err, blunder.InodeFlushError)
   947  				return
   948  			}
   949  			emptyLogSegmentsThisInode = make([]uint64, 0)
   950  			for logSegmentNumber, logSegmentValidBytes = range inode.LogSegmentMap {
   951  				if 0 == logSegmentValidBytes {
   952  					emptyLogSegmentsThisInode = append(emptyLogSegmentsThisInode, logSegmentNumber)
   953  				}
   954  			}
   955  			for _, logSegmentNumber = range emptyLogSegmentsThisInode {
   956  				delete(inode.LogSegmentMap, logSegmentNumber)
   957  			}
   958  			emptyLogSegments = append(emptyLogSegments, emptyLogSegmentsThisInode...)
   959  		}
   960  		if SymlinkType != inode.InodeType {
   961  			// (FileType == inode.InodeType || (DirType == inode.InodeType)
   962  			payloadAsBPlusTree = inode.payload.(sortedmap.BPlusTree)
   963  			payloadObjectNumber, _, payloadObjectLength, err = payloadAsBPlusTree.Flush(false)
   964  			if nil != err {
   965  				evtlog.Record(evtlog.FormatFlushInodesErrorOnInode, vS.volumeName, uint64(inode.InodeNumber), err.Error())
   966  				logger.ErrorWithError(err)
   967  				err = blunder.AddError(err, blunder.InodeFlushError)
   968  				return
   969  			}
   970  			if payloadObjectNumber > inode.PayloadObjectNumber {
   971  				if !inode.dirty {
   972  					err = fmt.Errorf("Logic error: inode.dirty should have been true")
   973  					evtlog.Record(evtlog.FormatFlushInodesErrorOnInode, vS.volumeName, uint64(inode.InodeNumber), err.Error())
   974  					logger.ErrorWithError(err)
   975  					err = blunder.AddError(err, blunder.InodeFlushError)
   976  					return
   977  				}
   978  				// REVIEW: What if cache pressure flushed before we got here?
   979  				//         Is it possible that Number doesn't get updated?
   980  
   981  				if inode.PayloadObjectNumber != 0 {
   982  					logger.Tracef("flushInodes(): volume '%s' %v inode %d: updating Payload"+
   983  						" from Object %016X to %016X bytes %d to %d",
   984  						vS.volumeName, inode.InodeType, inode.InodeNumber,
   985  						inode.PayloadObjectNumber, payloadObjectNumber,
   986  						inode.PayloadObjectLength, payloadObjectLength)
   987  				}
   988  				inode.PayloadObjectNumber = payloadObjectNumber
   989  				inode.PayloadObjectLength = payloadObjectLength
   990  
   991  				evtlog.Record(evtlog.FormatFlushInodesDirOrFilePayloadObjectNumberUpdated, vS.volumeName, uint64(inode.InodeNumber), payloadObjectNumber)
   992  			}
   993  		}
   994  		if inode.dirty {
   995  			onDiskInodeV1, err = inode.convertToOnDiskInodeV1()
   996  			if nil != err {
   997  				evtlog.Record(evtlog.FormatFlushInodesErrorOnInode, vS.volumeName, uint64(inode.InodeNumber), err.Error())
   998  				logger.ErrorWithError(err)
   999  				err = blunder.AddError(err, blunder.InodeFlushError)
  1000  				return
  1001  			}
  1002  			onDiskInodeV1Buf, err = json.Marshal(onDiskInodeV1)
  1003  			if nil != err {
  1004  				evtlog.Record(evtlog.FormatFlushInodesErrorOnInode, vS.volumeName, uint64(inode.InodeNumber), err.Error())
  1005  				logger.ErrorWithError(err)
  1006  				err = blunder.AddError(err, blunder.InodeFlushError)
  1007  				return
  1008  			}
  1009  			dirtyInodeRecBytes = make([]byte, 0, len(globals.inodeRecDefaultPreambleBuf)+len(onDiskInodeV1Buf))
  1010  			dirtyInodeRecBytes = append(dirtyInodeRecBytes, globals.inodeRecDefaultPreambleBuf...)
  1011  			dirtyInodeRecBytes = append(dirtyInodeRecBytes, onDiskInodeV1Buf...)
  1012  			dirtyInodeNumbers = append(dirtyInodeNumbers, uint64(inode.InodeNumber))
  1013  			dirtyInodeRecs = append(dirtyInodeRecs, dirtyInodeRecBytes)
  1014  		}
  1015  	}
  1016  
  1017  	// Go update HeadHunter (if necessary)
  1018  	if 0 < len(dirtyInodeNumbers) {
  1019  		err = vS.headhunterVolumeHandle.PutInodeRecs(dirtyInodeNumbers, dirtyInodeRecs)
  1020  		if nil != err {
  1021  			evtlog.Record(evtlog.FormatFlushInodesErrorOnHeadhunterPut, vS.volumeName, err.Error())
  1022  			logger.ErrorWithError(err)
  1023  			err = blunder.AddError(err, blunder.InodeFlushError)
  1024  			return
  1025  		}
  1026  		for _, inode = range inodes {
  1027  			inode.dirty = false
  1028  		}
  1029  	}
  1030  
  1031  	// Now do phase one of garbage collection
  1032  	if 0 < len(emptyLogSegments) {
  1033  		for _, logSegmentNumber = range emptyLogSegments {
  1034  			err = vS.headhunterVolumeHandle.DeleteLogSegmentRec(logSegmentNumber)
  1035  			if nil != err {
  1036  				logger.WarnfWithError(err, "couldn't delete garbage log segment")
  1037  			}
  1038  		}
  1039  	}
  1040  
  1041  	evtlog.Record(evtlog.FormatFlushInodesExit, vS.volumeName, toFlushInodeNumbers)
  1042  
  1043  	err = nil
  1044  	return
  1045  }
  1046  
  1047  func (vS *volumeStruct) flushInodeNumbers(inodeNumbers []InodeNumber) (err error) {
  1048  	var (
  1049  		inode       *inMemoryInodeStruct
  1050  		inodes      []*inMemoryInodeStruct
  1051  		inodeNumber InodeNumber
  1052  		ok          bool
  1053  	)
  1054  
  1055  	// Fetch referenced inodes
  1056  	inodes = make([]*inMemoryInodeStruct, 0, len(inodeNumbers))
  1057  	for _, inodeNumber = range inodeNumbers {
  1058  		inode, ok, err = vS.fetchInode(inodeNumber)
  1059  		if nil != err {
  1060  			// the inode is locked so this should never happen (unless the inode
  1061  			// was evicted from the cache and it was corrupt when read from disk)
  1062  			// (err includes volume name and inode number)
  1063  			logger.ErrorfWithError(err, "%s: fetch of inode to flush failed", utils.GetFnName())
  1064  			err = blunder.AddError(err, blunder.InodeFlushError)
  1065  			return
  1066  		}
  1067  		if !ok {
  1068  			// this should never happen (see above)
  1069  			err = fmt.Errorf("%s: fetch of inode %d volume '%s' failed because it is unallocated",
  1070  				utils.GetFnName(), inodeNumber, vS.volumeName)
  1071  			logger.ErrorWithError(err)
  1072  			err = blunder.AddError(err, blunder.NotFoundError)
  1073  			return
  1074  		}
  1075  
  1076  		inodes = append(inodes, inode)
  1077  	}
  1078  
  1079  	err = vS.flushInodes(inodes)
  1080  
  1081  	return
  1082  }
  1083  
  1084  func accountNameToVolumeName(accountName string) (volumeName string, ok bool) {
  1085  	var (
  1086  		volume *volumeStruct
  1087  	)
  1088  
  1089  	globals.Lock()
  1090  
  1091  	volume, ok = globals.accountMap[accountName]
  1092  	if ok {
  1093  		volumeName = volume.volumeName
  1094  	}
  1095  
  1096  	globals.Unlock()
  1097  
  1098  	return
  1099  }
  1100  
  1101  func volumeNameToAccountName(volumeName string) (accountName string, ok bool) {
  1102  	var (
  1103  		volume *volumeStruct
  1104  	)
  1105  
  1106  	globals.Lock()
  1107  
  1108  	volume, ok = globals.volumeMap[volumeName]
  1109  	if ok {
  1110  		accountName = volume.accountName
  1111  	}
  1112  
  1113  	globals.Unlock()
  1114  
  1115  	return
  1116  }
  1117  
  1118  func volumeNameToActivePeerPrivateIPAddr(volumeName string) (activePeerPrivateIPAddr string, ok bool) {
  1119  	var (
  1120  		volume *volumeStruct
  1121  	)
  1122  
  1123  	globals.Lock()
  1124  
  1125  	volume, ok = globals.volumeMap[volumeName]
  1126  
  1127  	if ok {
  1128  		activePeerPrivateIPAddr = volume.volumeGroup.activePeerPrivateIPAddr
  1129  	}
  1130  
  1131  	globals.Unlock()
  1132  
  1133  	return
  1134  }
  1135  
  1136  func fetchVolumeHandle(volumeName string) (volumeHandle VolumeHandle, err error) {
  1137  	globals.Lock()
  1138  	volume, ok := globals.volumeMap[volumeName]
  1139  	globals.Unlock()
  1140  
  1141  	if !ok {
  1142  		err = fmt.Errorf("%s: volumeName \"%v\" not found", utils.GetFnName(), volumeName)
  1143  		err = blunder.AddError(err, blunder.NotFoundError)
  1144  		return
  1145  	}
  1146  
  1147  	volumeHandle = volume
  1148  
  1149  	volume.Lock()         // REVIEW: Once Tracker https://www.pivotaltracker.com/story/show/133377567
  1150  	defer volume.Unlock() //         is resolved, these two lines should be removed
  1151  
  1152  	if !volume.served {
  1153  		err = fmt.Errorf("%s: volumeName \"%v\" not served", utils.GetFnName(), volumeName)
  1154  		err = blunder.AddError(err, blunder.NotActiveError)
  1155  		return
  1156  	}
  1157  
  1158  	_, ok, err = volume.headhunterVolumeHandle.GetInodeRec(uint64(RootDirInodeNumber))
  1159  	if nil != err {
  1160  		// disk corruption of the inode btree (or software error)
  1161  		err = fmt.Errorf("%s: unable to lookup root inode for volume '%s': %v",
  1162  			utils.GetFnName(), volume.volumeName, err)
  1163  		err = blunder.AddError(err, blunder.NotFoundError)
  1164  	}
  1165  	if !ok {
  1166  		// First access didn't find root dir... so create it
  1167  		_, err = volume.createRootOrSubDir(PosixModePerm, 0, 0, true)
  1168  		if nil != err {
  1169  			err = fmt.Errorf("%s: unable to create root inode for volume '%s': %v",
  1170  				utils.GetFnName(), volume.volumeName, err)
  1171  			err = blunder.AddError(err, blunder.NotFoundError)
  1172  		}
  1173  	}
  1174  
  1175  	// If we get this far, return values are already set as desired
  1176  
  1177  	err = nil
  1178  
  1179  	return
  1180  }
  1181  
  1182  func (vS *volumeStruct) provisionPhysicalContainer(physicalContainerLayout *physicalContainerLayoutStruct) (err error) {
  1183  	if 0 == (physicalContainerLayout.containerNameSliceLoopCount % physicalContainerLayout.maxObjectsPerContainer) {
  1184  		// We need to provision a new PhysicalContainer in this PhysicalContainerLayout
  1185  
  1186  		physicalContainerNameSuffix := vS.headhunterVolumeHandle.FetchNonce()
  1187  
  1188  		newContainerName := fmt.Sprintf("%s%s", physicalContainerLayout.containerNamePrefix, utils.Uint64ToHexStr(physicalContainerNameSuffix))
  1189  
  1190  		storagePolicyHeaderValues := []string{vS.defaultPhysicalContainerLayout.containerStoragePolicy}
  1191  		newContainerHeaders := make(map[string][]string)
  1192  		newContainerHeaders["X-Storage-Policy"] = storagePolicyHeaderValues
  1193  
  1194  		err = swiftclient.ContainerPut(vS.accountName, newContainerName, newContainerHeaders)
  1195  		if nil != err {
  1196  			return
  1197  		}
  1198  
  1199  		physicalContainerLayout.containerNameSlice[physicalContainerLayout.containerNameSliceNextIndex] = newContainerName
  1200  	}
  1201  
  1202  	err = nil
  1203  	return
  1204  }
  1205  
  1206  func (vS *volumeStruct) provisionObject() (containerName string, objectNumber uint64, err error) {
  1207  	objectNumber = vS.headhunterVolumeHandle.FetchNonce()
  1208  
  1209  	vS.Lock()
  1210  
  1211  	err = vS.provisionPhysicalContainer(vS.defaultPhysicalContainerLayout)
  1212  	if nil != err {
  1213  		vS.Unlock()
  1214  		return
  1215  	}
  1216  
  1217  	containerName = vS.defaultPhysicalContainerLayout.containerNameSlice[vS.defaultPhysicalContainerLayout.containerNameSliceNextIndex]
  1218  
  1219  	vS.defaultPhysicalContainerLayout.containerNameSliceNextIndex++
  1220  
  1221  	if vS.defaultPhysicalContainerLayout.containerNameSliceNextIndex == vS.defaultPhysicalContainerLayout.containersPerPeer {
  1222  		vS.defaultPhysicalContainerLayout.containerNameSliceNextIndex = 0
  1223  		vS.defaultPhysicalContainerLayout.containerNameSliceLoopCount++
  1224  	}
  1225  
  1226  	vS.Unlock()
  1227  
  1228  	err = nil
  1229  	return
  1230  }
  1231  
  1232  func (vS *volumeStruct) Access(inodeNumber InodeNumber, userID InodeUserID, groupID InodeGroupID, otherGroupIDs []InodeGroupID, accessMode InodeMode, override AccessOverride) (accessReturn bool) {
  1233  	var (
  1234  		adjustedInodeNumber InodeNumber
  1235  		err                 error
  1236  		groupIDCheck        bool
  1237  		ok                  bool
  1238  		otherGroupID        InodeGroupID
  1239  		ourInode            *inMemoryInodeStruct
  1240  		ourInodeGroupID     InodeGroupID
  1241  		ourInodeMode        InodeMode
  1242  		ourInodeUserID      InodeUserID
  1243  		snapShotIDType      headhunter.SnapShotIDType
  1244  	)
  1245  
  1246  	snapShotIDType, _, _ = vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber))
  1247  
  1248  	switch snapShotIDType {
  1249  	case headhunter.SnapShotIDTypeLive:
  1250  		adjustedInodeNumber = inodeNumber
  1251  	case headhunter.SnapShotIDTypeSnapShot:
  1252  		adjustedInodeNumber = inodeNumber
  1253  	case headhunter.SnapShotIDTypeDotSnapShot:
  1254  		adjustedInodeNumber = RootDirInodeNumber
  1255  	default:
  1256  		logger.Fatalf("headhunter.SnapShotU64Decode(inodeNumber == 0x%016X) returned unknown snapShotIDType: %v", inodeNumber, snapShotIDType)
  1257  	}
  1258  	if (headhunter.SnapShotIDTypeLive != snapShotIDType) && (0 != (W_OK & accessMode)) {
  1259  		err = blunder.NewError(blunder.InvalidArgError, "Access() where accessMode includes W_OK of non-LiveView inodeNumber not allowed")
  1260  		return
  1261  	}
  1262  
  1263  	ourInode, ok, err = vS.fetchInode(adjustedInodeNumber)
  1264  	if nil != err {
  1265  		// this indicates disk corruption or software bug
  1266  		// (err includes volume name and inode number)
  1267  		logger.ErrorfWithError(err, "%s: fetch of inode failed", utils.GetFnName())
  1268  
  1269  		// if we can't fetch the inode we can't access it
  1270  		accessReturn = false
  1271  		return
  1272  	}
  1273  	if !ok {
  1274  		// disk corruption or client requested a free inode
  1275  		logger.Infof("%s: fetch of inode %d volume '%s' failed because it is unallocated",
  1276  			utils.GetFnName(), inodeNumber, vS.volumeName)
  1277  
  1278  		// if the inode is free then we can't access it
  1279  		accessReturn = false
  1280  		return
  1281  	}
  1282  
  1283  	ourInodeUserID = ourInode.UserID
  1284  	ourInodeGroupID = ourInode.GroupID
  1285  
  1286  	if headhunter.SnapShotIDTypeLive == snapShotIDType {
  1287  		ourInodeMode = ourInode.Mode
  1288  	} else {
  1289  		ourInodeMode = ourInode.Mode // TODO: Make it read-only...
  1290  	}
  1291  
  1292  	if F_OK == accessMode {
  1293  		// the inode exists so its F_OK
  1294  		accessReturn = true
  1295  		return
  1296  	}
  1297  
  1298  	if P_OK == accessMode {
  1299  		accessReturn = (InodeRootUserID == userID) || (userID == ourInodeUserID)
  1300  		return
  1301  	}
  1302  
  1303  	if accessMode != (accessMode & (R_OK | W_OK | X_OK)) {
  1304  		// Default to false if P_OK bit set along with any others)
  1305  		accessReturn = false
  1306  		return
  1307  	}
  1308  
  1309  	// Only the LiveView is ever writeable... even by the root user
  1310  	if (accessMode&W_OK != 0) && (headhunter.SnapShotIDTypeLive != snapShotIDType) {
  1311  		accessReturn = false
  1312  		return
  1313  	}
  1314  
  1315  	// The root user (if not squashed) can do anything except exec files
  1316  	// that are not executable by any user
  1317  	if userID == InodeRootUserID {
  1318  		if (accessMode&X_OK != 0) && (ourInodeMode&(X_OK<<6|X_OK<<3|X_OK) == 0) {
  1319  			accessReturn = false
  1320  		} else {
  1321  			accessReturn = true
  1322  		}
  1323  		return
  1324  	}
  1325  
  1326  	// We check against permissions for the user, group, and other.  The
  1327  	// first match wins (not the first permission granted).  If the user is
  1328  	// the owner of the file then those permission bits determine what
  1329  	// happens.  In other words, if the permission bits deny read permission
  1330  	// to the owner of a file but allow read permission for group and other,
  1331  	// then everyone except the owner of the file can read it.
  1332  	//
  1333  	// On a local file system, the owner of a file is *not* allowed to write
  1334  	// to the file unless it was opened for writing and the permission bits
  1335  	// allowed it *or* the process created the file and opened it for
  1336  	// writing at the same time.  However, NFS does not have an open state
  1337  	// (there's no file descriptor that tracks permissions when the the file
  1338  	// was opened) so we check for write permission on every write.  This
  1339  	// breaks things like tar when it tries to unpack a file which has
  1340  	// permission 0444 (read only).  On a local file system that works, but
  1341  	// it doesn't work for NFS unless we bend the rules a bit for the owner
  1342  	// of the file and allow the owner to write to the file even if
  1343  	// appropriate permissions are lacking.  (This is only done for the user
  1344  	// that owns the file, not the group that owns the file. Note that the
  1345  	// owner can always change the permissions to allow writing so its not a
  1346  	// security risk, but the owning group cannot).
  1347  	//
  1348  	// Note that the NFS client will typically call Access() when an app
  1349  	// wants to open the file and fail an open request for writing that if
  1350  	// the permission bits do not allow it.
  1351  	//
  1352  	// Similar rules apply to Read() and Truncate() (for ftruncate(2)), but
  1353  	// not for execute permission.  Also, this only applies to regular files
  1354  	// but we'll rely on the caller for that.
  1355  	if userID == ourInodeUserID {
  1356  		if override == OwnerOverride && (accessMode&X_OK == 0) {
  1357  			accessReturn = true
  1358  		} else {
  1359  			accessReturn = (((ourInodeMode >> 6) & accessMode) == accessMode)
  1360  		}
  1361  		return
  1362  	}
  1363  
  1364  	groupIDCheck = (groupID == ourInodeGroupID)
  1365  	if !groupIDCheck {
  1366  		for _, otherGroupID = range otherGroupIDs {
  1367  			if otherGroupID == ourInodeGroupID {
  1368  				groupIDCheck = true
  1369  				break
  1370  			}
  1371  		}
  1372  	}
  1373  	if groupIDCheck {
  1374  		accessReturn = ((((ourInodeMode >> 3) & 07) & accessMode) == accessMode)
  1375  		return
  1376  	}
  1377  
  1378  	accessReturn = ((((ourInodeMode >> 0) & 07) & accessMode) == accessMode)
  1379  	return
  1380  }
  1381  
  1382  func (vS *volumeStruct) ProvisionObject() (objectPath string, err error) {
  1383  	err = enforceRWMode(true)
  1384  	if nil != err {
  1385  		return
  1386  	}
  1387  
  1388  	containerName, objectNumber, err := vS.provisionObject()
  1389  	if nil != err {
  1390  		return
  1391  	}
  1392  
  1393  	objectPath = fmt.Sprintf("/v1/%s/%s/%016X", vS.accountName, containerName, objectNumber)
  1394  
  1395  	err = nil
  1396  	return
  1397  }
  1398  
  1399  func (vS *volumeStruct) Purge(inodeNumber InodeNumber) (err error) {
  1400  	var (
  1401  		inode *inMemoryInodeStruct
  1402  		ok    bool
  1403  	)
  1404  
  1405  	err = enforceRWMode(false)
  1406  	if nil != err {
  1407  		return
  1408  	}
  1409  
  1410  	inode, ok, err = vS.inodeCacheFetch(inodeNumber)
  1411  	if (nil != err) || !ok {
  1412  		return
  1413  	}
  1414  
  1415  	if inode.dirty {
  1416  		err = fmt.Errorf("Inode dirty... cannot be purged")
  1417  		return
  1418  	}
  1419  
  1420  	ok, err = vS.inodeCacheDrop(inode)
  1421  	if nil != err {
  1422  		return
  1423  	}
  1424  	if !ok {
  1425  		err = fmt.Errorf("inodeCacheDrop(inode) failed")
  1426  	}
  1427  
  1428  	return
  1429  }
  1430  
  1431  func (vS *volumeStruct) Destroy(inodeNumber InodeNumber) (err error) {
  1432  	logger.Tracef("inode.Destroy(): volume '%s' inode %d", vS.volumeName, inodeNumber)
  1433  
  1434  	err = enforceRWMode(false)
  1435  	if nil != err {
  1436  		return
  1437  	}
  1438  
  1439  	snapShotIDType, _, _ := vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber))
  1440  	if headhunter.SnapShotIDTypeLive != snapShotIDType {
  1441  		err = fmt.Errorf("Destroy() on non-LiveView inodeNumber not allowed")
  1442  		return
  1443  	}
  1444  
  1445  	ourInode, ok, err := vS.fetchInode(inodeNumber)
  1446  	if nil != err {
  1447  		// the inode is locked so this should never happen (unless the inode
  1448  		// was evicted from the cache and it was corrupt when read from disk)
  1449  		// (err includes volume name and inode number)
  1450  		logger.ErrorfWithError(err, "%s: fetch of inode failed", utils.GetFnName())
  1451  		return
  1452  	}
  1453  	if !ok {
  1454  		// this should never happen (see above)
  1455  		err = fmt.Errorf("%s: cannot destroy inode %d volume '%s' because it is unallocated",
  1456  			utils.GetFnName(), inodeNumber, vS.volumeName)
  1457  		err = blunder.AddError(err, blunder.NotFoundError)
  1458  		logger.ErrorWithError(err)
  1459  		return
  1460  	}
  1461  
  1462  	ok, err = vS.inodeCacheDrop(ourInode)
  1463  	if nil != err {
  1464  		logger.ErrorfWithError(err, "%s: inodeCacheDrop() of inode failed: %v", utils.GetFnName(), err)
  1465  		return
  1466  	}
  1467  	if !ok {
  1468  		logger.ErrorfWithError(err, "%s: inodeCacheDrop() of inode returned !ok", utils.GetFnName())
  1469  		return
  1470  	}
  1471  
  1472  	if ourInode.InodeType == FileType {
  1473  		_ = vS.doFileInodeDataFlush(ourInode)
  1474  	}
  1475  
  1476  	err = vS.headhunterVolumeHandle.DeleteInodeRec(uint64(inodeNumber))
  1477  	if nil != err {
  1478  		logger.ErrorWithError(err)
  1479  		return
  1480  	}
  1481  
  1482  	if DirType == ourInode.InodeType {
  1483  		logger.Tracef("inode.Destroy(): volume '%s' inode %d: discarding dirmap payload Object %016X  len %d",
  1484  			vS.volumeName, inodeNumber, ourInode.PayloadObjectNumber, ourInode.PayloadObjectLength)
  1485  
  1486  		dirMapping := ourInode.payload.(sortedmap.BPlusTree)
  1487  
  1488  		err = dirMapping.Discard()
  1489  		if nil != err {
  1490  			logger.ErrorWithError(err)
  1491  			return
  1492  		}
  1493  
  1494  		stats.IncrementOperations(&stats.DirDestroyOps)
  1495  
  1496  	} else if FileType == ourInode.InodeType {
  1497  		logger.Tracef("inode.Destroy(): volume '%s' inode %d: discarding extmap payload Object %016X  len %d",
  1498  			vS.volumeName, inodeNumber, ourInode.PayloadObjectNumber, ourInode.PayloadObjectLength)
  1499  
  1500  		extents := ourInode.payload.(sortedmap.BPlusTree)
  1501  
  1502  		err = extents.Discard()
  1503  		if nil != err {
  1504  			logger.ErrorWithError(err)
  1505  			return
  1506  		}
  1507  
  1508  		for logSegmentNumber := range ourInode.LogSegmentMap {
  1509  			deleteSegmentErr := vS.headhunterVolumeHandle.DeleteLogSegmentRec(logSegmentNumber)
  1510  			if nil != deleteSegmentErr {
  1511  				logger.WarnfWithError(deleteSegmentErr, "couldn't delete destroy'd log segment")
  1512  				return
  1513  			}
  1514  			stats.IncrementOperations(&stats.GcLogSegDeleteOps)
  1515  		}
  1516  		stats.IncrementOperations(&stats.GcLogSegOps)
  1517  
  1518  		stats.IncrementOperations(&stats.FileDestroyOps)
  1519  	} else { // SymlinkType == ourInode.InodeType
  1520  		stats.IncrementOperations(&stats.SymlinkDestroyOps)
  1521  	}
  1522  
  1523  	return
  1524  }
  1525  
  1526  func (vS *volumeStruct) GetMetadata(inodeNumber InodeNumber) (metadata *MetadataStruct, err error) {
  1527  	var (
  1528  		inode          *inMemoryInodeStruct
  1529  		ok             bool
  1530  		pos            int
  1531  		snapShotIDType headhunter.SnapShotIDType
  1532  	)
  1533  
  1534  	snapShotIDType, _, _ = vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber))
  1535  	if headhunter.SnapShotIDTypeDotSnapShot == snapShotIDType {
  1536  		// For /<SnapShotDirName>, start with metadata from /
  1537  		inode, ok, err = vS.fetchInode(RootDirInodeNumber)
  1538  	} else {
  1539  		inode, ok, err = vS.fetchInode(inodeNumber)
  1540  	}
  1541  
  1542  	if nil != err {
  1543  		// this indicates disk corruption or software error
  1544  		// (err includes volume name and inode number)
  1545  		logger.ErrorfWithError(err, "%s: fetch of inode failed", utils.GetFnName())
  1546  		return
  1547  	}
  1548  	if !ok {
  1549  		// disk corruption or client request for unallocated inode
  1550  		err = fmt.Errorf("%s: failing request for inode %d volume '%s' because it is unallocated",
  1551  			utils.GetFnName(), inodeNumber, vS.volumeName)
  1552  		err = blunder.AddError(err, blunder.NotFoundError)
  1553  		logger.InfoWithError(err)
  1554  		return
  1555  	}
  1556  
  1557  	metadata = &MetadataStruct{
  1558  		InodeType:            inode.InodeType,
  1559  		LinkCount:            inode.LinkCount,
  1560  		Size:                 inode.Size,
  1561  		CreationTime:         inode.CreationTime,
  1562  		ModificationTime:     inode.ModificationTime,
  1563  		AccessTime:           inode.AccessTime,
  1564  		AttrChangeTime:       inode.AttrChangeTime,
  1565  		NumWrites:            inode.NumWrites,
  1566  		InodeStreamNameSlice: make([]string, len(inode.StreamMap)),
  1567  		Mode:                 inode.Mode,
  1568  		UserID:               inode.UserID,
  1569  		GroupID:              inode.GroupID,
  1570  	}
  1571  
  1572  	if headhunter.SnapShotIDTypeDotSnapShot == snapShotIDType {
  1573  		// For /<SnapShotDirName>, simply remove Write Access... and skip InodeStreamNameSlice
  1574  		metadata.Mode &= metadata.Mode & ^(W_OK<<6 | W_OK<<3 | W_OK<<0)
  1575  	} else {
  1576  		if headhunter.SnapShotIDTypeSnapShot == snapShotIDType {
  1577  			// For inodes in a SnapShot, simply remove Write Access
  1578  			metadata.Mode &= metadata.Mode & ^(W_OK<<6 | W_OK<<3 | W_OK<<0)
  1579  		}
  1580  		pos = 0
  1581  		for inodeStreamName := range inode.StreamMap {
  1582  			metadata.InodeStreamNameSlice[pos] = inodeStreamName
  1583  			pos++
  1584  		}
  1585  	}
  1586  
  1587  	stats.IncrementOperations(&stats.InodeGetMetadataOps)
  1588  	return
  1589  }
  1590  
  1591  func (vS *volumeStruct) GetType(inodeNumber InodeNumber) (inodeType InodeType, err error) {
  1592  	snapShotIDType, _, _ := vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber))
  1593  	if headhunter.SnapShotIDTypeDotSnapShot == snapShotIDType {
  1594  		inodeType = DirType
  1595  		err = nil
  1596  		return
  1597  	}
  1598  
  1599  	inode, ok, err := vS.fetchInode(inodeNumber)
  1600  	if nil != err {
  1601  		// this indicates disk corruption or software error
  1602  		// (err includes volume name and inode number)
  1603  		logger.ErrorfWithError(err, "%s: fetch of inode failed", utils.GetFnName())
  1604  		return
  1605  	}
  1606  	if !ok {
  1607  		// disk corruption or client request for unallocated inode
  1608  		err = fmt.Errorf("%s: failing request for inode %d volume '%s' because it is unallocated",
  1609  			utils.GetFnName(), inodeNumber, vS.volumeName)
  1610  		logger.InfoWithError(err)
  1611  		err = blunder.AddError(err, blunder.NotFoundError)
  1612  		return
  1613  	}
  1614  
  1615  	inodeType = inode.InodeType
  1616  
  1617  	stats.IncrementOperations(&stats.InodeGetTypeOps)
  1618  	return
  1619  }
  1620  
  1621  func (vS *volumeStruct) GetLinkCount(inodeNumber InodeNumber) (linkCount uint64, err error) {
  1622  	var (
  1623  		adjustLinkCountForSnapShotSubDirInRootDirInode bool
  1624  		inode                                          *inMemoryInodeStruct
  1625  		ok                                             bool
  1626  		snapShotCount                                  uint64
  1627  		snapShotIDType                                 headhunter.SnapShotIDType
  1628  	)
  1629  
  1630  	if RootDirInodeNumber == inodeNumber {
  1631  		// Account for .. in /<SnapShotDirName> if any SnapShot's exist
  1632  		snapShotCount = vS.headhunterVolumeHandle.SnapShotCount()
  1633  		adjustLinkCountForSnapShotSubDirInRootDirInode = (0 != snapShotCount)
  1634  	} else {
  1635  		snapShotIDType, _, _ = vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber))
  1636  		if headhunter.SnapShotIDTypeDotSnapShot == snapShotIDType {
  1637  			// linkCount == 1 (/<SnapShotDirName>'s '.') + 1 (/'s reference to <SnapShotDirName>) + # SnapShot's (/..' in each SnapShot's /)
  1638  			snapShotCount = vS.headhunterVolumeHandle.SnapShotCount()
  1639  			linkCount = 1 + 1 + snapShotCount
  1640  			err = nil
  1641  			return
  1642  		}
  1643  		adjustLinkCountForSnapShotSubDirInRootDirInode = false
  1644  	}
  1645  
  1646  	inode, ok, err = vS.fetchInode(inodeNumber)
  1647  	if nil != err {
  1648  		// this indicates disk corruption or software error
  1649  		// (err includes volume name and inode number)
  1650  		logger.ErrorfWithError(err, "%s: fetch of inode failed", utils.GetFnName())
  1651  		return
  1652  	}
  1653  	if !ok {
  1654  		// disk corruption or client request for unallocated inode
  1655  		err = fmt.Errorf("%s: failing request for inode %d volume '%s' because it is unallocated",
  1656  			utils.GetFnName(), inodeNumber, vS.volumeName)
  1657  		logger.InfoWithError(err)
  1658  		err = blunder.AddError(err, blunder.NotFoundError)
  1659  		return
  1660  	}
  1661  
  1662  	if adjustLinkCountForSnapShotSubDirInRootDirInode {
  1663  		linkCount = inode.LinkCount + 1
  1664  	} else {
  1665  		linkCount = inode.LinkCount
  1666  	}
  1667  
  1668  	return
  1669  }
  1670  
  1671  // SetLinkCount is used to adjust the LinkCount property to match current reference count during FSCK TreeWalk.
  1672  func (vS *volumeStruct) SetLinkCount(inodeNumber InodeNumber, linkCount uint64) (err error) {
  1673  	err = enforceRWMode(false)
  1674  	if nil != err {
  1675  		return
  1676  	}
  1677  
  1678  	snapShotIDType, _, _ := vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber))
  1679  	if headhunter.SnapShotIDTypeLive != snapShotIDType {
  1680  		err = fmt.Errorf("SetLinkCount() on non-LiveView inodeNumber not allowed")
  1681  		return
  1682  	}
  1683  
  1684  	inode, ok, err := vS.fetchInode(inodeNumber)
  1685  	if err != nil {
  1686  		// this indicates disk corruption or software error
  1687  		// (err includes volume name and inode number)
  1688  		logger.ErrorfWithError(err, "%s: fetch of inode failed", utils.GetFnName())
  1689  		return
  1690  	}
  1691  	if !ok {
  1692  		// disk corruption or client request for unallocated inode
  1693  		err = fmt.Errorf("%s: failing request for inode %d volume '%s' because it is unallocated",
  1694  			utils.GetFnName(), inodeNumber, vS.volumeName)
  1695  		logger.InfoWithError(err)
  1696  		err = blunder.AddError(err, blunder.NotFoundError)
  1697  		return
  1698  	}
  1699  
  1700  	inode.dirty = true
  1701  	inode.LinkCount = linkCount
  1702  
  1703  	err = vS.flushInode(inode)
  1704  	if err != nil {
  1705  		logger.ErrorWithError(err)
  1706  		return err
  1707  	}
  1708  
  1709  	return
  1710  }
  1711  
  1712  func (vS *volumeStruct) SetCreationTime(inodeNumber InodeNumber, CreationTime time.Time) (err error) {
  1713  	err = enforceRWMode(false)
  1714  	if nil != err {
  1715  		return
  1716  	}
  1717  
  1718  	snapShotIDType, _, _ := vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber))
  1719  	if headhunter.SnapShotIDTypeLive != snapShotIDType {
  1720  		err = fmt.Errorf("SetCreationTime() on non-LiveView inodeNumber not allowed")
  1721  		return
  1722  	}
  1723  
  1724  	inode, ok, err := vS.fetchInode(inodeNumber)
  1725  	if err != nil {
  1726  		// the inode is locked so this should never happen (unless the inode
  1727  		// was evicted from the cache and it was corrupt when read from disk)
  1728  		logger.ErrorfWithError(err, "%s: fetch of target inode failed", utils.GetFnName())
  1729  		return err
  1730  	}
  1731  	if !ok {
  1732  		// this should never happen (see above)
  1733  		err = fmt.Errorf("%s: failing request for inode %d volume '%s' because it is unallocated",
  1734  			utils.GetFnName(), inodeNumber, vS.volumeName)
  1735  		logger.ErrorWithError(err)
  1736  		err = blunder.AddError(err, blunder.NotFoundError)
  1737  		return err
  1738  	}
  1739  
  1740  	inode.dirty = true
  1741  	inode.AttrChangeTime = time.Now()
  1742  	inode.CreationTime = CreationTime
  1743  
  1744  	err = vS.flushInode(inode)
  1745  	if err != nil {
  1746  		logger.ErrorWithError(err)
  1747  		return err
  1748  	}
  1749  	return
  1750  }
  1751  
  1752  func (vS *volumeStruct) SetModificationTime(inodeNumber InodeNumber, ModificationTime time.Time) (err error) {
  1753  	err = enforceRWMode(false)
  1754  	if nil != err {
  1755  		return
  1756  	}
  1757  
  1758  	snapShotIDType, _, _ := vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber))
  1759  	if headhunter.SnapShotIDTypeLive != snapShotIDType {
  1760  		err = fmt.Errorf("SetModificationTime() on non-LiveView inodeNumber not allowed")
  1761  		return
  1762  	}
  1763  
  1764  	inode, ok, err := vS.fetchInode(inodeNumber)
  1765  	if err != nil {
  1766  		// the inode is locked so this should never happen (unless the inode
  1767  		// was evicted from the cache and it was corrupt when read from disk)
  1768  		logger.ErrorfWithError(err, "%s: fetch of target inode failed", utils.GetFnName())
  1769  		return err
  1770  	}
  1771  	if !ok {
  1772  		// this should never happen (see above)
  1773  		err = fmt.Errorf("%s: failing request for inode %d volume '%s' because it is unallocated",
  1774  			utils.GetFnName(), inodeNumber, vS.volumeName)
  1775  		logger.ErrorWithError(err)
  1776  		err = blunder.AddError(err, blunder.NotFoundError)
  1777  		return err
  1778  	}
  1779  
  1780  	inode.dirty = true
  1781  	inode.AttrChangeTime = time.Now()
  1782  	inode.ModificationTime = ModificationTime
  1783  
  1784  	err = vS.flushInode(inode)
  1785  	if err != nil {
  1786  		logger.ErrorWithError(err)
  1787  		return err
  1788  	}
  1789  
  1790  	return
  1791  }
  1792  
  1793  func (vS *volumeStruct) SetAccessTime(inodeNumber InodeNumber, accessTime time.Time) (err error) {
  1794  	err = enforceRWMode(false)
  1795  	if nil != err {
  1796  		return
  1797  	}
  1798  
  1799  	snapShotIDType, _, _ := vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber))
  1800  	if headhunter.SnapShotIDTypeLive != snapShotIDType {
  1801  		err = fmt.Errorf("SetAccessTime() on non-LiveView inodeNumber not allowed")
  1802  		return
  1803  	}
  1804  
  1805  	inode, ok, err := vS.fetchInode(inodeNumber)
  1806  	if err != nil {
  1807  		// the inode is locked so this should never happen (unless the inode
  1808  		// was evicted from the cache and it was corrupt when read from disk)
  1809  		logger.ErrorfWithError(err, "%s: fetch of target inode failed", utils.GetFnName())
  1810  		return err
  1811  	}
  1812  	if !ok {
  1813  		// this should never happen (see above)
  1814  		err = fmt.Errorf("%s: failing request for inode %d volume '%s' because it is unallocated",
  1815  			utils.GetFnName(), inodeNumber, vS.volumeName)
  1816  		logger.ErrorWithError(err)
  1817  		err = blunder.AddError(err, blunder.NotFoundError)
  1818  		return err
  1819  	}
  1820  
  1821  	inode.dirty = true
  1822  	inode.AttrChangeTime = time.Now()
  1823  	inode.AccessTime = accessTime
  1824  
  1825  	err = vS.flushInode(inode)
  1826  	if err != nil {
  1827  		logger.ErrorWithError(err)
  1828  		return err
  1829  	}
  1830  
  1831  	return
  1832  }
  1833  
  1834  func determineMode(filePerm InodeMode, inodeType InodeType) (fileMode InodeMode, err error) {
  1835  	// Caller should only be setting the file perm bits, but samba seems to send file type
  1836  	// bits as well. Since we need to work with whatever samba does, let's just silently
  1837  	// mask off the other bits.
  1838  	if filePerm&^PosixModePerm != 0 {
  1839  		logger.Tracef("inode.determineMode(): invalid file mode 0x%x (max 0x%x); removing file type bits.", uint32(filePerm), uint32(PosixModePerm))
  1840  	}
  1841  
  1842  	// Build fileMode starting with the file permission bits
  1843  	fileMode = filePerm & PosixModePerm
  1844  
  1845  	// Add the file type to the mode.
  1846  	switch inodeType {
  1847  	case DirType:
  1848  		fileMode |= PosixModeDir
  1849  	case FileType:
  1850  		fileMode |= PosixModeFile
  1851  	case SymlinkType:
  1852  		fileMode |= PosixModeSymlink
  1853  	default:
  1854  		err = fmt.Errorf("%s: unrecognized inode type %v", utils.GetFnName(), inodeType)
  1855  		err = blunder.AddError(err, blunder.InvalidInodeTypeError)
  1856  		return
  1857  	}
  1858  
  1859  	err = nil
  1860  	return
  1861  }
  1862  
  1863  func (vS *volumeStruct) SetPermMode(inodeNumber InodeNumber, filePerm InodeMode) (err error) {
  1864  	err = enforceRWMode(false)
  1865  	if nil != err {
  1866  		return
  1867  	}
  1868  
  1869  	snapShotIDType, _, _ := vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber))
  1870  	if headhunter.SnapShotIDTypeLive != snapShotIDType {
  1871  		err = fmt.Errorf("SetPermMode() on non-LiveView inodeNumber not allowed")
  1872  		return
  1873  	}
  1874  
  1875  	inode, ok, err := vS.fetchInode(inodeNumber)
  1876  	if err != nil {
  1877  		// the inode is locked so this should never happen (unless the inode
  1878  		// was evicted from the cache and it was corrupt when read from disk)
  1879  		logger.ErrorfWithError(err, "%s: fetch of target inode failed", utils.GetFnName())
  1880  		return err
  1881  	}
  1882  	if !ok {
  1883  		// this should never happen (see above)
  1884  		err = fmt.Errorf("%s: failing request for inode %d volume '%s' because it is unallocated",
  1885  			utils.GetFnName(), inodeNumber, vS.volumeName)
  1886  		logger.ErrorWithError(err)
  1887  		err = blunder.AddError(err, blunder.NotFoundError)
  1888  		return err
  1889  	}
  1890  
  1891  	// Create file mode out of file permissions plus inode type
  1892  	fileMode, err := determineMode(filePerm, inode.InodeType)
  1893  	if err != nil {
  1894  		return err
  1895  	}
  1896  
  1897  	inode.dirty = true
  1898  	inode.Mode = fileMode
  1899  
  1900  	updateTime := time.Now()
  1901  	inode.AttrChangeTime = updateTime
  1902  
  1903  	err = vS.flushInode(inode)
  1904  	if err != nil {
  1905  		logger.ErrorWithError(err)
  1906  		return err
  1907  	}
  1908  
  1909  	return
  1910  }
  1911  
  1912  func (vS *volumeStruct) SetOwnerUserID(inodeNumber InodeNumber, userID InodeUserID) (err error) {
  1913  	err = enforceRWMode(false)
  1914  	if nil != err {
  1915  		return
  1916  	}
  1917  
  1918  	snapShotIDType, _, _ := vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber))
  1919  	if headhunter.SnapShotIDTypeLive != snapShotIDType {
  1920  		err = fmt.Errorf("SetOwnerUserID() on non-LiveView inodeNumber not allowed")
  1921  		return
  1922  	}
  1923  
  1924  	inode, ok, err := vS.fetchInode(inodeNumber)
  1925  	if err != nil {
  1926  		// the inode is locked so this should never happen (unless the inode
  1927  		// was evicted from the cache and it was corrupt when read from disk)
  1928  		logger.ErrorfWithError(err, "%s: fetch of target inode failed", utils.GetFnName())
  1929  		return err
  1930  	}
  1931  	if !ok {
  1932  		// this should never happen (see above)
  1933  		err = fmt.Errorf("%s: failing request for inode %d volume '%s' because it is unallocated",
  1934  			utils.GetFnName(), inodeNumber, vS.volumeName)
  1935  		logger.ErrorWithError(err)
  1936  		err = blunder.AddError(err, blunder.NotFoundError)
  1937  		return err
  1938  	}
  1939  
  1940  	inode.dirty = true
  1941  	inode.UserID = userID
  1942  
  1943  	updateTime := time.Now()
  1944  	inode.AttrChangeTime = updateTime
  1945  
  1946  	err = vS.flushInode(inode)
  1947  	if err != nil {
  1948  		logger.ErrorWithError(err)
  1949  		return err
  1950  	}
  1951  
  1952  	return
  1953  }
  1954  
  1955  func (vS *volumeStruct) SetOwnerUserIDGroupID(inodeNumber InodeNumber, userID InodeUserID, groupID InodeGroupID) (err error) {
  1956  	err = enforceRWMode(false)
  1957  	if nil != err {
  1958  		return
  1959  	}
  1960  
  1961  	snapShotIDType, _, _ := vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber))
  1962  	if headhunter.SnapShotIDTypeLive != snapShotIDType {
  1963  		err = fmt.Errorf("SetOwnerUserIDGroupID() on non-LiveView inodeNumber not allowed")
  1964  		return
  1965  	}
  1966  
  1967  	inode, ok, err := vS.fetchInode(inodeNumber)
  1968  	if err != nil {
  1969  		// the inode is locked so this should never happen (unless the inode
  1970  		// was evicted from the cache and it was corrupt when read from disk)
  1971  		logger.ErrorfWithError(err, "%s: fetch of target inode failed", utils.GetFnName())
  1972  		return err
  1973  	}
  1974  	if !ok {
  1975  		// this should never happen (see above)
  1976  		err = fmt.Errorf("%s: failing request for inode %d volume '%s' because it is unallocated",
  1977  			utils.GetFnName(), inodeNumber, vS.volumeName)
  1978  		logger.ErrorWithError(err)
  1979  		err = blunder.AddError(err, blunder.NotFoundError)
  1980  		return err
  1981  	}
  1982  
  1983  	inode.dirty = true
  1984  	inode.UserID = userID
  1985  	inode.GroupID = groupID
  1986  
  1987  	updateTime := time.Now()
  1988  	inode.AttrChangeTime = updateTime
  1989  
  1990  	err = vS.flushInode(inode)
  1991  	if err != nil {
  1992  		logger.ErrorWithError(err)
  1993  		return err
  1994  	}
  1995  
  1996  	return
  1997  }
  1998  
  1999  func (vS *volumeStruct) SetOwnerGroupID(inodeNumber InodeNumber, groupID InodeGroupID) (err error) {
  2000  	err = enforceRWMode(false)
  2001  	if nil != err {
  2002  		return
  2003  	}
  2004  
  2005  	snapShotIDType, _, _ := vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber))
  2006  	if headhunter.SnapShotIDTypeLive != snapShotIDType {
  2007  		err = fmt.Errorf("SetOwnerGroupID() on non-LiveView inodeNumber not allowed")
  2008  		return
  2009  	}
  2010  
  2011  	inode, ok, err := vS.fetchInode(inodeNumber)
  2012  	if err != nil {
  2013  		// the inode is locked so this should never happen (unless the inode
  2014  		// was evicted from the cache and it was corrupt when read from disk)
  2015  		logger.ErrorfWithError(err, "%s: fetch of target inode failed", utils.GetFnName())
  2016  		return err
  2017  	}
  2018  	if !ok {
  2019  		// this should never happen (see above)
  2020  		err = fmt.Errorf("%s: failing request for inode %d volume '%s' because it is unallocated",
  2021  			utils.GetFnName(), inodeNumber, vS.volumeName)
  2022  		logger.ErrorWithError(err)
  2023  		err = blunder.AddError(err, blunder.NotFoundError)
  2024  		return err
  2025  	}
  2026  
  2027  	inode.dirty = true
  2028  	inode.GroupID = groupID
  2029  
  2030  	updateTime := time.Now()
  2031  	inode.AttrChangeTime = updateTime
  2032  
  2033  	err = vS.flushInode(inode)
  2034  	if err != nil {
  2035  		logger.ErrorWithError(err)
  2036  		return err
  2037  	}
  2038  
  2039  	return
  2040  }
  2041  
  2042  func (vS *volumeStruct) GetStream(inodeNumber InodeNumber, inodeStreamName string) (buf []byte, err error) {
  2043  	snapShotIDType, _, _ := vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber))
  2044  	if headhunter.SnapShotIDTypeDotSnapShot == snapShotIDType {
  2045  		err = fmt.Errorf("No stream '%v'", inodeStreamName)
  2046  		return buf, blunder.AddError(err, blunder.StreamNotFound)
  2047  	}
  2048  
  2049  	inode, ok, err := vS.fetchInode(inodeNumber)
  2050  	if err != nil {
  2051  		// this indicates disk corruption or software error
  2052  		// (err includes volume name and inode number)
  2053  		logger.ErrorfWithError(err, "%s: fetch of inode failed", utils.GetFnName())
  2054  		return nil, err
  2055  	}
  2056  	if !ok {
  2057  		// disk corruption or client request for unallocated inode
  2058  		err = fmt.Errorf("%s: failing request for inode %d volume '%s' because it is unallocated",
  2059  			utils.GetFnName(), inodeNumber, vS.volumeName)
  2060  		logger.InfoWithError(err)
  2061  		err = blunder.AddError(err, blunder.NotFoundError)
  2062  		return nil, err
  2063  	}
  2064  
  2065  	inodeStreamBuf, ok := inode.StreamMap[inodeStreamName]
  2066  
  2067  	if !ok {
  2068  		err = fmt.Errorf("No stream '%v'", inodeStreamName)
  2069  		return buf, blunder.AddError(err, blunder.StreamNotFound)
  2070  	}
  2071  
  2072  	buf = make([]byte, len(inodeStreamBuf))
  2073  
  2074  	copy(buf, inodeStreamBuf)
  2075  
  2076  	err = nil
  2077  
  2078  	return
  2079  }
  2080  
  2081  func (vS *volumeStruct) PutStream(inodeNumber InodeNumber, inodeStreamName string, buf []byte) (err error) {
  2082  	err = enforceRWMode(false)
  2083  	if nil != err {
  2084  		return
  2085  	}
  2086  
  2087  	snapShotIDType, _, _ := vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber))
  2088  	if headhunter.SnapShotIDTypeLive != snapShotIDType {
  2089  		err = fmt.Errorf("PutStream() on non-LiveView inodeNumber not allowed")
  2090  		return
  2091  	}
  2092  
  2093  	inode, ok, err := vS.fetchInode(inodeNumber)
  2094  	if err != nil {
  2095  		// this indicates disk corruption or software error
  2096  		// (err includes volume name and inode number)
  2097  		logger.ErrorfWithError(err, "%s: fetch of inode failed", utils.GetFnName())
  2098  		return err
  2099  	}
  2100  	if !ok {
  2101  		// disk corruption or client request for unallocated inode
  2102  		err = fmt.Errorf("%s: failing request for inode %d volume '%s' because it is unallocated",
  2103  			utils.GetFnName(), inodeNumber, vS.volumeName)
  2104  		logger.InfoWithError(err)
  2105  		err = blunder.AddError(err, blunder.NotFoundError)
  2106  		return err
  2107  	}
  2108  
  2109  	inodeStreamBuf := make([]byte, len(buf))
  2110  
  2111  	copy(inodeStreamBuf, buf)
  2112  
  2113  	inode.dirty = true
  2114  	inode.StreamMap[inodeStreamName] = inodeStreamBuf
  2115  
  2116  	updateTime := time.Now()
  2117  	inode.AttrChangeTime = updateTime
  2118  
  2119  	err = vS.flushInode(inode)
  2120  	if err != nil {
  2121  		logger.ErrorWithError(err)
  2122  		return err
  2123  	}
  2124  
  2125  	return
  2126  }
  2127  
  2128  func (vS *volumeStruct) DeleteStream(inodeNumber InodeNumber, inodeStreamName string) (err error) {
  2129  	err = enforceRWMode(false)
  2130  	if nil != err {
  2131  		return
  2132  	}
  2133  
  2134  	snapShotIDType, _, _ := vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber))
  2135  	if headhunter.SnapShotIDTypeLive != snapShotIDType {
  2136  		err = fmt.Errorf("DeleteStream() on non-LiveView inodeNumber not allowed")
  2137  		return
  2138  	}
  2139  
  2140  	inode, ok, err := vS.fetchInode(inodeNumber)
  2141  	if err != nil {
  2142  		// this indicates disk corruption or software error
  2143  		// (err includes volume name and inode number)
  2144  		logger.ErrorfWithError(err, "%s: fetch of inode failed", utils.GetFnName())
  2145  		return
  2146  	}
  2147  	if !ok {
  2148  		// disk corruption or client request for unallocated inode
  2149  		err = fmt.Errorf("%s: failing request for inode %d volume '%s' because it is unallocated",
  2150  			utils.GetFnName(), inodeNumber, vS.volumeName)
  2151  		logger.InfoWithError(err)
  2152  		err = blunder.AddError(err, blunder.NotFoundError)
  2153  		return
  2154  	}
  2155  
  2156  	inode.dirty = true
  2157  	delete(inode.StreamMap, inodeStreamName)
  2158  
  2159  	updateTime := time.Now()
  2160  	inode.AttrChangeTime = updateTime
  2161  
  2162  	err = vS.flushInode(inode)
  2163  	if err != nil {
  2164  		logger.ErrorWithError(err)
  2165  		return err
  2166  	}
  2167  
  2168  	return
  2169  }
  2170  
  2171  func (vS *volumeStruct) FetchLayoutReport(inodeNumber InodeNumber) (layoutReport sortedmap.LayoutReport, err error) {
  2172  	snapShotIDType, _, _ := vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber))
  2173  	if headhunter.SnapShotIDTypeDotSnapShot == snapShotIDType {
  2174  		layoutReport = make(sortedmap.LayoutReport)
  2175  		err = nil
  2176  		return
  2177  	}
  2178  
  2179  	inode, ok, err := vS.fetchInode(inodeNumber)
  2180  	if err != nil {
  2181  		// this indicates disk corruption or software error
  2182  		// (err includes volume name and inode number)
  2183  		logger.ErrorfWithError(err, "%s: fetch of inode failed", utils.GetFnName())
  2184  		return nil, err
  2185  	}
  2186  	if !ok {
  2187  		// disk corruption or client request for unallocated inode
  2188  		err = fmt.Errorf("%s: failing request for inode %d volume '%s' because it is unallocated",
  2189  			utils.GetFnName(), inodeNumber, vS.volumeName)
  2190  		logger.InfoWithError(err)
  2191  		err = blunder.AddError(err, blunder.NotFoundError)
  2192  		return nil, err
  2193  	}
  2194  
  2195  	if SymlinkType == inode.InodeType {
  2196  		layoutReport = make(sortedmap.LayoutReport)
  2197  		err = nil
  2198  	} else {
  2199  		layoutReport, err = inode.payload.(sortedmap.BPlusTree).FetchLayoutReport()
  2200  	}
  2201  
  2202  	return
  2203  }
  2204  
  2205  func (vS *volumeStruct) FetchFragmentationReport(inodeNumber InodeNumber) (fragmentationReport FragmentationReport, err error) {
  2206  	err = fmt.Errorf("FetchFragmentationReport not yet implemented")
  2207  	return
  2208  }
  2209  
  2210  func (vS *volumeStruct) Optimize(inodeNumber InodeNumber, maxDuration time.Duration) (err error) {
  2211  	err = enforceRWMode(false)
  2212  	if nil != err {
  2213  		return
  2214  	}
  2215  
  2216  	err = fmt.Errorf("Optimize not yet implemented")
  2217  	return
  2218  }
  2219  
  2220  func validateFileExtents(snapShotID uint64, ourInode *inMemoryInodeStruct) (err error) {
  2221  	var (
  2222  		zero = uint64(0)
  2223  	)
  2224  
  2225  	readPlan, readPlanBytes, err := ourInode.volume.getReadPlanHelper(snapShotID, ourInode, &zero, nil)
  2226  	if err != nil {
  2227  		return err
  2228  	}
  2229  
  2230  	// We read the whole file, so these should match
  2231  	if readPlanBytes != ourInode.Size {
  2232  		return blunder.NewError(blunder.CorruptInodeError, "inode %v had recorded size %v bytes, but full read plan was only %v bytes", ourInode.InodeNumber, ourInode.Size, readPlanBytes)
  2233  	}
  2234  
  2235  	// Let's check that the read plan is consistent with what the inode's
  2236  	// internal log-segment map says about which segments should have how much data.
  2237  	//
  2238  	// Make a copy of the inode's LogSegmentMap map so we can decrement the
  2239  	// byte count for each segment as we walk the readPlan entries.
  2240  	remainingExpectedBytes := make(map[uint64]uint64)
  2241  	for segmentNumber, segmentBytesUsed := range ourInode.LogSegmentMap {
  2242  		remainingExpectedBytes[segmentNumber] += segmentBytesUsed
  2243  	}
  2244  	// Then we can compare with the actual read plan we got ...
  2245  	for _, readPlanStep := range readPlan {
  2246  
  2247  		// holes in a sparse file aren't counted
  2248  		if readPlanStep.LogSegmentNumber == 0 {
  2249  			continue
  2250  		}
  2251  		pathSegments := strings.Split(readPlanStep.ObjectPath, "/")
  2252  		logSegmentRepresentation := pathSegments[len(pathSegments)-1]
  2253  		logSegmentNumber, hexConvErr := utils.HexStrToUint64(logSegmentRepresentation)
  2254  		if hexConvErr != nil {
  2255  			return blunder.NewError(blunder.CorruptInodeError,
  2256  				"conversion of read plan object name to log segment number failed; "+
  2257  					"readPlanStep: %v  logSegmentString: '%v'  err: %v",
  2258  				readPlanStep, logSegmentRepresentation, hexConvErr)
  2259  		}
  2260  		remainingExpectedBytes[logSegmentNumber] -= readPlanStep.Length
  2261  	}
  2262  	// ... and fail validation if any log segment didn't match. We'll put the
  2263  	// mismatches in a separate map that we'll attach to the error in case a
  2264  	// consumer or logger wants it.
  2265  	logSegmentByteCountMismatches := make(map[uint64]uint64)
  2266  	for logSegmentNumber, remainingExpectedByteCount := range remainingExpectedBytes {
  2267  		if remainingExpectedByteCount != 0 {
  2268  			logSegmentByteCountMismatches[logSegmentNumber] = remainingExpectedByteCount
  2269  		}
  2270  	}
  2271  	if len(logSegmentByteCountMismatches) != 0 {
  2272  		rootErr := fmt.Errorf("inconsistency detected between log segment map and read plan for inode %v", ourInode.InodeNumber)
  2273  		return merry.WithValue(blunder.AddError(rootErr, blunder.CorruptInodeError), "logSegmentByteCountMismatches", logSegmentByteCountMismatches)
  2274  	}
  2275  
  2276  	// Having verified that our read plan is consistent with our internal log
  2277  	// segment map, we also want to check that it's consistent with the actual log
  2278  	// segment objects in Swift. First, we'll construct a map of object paths to
  2279  	// the largest offset we would need read up to in that object.
  2280  	objectPathToEndOffset := make(map[string]uint64)
  2281  
  2282  	for _, planStep := range readPlan {
  2283  
  2284  		// holes in a sparse file don't have objects
  2285  		if planStep.LogSegmentNumber == 0 {
  2286  			continue
  2287  		}
  2288  		stepEndOffset := planStep.Offset + planStep.Length
  2289  		endOffset, ok := objectPathToEndOffset[planStep.ObjectPath]
  2290  		if !ok || stepEndOffset > endOffset {
  2291  			objectPathToEndOffset[planStep.ObjectPath] = stepEndOffset
  2292  		}
  2293  	}
  2294  
  2295  	// then, HEAD each object to make sure that it has enough bytes.
  2296  	for objectPath, endOffset := range objectPathToEndOffset {
  2297  		accountName, containerName, objectName, err := utils.PathToAcctContObj(objectPath)
  2298  		if err != nil {
  2299  			logger.ErrorWithError(err)
  2300  			return err
  2301  		}
  2302  
  2303  		contentLength, err := swiftclient.ObjectContentLength(accountName, containerName, objectName)
  2304  		if err != nil {
  2305  			logger.ErrorWithError(err)
  2306  			return err
  2307  		}
  2308  
  2309  		if contentLength < endOffset {
  2310  			// REVIEW: it might be helpful to continue and make a combined report of all
  2311  			//         insufficiently long log segments, rather than erroring out immediately
  2312  			err = fmt.Errorf("expected %q to have at least %v bytes, content length was %v", objectPath, endOffset, contentLength)
  2313  			logger.ErrorWithError(err)
  2314  			return err
  2315  		}
  2316  
  2317  	}
  2318  
  2319  	return nil
  2320  }
  2321  
  2322  func (vS *volumeStruct) markCorrupted(inodeNumber InodeNumber) (err error) {
  2323  	var (
  2324  		inodeRec       []byte
  2325  		ok             bool
  2326  		snapShotIDType headhunter.SnapShotIDType
  2327  	)
  2328  
  2329  	snapShotIDType, _, _ = vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber))
  2330  	if headhunter.SnapShotIDTypeLive != snapShotIDType {
  2331  		err = blunder.NewError(blunder.InvalidArgError, "markCorrupted() of non-LiveView inodeNumber not allowed")
  2332  		return
  2333  	}
  2334  
  2335  	inodeRec, ok, err = vS.headhunterVolumeHandle.GetInodeRec(uint64(inodeNumber))
  2336  	if nil == err && ok && (len(globals.corruptionDetectedTrueBuf) <= len(inodeRec)) {
  2337  		// Just overwrite CorruptionDetected field with true
  2338  		_ = copy(inodeRec, globals.corruptionDetectedTrueBuf)
  2339  	} else {
  2340  		// Use a simple CorruptionDetected == true inodeRec
  2341  		inodeRec = globals.corruptionDetectedTrueBuf
  2342  	}
  2343  
  2344  	err = vS.headhunterVolumeHandle.PutInodeRec(uint64(inodeNumber), inodeRec)
  2345  
  2346  	return
  2347  }
  2348  
  2349  func (vS *volumeStruct) Validate(inodeNumber InodeNumber, deeply bool) (err error) {
  2350  	var (
  2351  		ok             bool
  2352  		ourInode       *inMemoryInodeStruct
  2353  		snapShotID     uint64
  2354  		snapShotIDType headhunter.SnapShotIDType
  2355  		tree           sortedmap.BPlusTree
  2356  	)
  2357  
  2358  	snapShotIDType, snapShotID, _ = vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber))
  2359  	if headhunter.SnapShotIDTypeDotSnapShot == snapShotIDType {
  2360  		err = nil // Since /<SnapShotDirName> is emulated, always return success
  2361  		return
  2362  	}
  2363  
  2364  	// we don't want to use the in-memory cache for this; we'll need to fetch
  2365  	// the current real-world bits from disk.
  2366  
  2367  	// If this is a file inode, we flush to ensure that the inode is not dirty
  2368  	// (and that DLM locking therefore ensures we have exclusive access to the
  2369  	// inode and don't need to serialize this operation, as there can be no pending
  2370  	// time-based flush to race with).
  2371  
  2372  	err = vS.flushInodeNumber(inodeNumber)
  2373  	if nil != err {
  2374  		logger.ErrorfWithError(err, "couldn't flush inode %v", inodeNumber)
  2375  		err = blunder.AddError(err, blunder.CorruptInodeError)
  2376  		return
  2377  	}
  2378  
  2379  	err = vS.Purge(inodeNumber)
  2380  	if nil != err {
  2381  		logger.ErrorfWithError(err, "couldn't purge inode %v", inodeNumber)
  2382  		err = blunder.AddError(err, blunder.CorruptInodeError)
  2383  		return
  2384  	}
  2385  
  2386  	ourInode, ok, err = vS.fetchInode(inodeNumber)
  2387  	if nil != err {
  2388  		// this indicates diskj corruption or software error
  2389  		// (err includes volume name and inode number)
  2390  		logger.ErrorfWithError(err, "%s: fetch of inode failed", utils.GetFnName())
  2391  		err = blunder.AddError(err, blunder.CorruptInodeError)
  2392  		return
  2393  	}
  2394  	if !ok {
  2395  		// disk corruption or client request for unallocated inode
  2396  		err = fmt.Errorf("%s: failing request for inode %d volume '%s' because it is unallocated",
  2397  			utils.GetFnName(), inodeNumber, vS.volumeName)
  2398  		logger.InfoWithError(err)
  2399  		err = blunder.AddError(err, blunder.NotFoundError)
  2400  		return
  2401  	}
  2402  
  2403  	switch ourInode.InodeType {
  2404  	case DirType, FileType:
  2405  		tree, ok = ourInode.payload.(sortedmap.BPlusTree)
  2406  		if !ok {
  2407  			err = fmt.Errorf("type conversion of inode %v payload to sortedmap.BPlusTree failed", ourInode.InodeNumber)
  2408  			err = blunder.AddError(err, blunder.CorruptInodeError)
  2409  			_ = vS.markCorrupted(inodeNumber)
  2410  			return
  2411  		}
  2412  		err = tree.Validate()
  2413  		if nil != err {
  2414  			err = blunder.AddError(err, blunder.CorruptInodeError)
  2415  			_ = vS.markCorrupted(inodeNumber)
  2416  			return
  2417  		}
  2418  		if FileType == ourInode.InodeType {
  2419  			if deeply {
  2420  				err = validateFileExtents(snapShotID, ourInode)
  2421  				if nil != err {
  2422  					err = blunder.AddError(err, blunder.CorruptInodeError)
  2423  					_ = vS.markCorrupted(inodeNumber)
  2424  					return
  2425  				}
  2426  			}
  2427  		}
  2428  	case SymlinkType:
  2429  		// Nothing to be done here
  2430  	default:
  2431  		err = fmt.Errorf("unrecognized inode type")
  2432  		err = blunder.AddError(err, blunder.CorruptInodeError)
  2433  		_ = vS.markCorrupted(inodeNumber)
  2434  		return
  2435  	}
  2436  
  2437  	err = nil
  2438  	return
  2439  }