github.com/rohankumardubey/proxyfs@v0.0.0-20210108201508-653efa9ab00e/inode/inode.go (about)

     1  package inode
     2  
     3  import (
     4  	"container/list"
     5  	"encoding/json"
     6  	"fmt"
     7  	"runtime/debug"
     8  	"strings"
     9  	"sync"
    10  	"time"
    11  
    12  	"github.com/ansel1/merry"
    13  	"github.com/swiftstack/cstruct"
    14  	"github.com/swiftstack/sortedmap"
    15  
    16  	"github.com/swiftstack/ProxyFS/blunder"
    17  	"github.com/swiftstack/ProxyFS/dlm"
    18  	"github.com/swiftstack/ProxyFS/evtlog"
    19  	"github.com/swiftstack/ProxyFS/halter"
    20  	"github.com/swiftstack/ProxyFS/headhunter"
    21  	"github.com/swiftstack/ProxyFS/logger"
    22  	"github.com/swiftstack/ProxyFS/stats"
    23  	"github.com/swiftstack/ProxyFS/swiftclient"
    24  	"github.com/swiftstack/ProxyFS/trackedlock"
    25  	"github.com/swiftstack/ProxyFS/utils"
    26  )
    27  
    28  // Shorthand for inode internal API debug log id; global to the package
    29  var int_inode_debug = logger.DbgInodeInternal
    30  
    31  const (
    32  	optimisticInodeFetchBytes = 2048
    33  )
    34  
    35  type CorruptionDetected bool
    36  type Version uint64
    37  
    38  const (
    39  	V1                               Version = iota + 1 // use type/struct onDiskInodeV1Struct
    40  	onDiskInodeV1PayloadObjectOffset uint64  = 0
    41  )
    42  
    43  type onDiskInodeV1Struct struct { // Preceded "on disk" by CorruptionDetected then Version both in cstruct.LittleEndian form
    44  	InodeNumber
    45  	InodeType
    46  	LinkCount           uint64
    47  	Size                uint64
    48  	CreationTime        time.Time
    49  	ModificationTime    time.Time
    50  	AccessTime          time.Time
    51  	AttrChangeTime      time.Time
    52  	NumWrites           uint64
    53  	Mode                InodeMode
    54  	UserID              InodeUserID
    55  	GroupID             InodeGroupID
    56  	StreamMap           map[string][]byte
    57  	PayloadObjectNumber uint64            // DirInode:     B+Tree Root with Key == dir_entry_name, Value = InodeNumber
    58  	PayloadObjectLength uint64            // FileInode:    B+Tree Root with Key == fileOffset, Value = fileExtent
    59  	SymlinkTarget       string            // SymlinkInode: target path of symbolic link
    60  	LogSegmentMap       map[uint64]uint64 // FileInode:    Key == LogSegment#, Value = file user data byte count
    61  }
    62  
    63  type inFlightLogSegmentStruct struct { //               Used as (by reference) Value for inMemoryInodeStruct.inFlightLogSegmentMap
    64  	logSegmentNumber          uint64 //                 Used as (by value)     Key   for inMemoryInodeStruct.inFlightLogSegmentMap
    65  	openLogSegmentLRUNext     *inFlightLogSegmentStruct
    66  	openLogSegmentLRUPrev     *inFlightLogSegmentStruct
    67  	fileInode                 *inMemoryInodeStruct
    68  	accountName               string
    69  	containerName             string
    70  	objectName                string
    71  	openLogSegmentListElement list.Element
    72  	swiftclient.ChunkedPutContext
    73  }
    74  
    75  type inMemoryInodeStruct struct {
    76  	trackedlock.Mutex //                                             Used to synchronize with background fileInodeFlusherDaemon
    77  	sync.WaitGroup    //                                             FileInode Flush requests wait on this
    78  	inodeCacheLRUNext *inMemoryInodeStruct
    79  	inodeCacheLRUPrev *inMemoryInodeStruct
    80  	dirty             bool
    81  	volume            *volumeStruct
    82  	snapShotID        uint64
    83  	payload           interface{} //                                 DirInode:  B+Tree with Key == dir_entry_name, Value = InodeNumber
    84  	//                                                               FileInode: B+Tree with Key == fileOffset, Value = *fileExtent
    85  	openLogSegment           *inFlightLogSegmentStruct            // FileInode only... also in inFlightLogSegmentMap
    86  	inFlightLogSegmentMap    map[uint64]*inFlightLogSegmentStruct // FileInode: key == logSegmentNumber
    87  	inFlightLogSegmentErrors map[uint64]error                     // FileInode: key == logSegmentNumber; value == err (if non nil)
    88  	onDiskInodeV1Struct                                           // Real on-disk inode information embedded here
    89  }
    90  
    91  func (vS *volumeStruct) DumpKey(key sortedmap.Key) (keyAsString string, err error) {
    92  	keyAsInodeNumber, ok := key.(InodeNumber)
    93  	if !ok {
    94  		err = fmt.Errorf("inode.volumeStruct.DumpKey() could not parse key as a InodeNumber")
    95  		return
    96  	}
    97  
    98  	keyAsString = fmt.Sprintf("0x%016X", keyAsInodeNumber)
    99  
   100  	err = nil
   101  	return
   102  }
   103  
   104  func (vS *volumeStruct) DumpValue(value sortedmap.Value) (valueAsString string, err error) {
   105  	valueAsInMemoryInodeStructPtr, ok := value.(*inMemoryInodeStruct)
   106  	if !ok {
   107  		err = fmt.Errorf("inode.volumeStruct.DumpValue() could not parse value as a *inMemoryInodeStruct")
   108  		return
   109  	}
   110  
   111  	valueAsString = fmt.Sprintf("%016p", valueAsInMemoryInodeStructPtr)
   112  
   113  	err = nil
   114  	return
   115  }
   116  
   117  func compareInodeNumber(key1 sortedmap.Key, key2 sortedmap.Key) (result int, err error) {
   118  	key1InodeNumber, ok := key1.(InodeNumber)
   119  	if !ok {
   120  		err = fmt.Errorf("compareInodeNumber(non-InodeNumber,) not supported")
   121  		return
   122  	}
   123  	key2InodeNumber, ok := key2.(InodeNumber)
   124  	if !ok {
   125  		err = fmt.Errorf("compareInodeNumber(InodeNumber, non-InodeNumber) not supported")
   126  		return
   127  	}
   128  
   129  	if key1InodeNumber < key2InodeNumber {
   130  		result = -1
   131  	} else if key1InodeNumber == key2InodeNumber {
   132  		result = 0
   133  	} else { // key1InodeNumber > key2InodeNumber
   134  		result = 1
   135  	}
   136  
   137  	err = nil
   138  
   139  	return
   140  }
   141  
   142  func setRWMode(rwMode RWModeType) (err error) {
   143  	if rwMode != globals.rwMode {
   144  		switch rwMode {
   145  		case RWModeNormal:
   146  			stats.IncrementOperations(&stats.ReconCheckTriggeredNormalMode)
   147  		case RWModeNoWrite:
   148  			stats.IncrementOperations(&stats.ReconCheckTriggeredNoWriteMode)
   149  		case RWModeReadOnly:
   150  			stats.IncrementOperations(&stats.ReconCheckTriggeredReadOnlyMode)
   151  		default:
   152  			err = fmt.Errorf("SetRWMode(rwMode==%d) not allowed... must be one of RWModeNormal(%d), RWModeNoWrite(%d), or RWModeReadOnly(%d)", rwMode, RWModeNormal, RWModeNoWrite, RWModeReadOnly)
   153  			return
   154  		}
   155  
   156  		globals.rwMode = rwMode
   157  	}
   158  
   159  	err = nil
   160  	return
   161  }
   162  
   163  func enforceRWMode(enforceNoWriteMode bool) (err error) {
   164  	var (
   165  		rwModeCopy RWModeType
   166  	)
   167  
   168  	rwModeCopy = globals.rwMode
   169  
   170  	if rwModeCopy == RWModeReadOnly {
   171  		err = blunder.NewError(globals.readOnlyThresholdErrno, globals.readOnlyThresholdErrnoString)
   172  	} else if enforceNoWriteMode && (rwModeCopy == RWModeNoWrite) {
   173  		err = blunder.NewError(globals.noWriteThresholdErrno, globals.noWriteThresholdErrnoString)
   174  	} else {
   175  		err = nil
   176  	}
   177  
   178  	return
   179  }
   180  
   181  func (vS *volumeStruct) FetchOnDiskInode(inodeNumber InodeNumber) (corruptionDetected CorruptionDetected, version Version, onDiskInode []byte, err error) {
   182  	var (
   183  		bytesConsumedByCorruptionDetected uint64
   184  		bytesConsumedByVersion            uint64
   185  		inodeRec                          []byte
   186  		ok                                bool
   187  	)
   188  
   189  	corruptionDetected = CorruptionDetected(false)
   190  	version = Version(0)
   191  	onDiskInode = make([]byte, 0)
   192  
   193  	inodeRec, ok, err = vS.headhunterVolumeHandle.GetInodeRec(uint64(inodeNumber))
   194  	if nil != err {
   195  		err = fmt.Errorf("headhunter.GetInodeRec() failed: %v", err)
   196  		return
   197  	}
   198  	if !ok {
   199  		err = fmt.Errorf("headhunter.GetInodeRec() returned !ok")
   200  		return
   201  	}
   202  
   203  	bytesConsumedByCorruptionDetected, err = cstruct.Unpack(inodeRec, &corruptionDetected, cstruct.LittleEndian)
   204  	if nil != err {
   205  		err = fmt.Errorf("cstruct.Unpack(,&corruptionDetected,) failed: %v", err)
   206  		return
   207  	}
   208  	if corruptionDetected {
   209  		return
   210  	}
   211  
   212  	bytesConsumedByVersion, err = cstruct.Unpack(inodeRec[bytesConsumedByCorruptionDetected:], &version, cstruct.LittleEndian)
   213  	if nil != err {
   214  		err = fmt.Errorf("cstruct.Unpack(,&version,) failed: %v", err)
   215  		return
   216  	}
   217  
   218  	onDiskInode = inodeRec[bytesConsumedByCorruptionDetected+bytesConsumedByVersion:]
   219  
   220  	return
   221  }
   222  
   223  func (vS *volumeStruct) fetchOnDiskInode(inodeNumber InodeNumber) (inMemoryInode *inMemoryInodeStruct, ok bool, err error) {
   224  	var (
   225  		bytesConsumedByCorruptionDetected uint64
   226  		bytesConsumedByVersion            uint64
   227  		corruptionDetected                CorruptionDetected
   228  		inodeRec                          []byte
   229  		onDiskInodeV1                     *onDiskInodeV1Struct
   230  		snapShotID                        uint64
   231  		snapShotIDType                    headhunter.SnapShotIDType
   232  		version                           Version
   233  	)
   234  
   235  	snapShotIDType, snapShotID, _ = vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber))
   236  	if headhunter.SnapShotIDTypeDotSnapShot == snapShotIDType {
   237  		logger.Fatalf("fetchOnDiskInode for headhunter.SnapShotIDTypeDotSnapShot not allowed")
   238  	}
   239  
   240  	inodeRec, ok, err = vS.headhunterVolumeHandle.GetInodeRec(uint64(inodeNumber))
   241  	if nil != err {
   242  		stackStr := string(debug.Stack())
   243  		err = fmt.Errorf("%s: unable to get inodeRec for inode %d: %v stack: %s",
   244  			utils.GetFnName(), inodeNumber, err, stackStr)
   245  		err = blunder.AddError(err, blunder.NotFoundError)
   246  		return
   247  	}
   248  	if !ok {
   249  		return
   250  	}
   251  
   252  	bytesConsumedByCorruptionDetected, err = cstruct.Unpack(inodeRec, &corruptionDetected, cstruct.LittleEndian)
   253  	if nil != err {
   254  		err = fmt.Errorf("%s: unable to parse inodeRec.CorruptionDetected for inode %d: %v", utils.GetFnName(), inodeNumber, err)
   255  		err = blunder.AddError(err, blunder.CorruptInodeError)
   256  		return
   257  	}
   258  	if corruptionDetected {
   259  		err = fmt.Errorf("%s: inode %d has been marked corrupted", utils.GetFnName(), inodeNumber)
   260  		err = blunder.AddError(err, blunder.CorruptInodeError)
   261  		return
   262  	}
   263  
   264  	bytesConsumedByVersion, err = cstruct.Unpack(inodeRec[bytesConsumedByCorruptionDetected:], &version, cstruct.LittleEndian)
   265  	if nil != err {
   266  		err = fmt.Errorf("%s: unable to get inodeRec.Version for inode %d: %v", utils.GetFnName(), inodeNumber, err)
   267  		err = blunder.AddError(err, blunder.CorruptInodeError)
   268  		return
   269  	}
   270  	if V1 != version {
   271  		err = fmt.Errorf("%s: inodeRec.Version for inode %d (%v) not supported", utils.GetFnName(), inodeNumber, version)
   272  		err = blunder.AddError(err, blunder.CorruptInodeError)
   273  		return
   274  	}
   275  
   276  	onDiskInodeV1 = &onDiskInodeV1Struct{StreamMap: make(map[string][]byte)}
   277  
   278  	err = json.Unmarshal(inodeRec[bytesConsumedByCorruptionDetected+bytesConsumedByVersion:], onDiskInodeV1)
   279  	if nil != err {
   280  		err = fmt.Errorf("%s: inodeRec.<body> for inode %d json.Unmarshal() failed: %v", utils.GetFnName(), inodeNumber, err)
   281  		err = blunder.AddError(err, blunder.CorruptInodeError)
   282  		return
   283  	}
   284  
   285  	inMemoryInode = &inMemoryInodeStruct{
   286  		inodeCacheLRUNext:        nil,
   287  		inodeCacheLRUPrev:        nil,
   288  		dirty:                    false,
   289  		volume:                   vS,
   290  		snapShotID:               snapShotID,
   291  		openLogSegment:           nil,
   292  		inFlightLogSegmentMap:    make(map[uint64]*inFlightLogSegmentStruct),
   293  		inFlightLogSegmentErrors: make(map[uint64]error),
   294  		onDiskInodeV1Struct:      *onDiskInodeV1,
   295  	}
   296  
   297  	inMemoryInode.onDiskInodeV1Struct.InodeNumber = inodeNumber
   298  
   299  	switch inMemoryInode.InodeType {
   300  	case DirType:
   301  		if 0 == inMemoryInode.PayloadObjectNumber {
   302  			inMemoryInode.payload =
   303  				sortedmap.NewBPlusTree(
   304  					vS.maxEntriesPerDirNode,
   305  					sortedmap.CompareString,
   306  					&dirInodeCallbacks{treeNodeLoadable{inode: inMemoryInode}},
   307  					globals.dirEntryCache)
   308  		} else {
   309  			inMemoryInode.payload, err =
   310  				sortedmap.OldBPlusTree(
   311  					inMemoryInode.PayloadObjectNumber,
   312  					onDiskInodeV1PayloadObjectOffset,
   313  					inMemoryInode.PayloadObjectLength,
   314  					sortedmap.CompareString,
   315  					&dirInodeCallbacks{treeNodeLoadable{inode: inMemoryInode}},
   316  					globals.dirEntryCache)
   317  			if nil != err {
   318  				err = fmt.Errorf("%s: sortedmap.OldBPlusTree(inodeRec.<body>.PayloadObjectNumber) for DirType inode %d failed: %v", utils.GetFnName(), inodeNumber, err)
   319  				err = blunder.AddError(err, blunder.CorruptInodeError)
   320  				return
   321  			}
   322  		}
   323  	case FileType:
   324  		if 0 == inMemoryInode.PayloadObjectNumber {
   325  			inMemoryInode.payload =
   326  				sortedmap.NewBPlusTree(
   327  					vS.maxExtentsPerFileNode,
   328  					sortedmap.CompareUint64,
   329  					&fileInodeCallbacks{treeNodeLoadable{inode: inMemoryInode}},
   330  					globals.fileExtentMapCache)
   331  		} else {
   332  			inMemoryInode.payload, err =
   333  				sortedmap.OldBPlusTree(
   334  					inMemoryInode.PayloadObjectNumber,
   335  					onDiskInodeV1PayloadObjectOffset,
   336  					inMemoryInode.PayloadObjectLength,
   337  					sortedmap.CompareUint64,
   338  					&fileInodeCallbacks{treeNodeLoadable{inode: inMemoryInode}},
   339  					globals.fileExtentMapCache)
   340  			if nil != err {
   341  				err = fmt.Errorf("%s: sortedmap.OldBPlusTree(inodeRec.<body>.PayloadObjectNumber) for FileType inode %d failed: %v", utils.GetFnName(), inodeNumber, err)
   342  				err = blunder.AddError(err, blunder.CorruptInodeError)
   343  				return
   344  			}
   345  		}
   346  	case SymlinkType:
   347  		// Nothing special here
   348  	default:
   349  		err = fmt.Errorf("%s: inodeRec.InodeType for inode %d (%v) not supported", utils.GetFnName(), inodeNumber, inMemoryInode.InodeType)
   350  		err = blunder.AddError(err, blunder.CorruptInodeError)
   351  		return
   352  	}
   353  
   354  	err = nil
   355  	return
   356  }
   357  
   358  func (vS *volumeStruct) inodeCacheFetchWhileLocked(inodeNumber InodeNumber) (inode *inMemoryInodeStruct, ok bool, err error) {
   359  	var (
   360  		inodeAsValue sortedmap.Value
   361  	)
   362  
   363  	inodeAsValue, ok, err = vS.inodeCache.GetByKey(inodeNumber)
   364  	if nil != err {
   365  		return
   366  	}
   367  
   368  	if ok {
   369  		inode, ok = inodeAsValue.(*inMemoryInodeStruct)
   370  		if ok {
   371  			vS.inodeCacheTouchWhileLocked(inode)
   372  			err = nil
   373  		} else {
   374  			ok = false
   375  			err = fmt.Errorf("inodeCache[inodeNumber==0x%016X] contains a value not mappable to a *inMemoryInodeStruct", inodeNumber)
   376  		}
   377  	}
   378  
   379  	return
   380  }
   381  
   382  func (vS *volumeStruct) inodeCacheFetch(inodeNumber InodeNumber) (inode *inMemoryInodeStruct, ok bool, err error) {
   383  	vS.Lock()
   384  	inode, ok, err = vS.inodeCacheFetchWhileLocked(inodeNumber)
   385  	vS.Unlock()
   386  	return
   387  }
   388  
   389  func (vS *volumeStruct) inodeCacheInsertWhileLocked(inode *inMemoryInodeStruct) (ok bool, err error) {
   390  	ok, err = vS.inodeCache.Put(inode.InodeNumber, inode)
   391  	if (nil != err) || !ok {
   392  		return
   393  	}
   394  
   395  	// Place inode at the MRU end of inodeCacheLRU
   396  
   397  	if 0 == vS.inodeCacheLRUItems {
   398  		vS.inodeCacheLRUHead = inode
   399  		vS.inodeCacheLRUTail = inode
   400  		vS.inodeCacheLRUItems = 1
   401  	} else {
   402  		inode.inodeCacheLRUPrev = vS.inodeCacheLRUTail
   403  		inode.inodeCacheLRUPrev.inodeCacheLRUNext = inode
   404  
   405  		vS.inodeCacheLRUTail = inode
   406  		vS.inodeCacheLRUItems++
   407  	}
   408  
   409  	return
   410  }
   411  
   412  func (vS *volumeStruct) inodeCacheInsert(inode *inMemoryInodeStruct) (ok bool, err error) {
   413  	vS.Lock()
   414  	ok, err = vS.inodeCacheInsertWhileLocked(inode)
   415  	vS.Unlock()
   416  	return
   417  }
   418  
   419  func (vS *volumeStruct) inodeCacheTouchWhileLocked(inode *inMemoryInodeStruct) {
   420  	// Move inode to the MRU end of inodeCacheLRU
   421  
   422  	if inode != vS.inodeCacheLRUTail {
   423  		if inode == vS.inodeCacheLRUHead {
   424  			vS.inodeCacheLRUHead = inode.inodeCacheLRUNext
   425  			vS.inodeCacheLRUHead.inodeCacheLRUPrev = nil
   426  
   427  			inode.inodeCacheLRUPrev = vS.inodeCacheLRUTail
   428  			inode.inodeCacheLRUNext = nil
   429  
   430  			vS.inodeCacheLRUTail.inodeCacheLRUNext = inode
   431  			vS.inodeCacheLRUTail = inode
   432  		} else {
   433  			inode.inodeCacheLRUPrev.inodeCacheLRUNext = inode.inodeCacheLRUNext
   434  			inode.inodeCacheLRUNext.inodeCacheLRUPrev = inode.inodeCacheLRUPrev
   435  
   436  			inode.inodeCacheLRUNext = nil
   437  			inode.inodeCacheLRUPrev = vS.inodeCacheLRUTail
   438  
   439  			vS.inodeCacheLRUTail.inodeCacheLRUNext = inode
   440  			vS.inodeCacheLRUTail = inode
   441  		}
   442  	}
   443  }
   444  
   445  func (vS *volumeStruct) inodeCacheTouch(inode *inMemoryInodeStruct) {
   446  	vS.Lock()
   447  	vS.inodeCacheTouchWhileLocked(inode)
   448  	vS.Unlock()
   449  }
   450  
   451  // The inode cache discard thread calls this routine when the ticker goes off.
   452  func (vS *volumeStruct) inodeCacheDiscard() (discarded uint64, dirty uint64, locked uint64, lruItems uint64) {
   453  	inodesToDrop := uint64(0)
   454  
   455  	vS.Lock()
   456  
   457  	if (vS.inodeCacheLRUItems * globals.inodeSize) > vS.inodeCacheLRUMaxBytes {
   458  		// Check, at most, 1.25 * (minimum_number_to_drop)
   459  		inodesToDrop = (vS.inodeCacheLRUItems * globals.inodeSize) - vS.inodeCacheLRUMaxBytes
   460  		inodesToDrop = inodesToDrop / globals.inodeSize
   461  		inodesToDrop += inodesToDrop / 4
   462  		for (inodesToDrop > 0) && ((vS.inodeCacheLRUItems * globals.inodeSize) > vS.inodeCacheLRUMaxBytes) {
   463  			inodesToDrop--
   464  
   465  			ic := vS.inodeCacheLRUHead
   466  
   467  			// Create a DLM lock object
   468  			id := dlm.GenerateCallerID()
   469  			inodeRWLock, _ := vS.InitInodeLock(ic.InodeNumber, id)
   470  			err := inodeRWLock.TryWriteLock()
   471  
   472  			// Inode is locked; skip it
   473  			if err != nil {
   474  				// Move inode to tail of LRU
   475  				vS.inodeCacheTouchWhileLocked(ic)
   476  				locked++
   477  				continue
   478  			}
   479  
   480  			if ic.dirty {
   481  				// The inode is busy - drop the DLM lock and move to tail
   482  				inodeRWLock.Unlock()
   483  				dirty++
   484  				vS.inodeCacheTouchWhileLocked(ic)
   485  				continue
   486  			}
   487  
   488  			var ok bool
   489  
   490  			discarded++
   491  			ok, err = vS.inodeCacheDropWhileLocked(ic)
   492  			if err != nil || !ok {
   493  				pStr := fmt.Errorf("The inodes was not found in the inode cache - ok: %v err: %v", ok, err)
   494  				panic(pStr)
   495  			}
   496  
   497  			inodeRWLock.Unlock()
   498  
   499  			// NOTE: vS.inodeCacheDropWhileLocked() removed the inode from the LRU list so
   500  			// the head is now different
   501  		}
   502  	}
   503  	lruItems = vS.inodeCacheLRUItems
   504  	vS.Unlock()
   505  	//logger.Infof("discard: %v dirty: %v locked: %v LRUitems: %v", discarded, dirty, locked, lruItems)
   506  	return
   507  }
   508  
   509  func (vS *volumeStruct) inodeCacheDropWhileLocked(inode *inMemoryInodeStruct) (ok bool, err error) {
   510  	ok, err = vS.inodeCache.DeleteByKey(inode.InodeNumber)
   511  	if (nil != err) || !ok {
   512  		return
   513  	}
   514  
   515  	if inode == vS.inodeCacheLRUHead {
   516  		if inode == vS.inodeCacheLRUTail {
   517  			vS.inodeCacheLRUHead = nil
   518  			vS.inodeCacheLRUTail = nil
   519  			vS.inodeCacheLRUItems = 0
   520  		} else {
   521  			vS.inodeCacheLRUHead = inode.inodeCacheLRUNext
   522  			vS.inodeCacheLRUHead.inodeCacheLRUPrev = nil
   523  			vS.inodeCacheLRUItems--
   524  
   525  			inode.inodeCacheLRUNext = nil
   526  		}
   527  	} else {
   528  		if inode == vS.inodeCacheLRUTail {
   529  			vS.inodeCacheLRUTail = inode.inodeCacheLRUPrev
   530  			vS.inodeCacheLRUTail.inodeCacheLRUNext = nil
   531  			vS.inodeCacheLRUItems--
   532  
   533  			inode.inodeCacheLRUPrev = nil
   534  		} else {
   535  			inode.inodeCacheLRUPrev.inodeCacheLRUNext = inode.inodeCacheLRUNext
   536  			inode.inodeCacheLRUNext.inodeCacheLRUPrev = inode.inodeCacheLRUPrev
   537  			vS.inodeCacheLRUItems--
   538  
   539  			inode.inodeCacheLRUNext = nil
   540  			inode.inodeCacheLRUPrev = nil
   541  		}
   542  	}
   543  
   544  	return
   545  }
   546  
   547  func (vS *volumeStruct) inodeCacheDrop(inode *inMemoryInodeStruct) (ok bool, err error) {
   548  	vS.Lock()
   549  	ok, err = vS.inodeCacheDropWhileLocked(inode)
   550  	vS.Unlock()
   551  	return
   552  }
   553  
   554  func (vS *volumeStruct) fetchInode(inodeNumber InodeNumber) (inode *inMemoryInodeStruct, ok bool, err error) {
   555  	for {
   556  		inode, ok, err = vS.inodeCacheFetch(inodeNumber)
   557  		if nil != err {
   558  			return
   559  		}
   560  
   561  		if ok {
   562  			return
   563  		}
   564  
   565  		inode, ok, err = vS.fetchOnDiskInode(inodeNumber)
   566  		if nil != err {
   567  			return
   568  		}
   569  		if !ok {
   570  			err = fmt.Errorf("%s.fetchInode(0x%016X) not found", vS.volumeName, inodeNumber)
   571  			return
   572  		}
   573  
   574  		ok, err = vS.inodeCacheInsert(inode)
   575  		if nil != err {
   576  			return
   577  		}
   578  
   579  		if ok {
   580  			return
   581  		}
   582  
   583  		// If we reach here, somebody beat us to it... just restart the fetch...
   584  	}
   585  }
   586  
   587  // Fetch inode with inode type checking
   588  func (vS *volumeStruct) fetchInodeType(inodeNumber InodeNumber, expectedType InodeType) (inode *inMemoryInodeStruct, err error) {
   589  	inode, ok, err := vS.fetchInode(inodeNumber)
   590  	if nil != err {
   591  		return
   592  	}
   593  	if !ok {
   594  		err = fmt.Errorf("%s: expected inode %d volume '%s' to be type %v, but it was unallocated",
   595  			utils.GetFnName(), inode.InodeNumber, vS.volumeName, expectedType)
   596  		err = blunder.AddError(err, blunder.NotFoundError)
   597  		return
   598  	}
   599  	if inode.InodeType == expectedType {
   600  		// success
   601  		return
   602  	}
   603  
   604  	err = fmt.Errorf("%s: expected inode %d volume '%s' to be type %v, got %v",
   605  		utils.GetFnName(), inode.InodeNumber, vS.volumeName, expectedType, inode.InodeType)
   606  
   607  	var errVal blunder.FsError
   608  	switch expectedType {
   609  	case DirType:
   610  		errVal = blunder.NotDirError
   611  	case FileType:
   612  		errVal = blunder.NotFileError
   613  	case SymlinkType:
   614  		errVal = blunder.NotSymlinkError
   615  	default:
   616  		panic(fmt.Sprintf("unknown inode type=%v!", expectedType))
   617  	}
   618  	err = blunder.AddError(err, errVal)
   619  
   620  	return
   621  }
   622  
   623  func (vS *volumeStruct) makeInMemoryInodeWithThisInodeNumber(inodeType InodeType, fileMode InodeMode, userID InodeUserID, groupID InodeGroupID, inodeNumber InodeNumber, volumeLocked bool) (inMemoryInode *inMemoryInodeStruct) {
   624  	var (
   625  		birthTime      time.Time
   626  		nonce          uint64
   627  		snapShotID     uint64
   628  		snapShotIDType headhunter.SnapShotIDType
   629  	)
   630  
   631  	snapShotIDType, snapShotID, nonce = vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber))
   632  	if headhunter.SnapShotIDTypeDotSnapShot == snapShotIDType {
   633  		logger.Fatalf("makeInMemoryInodeWithThisInodeNumber for headhunter.SnapShotIDTypeDotSnapShot not allowed")
   634  	}
   635  
   636  	birthTime = time.Now()
   637  
   638  	inMemoryInode = &inMemoryInodeStruct{
   639  		inodeCacheLRUNext:        nil,
   640  		inodeCacheLRUPrev:        nil,
   641  		dirty:                    true,
   642  		volume:                   vS,
   643  		snapShotID:               snapShotID,
   644  		openLogSegment:           nil,
   645  		inFlightLogSegmentMap:    make(map[uint64]*inFlightLogSegmentStruct),
   646  		inFlightLogSegmentErrors: make(map[uint64]error),
   647  		onDiskInodeV1Struct: onDiskInodeV1Struct{
   648  			InodeNumber:      InodeNumber(nonce),
   649  			InodeType:        inodeType,
   650  			CreationTime:     birthTime,
   651  			ModificationTime: birthTime,
   652  			AccessTime:       birthTime,
   653  			AttrChangeTime:   birthTime,
   654  			NumWrites:        0,
   655  			Mode:             fileMode,
   656  			UserID:           userID,
   657  			GroupID:          groupID,
   658  			StreamMap:        make(map[string][]byte),
   659  			LogSegmentMap:    make(map[uint64]uint64),
   660  		},
   661  	}
   662  
   663  	return
   664  }
   665  
   666  func (vS *volumeStruct) makeInMemoryInode(inodeType InodeType, fileMode InodeMode, userID InodeUserID, groupID InodeGroupID) (inMemoryInode *inMemoryInodeStruct, err error) {
   667  	inodeNumberAsUint64 := vS.headhunterVolumeHandle.FetchNonce()
   668  
   669  	inMemoryInode = vS.makeInMemoryInodeWithThisInodeNumber(inodeType, fileMode, userID, groupID, InodeNumber(inodeNumberAsUint64), false)
   670  
   671  	return
   672  }
   673  
   674  func (vS *volumeStruct) PatchInode(inodeNumber InodeNumber, inodeType InodeType, linkCount uint64, mode InodeMode, userID InodeUserID, groupID InodeGroupID, parentInodeNumber InodeNumber, symlinkTarget string) (err error) {
   675  	var (
   676  		callerID                              dlm.CallerID
   677  		inode                                 *inMemoryInodeStruct
   678  		inodeNumberDecodedAsInodeNumber       InodeNumber
   679  		inodeNumberDecodedAsUint64            uint64
   680  		inodeRWLock                           *dlm.RWLockStruct
   681  		modeAdornedWithInodeType              InodeMode
   682  		ok                                    bool
   683  		parentInodeNumberDecodedAsInodeNumber InodeNumber
   684  		parentInodeNumberDecodedAsUint64      uint64
   685  		payload                               sortedmap.BPlusTree
   686  		snapShotIDType                        headhunter.SnapShotIDType
   687  	)
   688  
   689  	snapShotIDType, _, inodeNumberDecodedAsUint64 = vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber))
   690  	if headhunter.SnapShotIDTypeLive != snapShotIDType {
   691  		err = fmt.Errorf("PatchInode(inodeNumber==0x%016X,,,,,,,) must provide a non-SnapShot inodeNumber", inodeNumber)
   692  		return
   693  	}
   694  	inodeNumberDecodedAsInodeNumber = InodeNumber(inodeNumberDecodedAsUint64)
   695  
   696  	switch inodeType {
   697  	case DirType:
   698  		if 2 != linkCount {
   699  			err = fmt.Errorf("PatchInode(inodeNumber==0x%016X,inodeType==DirType,linkCount==%v,,,,,) must set linkCount to 2", inodeNumber, linkCount)
   700  			return
   701  		}
   702  		if InodeNumber(0) == parentInodeNumber {
   703  			err = fmt.Errorf("PatchInode(inodeNumber==0x%016X,inodeType==DirType,,,,,parentInodeNumber==0,) must provide a non-zero parentInodeNumber", inodeNumber)
   704  			return
   705  		}
   706  		if (RootDirInodeNumber == inodeNumber) && (RootDirInodeNumber != parentInodeNumber) {
   707  			err = fmt.Errorf("PatchInode(inodeNumber==RootDirInodeNumber,inodeType==DirType,,,,,parentInodeNumber==0x%016X,) must provide RootDirInode's parent as also RootDirInodeNumber", parentInodeNumber)
   708  			return
   709  		}
   710  		snapShotIDType, _, parentInodeNumberDecodedAsUint64 = vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber))
   711  		if headhunter.SnapShotIDTypeLive != snapShotIDType {
   712  			err = fmt.Errorf("PatchInode(inodeNumber==0x%016X,inodeType==DirType,,,,,parentInodeNumber==0x%016X,) must provide a non-SnapShot parentInodeNumber", inodeNumber, parentInodeNumber)
   713  			return
   714  		}
   715  		parentInodeNumberDecodedAsInodeNumber = InodeNumber(parentInodeNumberDecodedAsUint64)
   716  	case FileType:
   717  		if 0 == linkCount {
   718  			err = fmt.Errorf("PatchInode(inodeNumber==0x%016X,inodeType==FileType,linkCount==0,,,,,) must provide a non-zero linkCount", inodeNumber)
   719  			return
   720  		}
   721  	case SymlinkType:
   722  		if 0 == linkCount {
   723  			err = fmt.Errorf("PatchInode(inodeNumber==0x%016X,inodeType==SymlinkType,linkCount==0,,,,,) must provide a non-zero linkCount", inodeNumber)
   724  			return
   725  		}
   726  		if "" == symlinkTarget {
   727  			err = fmt.Errorf("PatchInode(inodeNumber==0x%016X,inodeType==SymlinkType,,,,,,symlinkTarget==\"\") must provide a non-empty symlinkTarget", inodeNumber)
   728  			return
   729  		}
   730  	default:
   731  		err = fmt.Errorf("PatchInode(inodeNumber==0x%016X,inodeType==%v,,,,,,) must provide a inodeType of DirType(%v), FileType(%v), or SymlinkType(%v)", inodeNumber, inodeType, DirType, FileType, SymlinkType)
   732  		return
   733  	}
   734  
   735  	modeAdornedWithInodeType, err = determineMode(mode, inodeType)
   736  	if nil != err {
   737  		err = fmt.Errorf("PatchInode(inodeNumber==0x%016X,inodeType==%v,,mode==0o%011o,,,,) failed: %v", inodeNumber, inodeType, mode, err)
   738  		return
   739  	}
   740  
   741  	vS.Lock()
   742  
   743  	callerID = dlm.GenerateCallerID()
   744  	inodeRWLock, _ = vS.InitInodeLock(inodeNumber, callerID)
   745  	err = inodeRWLock.TryWriteLock()
   746  	if nil != err {
   747  		vS.Unlock()
   748  		err = fmt.Errorf("PatchInode(inodeNumber==0x%016X,,,,,,,) couldn't create a *dlm.RWLockStruct: %v", inodeNumber, err)
   749  		return
   750  	}
   751  
   752  	inode, ok, err = vS.inodeCacheFetchWhileLocked(inodeNumber)
   753  	if nil != err {
   754  		_ = inodeRWLock.Unlock()
   755  		vS.Unlock()
   756  		err = fmt.Errorf("PatchInode(inodeNumber==0x%016X,,,,,,,) couldn't search inodeCache for pre-existing inode: %v", inodeNumber, err)
   757  		return
   758  	}
   759  	if ok {
   760  		if inode.dirty {
   761  			_ = inodeRWLock.Unlock()
   762  			vS.Unlock()
   763  			err = fmt.Errorf("PatchInode(inodeNumber==0x%016X,,,,,,,) of dirty Inode is not allowed", inodeNumber)
   764  			return
   765  		}
   766  		ok, err = vS.inodeCacheDropWhileLocked(inode)
   767  		if nil != err {
   768  			_ = inodeRWLock.Unlock()
   769  			vS.Unlock()
   770  			err = fmt.Errorf("PatchInode(inodeNumber==0x%016X,,,,,,,) drop of pre-existing inode from inodeCache failed: %v", inodeNumber, err)
   771  			return
   772  		}
   773  		if !ok {
   774  			_ = inodeRWLock.Unlock()
   775  			vS.Unlock()
   776  			err = fmt.Errorf("PatchInode(inodeNumber==0x%016X,,,,,,,) drop of pre-existing inode from inodeCache returned !ok", inodeNumber)
   777  			return
   778  		}
   779  	}
   780  
   781  	inode = vS.makeInMemoryInodeWithThisInodeNumber(inodeType, modeAdornedWithInodeType, userID, groupID, inodeNumberDecodedAsInodeNumber, true)
   782  
   783  	inode.dirty = true
   784  
   785  	inode.onDiskInodeV1Struct.LinkCount = linkCount
   786  
   787  	switch inodeType {
   788  	case DirType:
   789  		payload = sortedmap.NewBPlusTree(
   790  			vS.maxEntriesPerDirNode,
   791  			sortedmap.CompareString,
   792  			&dirInodeCallbacks{treeNodeLoadable{inode: inode}},
   793  			globals.dirEntryCache)
   794  
   795  		ok, err = payload.Put(".", inodeNumberDecodedAsInodeNumber)
   796  		if nil != err {
   797  			err = fmt.Errorf("PatchInode(inodeNumber==0x%016X,inodeType==DirType,,,,,,) failed to insert \".\" dirEntry: %v", inodeNumber, err)
   798  			panic(err)
   799  		}
   800  		if !ok {
   801  			err = fmt.Errorf("PatchInode(inodeNumber==0x%016X,inodeType==DirType,,,,,,) insert \".\" dirEntry got a !ok", inodeNumber)
   802  			panic(err)
   803  		}
   804  
   805  		ok, err = payload.Put("..", parentInodeNumberDecodedAsInodeNumber)
   806  		if nil != err {
   807  			err = fmt.Errorf("PatchInode(inodeNumber==0x%016X,inodeType==DirType,,,,,parentInodeNumber==0x%016X,) failed to insert \"..\" dirEntry: %v", inodeNumber, parentInodeNumber, err)
   808  			panic(err)
   809  		}
   810  		if !ok {
   811  			err = fmt.Errorf("PatchInode(inodeNumber==0x%016X,inodeType==DirType,,,,,parentInodeNumber==0x%016X,) insert \"..\" dirEntry got a !ok", inodeNumber, parentInodeNumber)
   812  			panic(err)
   813  		}
   814  
   815  		inode.payload = payload
   816  		inode.onDiskInodeV1Struct.SymlinkTarget = ""
   817  	case FileType:
   818  		payload = sortedmap.NewBPlusTree(
   819  			vS.maxExtentsPerFileNode,
   820  			sortedmap.CompareUint64,
   821  			&fileInodeCallbacks{treeNodeLoadable{inode: inode}},
   822  			globals.fileExtentMapCache)
   823  
   824  		inode.payload = payload
   825  		inode.onDiskInodeV1Struct.SymlinkTarget = ""
   826  	case SymlinkType:
   827  		inode.payload = nil
   828  		inode.onDiskInodeV1Struct.SymlinkTarget = symlinkTarget
   829  	}
   830  
   831  	ok, err = vS.inodeCacheInsertWhileLocked(inode)
   832  	if nil != err {
   833  		err = fmt.Errorf("PatchInode(inodeNumber==0x%016X,inodeType==DirType,,,,,,) failed to insert inode in inodeCache: %v", inodeNumber, err)
   834  		panic(err)
   835  	}
   836  	if !ok {
   837  		err = fmt.Errorf("PatchInode(inodeNumber==0x%016X,inodeType==DirType,,,,,,) insert of inode in inodeCache got a !ok", inodeNumber)
   838  		panic(err)
   839  	}
   840  
   841  	_ = inodeRWLock.Unlock()
   842  
   843  	vS.Unlock()
   844  
   845  	err = vS.flushInode(inode)
   846  	if nil != err {
   847  		err = fmt.Errorf("PatchInode(inodeNumber==0x%016X,,,,,,,) failed to flush: %v", inodeNumber, err)
   848  		panic(err)
   849  	}
   850  
   851  	return
   852  }
   853  
   854  func (inMemoryInode *inMemoryInodeStruct) convertToOnDiskInodeV1() (onDiskInodeV1 *onDiskInodeV1Struct, err error) {
   855  	onDiskInode := inMemoryInode.onDiskInodeV1Struct
   856  
   857  	if (DirType == inMemoryInode.InodeType) || (FileType == inMemoryInode.InodeType) {
   858  		content := inMemoryInode.payload.(sortedmap.BPlusTree)
   859  		payloadObjectNumber, payloadObjectOffset, payloadObjectLength, flushErr := content.Flush(false)
   860  		if nil != flushErr {
   861  			panic(flushErr)
   862  		}
   863  		pruneErr := content.Prune()
   864  		if nil != pruneErr {
   865  			panic(pruneErr)
   866  		}
   867  		if onDiskInodeV1PayloadObjectOffset != payloadObjectOffset {
   868  			flushErr = fmt.Errorf("Logic Error: content.Flush() should have returned payloadObjectOffset == %v", onDiskInodeV1PayloadObjectOffset)
   869  			panic(flushErr)
   870  		}
   871  		onDiskInode.PayloadObjectNumber = payloadObjectNumber
   872  		onDiskInode.PayloadObjectLength = payloadObjectLength
   873  	}
   874  
   875  	// maps are refernce types, so this needs to be copied manually
   876  
   877  	onDiskInode.StreamMap = make(map[string][]byte)
   878  	for key, value := range inMemoryInode.StreamMap {
   879  		valueCopy := make([]byte, len(value))
   880  		copy(valueCopy, value)
   881  		onDiskInode.StreamMap[key] = valueCopy
   882  	}
   883  
   884  	onDiskInode.LogSegmentMap = make(map[uint64]uint64)
   885  	for logSegmentNumber, logSegmentBytesUsed := range inMemoryInode.LogSegmentMap {
   886  		onDiskInode.LogSegmentMap[logSegmentNumber] = logSegmentBytesUsed
   887  	}
   888  
   889  	return &onDiskInode, nil
   890  }
   891  
   892  func (vS *volumeStruct) flushInode(inode *inMemoryInodeStruct) (err error) {
   893  	err = vS.flushInodes([]*inMemoryInodeStruct{inode})
   894  	return
   895  }
   896  
   897  func (vS *volumeStruct) flushInodeNumber(inodeNumber InodeNumber) (err error) {
   898  	err = vS.flushInodeNumbers([]InodeNumber{inodeNumber})
   899  	return
   900  }
   901  
   902  // REVIEW: Need to clearly explain what "flush" means (i.e. "to HH", not "to disk")
   903  
   904  func (vS *volumeStruct) flushInodes(inodes []*inMemoryInodeStruct) (err error) {
   905  	var (
   906  		dirtyInodeNumbers         []uint64
   907  		dirtyInodeRecBytes        []byte
   908  		dirtyInodeRecs            [][]byte
   909  		emptyLogSegments          []uint64
   910  		emptyLogSegmentsThisInode []uint64
   911  		inode                     *inMemoryInodeStruct
   912  		logSegmentNumber          uint64
   913  		logSegmentValidBytes      uint64
   914  		onDiskInodeV1             *onDiskInodeV1Struct
   915  		onDiskInodeV1Buf          []byte
   916  		payloadAsBPlusTree        sortedmap.BPlusTree
   917  		payloadObjectLength       uint64
   918  		payloadObjectNumber       uint64
   919  		toFlushInodeNumbers       []uint64
   920  	)
   921  
   922  	halter.Trigger(halter.InodeFlushInodesEntry)
   923  	defer halter.Trigger(halter.InodeFlushInodesExit)
   924  
   925  	toFlushInodeNumbers = make([]uint64, 0, len(inodes))
   926  	for _, inode = range inodes {
   927  		toFlushInodeNumbers = append(toFlushInodeNumbers, uint64(inode.InodeNumber))
   928  	}
   929  
   930  	evtlog.Record(evtlog.FormatFlushInodesEntry, vS.volumeName, toFlushInodeNumbers)
   931  
   932  	// Assemble slice of "dirty" inodes while flushing them
   933  	dirtyInodeNumbers = make([]uint64, 0, len(inodes))
   934  	dirtyInodeRecs = make([][]byte, 0, len(inodes))
   935  	emptyLogSegments = make([]uint64, 0)
   936  
   937  	for _, inode = range inodes {
   938  		if FileType == inode.InodeType {
   939  			err = vS.doFileInodeDataFlush(inode)
   940  			if nil != err {
   941  				evtlog.Record(evtlog.FormatFlushInodesErrorOnInode, vS.volumeName, uint64(inode.InodeNumber), err.Error())
   942  				logger.ErrorWithError(err)
   943  				err = blunder.AddError(err, blunder.InodeFlushError)
   944  				return
   945  			}
   946  			emptyLogSegmentsThisInode = make([]uint64, 0)
   947  			for logSegmentNumber, logSegmentValidBytes = range inode.LogSegmentMap {
   948  				if 0 == logSegmentValidBytes {
   949  					emptyLogSegmentsThisInode = append(emptyLogSegmentsThisInode, logSegmentNumber)
   950  				}
   951  			}
   952  			for _, logSegmentNumber = range emptyLogSegmentsThisInode {
   953  				delete(inode.LogSegmentMap, logSegmentNumber)
   954  			}
   955  			emptyLogSegments = append(emptyLogSegments, emptyLogSegmentsThisInode...)
   956  		}
   957  		if SymlinkType != inode.InodeType {
   958  			// (FileType == inode.InodeType || (DirType == inode.InodeType)
   959  			payloadAsBPlusTree = inode.payload.(sortedmap.BPlusTree)
   960  			payloadObjectNumber, _, payloadObjectLength, err = payloadAsBPlusTree.Flush(false)
   961  			if nil != err {
   962  				evtlog.Record(evtlog.FormatFlushInodesErrorOnInode, vS.volumeName, uint64(inode.InodeNumber), err.Error())
   963  				logger.ErrorWithError(err)
   964  				err = blunder.AddError(err, blunder.InodeFlushError)
   965  				return
   966  			}
   967  			if payloadObjectNumber > inode.PayloadObjectNumber {
   968  				if !inode.dirty {
   969  					err = fmt.Errorf("Logic error: inode.dirty should have been true")
   970  					evtlog.Record(evtlog.FormatFlushInodesErrorOnInode, vS.volumeName, uint64(inode.InodeNumber), err.Error())
   971  					logger.ErrorWithError(err)
   972  					err = blunder.AddError(err, blunder.InodeFlushError)
   973  					return
   974  				}
   975  				// REVIEW: What if cache pressure flushed before we got here?
   976  				//         Is it possible that Number doesn't get updated?
   977  
   978  				if inode.PayloadObjectNumber != 0 {
   979  					logger.Tracef("flushInodes(): volume '%s' %v inode %d: updating Payload"+
   980  						" from Object %016X to %016X bytes %d to %d",
   981  						vS.volumeName, inode.InodeType, inode.InodeNumber,
   982  						inode.PayloadObjectNumber, payloadObjectNumber,
   983  						inode.PayloadObjectLength, payloadObjectLength)
   984  				}
   985  				inode.PayloadObjectNumber = payloadObjectNumber
   986  				inode.PayloadObjectLength = payloadObjectLength
   987  
   988  				evtlog.Record(evtlog.FormatFlushInodesDirOrFilePayloadObjectNumberUpdated, vS.volumeName, uint64(inode.InodeNumber), payloadObjectNumber)
   989  			}
   990  		}
   991  		if inode.dirty {
   992  			onDiskInodeV1, err = inode.convertToOnDiskInodeV1()
   993  			if nil != err {
   994  				evtlog.Record(evtlog.FormatFlushInodesErrorOnInode, vS.volumeName, uint64(inode.InodeNumber), err.Error())
   995  				logger.ErrorWithError(err)
   996  				err = blunder.AddError(err, blunder.InodeFlushError)
   997  				return
   998  			}
   999  			onDiskInodeV1Buf, err = json.Marshal(onDiskInodeV1)
  1000  			if nil != err {
  1001  				evtlog.Record(evtlog.FormatFlushInodesErrorOnInode, vS.volumeName, uint64(inode.InodeNumber), err.Error())
  1002  				logger.ErrorWithError(err)
  1003  				err = blunder.AddError(err, blunder.InodeFlushError)
  1004  				return
  1005  			}
  1006  			dirtyInodeRecBytes = make([]byte, 0, len(globals.inodeRecDefaultPreambleBuf)+len(onDiskInodeV1Buf))
  1007  			dirtyInodeRecBytes = append(dirtyInodeRecBytes, globals.inodeRecDefaultPreambleBuf...)
  1008  			dirtyInodeRecBytes = append(dirtyInodeRecBytes, onDiskInodeV1Buf...)
  1009  			dirtyInodeNumbers = append(dirtyInodeNumbers, uint64(inode.InodeNumber))
  1010  			dirtyInodeRecs = append(dirtyInodeRecs, dirtyInodeRecBytes)
  1011  		}
  1012  	}
  1013  
  1014  	// Go update HeadHunter (if necessary)
  1015  	if 0 < len(dirtyInodeNumbers) {
  1016  		err = vS.headhunterVolumeHandle.PutInodeRecs(dirtyInodeNumbers, dirtyInodeRecs)
  1017  		if nil != err {
  1018  			evtlog.Record(evtlog.FormatFlushInodesErrorOnHeadhunterPut, vS.volumeName, err.Error())
  1019  			logger.ErrorWithError(err)
  1020  			err = blunder.AddError(err, blunder.InodeFlushError)
  1021  			return
  1022  		}
  1023  		for _, inode = range inodes {
  1024  			inode.dirty = false
  1025  		}
  1026  	}
  1027  
  1028  	// Now do phase one of garbage collection
  1029  	if 0 < len(emptyLogSegments) {
  1030  		for _, logSegmentNumber = range emptyLogSegments {
  1031  			err = vS.headhunterVolumeHandle.DeleteLogSegmentRec(logSegmentNumber)
  1032  			if nil != err {
  1033  				logger.WarnfWithError(err, "couldn't delete garbage log segment")
  1034  			}
  1035  		}
  1036  	}
  1037  
  1038  	evtlog.Record(evtlog.FormatFlushInodesExit, vS.volumeName, toFlushInodeNumbers)
  1039  
  1040  	err = nil
  1041  	return
  1042  }
  1043  
  1044  func (vS *volumeStruct) flushInodeNumbers(inodeNumbers []InodeNumber) (err error) {
  1045  	var (
  1046  		inode       *inMemoryInodeStruct
  1047  		inodes      []*inMemoryInodeStruct
  1048  		inodeNumber InodeNumber
  1049  		ok          bool
  1050  	)
  1051  
  1052  	// Fetch referenced inodes
  1053  	inodes = make([]*inMemoryInodeStruct, 0, len(inodeNumbers))
  1054  	for _, inodeNumber = range inodeNumbers {
  1055  		inode, ok, err = vS.fetchInode(inodeNumber)
  1056  		if nil != err {
  1057  			// the inode is locked so this should never happen (unless the inode
  1058  			// was evicted from the cache and it was corrupt when read from disk)
  1059  			// (err includes volume name and inode number)
  1060  			logger.ErrorfWithError(err, "%s: fetch of inode to flush failed", utils.GetFnName())
  1061  			err = blunder.AddError(err, blunder.InodeFlushError)
  1062  			return
  1063  		}
  1064  		if !ok {
  1065  			// this should never happen (see above)
  1066  			err = fmt.Errorf("%s: fetch of inode %d volume '%s' failed because it is unallocated",
  1067  				utils.GetFnName(), inodeNumber, vS.volumeName)
  1068  			logger.ErrorWithError(err)
  1069  			err = blunder.AddError(err, blunder.NotFoundError)
  1070  			return
  1071  		}
  1072  
  1073  		inodes = append(inodes, inode)
  1074  	}
  1075  
  1076  	err = vS.flushInodes(inodes)
  1077  
  1078  	return
  1079  }
  1080  
  1081  func accountNameToVolumeName(accountName string) (volumeName string, ok bool) {
  1082  	var (
  1083  		volume *volumeStruct
  1084  	)
  1085  
  1086  	globals.Lock()
  1087  
  1088  	volume, ok = globals.accountMap[accountName]
  1089  	if ok {
  1090  		volumeName = volume.volumeName
  1091  	}
  1092  
  1093  	globals.Unlock()
  1094  
  1095  	return
  1096  }
  1097  
  1098  func volumeNameToAccountName(volumeName string) (accountName string, ok bool) {
  1099  	var (
  1100  		volume *volumeStruct
  1101  	)
  1102  
  1103  	globals.Lock()
  1104  
  1105  	volume, ok = globals.volumeMap[volumeName]
  1106  	if ok {
  1107  		accountName = volume.accountName
  1108  	}
  1109  
  1110  	globals.Unlock()
  1111  
  1112  	return
  1113  }
  1114  
  1115  func volumeNameToActivePeerPrivateIPAddr(volumeName string) (activePeerPrivateIPAddr string, ok bool) {
  1116  	var (
  1117  		volume *volumeStruct
  1118  	)
  1119  
  1120  	globals.Lock()
  1121  
  1122  	volume, ok = globals.volumeMap[volumeName]
  1123  
  1124  	if ok {
  1125  		activePeerPrivateIPAddr = volume.volumeGroup.activePeerPrivateIPAddr
  1126  	}
  1127  
  1128  	globals.Unlock()
  1129  
  1130  	return
  1131  }
  1132  
  1133  func fetchVolumeHandle(volumeName string) (volumeHandle VolumeHandle, err error) {
  1134  	globals.Lock()
  1135  	volume, ok := globals.volumeMap[volumeName]
  1136  	globals.Unlock()
  1137  
  1138  	if !ok {
  1139  		err = fmt.Errorf("%s: volumeName \"%v\" not found", utils.GetFnName(), volumeName)
  1140  		err = blunder.AddError(err, blunder.NotFoundError)
  1141  		return
  1142  	}
  1143  
  1144  	volumeHandle = volume
  1145  
  1146  	volume.Lock()         // REVIEW: Once Tracker https://www.pivotaltracker.com/story/show/133377567
  1147  	defer volume.Unlock() //         is resolved, these two lines should be removed
  1148  
  1149  	if !volume.served {
  1150  		err = fmt.Errorf("%s: volumeName \"%v\" not served", utils.GetFnName(), volumeName)
  1151  		err = blunder.AddError(err, blunder.NotActiveError)
  1152  		return
  1153  	}
  1154  
  1155  	_, ok, err = volume.headhunterVolumeHandle.GetInodeRec(uint64(RootDirInodeNumber))
  1156  	if nil != err {
  1157  		// disk corruption of the inode btree (or software error)
  1158  		err = fmt.Errorf("%s: unable to lookup root inode for volume '%s': %v",
  1159  			utils.GetFnName(), volume.volumeName, err)
  1160  		err = blunder.AddError(err, blunder.NotFoundError)
  1161  	}
  1162  	if !ok {
  1163  		// First access didn't find root dir... so create it
  1164  		_, err = volume.createRootOrSubDir(PosixModePerm, 0, 0, true)
  1165  		if nil != err {
  1166  			err = fmt.Errorf("%s: unable to create root inode for volume '%s': %v",
  1167  				utils.GetFnName(), volume.volumeName, err)
  1168  			err = blunder.AddError(err, blunder.NotFoundError)
  1169  		}
  1170  	}
  1171  
  1172  	// If we get this far, return values are already set as desired
  1173  
  1174  	err = nil
  1175  
  1176  	return
  1177  }
  1178  
  1179  func (vS *volumeStruct) provisionPhysicalContainer(physicalContainerLayout *physicalContainerLayoutStruct) (err error) {
  1180  	if 0 == (physicalContainerLayout.containerNameSliceLoopCount % physicalContainerLayout.maxObjectsPerContainer) {
  1181  		// We need to provision a new PhysicalContainer in this PhysicalContainerLayout
  1182  
  1183  		physicalContainerNameSuffix := vS.headhunterVolumeHandle.FetchNonce()
  1184  
  1185  		newContainerName := fmt.Sprintf("%s%s", physicalContainerLayout.containerNamePrefix, utils.Uint64ToHexStr(physicalContainerNameSuffix))
  1186  
  1187  		storagePolicyHeaderValues := []string{vS.defaultPhysicalContainerLayout.containerStoragePolicy}
  1188  		newContainerHeaders := make(map[string][]string)
  1189  		newContainerHeaders["X-Storage-Policy"] = storagePolicyHeaderValues
  1190  
  1191  		err = swiftclient.ContainerPut(vS.accountName, newContainerName, newContainerHeaders)
  1192  		if nil != err {
  1193  			return
  1194  		}
  1195  
  1196  		physicalContainerLayout.containerNameSlice[physicalContainerLayout.containerNameSliceNextIndex] = newContainerName
  1197  	}
  1198  
  1199  	err = nil
  1200  	return
  1201  }
  1202  
  1203  func (vS *volumeStruct) provisionObject() (containerName string, objectNumber uint64, err error) {
  1204  	objectNumber = vS.headhunterVolumeHandle.FetchNonce()
  1205  
  1206  	vS.Lock()
  1207  
  1208  	err = vS.provisionPhysicalContainer(vS.defaultPhysicalContainerLayout)
  1209  	if nil != err {
  1210  		vS.Unlock()
  1211  		return
  1212  	}
  1213  
  1214  	containerName = vS.defaultPhysicalContainerLayout.containerNameSlice[vS.defaultPhysicalContainerLayout.containerNameSliceNextIndex]
  1215  
  1216  	vS.defaultPhysicalContainerLayout.containerNameSliceNextIndex++
  1217  
  1218  	if vS.defaultPhysicalContainerLayout.containerNameSliceNextIndex == vS.defaultPhysicalContainerLayout.containersPerPeer {
  1219  		vS.defaultPhysicalContainerLayout.containerNameSliceNextIndex = 0
  1220  		vS.defaultPhysicalContainerLayout.containerNameSliceLoopCount++
  1221  	}
  1222  
  1223  	vS.Unlock()
  1224  
  1225  	err = nil
  1226  	return
  1227  }
  1228  
  1229  func (vS *volumeStruct) Access(inodeNumber InodeNumber, userID InodeUserID, groupID InodeGroupID, otherGroupIDs []InodeGroupID, accessMode InodeMode, override AccessOverride) (accessReturn bool) {
  1230  	var (
  1231  		adjustedInodeNumber InodeNumber
  1232  		err                 error
  1233  		groupIDCheck        bool
  1234  		ok                  bool
  1235  		otherGroupID        InodeGroupID
  1236  		ourInode            *inMemoryInodeStruct
  1237  		ourInodeGroupID     InodeGroupID
  1238  		ourInodeMode        InodeMode
  1239  		ourInodeUserID      InodeUserID
  1240  		snapShotIDType      headhunter.SnapShotIDType
  1241  	)
  1242  
  1243  	snapShotIDType, _, _ = vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber))
  1244  
  1245  	switch snapShotIDType {
  1246  	case headhunter.SnapShotIDTypeLive:
  1247  		adjustedInodeNumber = inodeNumber
  1248  	case headhunter.SnapShotIDTypeSnapShot:
  1249  		adjustedInodeNumber = inodeNumber
  1250  	case headhunter.SnapShotIDTypeDotSnapShot:
  1251  		adjustedInodeNumber = RootDirInodeNumber
  1252  	default:
  1253  		logger.Fatalf("headhunter.SnapShotU64Decode(inodeNumber == 0x%016X) returned unknown snapShotIDType: %v", inodeNumber, snapShotIDType)
  1254  	}
  1255  	if (headhunter.SnapShotIDTypeLive != snapShotIDType) && (0 != (W_OK & accessMode)) {
  1256  		err = blunder.NewError(blunder.InvalidArgError, "Access() where accessMode includes W_OK of non-LiveView inodeNumber not allowed")
  1257  		return
  1258  	}
  1259  
  1260  	ourInode, ok, err = vS.fetchInode(adjustedInodeNumber)
  1261  	if nil != err {
  1262  		// this indicates disk corruption or software bug
  1263  		// (err includes volume name and inode number)
  1264  		logger.ErrorfWithError(err, "%s: fetch of inode failed", utils.GetFnName())
  1265  
  1266  		// if we can't fetch the inode we can't access it
  1267  		accessReturn = false
  1268  		return
  1269  	}
  1270  	if !ok {
  1271  		// disk corruption or client requested a free inode
  1272  		logger.Infof("%s: fetch of inode %d volume '%s' failed because it is unallocated",
  1273  			utils.GetFnName(), inodeNumber, vS.volumeName)
  1274  
  1275  		// if the inode is free then we can't access it
  1276  		accessReturn = false
  1277  		return
  1278  	}
  1279  
  1280  	ourInodeUserID = ourInode.UserID
  1281  	ourInodeGroupID = ourInode.GroupID
  1282  
  1283  	if headhunter.SnapShotIDTypeLive == snapShotIDType {
  1284  		ourInodeMode = ourInode.Mode
  1285  	} else {
  1286  		ourInodeMode = ourInode.Mode // TODO: Make it read-only...
  1287  	}
  1288  
  1289  	if F_OK == accessMode {
  1290  		// the inode exists so its F_OK
  1291  		accessReturn = true
  1292  		return
  1293  	}
  1294  
  1295  	if P_OK == accessMode {
  1296  		accessReturn = (InodeRootUserID == userID) || (userID == ourInodeUserID)
  1297  		return
  1298  	}
  1299  
  1300  	if accessMode != (accessMode & (R_OK | W_OK | X_OK)) {
  1301  		// Default to false if P_OK bit set along with any others)
  1302  		accessReturn = false
  1303  		return
  1304  	}
  1305  
  1306  	// Only the LiveView is ever writeable... even by the root user
  1307  	if (accessMode&W_OK != 0) && (headhunter.SnapShotIDTypeLive != snapShotIDType) {
  1308  		accessReturn = false
  1309  		return
  1310  	}
  1311  
  1312  	// The root user (if not squashed) can do anything except exec files
  1313  	// that are not executable by any user
  1314  	if userID == InodeRootUserID {
  1315  		if (accessMode&X_OK != 0) && (ourInodeMode&(X_OK<<6|X_OK<<3|X_OK) == 0) {
  1316  			accessReturn = false
  1317  		} else {
  1318  			accessReturn = true
  1319  		}
  1320  		return
  1321  	}
  1322  
  1323  	// We check against permissions for the user, group, and other.  The
  1324  	// first match wins (not the first permission granted).  If the user is
  1325  	// the owner of the file then those permission bits determine what
  1326  	// happens.  In other words, if the permission bits deny read permission
  1327  	// to the owner of a file but allow read permission for group and other,
  1328  	// then everyone except the owner of the file can read it.
  1329  	//
  1330  	// On a local file system, the owner of a file is *not* allowed to write
  1331  	// to the file unless it was opened for writing and the permission bits
  1332  	// allowed it *or* the process created the file and opened it for
  1333  	// writing at the same time.  However, NFS does not have an open state
  1334  	// (there's no file descriptor that tracks permissions when the the file
  1335  	// was opened) so we check for write permission on every write.  This
  1336  	// breaks things like tar when it tries to unpack a file which has
  1337  	// permission 0444 (read only).  On a local file system that works, but
  1338  	// it doesn't work for NFS unless we bend the rules a bit for the owner
  1339  	// of the file and allow the owner to write to the file even if
  1340  	// appropriate permissions are lacking.  (This is only done for the user
  1341  	// that owns the file, not the group that owns the file. Note that the
  1342  	// owner can always change the permissions to allow writing so its not a
  1343  	// security risk, but the owning group cannot).
  1344  	//
  1345  	// Note that the NFS client will typically call Access() when an app
  1346  	// wants to open the file and fail an open request for writing that if
  1347  	// the permission bits do not allow it.
  1348  	//
  1349  	// Similar rules apply to Read() and Truncate() (for ftruncate(2)), but
  1350  	// not for execute permission.  Also, this only applies to regular files
  1351  	// but we'll rely on the caller for that.
  1352  	if userID == ourInodeUserID {
  1353  		if override == OwnerOverride && (accessMode&X_OK == 0) {
  1354  			accessReturn = true
  1355  		} else {
  1356  			accessReturn = (((ourInodeMode >> 6) & accessMode) == accessMode)
  1357  		}
  1358  		return
  1359  	}
  1360  
  1361  	groupIDCheck = (groupID == ourInodeGroupID)
  1362  	if !groupIDCheck {
  1363  		for _, otherGroupID = range otherGroupIDs {
  1364  			if otherGroupID == ourInodeGroupID {
  1365  				groupIDCheck = true
  1366  				break
  1367  			}
  1368  		}
  1369  	}
  1370  	if groupIDCheck {
  1371  		accessReturn = ((((ourInodeMode >> 3) & 07) & accessMode) == accessMode)
  1372  		return
  1373  	}
  1374  
  1375  	accessReturn = ((((ourInodeMode >> 0) & 07) & accessMode) == accessMode)
  1376  	return
  1377  }
  1378  
  1379  func (vS *volumeStruct) ProvisionObject() (objectPath string, err error) {
  1380  	err = enforceRWMode(true)
  1381  	if nil != err {
  1382  		return
  1383  	}
  1384  
  1385  	containerName, objectNumber, err := vS.provisionObject()
  1386  	if nil != err {
  1387  		return
  1388  	}
  1389  
  1390  	objectPath = fmt.Sprintf("/v1/%s/%s/%016X", vS.accountName, containerName, objectNumber)
  1391  
  1392  	err = nil
  1393  	return
  1394  }
  1395  
  1396  func (vS *volumeStruct) Purge(inodeNumber InodeNumber) (err error) {
  1397  	var (
  1398  		inode *inMemoryInodeStruct
  1399  		ok    bool
  1400  	)
  1401  
  1402  	err = enforceRWMode(false)
  1403  	if nil != err {
  1404  		return
  1405  	}
  1406  
  1407  	inode, ok, err = vS.inodeCacheFetch(inodeNumber)
  1408  	if (nil != err) || !ok {
  1409  		return
  1410  	}
  1411  
  1412  	if inode.dirty {
  1413  		err = fmt.Errorf("Inode dirty... cannot be purged")
  1414  		return
  1415  	}
  1416  
  1417  	ok, err = vS.inodeCacheDrop(inode)
  1418  	if nil != err {
  1419  		return
  1420  	}
  1421  	if !ok {
  1422  		err = fmt.Errorf("inodeCacheDrop(inode) failed")
  1423  	}
  1424  
  1425  	return
  1426  }
  1427  
  1428  func (vS *volumeStruct) Destroy(inodeNumber InodeNumber) (err error) {
  1429  	logger.Tracef("inode.Destroy(): volume '%s' inode %d", vS.volumeName, inodeNumber)
  1430  
  1431  	err = enforceRWMode(false)
  1432  	if nil != err {
  1433  		return
  1434  	}
  1435  
  1436  	snapShotIDType, _, _ := vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber))
  1437  	if headhunter.SnapShotIDTypeLive != snapShotIDType {
  1438  		err = fmt.Errorf("Destroy() on non-LiveView inodeNumber not allowed")
  1439  		return
  1440  	}
  1441  
  1442  	ourInode, ok, err := vS.fetchInode(inodeNumber)
  1443  	if nil != err {
  1444  		// the inode is locked so this should never happen (unless the inode
  1445  		// was evicted from the cache and it was corrupt when read from disk)
  1446  		// (err includes volume name and inode number)
  1447  		logger.ErrorfWithError(err, "%s: fetch of inode failed", utils.GetFnName())
  1448  		return
  1449  	}
  1450  	if !ok {
  1451  		// this should never happen (see above)
  1452  		err = fmt.Errorf("%s: cannot destroy inode %d volume '%s' because it is unallocated",
  1453  			utils.GetFnName(), inodeNumber, vS.volumeName)
  1454  		err = blunder.AddError(err, blunder.NotFoundError)
  1455  		logger.ErrorWithError(err)
  1456  		return
  1457  	}
  1458  
  1459  	ok, err = vS.inodeCacheDrop(ourInode)
  1460  	if nil != err {
  1461  		logger.ErrorfWithError(err, "%s: inodeCacheDrop() of inode failed: %v", utils.GetFnName(), err)
  1462  		return
  1463  	}
  1464  	if !ok {
  1465  		logger.ErrorfWithError(err, "%s: inodeCacheDrop() of inode returned !ok", utils.GetFnName())
  1466  		return
  1467  	}
  1468  
  1469  	if ourInode.InodeType == FileType {
  1470  		_ = vS.doFileInodeDataFlush(ourInode)
  1471  	}
  1472  
  1473  	err = vS.headhunterVolumeHandle.DeleteInodeRec(uint64(inodeNumber))
  1474  	if nil != err {
  1475  		logger.ErrorWithError(err)
  1476  		return
  1477  	}
  1478  
  1479  	if DirType == ourInode.InodeType {
  1480  		logger.Tracef("inode.Destroy(): volume '%s' inode %d: discarding dirmap payload Object %016X  len %d",
  1481  			vS.volumeName, inodeNumber, ourInode.PayloadObjectNumber, ourInode.PayloadObjectLength)
  1482  
  1483  		dirMapping := ourInode.payload.(sortedmap.BPlusTree)
  1484  
  1485  		err = dirMapping.Discard()
  1486  		if nil != err {
  1487  			logger.ErrorWithError(err)
  1488  			return
  1489  		}
  1490  
  1491  		stats.IncrementOperations(&stats.DirDestroyOps)
  1492  
  1493  	} else if FileType == ourInode.InodeType {
  1494  		logger.Tracef("inode.Destroy(): volume '%s' inode %d: discarding extmap payload Object %016X  len %d",
  1495  			vS.volumeName, inodeNumber, ourInode.PayloadObjectNumber, ourInode.PayloadObjectLength)
  1496  
  1497  		extents := ourInode.payload.(sortedmap.BPlusTree)
  1498  
  1499  		err = extents.Discard()
  1500  		if nil != err {
  1501  			logger.ErrorWithError(err)
  1502  			return
  1503  		}
  1504  
  1505  		for logSegmentNumber := range ourInode.LogSegmentMap {
  1506  			deleteSegmentErr := vS.headhunterVolumeHandle.DeleteLogSegmentRec(logSegmentNumber)
  1507  			if nil != deleteSegmentErr {
  1508  				logger.WarnfWithError(deleteSegmentErr, "couldn't delete destroy'd log segment")
  1509  				return
  1510  			}
  1511  			stats.IncrementOperations(&stats.GcLogSegDeleteOps)
  1512  		}
  1513  		stats.IncrementOperations(&stats.GcLogSegOps)
  1514  
  1515  		stats.IncrementOperations(&stats.FileDestroyOps)
  1516  	} else { // SymlinkType == ourInode.InodeType
  1517  		stats.IncrementOperations(&stats.SymlinkDestroyOps)
  1518  	}
  1519  
  1520  	return
  1521  }
  1522  
  1523  func (vS *volumeStruct) GetMetadata(inodeNumber InodeNumber) (metadata *MetadataStruct, err error) {
  1524  	var (
  1525  		inode          *inMemoryInodeStruct
  1526  		ok             bool
  1527  		pos            int
  1528  		snapShotIDType headhunter.SnapShotIDType
  1529  	)
  1530  
  1531  	snapShotIDType, _, _ = vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber))
  1532  	if headhunter.SnapShotIDTypeDotSnapShot == snapShotIDType {
  1533  		// For /<SnapShotDirName>, start with metadata from /
  1534  		inode, ok, err = vS.fetchInode(RootDirInodeNumber)
  1535  	} else {
  1536  		inode, ok, err = vS.fetchInode(inodeNumber)
  1537  	}
  1538  
  1539  	if nil != err {
  1540  		// this indicates disk corruption or software error
  1541  		// (err includes volume name and inode number)
  1542  		logger.ErrorfWithError(err, "%s: fetch of inode failed", utils.GetFnName())
  1543  		return
  1544  	}
  1545  	if !ok {
  1546  		// disk corruption or client request for unallocated inode
  1547  		err = fmt.Errorf("%s: failing request for inode %d volume '%s' because it is unallocated",
  1548  			utils.GetFnName(), inodeNumber, vS.volumeName)
  1549  		err = blunder.AddError(err, blunder.NotFoundError)
  1550  		logger.InfoWithError(err)
  1551  		return
  1552  	}
  1553  
  1554  	metadata = &MetadataStruct{
  1555  		InodeType:            inode.InodeType,
  1556  		LinkCount:            inode.LinkCount,
  1557  		Size:                 inode.Size,
  1558  		CreationTime:         inode.CreationTime,
  1559  		ModificationTime:     inode.ModificationTime,
  1560  		AccessTime:           inode.AccessTime,
  1561  		AttrChangeTime:       inode.AttrChangeTime,
  1562  		NumWrites:            inode.NumWrites,
  1563  		InodeStreamNameSlice: make([]string, len(inode.StreamMap)),
  1564  		Mode:                 inode.Mode,
  1565  		UserID:               inode.UserID,
  1566  		GroupID:              inode.GroupID,
  1567  	}
  1568  
  1569  	if headhunter.SnapShotIDTypeDotSnapShot == snapShotIDType {
  1570  		// For /<SnapShotDirName>, simply remove Write Access... and skip InodeStreamNameSlice
  1571  		metadata.Mode &= metadata.Mode & ^(W_OK<<6 | W_OK<<3 | W_OK<<0)
  1572  	} else {
  1573  		if headhunter.SnapShotIDTypeSnapShot == snapShotIDType {
  1574  			// For inodes in a SnapShot, simply remove Write Access
  1575  			metadata.Mode &= metadata.Mode & ^(W_OK<<6 | W_OK<<3 | W_OK<<0)
  1576  		}
  1577  		pos = 0
  1578  		for inodeStreamName := range inode.StreamMap {
  1579  			metadata.InodeStreamNameSlice[pos] = inodeStreamName
  1580  			pos++
  1581  		}
  1582  	}
  1583  
  1584  	stats.IncrementOperations(&stats.InodeGetMetadataOps)
  1585  	return
  1586  }
  1587  
  1588  func (vS *volumeStruct) GetType(inodeNumber InodeNumber) (inodeType InodeType, err error) {
  1589  	snapShotIDType, _, _ := vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber))
  1590  	if headhunter.SnapShotIDTypeDotSnapShot == snapShotIDType {
  1591  		inodeType = DirType
  1592  		err = nil
  1593  		return
  1594  	}
  1595  
  1596  	inode, ok, err := vS.fetchInode(inodeNumber)
  1597  	if nil != err {
  1598  		// this indicates disk corruption or software error
  1599  		// (err includes volume name and inode number)
  1600  		logger.ErrorfWithError(err, "%s: fetch of inode failed", utils.GetFnName())
  1601  		return
  1602  	}
  1603  	if !ok {
  1604  		// disk corruption or client request for unallocated inode
  1605  		err = fmt.Errorf("%s: failing request for inode %d volume '%s' because it is unallocated",
  1606  			utils.GetFnName(), inodeNumber, vS.volumeName)
  1607  		logger.InfoWithError(err)
  1608  		err = blunder.AddError(err, blunder.NotFoundError)
  1609  		return
  1610  	}
  1611  
  1612  	inodeType = inode.InodeType
  1613  
  1614  	stats.IncrementOperations(&stats.InodeGetTypeOps)
  1615  	return
  1616  }
  1617  
  1618  func (vS *volumeStruct) GetLinkCount(inodeNumber InodeNumber) (linkCount uint64, err error) {
  1619  	var (
  1620  		adjustLinkCountForSnapShotSubDirInRootDirInode bool
  1621  		inode                                          *inMemoryInodeStruct
  1622  		ok                                             bool
  1623  		snapShotCount                                  uint64
  1624  		snapShotIDType                                 headhunter.SnapShotIDType
  1625  	)
  1626  
  1627  	if RootDirInodeNumber == inodeNumber {
  1628  		// Account for .. in /<SnapShotDirName> if any SnapShot's exist
  1629  		snapShotCount = vS.headhunterVolumeHandle.SnapShotCount()
  1630  		adjustLinkCountForSnapShotSubDirInRootDirInode = (0 != snapShotCount)
  1631  	} else {
  1632  		snapShotIDType, _, _ = vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber))
  1633  		if headhunter.SnapShotIDTypeDotSnapShot == snapShotIDType {
  1634  			// linkCount == 1 (/<SnapShotDirName>'s '.') + 1 (/'s reference to <SnapShotDirName>) + # SnapShot's (/..' in each SnapShot's /)
  1635  			snapShotCount = vS.headhunterVolumeHandle.SnapShotCount()
  1636  			linkCount = 1 + 1 + snapShotCount
  1637  			err = nil
  1638  			return
  1639  		}
  1640  		adjustLinkCountForSnapShotSubDirInRootDirInode = false
  1641  	}
  1642  
  1643  	inode, ok, err = vS.fetchInode(inodeNumber)
  1644  	if nil != err {
  1645  		// this indicates disk corruption or software error
  1646  		// (err includes volume name and inode number)
  1647  		logger.ErrorfWithError(err, "%s: fetch of inode failed", utils.GetFnName())
  1648  		return
  1649  	}
  1650  	if !ok {
  1651  		// disk corruption or client request for unallocated inode
  1652  		err = fmt.Errorf("%s: failing request for inode %d volume '%s' because it is unallocated",
  1653  			utils.GetFnName(), inodeNumber, vS.volumeName)
  1654  		logger.InfoWithError(err)
  1655  		err = blunder.AddError(err, blunder.NotFoundError)
  1656  		return
  1657  	}
  1658  
  1659  	if adjustLinkCountForSnapShotSubDirInRootDirInode {
  1660  		linkCount = inode.LinkCount + 1
  1661  	} else {
  1662  		linkCount = inode.LinkCount
  1663  	}
  1664  
  1665  	return
  1666  }
  1667  
  1668  // SetLinkCount is used to adjust the LinkCount property to match current reference count during FSCK TreeWalk.
  1669  func (vS *volumeStruct) SetLinkCount(inodeNumber InodeNumber, linkCount uint64) (err error) {
  1670  	err = enforceRWMode(false)
  1671  	if nil != err {
  1672  		return
  1673  	}
  1674  
  1675  	snapShotIDType, _, _ := vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber))
  1676  	if headhunter.SnapShotIDTypeLive != snapShotIDType {
  1677  		err = fmt.Errorf("SetLinkCount() on non-LiveView inodeNumber not allowed")
  1678  		return
  1679  	}
  1680  
  1681  	inode, ok, err := vS.fetchInode(inodeNumber)
  1682  	if err != nil {
  1683  		// this indicates disk corruption or software error
  1684  		// (err includes volume name and inode number)
  1685  		logger.ErrorfWithError(err, "%s: fetch of inode failed", utils.GetFnName())
  1686  		return
  1687  	}
  1688  	if !ok {
  1689  		// disk corruption or client request for unallocated inode
  1690  		err = fmt.Errorf("%s: failing request for inode %d volume '%s' because it is unallocated",
  1691  			utils.GetFnName(), inodeNumber, vS.volumeName)
  1692  		logger.InfoWithError(err)
  1693  		err = blunder.AddError(err, blunder.NotFoundError)
  1694  		return
  1695  	}
  1696  
  1697  	inode.dirty = true
  1698  	inode.LinkCount = linkCount
  1699  
  1700  	err = vS.flushInode(inode)
  1701  	if err != nil {
  1702  		logger.ErrorWithError(err)
  1703  		return err
  1704  	}
  1705  
  1706  	return
  1707  }
  1708  
  1709  func (vS *volumeStruct) SetCreationTime(inodeNumber InodeNumber, CreationTime time.Time) (err error) {
  1710  	err = enforceRWMode(false)
  1711  	if nil != err {
  1712  		return
  1713  	}
  1714  
  1715  	snapShotIDType, _, _ := vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber))
  1716  	if headhunter.SnapShotIDTypeLive != snapShotIDType {
  1717  		err = fmt.Errorf("SetCreationTime() on non-LiveView inodeNumber not allowed")
  1718  		return
  1719  	}
  1720  
  1721  	inode, ok, err := vS.fetchInode(inodeNumber)
  1722  	if err != nil {
  1723  		// the inode is locked so this should never happen (unless the inode
  1724  		// was evicted from the cache and it was corrupt when read from disk)
  1725  		logger.ErrorfWithError(err, "%s: fetch of target inode failed", utils.GetFnName())
  1726  		return err
  1727  	}
  1728  	if !ok {
  1729  		// this should never happen (see above)
  1730  		err = fmt.Errorf("%s: failing request for inode %d volume '%s' because it is unallocated",
  1731  			utils.GetFnName(), inodeNumber, vS.volumeName)
  1732  		logger.ErrorWithError(err)
  1733  		err = blunder.AddError(err, blunder.NotFoundError)
  1734  		return err
  1735  	}
  1736  
  1737  	inode.dirty = true
  1738  	inode.AttrChangeTime = time.Now()
  1739  	inode.CreationTime = CreationTime
  1740  
  1741  	err = vS.flushInode(inode)
  1742  	if err != nil {
  1743  		logger.ErrorWithError(err)
  1744  		return err
  1745  	}
  1746  	return
  1747  }
  1748  
  1749  func (vS *volumeStruct) SetModificationTime(inodeNumber InodeNumber, ModificationTime time.Time) (err error) {
  1750  	err = enforceRWMode(false)
  1751  	if nil != err {
  1752  		return
  1753  	}
  1754  
  1755  	snapShotIDType, _, _ := vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber))
  1756  	if headhunter.SnapShotIDTypeLive != snapShotIDType {
  1757  		err = fmt.Errorf("SetModificationTime() on non-LiveView inodeNumber not allowed")
  1758  		return
  1759  	}
  1760  
  1761  	inode, ok, err := vS.fetchInode(inodeNumber)
  1762  	if err != nil {
  1763  		// the inode is locked so this should never happen (unless the inode
  1764  		// was evicted from the cache and it was corrupt when read from disk)
  1765  		logger.ErrorfWithError(err, "%s: fetch of target inode failed", utils.GetFnName())
  1766  		return err
  1767  	}
  1768  	if !ok {
  1769  		// this should never happen (see above)
  1770  		err = fmt.Errorf("%s: failing request for inode %d volume '%s' because it is unallocated",
  1771  			utils.GetFnName(), inodeNumber, vS.volumeName)
  1772  		logger.ErrorWithError(err)
  1773  		err = blunder.AddError(err, blunder.NotFoundError)
  1774  		return err
  1775  	}
  1776  
  1777  	inode.dirty = true
  1778  	inode.AttrChangeTime = time.Now()
  1779  	inode.ModificationTime = ModificationTime
  1780  
  1781  	err = vS.flushInode(inode)
  1782  	if err != nil {
  1783  		logger.ErrorWithError(err)
  1784  		return err
  1785  	}
  1786  
  1787  	return
  1788  }
  1789  
  1790  func (vS *volumeStruct) SetAccessTime(inodeNumber InodeNumber, accessTime time.Time) (err error) {
  1791  	err = enforceRWMode(false)
  1792  	if nil != err {
  1793  		return
  1794  	}
  1795  
  1796  	snapShotIDType, _, _ := vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber))
  1797  	if headhunter.SnapShotIDTypeLive != snapShotIDType {
  1798  		err = fmt.Errorf("SetAccessTime() on non-LiveView inodeNumber not allowed")
  1799  		return
  1800  	}
  1801  
  1802  	inode, ok, err := vS.fetchInode(inodeNumber)
  1803  	if err != nil {
  1804  		// the inode is locked so this should never happen (unless the inode
  1805  		// was evicted from the cache and it was corrupt when read from disk)
  1806  		logger.ErrorfWithError(err, "%s: fetch of target inode failed", utils.GetFnName())
  1807  		return err
  1808  	}
  1809  	if !ok {
  1810  		// this should never happen (see above)
  1811  		err = fmt.Errorf("%s: failing request for inode %d volume '%s' because it is unallocated",
  1812  			utils.GetFnName(), inodeNumber, vS.volumeName)
  1813  		logger.ErrorWithError(err)
  1814  		err = blunder.AddError(err, blunder.NotFoundError)
  1815  		return err
  1816  	}
  1817  
  1818  	inode.dirty = true
  1819  	inode.AttrChangeTime = time.Now()
  1820  	inode.AccessTime = accessTime
  1821  
  1822  	err = vS.flushInode(inode)
  1823  	if err != nil {
  1824  		logger.ErrorWithError(err)
  1825  		return err
  1826  	}
  1827  
  1828  	return
  1829  }
  1830  
  1831  func determineMode(filePerm InodeMode, inodeType InodeType) (fileMode InodeMode, err error) {
  1832  	// Caller should only be setting the file perm bits, but samba seems to send file type
  1833  	// bits as well. Since we need to work with whatever samba does, let's just silently
  1834  	// mask off the other bits.
  1835  	if filePerm&^PosixModePerm != 0 {
  1836  		logger.Tracef("inode.determineMode(): invalid file mode 0x%x (max 0x%x); removing file type bits.", uint32(filePerm), uint32(PosixModePerm))
  1837  	}
  1838  
  1839  	// Build fileMode starting with the file permission bits
  1840  	fileMode = filePerm & PosixModePerm
  1841  
  1842  	// Add the file type to the mode.
  1843  	switch inodeType {
  1844  	case DirType:
  1845  		fileMode |= PosixModeDir
  1846  	case FileType:
  1847  		fileMode |= PosixModeFile
  1848  	case SymlinkType:
  1849  		fileMode |= PosixModeSymlink
  1850  	default:
  1851  		err = fmt.Errorf("%s: unrecognized inode type %v", utils.GetFnName(), inodeType)
  1852  		err = blunder.AddError(err, blunder.InvalidInodeTypeError)
  1853  		return
  1854  	}
  1855  
  1856  	err = nil
  1857  	return
  1858  }
  1859  
  1860  func (vS *volumeStruct) SetPermMode(inodeNumber InodeNumber, filePerm InodeMode) (err error) {
  1861  	err = enforceRWMode(false)
  1862  	if nil != err {
  1863  		return
  1864  	}
  1865  
  1866  	snapShotIDType, _, _ := vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber))
  1867  	if headhunter.SnapShotIDTypeLive != snapShotIDType {
  1868  		err = fmt.Errorf("SetPermMode() on non-LiveView inodeNumber not allowed")
  1869  		return
  1870  	}
  1871  
  1872  	inode, ok, err := vS.fetchInode(inodeNumber)
  1873  	if err != nil {
  1874  		// the inode is locked so this should never happen (unless the inode
  1875  		// was evicted from the cache and it was corrupt when read from disk)
  1876  		logger.ErrorfWithError(err, "%s: fetch of target inode failed", utils.GetFnName())
  1877  		return err
  1878  	}
  1879  	if !ok {
  1880  		// this should never happen (see above)
  1881  		err = fmt.Errorf("%s: failing request for inode %d volume '%s' because it is unallocated",
  1882  			utils.GetFnName(), inodeNumber, vS.volumeName)
  1883  		logger.ErrorWithError(err)
  1884  		err = blunder.AddError(err, blunder.NotFoundError)
  1885  		return err
  1886  	}
  1887  
  1888  	// Create file mode out of file permissions plus inode type
  1889  	fileMode, err := determineMode(filePerm, inode.InodeType)
  1890  	if err != nil {
  1891  		return err
  1892  	}
  1893  
  1894  	inode.dirty = true
  1895  	inode.Mode = fileMode
  1896  
  1897  	updateTime := time.Now()
  1898  	inode.AttrChangeTime = updateTime
  1899  
  1900  	err = vS.flushInode(inode)
  1901  	if err != nil {
  1902  		logger.ErrorWithError(err)
  1903  		return err
  1904  	}
  1905  
  1906  	return
  1907  }
  1908  
  1909  func (vS *volumeStruct) SetOwnerUserID(inodeNumber InodeNumber, userID InodeUserID) (err error) {
  1910  	err = enforceRWMode(false)
  1911  	if nil != err {
  1912  		return
  1913  	}
  1914  
  1915  	snapShotIDType, _, _ := vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber))
  1916  	if headhunter.SnapShotIDTypeLive != snapShotIDType {
  1917  		err = fmt.Errorf("SetOwnerUserID() on non-LiveView inodeNumber not allowed")
  1918  		return
  1919  	}
  1920  
  1921  	inode, ok, err := vS.fetchInode(inodeNumber)
  1922  	if err != nil {
  1923  		// the inode is locked so this should never happen (unless the inode
  1924  		// was evicted from the cache and it was corrupt when read from disk)
  1925  		logger.ErrorfWithError(err, "%s: fetch of target inode failed", utils.GetFnName())
  1926  		return err
  1927  	}
  1928  	if !ok {
  1929  		// this should never happen (see above)
  1930  		err = fmt.Errorf("%s: failing request for inode %d volume '%s' because it is unallocated",
  1931  			utils.GetFnName(), inodeNumber, vS.volumeName)
  1932  		logger.ErrorWithError(err)
  1933  		err = blunder.AddError(err, blunder.NotFoundError)
  1934  		return err
  1935  	}
  1936  
  1937  	inode.dirty = true
  1938  	inode.UserID = userID
  1939  
  1940  	updateTime := time.Now()
  1941  	inode.AttrChangeTime = updateTime
  1942  
  1943  	err = vS.flushInode(inode)
  1944  	if err != nil {
  1945  		logger.ErrorWithError(err)
  1946  		return err
  1947  	}
  1948  
  1949  	return
  1950  }
  1951  
  1952  func (vS *volumeStruct) SetOwnerUserIDGroupID(inodeNumber InodeNumber, userID InodeUserID, groupID InodeGroupID) (err error) {
  1953  	err = enforceRWMode(false)
  1954  	if nil != err {
  1955  		return
  1956  	}
  1957  
  1958  	snapShotIDType, _, _ := vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber))
  1959  	if headhunter.SnapShotIDTypeLive != snapShotIDType {
  1960  		err = fmt.Errorf("SetOwnerUserIDGroupID() on non-LiveView inodeNumber not allowed")
  1961  		return
  1962  	}
  1963  
  1964  	inode, ok, err := vS.fetchInode(inodeNumber)
  1965  	if err != nil {
  1966  		// the inode is locked so this should never happen (unless the inode
  1967  		// was evicted from the cache and it was corrupt when read from disk)
  1968  		logger.ErrorfWithError(err, "%s: fetch of target inode failed", utils.GetFnName())
  1969  		return err
  1970  	}
  1971  	if !ok {
  1972  		// this should never happen (see above)
  1973  		err = fmt.Errorf("%s: failing request for inode %d volume '%s' because it is unallocated",
  1974  			utils.GetFnName(), inodeNumber, vS.volumeName)
  1975  		logger.ErrorWithError(err)
  1976  		err = blunder.AddError(err, blunder.NotFoundError)
  1977  		return err
  1978  	}
  1979  
  1980  	inode.dirty = true
  1981  	inode.UserID = userID
  1982  	inode.GroupID = groupID
  1983  
  1984  	updateTime := time.Now()
  1985  	inode.AttrChangeTime = updateTime
  1986  
  1987  	err = vS.flushInode(inode)
  1988  	if err != nil {
  1989  		logger.ErrorWithError(err)
  1990  		return err
  1991  	}
  1992  
  1993  	return
  1994  }
  1995  
  1996  func (vS *volumeStruct) SetOwnerGroupID(inodeNumber InodeNumber, groupID InodeGroupID) (err error) {
  1997  	err = enforceRWMode(false)
  1998  	if nil != err {
  1999  		return
  2000  	}
  2001  
  2002  	snapShotIDType, _, _ := vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber))
  2003  	if headhunter.SnapShotIDTypeLive != snapShotIDType {
  2004  		err = fmt.Errorf("SetOwnerGroupID() on non-LiveView inodeNumber not allowed")
  2005  		return
  2006  	}
  2007  
  2008  	inode, ok, err := vS.fetchInode(inodeNumber)
  2009  	if err != nil {
  2010  		// the inode is locked so this should never happen (unless the inode
  2011  		// was evicted from the cache and it was corrupt when read from disk)
  2012  		logger.ErrorfWithError(err, "%s: fetch of target inode failed", utils.GetFnName())
  2013  		return err
  2014  	}
  2015  	if !ok {
  2016  		// this should never happen (see above)
  2017  		err = fmt.Errorf("%s: failing request for inode %d volume '%s' because it is unallocated",
  2018  			utils.GetFnName(), inodeNumber, vS.volumeName)
  2019  		logger.ErrorWithError(err)
  2020  		err = blunder.AddError(err, blunder.NotFoundError)
  2021  		return err
  2022  	}
  2023  
  2024  	inode.dirty = true
  2025  	inode.GroupID = groupID
  2026  
  2027  	updateTime := time.Now()
  2028  	inode.AttrChangeTime = updateTime
  2029  
  2030  	err = vS.flushInode(inode)
  2031  	if err != nil {
  2032  		logger.ErrorWithError(err)
  2033  		return err
  2034  	}
  2035  
  2036  	return
  2037  }
  2038  
  2039  func (vS *volumeStruct) GetStream(inodeNumber InodeNumber, inodeStreamName string) (buf []byte, err error) {
  2040  	snapShotIDType, _, _ := vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber))
  2041  	if headhunter.SnapShotIDTypeDotSnapShot == snapShotIDType {
  2042  		err = fmt.Errorf("No stream '%v'", inodeStreamName)
  2043  		return buf, blunder.AddError(err, blunder.StreamNotFound)
  2044  	}
  2045  
  2046  	inode, ok, err := vS.fetchInode(inodeNumber)
  2047  	if err != nil {
  2048  		// this indicates disk corruption or software error
  2049  		// (err includes volume name and inode number)
  2050  		logger.ErrorfWithError(err, "%s: fetch of inode failed", utils.GetFnName())
  2051  		return nil, err
  2052  	}
  2053  	if !ok {
  2054  		// disk corruption or client request for unallocated inode
  2055  		err = fmt.Errorf("%s: failing request for inode %d volume '%s' because it is unallocated",
  2056  			utils.GetFnName(), inodeNumber, vS.volumeName)
  2057  		logger.InfoWithError(err)
  2058  		err = blunder.AddError(err, blunder.NotFoundError)
  2059  		return nil, err
  2060  	}
  2061  
  2062  	inodeStreamBuf, ok := inode.StreamMap[inodeStreamName]
  2063  
  2064  	if !ok {
  2065  		err = fmt.Errorf("No stream '%v'", inodeStreamName)
  2066  		return buf, blunder.AddError(err, blunder.StreamNotFound)
  2067  	}
  2068  
  2069  	buf = make([]byte, len(inodeStreamBuf))
  2070  
  2071  	copy(buf, inodeStreamBuf)
  2072  
  2073  	err = nil
  2074  
  2075  	return
  2076  }
  2077  
  2078  func (vS *volumeStruct) PutStream(inodeNumber InodeNumber, inodeStreamName string, buf []byte) (err error) {
  2079  	err = enforceRWMode(false)
  2080  	if nil != err {
  2081  		return
  2082  	}
  2083  
  2084  	snapShotIDType, _, _ := vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber))
  2085  	if headhunter.SnapShotIDTypeLive != snapShotIDType {
  2086  		err = fmt.Errorf("PutStream() on non-LiveView inodeNumber not allowed")
  2087  		return
  2088  	}
  2089  
  2090  	inode, ok, err := vS.fetchInode(inodeNumber)
  2091  	if err != nil {
  2092  		// this indicates disk corruption or software error
  2093  		// (err includes volume name and inode number)
  2094  		logger.ErrorfWithError(err, "%s: fetch of inode failed", utils.GetFnName())
  2095  		return err
  2096  	}
  2097  	if !ok {
  2098  		// disk corruption or client request for unallocated inode
  2099  		err = fmt.Errorf("%s: failing request for inode %d volume '%s' because it is unallocated",
  2100  			utils.GetFnName(), inodeNumber, vS.volumeName)
  2101  		logger.InfoWithError(err)
  2102  		err = blunder.AddError(err, blunder.NotFoundError)
  2103  		return err
  2104  	}
  2105  
  2106  	inodeStreamBuf := make([]byte, len(buf))
  2107  
  2108  	copy(inodeStreamBuf, buf)
  2109  
  2110  	inode.dirty = true
  2111  	inode.StreamMap[inodeStreamName] = inodeStreamBuf
  2112  
  2113  	updateTime := time.Now()
  2114  	inode.AttrChangeTime = updateTime
  2115  
  2116  	err = vS.flushInode(inode)
  2117  	if err != nil {
  2118  		logger.ErrorWithError(err)
  2119  		return err
  2120  	}
  2121  
  2122  	return
  2123  }
  2124  
  2125  func (vS *volumeStruct) DeleteStream(inodeNumber InodeNumber, inodeStreamName string) (err error) {
  2126  	err = enforceRWMode(false)
  2127  	if nil != err {
  2128  		return
  2129  	}
  2130  
  2131  	snapShotIDType, _, _ := vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber))
  2132  	if headhunter.SnapShotIDTypeLive != snapShotIDType {
  2133  		err = fmt.Errorf("DeleteStream() on non-LiveView inodeNumber not allowed")
  2134  		return
  2135  	}
  2136  
  2137  	inode, ok, err := vS.fetchInode(inodeNumber)
  2138  	if err != nil {
  2139  		// this indicates disk corruption or software error
  2140  		// (err includes volume name and inode number)
  2141  		logger.ErrorfWithError(err, "%s: fetch of inode failed", utils.GetFnName())
  2142  		return
  2143  	}
  2144  	if !ok {
  2145  		// disk corruption or client request for unallocated inode
  2146  		err = fmt.Errorf("%s: failing request for inode %d volume '%s' because it is unallocated",
  2147  			utils.GetFnName(), inodeNumber, vS.volumeName)
  2148  		logger.InfoWithError(err)
  2149  		err = blunder.AddError(err, blunder.NotFoundError)
  2150  		return
  2151  	}
  2152  
  2153  	inode.dirty = true
  2154  	delete(inode.StreamMap, inodeStreamName)
  2155  
  2156  	updateTime := time.Now()
  2157  	inode.AttrChangeTime = updateTime
  2158  
  2159  	err = vS.flushInode(inode)
  2160  	if err != nil {
  2161  		logger.ErrorWithError(err)
  2162  		return err
  2163  	}
  2164  
  2165  	return
  2166  }
  2167  
  2168  func (vS *volumeStruct) FetchLayoutReport(inodeNumber InodeNumber) (layoutReport sortedmap.LayoutReport, err error) {
  2169  	snapShotIDType, _, _ := vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber))
  2170  	if headhunter.SnapShotIDTypeDotSnapShot == snapShotIDType {
  2171  		layoutReport = make(sortedmap.LayoutReport)
  2172  		err = nil
  2173  		return
  2174  	}
  2175  
  2176  	inode, ok, err := vS.fetchInode(inodeNumber)
  2177  	if err != nil {
  2178  		// this indicates disk corruption or software error
  2179  		// (err includes volume name and inode number)
  2180  		logger.ErrorfWithError(err, "%s: fetch of inode failed", utils.GetFnName())
  2181  		return nil, err
  2182  	}
  2183  	if !ok {
  2184  		// disk corruption or client request for unallocated inode
  2185  		err = fmt.Errorf("%s: failing request for inode %d volume '%s' because it is unallocated",
  2186  			utils.GetFnName(), inodeNumber, vS.volumeName)
  2187  		logger.InfoWithError(err)
  2188  		err = blunder.AddError(err, blunder.NotFoundError)
  2189  		return nil, err
  2190  	}
  2191  
  2192  	if SymlinkType == inode.InodeType {
  2193  		layoutReport = make(sortedmap.LayoutReport)
  2194  		err = nil
  2195  	} else {
  2196  		layoutReport, err = inode.payload.(sortedmap.BPlusTree).FetchLayoutReport()
  2197  	}
  2198  
  2199  	return
  2200  }
  2201  
  2202  func (vS *volumeStruct) FetchFragmentationReport(inodeNumber InodeNumber) (fragmentationReport FragmentationReport, err error) {
  2203  	err = fmt.Errorf("FetchFragmentationReport not yet implemented")
  2204  	return
  2205  }
  2206  
  2207  func (vS *volumeStruct) Optimize(inodeNumber InodeNumber, maxDuration time.Duration) (err error) {
  2208  	err = enforceRWMode(false)
  2209  	if nil != err {
  2210  		return
  2211  	}
  2212  
  2213  	err = fmt.Errorf("Optimize not yet implemented")
  2214  	return
  2215  }
  2216  
  2217  func validateFileExtents(snapShotID uint64, ourInode *inMemoryInodeStruct) (err error) {
  2218  	var (
  2219  		zero = uint64(0)
  2220  	)
  2221  
  2222  	readPlan, readPlanBytes, err := ourInode.volume.getReadPlanHelper(snapShotID, ourInode, &zero, nil)
  2223  	if err != nil {
  2224  		return err
  2225  	}
  2226  
  2227  	// We read the whole file, so these should match
  2228  	if readPlanBytes != ourInode.Size {
  2229  		return blunder.NewError(blunder.CorruptInodeError, "inode %v had recorded size %v bytes, but full read plan was only %v bytes", ourInode.InodeNumber, ourInode.Size, readPlanBytes)
  2230  	}
  2231  
  2232  	// Let's check that the read plan is consistent with what the inode's
  2233  	// internal log-segment map says about which segments should have how much data.
  2234  	//
  2235  	// Make a copy of the inode's LogSegmentMap map so we can decrement the
  2236  	// byte count for each segment as we walk the readPlan entries.
  2237  	remainingExpectedBytes := make(map[uint64]uint64)
  2238  	for segmentNumber, segmentBytesUsed := range ourInode.LogSegmentMap {
  2239  		remainingExpectedBytes[segmentNumber] += segmentBytesUsed
  2240  	}
  2241  	// Then we can compare with the actual read plan we got ...
  2242  	for _, readPlanStep := range readPlan {
  2243  
  2244  		// holes in a sparse file aren't counted
  2245  		if readPlanStep.LogSegmentNumber == 0 {
  2246  			continue
  2247  		}
  2248  		pathSegments := strings.Split(readPlanStep.ObjectPath, "/")
  2249  		logSegmentRepresentation := pathSegments[len(pathSegments)-1]
  2250  		logSegmentNumber, hexConvErr := utils.HexStrToUint64(logSegmentRepresentation)
  2251  		if hexConvErr != nil {
  2252  			return blunder.NewError(blunder.CorruptInodeError,
  2253  				"conversion of read plan object name to log segment number failed; "+
  2254  					"readPlanStep: %v  logSegmentString: '%v'  err: %v",
  2255  				readPlanStep, logSegmentRepresentation, hexConvErr)
  2256  		}
  2257  		remainingExpectedBytes[logSegmentNumber] -= readPlanStep.Length
  2258  	}
  2259  	// ... and fail validation if any log segment didn't match. We'll put the
  2260  	// mismatches in a separate map that we'll attach to the error in case a
  2261  	// consumer or logger wants it.
  2262  	logSegmentByteCountMismatches := make(map[uint64]uint64)
  2263  	for logSegmentNumber, remainingExpectedByteCount := range remainingExpectedBytes {
  2264  		if remainingExpectedByteCount != 0 {
  2265  			logSegmentByteCountMismatches[logSegmentNumber] = remainingExpectedByteCount
  2266  		}
  2267  	}
  2268  	if len(logSegmentByteCountMismatches) != 0 {
  2269  		rootErr := fmt.Errorf("inconsistency detected between log segment map and read plan for inode %v", ourInode.InodeNumber)
  2270  		return merry.WithValue(blunder.AddError(rootErr, blunder.CorruptInodeError), "logSegmentByteCountMismatches", logSegmentByteCountMismatches)
  2271  	}
  2272  
  2273  	// Having verified that our read plan is consistent with our internal log
  2274  	// segment map, we also want to check that it's consistent with the actual log
  2275  	// segment objects in Swift. First, we'll construct a map of object paths to
  2276  	// the largest offset we would need read up to in that object.
  2277  	objectPathToEndOffset := make(map[string]uint64)
  2278  
  2279  	for _, planStep := range readPlan {
  2280  
  2281  		// holes in a sparse file don't have objects
  2282  		if planStep.LogSegmentNumber == 0 {
  2283  			continue
  2284  		}
  2285  		stepEndOffset := planStep.Offset + planStep.Length
  2286  		endOffset, ok := objectPathToEndOffset[planStep.ObjectPath]
  2287  		if !ok || stepEndOffset > endOffset {
  2288  			objectPathToEndOffset[planStep.ObjectPath] = stepEndOffset
  2289  		}
  2290  	}
  2291  
  2292  	// then, HEAD each object to make sure that it has enough bytes.
  2293  	for objectPath, endOffset := range objectPathToEndOffset {
  2294  		accountName, containerName, objectName, err := utils.PathToAcctContObj(objectPath)
  2295  		if err != nil {
  2296  			logger.ErrorWithError(err)
  2297  			return err
  2298  		}
  2299  
  2300  		contentLength, err := swiftclient.ObjectContentLength(accountName, containerName, objectName)
  2301  		if err != nil {
  2302  			logger.ErrorWithError(err)
  2303  			return err
  2304  		}
  2305  
  2306  		if contentLength < endOffset {
  2307  			// REVIEW: it might be helpful to continue and make a combined report of all
  2308  			//         insufficiently long log segments, rather than erroring out immediately
  2309  			err = fmt.Errorf("expected %q to have at least %v bytes, content length was %v", objectPath, endOffset, contentLength)
  2310  			logger.ErrorWithError(err)
  2311  			return err
  2312  		}
  2313  
  2314  	}
  2315  
  2316  	return nil
  2317  }
  2318  
  2319  func (vS *volumeStruct) markCorrupted(inodeNumber InodeNumber) (err error) {
  2320  	var (
  2321  		inodeRec       []byte
  2322  		ok             bool
  2323  		snapShotIDType headhunter.SnapShotIDType
  2324  	)
  2325  
  2326  	snapShotIDType, _, _ = vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber))
  2327  	if headhunter.SnapShotIDTypeLive != snapShotIDType {
  2328  		err = blunder.NewError(blunder.InvalidArgError, "markCorrupted() of non-LiveView inodeNumber not allowed")
  2329  		return
  2330  	}
  2331  
  2332  	inodeRec, ok, err = vS.headhunterVolumeHandle.GetInodeRec(uint64(inodeNumber))
  2333  	if nil == err && ok && (len(globals.corruptionDetectedTrueBuf) <= len(inodeRec)) {
  2334  		// Just overwrite CorruptionDetected field with true
  2335  		_ = copy(inodeRec, globals.corruptionDetectedTrueBuf)
  2336  	} else {
  2337  		// Use a simple CorruptionDetected == true inodeRec
  2338  		inodeRec = globals.corruptionDetectedTrueBuf
  2339  	}
  2340  
  2341  	err = vS.headhunterVolumeHandle.PutInodeRec(uint64(inodeNumber), inodeRec)
  2342  
  2343  	return
  2344  }
  2345  
  2346  func (vS *volumeStruct) Validate(inodeNumber InodeNumber, deeply bool) (err error) {
  2347  	var (
  2348  		ok             bool
  2349  		ourInode       *inMemoryInodeStruct
  2350  		snapShotID     uint64
  2351  		snapShotIDType headhunter.SnapShotIDType
  2352  		tree           sortedmap.BPlusTree
  2353  	)
  2354  
  2355  	snapShotIDType, snapShotID, _ = vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber))
  2356  	if headhunter.SnapShotIDTypeDotSnapShot == snapShotIDType {
  2357  		err = nil // Since /<SnapShotDirName> is emulated, always return success
  2358  		return
  2359  	}
  2360  
  2361  	// we don't want to use the in-memory cache for this; we'll need to fetch
  2362  	// the current real-world bits from disk.
  2363  
  2364  	// If this is a file inode, we flush to ensure that the inode is not dirty
  2365  	// (and that DLM locking therefore ensures we have exclusive access to the
  2366  	// inode and don't need to serialize this operation, as there can be no pending
  2367  	// time-based flush to race with).
  2368  
  2369  	err = vS.flushInodeNumber(inodeNumber)
  2370  	if nil != err {
  2371  		logger.ErrorfWithError(err, "couldn't flush inode %v", inodeNumber)
  2372  		err = blunder.AddError(err, blunder.CorruptInodeError)
  2373  		return
  2374  	}
  2375  
  2376  	err = vS.Purge(inodeNumber)
  2377  	if nil != err {
  2378  		logger.ErrorfWithError(err, "couldn't purge inode %v", inodeNumber)
  2379  		err = blunder.AddError(err, blunder.CorruptInodeError)
  2380  		return
  2381  	}
  2382  
  2383  	ourInode, ok, err = vS.fetchInode(inodeNumber)
  2384  	if nil != err {
  2385  		// this indicates diskj corruption or software error
  2386  		// (err includes volume name and inode number)
  2387  		logger.ErrorfWithError(err, "%s: fetch of inode failed", utils.GetFnName())
  2388  		err = blunder.AddError(err, blunder.CorruptInodeError)
  2389  		return
  2390  	}
  2391  	if !ok {
  2392  		// disk corruption or client request for unallocated inode
  2393  		err = fmt.Errorf("%s: failing request for inode %d volume '%s' because it is unallocated",
  2394  			utils.GetFnName(), inodeNumber, vS.volumeName)
  2395  		logger.InfoWithError(err)
  2396  		err = blunder.AddError(err, blunder.NotFoundError)
  2397  		return
  2398  	}
  2399  
  2400  	switch ourInode.InodeType {
  2401  	case DirType, FileType:
  2402  		tree, ok = ourInode.payload.(sortedmap.BPlusTree)
  2403  		if !ok {
  2404  			err = fmt.Errorf("type conversion of inode %v payload to sortedmap.BPlusTree failed", ourInode.InodeNumber)
  2405  			err = blunder.AddError(err, blunder.CorruptInodeError)
  2406  			_ = vS.markCorrupted(inodeNumber)
  2407  			return
  2408  		}
  2409  		err = tree.Validate()
  2410  		if nil != err {
  2411  			err = blunder.AddError(err, blunder.CorruptInodeError)
  2412  			_ = vS.markCorrupted(inodeNumber)
  2413  			return
  2414  		}
  2415  		if FileType == ourInode.InodeType {
  2416  			if deeply {
  2417  				err = validateFileExtents(snapShotID, ourInode)
  2418  				if nil != err {
  2419  					err = blunder.AddError(err, blunder.CorruptInodeError)
  2420  					_ = vS.markCorrupted(inodeNumber)
  2421  					return
  2422  				}
  2423  			}
  2424  		}
  2425  	case SymlinkType:
  2426  		// Nothing to be done here
  2427  	default:
  2428  		err = fmt.Errorf("unrecognized inode type")
  2429  		err = blunder.AddError(err, blunder.CorruptInodeError)
  2430  		_ = vS.markCorrupted(inodeNumber)
  2431  		return
  2432  	}
  2433  
  2434  	err = nil
  2435  	return
  2436  }