github.com/rohankumardubey/proxyfs@v0.0.0-20210108201508-653efa9ab00e/inode/inode.go (about) 1 package inode 2 3 import ( 4 "container/list" 5 "encoding/json" 6 "fmt" 7 "runtime/debug" 8 "strings" 9 "sync" 10 "time" 11 12 "github.com/ansel1/merry" 13 "github.com/swiftstack/cstruct" 14 "github.com/swiftstack/sortedmap" 15 16 "github.com/swiftstack/ProxyFS/blunder" 17 "github.com/swiftstack/ProxyFS/dlm" 18 "github.com/swiftstack/ProxyFS/evtlog" 19 "github.com/swiftstack/ProxyFS/halter" 20 "github.com/swiftstack/ProxyFS/headhunter" 21 "github.com/swiftstack/ProxyFS/logger" 22 "github.com/swiftstack/ProxyFS/stats" 23 "github.com/swiftstack/ProxyFS/swiftclient" 24 "github.com/swiftstack/ProxyFS/trackedlock" 25 "github.com/swiftstack/ProxyFS/utils" 26 ) 27 28 // Shorthand for inode internal API debug log id; global to the package 29 var int_inode_debug = logger.DbgInodeInternal 30 31 const ( 32 optimisticInodeFetchBytes = 2048 33 ) 34 35 type CorruptionDetected bool 36 type Version uint64 37 38 const ( 39 V1 Version = iota + 1 // use type/struct onDiskInodeV1Struct 40 onDiskInodeV1PayloadObjectOffset uint64 = 0 41 ) 42 43 type onDiskInodeV1Struct struct { // Preceded "on disk" by CorruptionDetected then Version both in cstruct.LittleEndian form 44 InodeNumber 45 InodeType 46 LinkCount uint64 47 Size uint64 48 CreationTime time.Time 49 ModificationTime time.Time 50 AccessTime time.Time 51 AttrChangeTime time.Time 52 NumWrites uint64 53 Mode InodeMode 54 UserID InodeUserID 55 GroupID InodeGroupID 56 StreamMap map[string][]byte 57 PayloadObjectNumber uint64 // DirInode: B+Tree Root with Key == dir_entry_name, Value = InodeNumber 58 PayloadObjectLength uint64 // FileInode: B+Tree Root with Key == fileOffset, Value = fileExtent 59 SymlinkTarget string // SymlinkInode: target path of symbolic link 60 LogSegmentMap map[uint64]uint64 // FileInode: Key == LogSegment#, Value = file user data byte count 61 } 62 63 type inFlightLogSegmentStruct struct { // Used as (by reference) Value for inMemoryInodeStruct.inFlightLogSegmentMap 64 logSegmentNumber uint64 // Used as (by value) Key for inMemoryInodeStruct.inFlightLogSegmentMap 65 openLogSegmentLRUNext *inFlightLogSegmentStruct 66 openLogSegmentLRUPrev *inFlightLogSegmentStruct 67 fileInode *inMemoryInodeStruct 68 accountName string 69 containerName string 70 objectName string 71 openLogSegmentListElement list.Element 72 swiftclient.ChunkedPutContext 73 } 74 75 type inMemoryInodeStruct struct { 76 trackedlock.Mutex // Used to synchronize with background fileInodeFlusherDaemon 77 sync.WaitGroup // FileInode Flush requests wait on this 78 inodeCacheLRUNext *inMemoryInodeStruct 79 inodeCacheLRUPrev *inMemoryInodeStruct 80 dirty bool 81 volume *volumeStruct 82 snapShotID uint64 83 payload interface{} // DirInode: B+Tree with Key == dir_entry_name, Value = InodeNumber 84 // FileInode: B+Tree with Key == fileOffset, Value = *fileExtent 85 openLogSegment *inFlightLogSegmentStruct // FileInode only... also in inFlightLogSegmentMap 86 inFlightLogSegmentMap map[uint64]*inFlightLogSegmentStruct // FileInode: key == logSegmentNumber 87 inFlightLogSegmentErrors map[uint64]error // FileInode: key == logSegmentNumber; value == err (if non nil) 88 onDiskInodeV1Struct // Real on-disk inode information embedded here 89 } 90 91 func (vS *volumeStruct) DumpKey(key sortedmap.Key) (keyAsString string, err error) { 92 keyAsInodeNumber, ok := key.(InodeNumber) 93 if !ok { 94 err = fmt.Errorf("inode.volumeStruct.DumpKey() could not parse key as a InodeNumber") 95 return 96 } 97 98 keyAsString = fmt.Sprintf("0x%016X", keyAsInodeNumber) 99 100 err = nil 101 return 102 } 103 104 func (vS *volumeStruct) DumpValue(value sortedmap.Value) (valueAsString string, err error) { 105 valueAsInMemoryInodeStructPtr, ok := value.(*inMemoryInodeStruct) 106 if !ok { 107 err = fmt.Errorf("inode.volumeStruct.DumpValue() could not parse value as a *inMemoryInodeStruct") 108 return 109 } 110 111 valueAsString = fmt.Sprintf("%016p", valueAsInMemoryInodeStructPtr) 112 113 err = nil 114 return 115 } 116 117 func compareInodeNumber(key1 sortedmap.Key, key2 sortedmap.Key) (result int, err error) { 118 key1InodeNumber, ok := key1.(InodeNumber) 119 if !ok { 120 err = fmt.Errorf("compareInodeNumber(non-InodeNumber,) not supported") 121 return 122 } 123 key2InodeNumber, ok := key2.(InodeNumber) 124 if !ok { 125 err = fmt.Errorf("compareInodeNumber(InodeNumber, non-InodeNumber) not supported") 126 return 127 } 128 129 if key1InodeNumber < key2InodeNumber { 130 result = -1 131 } else if key1InodeNumber == key2InodeNumber { 132 result = 0 133 } else { // key1InodeNumber > key2InodeNumber 134 result = 1 135 } 136 137 err = nil 138 139 return 140 } 141 142 func setRWMode(rwMode RWModeType) (err error) { 143 if rwMode != globals.rwMode { 144 switch rwMode { 145 case RWModeNormal: 146 stats.IncrementOperations(&stats.ReconCheckTriggeredNormalMode) 147 case RWModeNoWrite: 148 stats.IncrementOperations(&stats.ReconCheckTriggeredNoWriteMode) 149 case RWModeReadOnly: 150 stats.IncrementOperations(&stats.ReconCheckTriggeredReadOnlyMode) 151 default: 152 err = fmt.Errorf("SetRWMode(rwMode==%d) not allowed... must be one of RWModeNormal(%d), RWModeNoWrite(%d), or RWModeReadOnly(%d)", rwMode, RWModeNormal, RWModeNoWrite, RWModeReadOnly) 153 return 154 } 155 156 globals.rwMode = rwMode 157 } 158 159 err = nil 160 return 161 } 162 163 func enforceRWMode(enforceNoWriteMode bool) (err error) { 164 var ( 165 rwModeCopy RWModeType 166 ) 167 168 rwModeCopy = globals.rwMode 169 170 if rwModeCopy == RWModeReadOnly { 171 err = blunder.NewError(globals.readOnlyThresholdErrno, globals.readOnlyThresholdErrnoString) 172 } else if enforceNoWriteMode && (rwModeCopy == RWModeNoWrite) { 173 err = blunder.NewError(globals.noWriteThresholdErrno, globals.noWriteThresholdErrnoString) 174 } else { 175 err = nil 176 } 177 178 return 179 } 180 181 func (vS *volumeStruct) FetchOnDiskInode(inodeNumber InodeNumber) (corruptionDetected CorruptionDetected, version Version, onDiskInode []byte, err error) { 182 var ( 183 bytesConsumedByCorruptionDetected uint64 184 bytesConsumedByVersion uint64 185 inodeRec []byte 186 ok bool 187 ) 188 189 corruptionDetected = CorruptionDetected(false) 190 version = Version(0) 191 onDiskInode = make([]byte, 0) 192 193 inodeRec, ok, err = vS.headhunterVolumeHandle.GetInodeRec(uint64(inodeNumber)) 194 if nil != err { 195 err = fmt.Errorf("headhunter.GetInodeRec() failed: %v", err) 196 return 197 } 198 if !ok { 199 err = fmt.Errorf("headhunter.GetInodeRec() returned !ok") 200 return 201 } 202 203 bytesConsumedByCorruptionDetected, err = cstruct.Unpack(inodeRec, &corruptionDetected, cstruct.LittleEndian) 204 if nil != err { 205 err = fmt.Errorf("cstruct.Unpack(,&corruptionDetected,) failed: %v", err) 206 return 207 } 208 if corruptionDetected { 209 return 210 } 211 212 bytesConsumedByVersion, err = cstruct.Unpack(inodeRec[bytesConsumedByCorruptionDetected:], &version, cstruct.LittleEndian) 213 if nil != err { 214 err = fmt.Errorf("cstruct.Unpack(,&version,) failed: %v", err) 215 return 216 } 217 218 onDiskInode = inodeRec[bytesConsumedByCorruptionDetected+bytesConsumedByVersion:] 219 220 return 221 } 222 223 func (vS *volumeStruct) fetchOnDiskInode(inodeNumber InodeNumber) (inMemoryInode *inMemoryInodeStruct, ok bool, err error) { 224 var ( 225 bytesConsumedByCorruptionDetected uint64 226 bytesConsumedByVersion uint64 227 corruptionDetected CorruptionDetected 228 inodeRec []byte 229 onDiskInodeV1 *onDiskInodeV1Struct 230 snapShotID uint64 231 snapShotIDType headhunter.SnapShotIDType 232 version Version 233 ) 234 235 snapShotIDType, snapShotID, _ = vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber)) 236 if headhunter.SnapShotIDTypeDotSnapShot == snapShotIDType { 237 logger.Fatalf("fetchOnDiskInode for headhunter.SnapShotIDTypeDotSnapShot not allowed") 238 } 239 240 inodeRec, ok, err = vS.headhunterVolumeHandle.GetInodeRec(uint64(inodeNumber)) 241 if nil != err { 242 stackStr := string(debug.Stack()) 243 err = fmt.Errorf("%s: unable to get inodeRec for inode %d: %v stack: %s", 244 utils.GetFnName(), inodeNumber, err, stackStr) 245 err = blunder.AddError(err, blunder.NotFoundError) 246 return 247 } 248 if !ok { 249 return 250 } 251 252 bytesConsumedByCorruptionDetected, err = cstruct.Unpack(inodeRec, &corruptionDetected, cstruct.LittleEndian) 253 if nil != err { 254 err = fmt.Errorf("%s: unable to parse inodeRec.CorruptionDetected for inode %d: %v", utils.GetFnName(), inodeNumber, err) 255 err = blunder.AddError(err, blunder.CorruptInodeError) 256 return 257 } 258 if corruptionDetected { 259 err = fmt.Errorf("%s: inode %d has been marked corrupted", utils.GetFnName(), inodeNumber) 260 err = blunder.AddError(err, blunder.CorruptInodeError) 261 return 262 } 263 264 bytesConsumedByVersion, err = cstruct.Unpack(inodeRec[bytesConsumedByCorruptionDetected:], &version, cstruct.LittleEndian) 265 if nil != err { 266 err = fmt.Errorf("%s: unable to get inodeRec.Version for inode %d: %v", utils.GetFnName(), inodeNumber, err) 267 err = blunder.AddError(err, blunder.CorruptInodeError) 268 return 269 } 270 if V1 != version { 271 err = fmt.Errorf("%s: inodeRec.Version for inode %d (%v) not supported", utils.GetFnName(), inodeNumber, version) 272 err = blunder.AddError(err, blunder.CorruptInodeError) 273 return 274 } 275 276 onDiskInodeV1 = &onDiskInodeV1Struct{StreamMap: make(map[string][]byte)} 277 278 err = json.Unmarshal(inodeRec[bytesConsumedByCorruptionDetected+bytesConsumedByVersion:], onDiskInodeV1) 279 if nil != err { 280 err = fmt.Errorf("%s: inodeRec.<body> for inode %d json.Unmarshal() failed: %v", utils.GetFnName(), inodeNumber, err) 281 err = blunder.AddError(err, blunder.CorruptInodeError) 282 return 283 } 284 285 inMemoryInode = &inMemoryInodeStruct{ 286 inodeCacheLRUNext: nil, 287 inodeCacheLRUPrev: nil, 288 dirty: false, 289 volume: vS, 290 snapShotID: snapShotID, 291 openLogSegment: nil, 292 inFlightLogSegmentMap: make(map[uint64]*inFlightLogSegmentStruct), 293 inFlightLogSegmentErrors: make(map[uint64]error), 294 onDiskInodeV1Struct: *onDiskInodeV1, 295 } 296 297 inMemoryInode.onDiskInodeV1Struct.InodeNumber = inodeNumber 298 299 switch inMemoryInode.InodeType { 300 case DirType: 301 if 0 == inMemoryInode.PayloadObjectNumber { 302 inMemoryInode.payload = 303 sortedmap.NewBPlusTree( 304 vS.maxEntriesPerDirNode, 305 sortedmap.CompareString, 306 &dirInodeCallbacks{treeNodeLoadable{inode: inMemoryInode}}, 307 globals.dirEntryCache) 308 } else { 309 inMemoryInode.payload, err = 310 sortedmap.OldBPlusTree( 311 inMemoryInode.PayloadObjectNumber, 312 onDiskInodeV1PayloadObjectOffset, 313 inMemoryInode.PayloadObjectLength, 314 sortedmap.CompareString, 315 &dirInodeCallbacks{treeNodeLoadable{inode: inMemoryInode}}, 316 globals.dirEntryCache) 317 if nil != err { 318 err = fmt.Errorf("%s: sortedmap.OldBPlusTree(inodeRec.<body>.PayloadObjectNumber) for DirType inode %d failed: %v", utils.GetFnName(), inodeNumber, err) 319 err = blunder.AddError(err, blunder.CorruptInodeError) 320 return 321 } 322 } 323 case FileType: 324 if 0 == inMemoryInode.PayloadObjectNumber { 325 inMemoryInode.payload = 326 sortedmap.NewBPlusTree( 327 vS.maxExtentsPerFileNode, 328 sortedmap.CompareUint64, 329 &fileInodeCallbacks{treeNodeLoadable{inode: inMemoryInode}}, 330 globals.fileExtentMapCache) 331 } else { 332 inMemoryInode.payload, err = 333 sortedmap.OldBPlusTree( 334 inMemoryInode.PayloadObjectNumber, 335 onDiskInodeV1PayloadObjectOffset, 336 inMemoryInode.PayloadObjectLength, 337 sortedmap.CompareUint64, 338 &fileInodeCallbacks{treeNodeLoadable{inode: inMemoryInode}}, 339 globals.fileExtentMapCache) 340 if nil != err { 341 err = fmt.Errorf("%s: sortedmap.OldBPlusTree(inodeRec.<body>.PayloadObjectNumber) for FileType inode %d failed: %v", utils.GetFnName(), inodeNumber, err) 342 err = blunder.AddError(err, blunder.CorruptInodeError) 343 return 344 } 345 } 346 case SymlinkType: 347 // Nothing special here 348 default: 349 err = fmt.Errorf("%s: inodeRec.InodeType for inode %d (%v) not supported", utils.GetFnName(), inodeNumber, inMemoryInode.InodeType) 350 err = blunder.AddError(err, blunder.CorruptInodeError) 351 return 352 } 353 354 err = nil 355 return 356 } 357 358 func (vS *volumeStruct) inodeCacheFetchWhileLocked(inodeNumber InodeNumber) (inode *inMemoryInodeStruct, ok bool, err error) { 359 var ( 360 inodeAsValue sortedmap.Value 361 ) 362 363 inodeAsValue, ok, err = vS.inodeCache.GetByKey(inodeNumber) 364 if nil != err { 365 return 366 } 367 368 if ok { 369 inode, ok = inodeAsValue.(*inMemoryInodeStruct) 370 if ok { 371 vS.inodeCacheTouchWhileLocked(inode) 372 err = nil 373 } else { 374 ok = false 375 err = fmt.Errorf("inodeCache[inodeNumber==0x%016X] contains a value not mappable to a *inMemoryInodeStruct", inodeNumber) 376 } 377 } 378 379 return 380 } 381 382 func (vS *volumeStruct) inodeCacheFetch(inodeNumber InodeNumber) (inode *inMemoryInodeStruct, ok bool, err error) { 383 vS.Lock() 384 inode, ok, err = vS.inodeCacheFetchWhileLocked(inodeNumber) 385 vS.Unlock() 386 return 387 } 388 389 func (vS *volumeStruct) inodeCacheInsertWhileLocked(inode *inMemoryInodeStruct) (ok bool, err error) { 390 ok, err = vS.inodeCache.Put(inode.InodeNumber, inode) 391 if (nil != err) || !ok { 392 return 393 } 394 395 // Place inode at the MRU end of inodeCacheLRU 396 397 if 0 == vS.inodeCacheLRUItems { 398 vS.inodeCacheLRUHead = inode 399 vS.inodeCacheLRUTail = inode 400 vS.inodeCacheLRUItems = 1 401 } else { 402 inode.inodeCacheLRUPrev = vS.inodeCacheLRUTail 403 inode.inodeCacheLRUPrev.inodeCacheLRUNext = inode 404 405 vS.inodeCacheLRUTail = inode 406 vS.inodeCacheLRUItems++ 407 } 408 409 return 410 } 411 412 func (vS *volumeStruct) inodeCacheInsert(inode *inMemoryInodeStruct) (ok bool, err error) { 413 vS.Lock() 414 ok, err = vS.inodeCacheInsertWhileLocked(inode) 415 vS.Unlock() 416 return 417 } 418 419 func (vS *volumeStruct) inodeCacheTouchWhileLocked(inode *inMemoryInodeStruct) { 420 // Move inode to the MRU end of inodeCacheLRU 421 422 if inode != vS.inodeCacheLRUTail { 423 if inode == vS.inodeCacheLRUHead { 424 vS.inodeCacheLRUHead = inode.inodeCacheLRUNext 425 vS.inodeCacheLRUHead.inodeCacheLRUPrev = nil 426 427 inode.inodeCacheLRUPrev = vS.inodeCacheLRUTail 428 inode.inodeCacheLRUNext = nil 429 430 vS.inodeCacheLRUTail.inodeCacheLRUNext = inode 431 vS.inodeCacheLRUTail = inode 432 } else { 433 inode.inodeCacheLRUPrev.inodeCacheLRUNext = inode.inodeCacheLRUNext 434 inode.inodeCacheLRUNext.inodeCacheLRUPrev = inode.inodeCacheLRUPrev 435 436 inode.inodeCacheLRUNext = nil 437 inode.inodeCacheLRUPrev = vS.inodeCacheLRUTail 438 439 vS.inodeCacheLRUTail.inodeCacheLRUNext = inode 440 vS.inodeCacheLRUTail = inode 441 } 442 } 443 } 444 445 func (vS *volumeStruct) inodeCacheTouch(inode *inMemoryInodeStruct) { 446 vS.Lock() 447 vS.inodeCacheTouchWhileLocked(inode) 448 vS.Unlock() 449 } 450 451 // The inode cache discard thread calls this routine when the ticker goes off. 452 func (vS *volumeStruct) inodeCacheDiscard() (discarded uint64, dirty uint64, locked uint64, lruItems uint64) { 453 inodesToDrop := uint64(0) 454 455 vS.Lock() 456 457 if (vS.inodeCacheLRUItems * globals.inodeSize) > vS.inodeCacheLRUMaxBytes { 458 // Check, at most, 1.25 * (minimum_number_to_drop) 459 inodesToDrop = (vS.inodeCacheLRUItems * globals.inodeSize) - vS.inodeCacheLRUMaxBytes 460 inodesToDrop = inodesToDrop / globals.inodeSize 461 inodesToDrop += inodesToDrop / 4 462 for (inodesToDrop > 0) && ((vS.inodeCacheLRUItems * globals.inodeSize) > vS.inodeCacheLRUMaxBytes) { 463 inodesToDrop-- 464 465 ic := vS.inodeCacheLRUHead 466 467 // Create a DLM lock object 468 id := dlm.GenerateCallerID() 469 inodeRWLock, _ := vS.InitInodeLock(ic.InodeNumber, id) 470 err := inodeRWLock.TryWriteLock() 471 472 // Inode is locked; skip it 473 if err != nil { 474 // Move inode to tail of LRU 475 vS.inodeCacheTouchWhileLocked(ic) 476 locked++ 477 continue 478 } 479 480 if ic.dirty { 481 // The inode is busy - drop the DLM lock and move to tail 482 inodeRWLock.Unlock() 483 dirty++ 484 vS.inodeCacheTouchWhileLocked(ic) 485 continue 486 } 487 488 var ok bool 489 490 discarded++ 491 ok, err = vS.inodeCacheDropWhileLocked(ic) 492 if err != nil || !ok { 493 pStr := fmt.Errorf("The inodes was not found in the inode cache - ok: %v err: %v", ok, err) 494 panic(pStr) 495 } 496 497 inodeRWLock.Unlock() 498 499 // NOTE: vS.inodeCacheDropWhileLocked() removed the inode from the LRU list so 500 // the head is now different 501 } 502 } 503 lruItems = vS.inodeCacheLRUItems 504 vS.Unlock() 505 //logger.Infof("discard: %v dirty: %v locked: %v LRUitems: %v", discarded, dirty, locked, lruItems) 506 return 507 } 508 509 func (vS *volumeStruct) inodeCacheDropWhileLocked(inode *inMemoryInodeStruct) (ok bool, err error) { 510 ok, err = vS.inodeCache.DeleteByKey(inode.InodeNumber) 511 if (nil != err) || !ok { 512 return 513 } 514 515 if inode == vS.inodeCacheLRUHead { 516 if inode == vS.inodeCacheLRUTail { 517 vS.inodeCacheLRUHead = nil 518 vS.inodeCacheLRUTail = nil 519 vS.inodeCacheLRUItems = 0 520 } else { 521 vS.inodeCacheLRUHead = inode.inodeCacheLRUNext 522 vS.inodeCacheLRUHead.inodeCacheLRUPrev = nil 523 vS.inodeCacheLRUItems-- 524 525 inode.inodeCacheLRUNext = nil 526 } 527 } else { 528 if inode == vS.inodeCacheLRUTail { 529 vS.inodeCacheLRUTail = inode.inodeCacheLRUPrev 530 vS.inodeCacheLRUTail.inodeCacheLRUNext = nil 531 vS.inodeCacheLRUItems-- 532 533 inode.inodeCacheLRUPrev = nil 534 } else { 535 inode.inodeCacheLRUPrev.inodeCacheLRUNext = inode.inodeCacheLRUNext 536 inode.inodeCacheLRUNext.inodeCacheLRUPrev = inode.inodeCacheLRUPrev 537 vS.inodeCacheLRUItems-- 538 539 inode.inodeCacheLRUNext = nil 540 inode.inodeCacheLRUPrev = nil 541 } 542 } 543 544 return 545 } 546 547 func (vS *volumeStruct) inodeCacheDrop(inode *inMemoryInodeStruct) (ok bool, err error) { 548 vS.Lock() 549 ok, err = vS.inodeCacheDropWhileLocked(inode) 550 vS.Unlock() 551 return 552 } 553 554 func (vS *volumeStruct) fetchInode(inodeNumber InodeNumber) (inode *inMemoryInodeStruct, ok bool, err error) { 555 for { 556 inode, ok, err = vS.inodeCacheFetch(inodeNumber) 557 if nil != err { 558 return 559 } 560 561 if ok { 562 return 563 } 564 565 inode, ok, err = vS.fetchOnDiskInode(inodeNumber) 566 if nil != err { 567 return 568 } 569 if !ok { 570 err = fmt.Errorf("%s.fetchInode(0x%016X) not found", vS.volumeName, inodeNumber) 571 return 572 } 573 574 ok, err = vS.inodeCacheInsert(inode) 575 if nil != err { 576 return 577 } 578 579 if ok { 580 return 581 } 582 583 // If we reach here, somebody beat us to it... just restart the fetch... 584 } 585 } 586 587 // Fetch inode with inode type checking 588 func (vS *volumeStruct) fetchInodeType(inodeNumber InodeNumber, expectedType InodeType) (inode *inMemoryInodeStruct, err error) { 589 inode, ok, err := vS.fetchInode(inodeNumber) 590 if nil != err { 591 return 592 } 593 if !ok { 594 err = fmt.Errorf("%s: expected inode %d volume '%s' to be type %v, but it was unallocated", 595 utils.GetFnName(), inode.InodeNumber, vS.volumeName, expectedType) 596 err = blunder.AddError(err, blunder.NotFoundError) 597 return 598 } 599 if inode.InodeType == expectedType { 600 // success 601 return 602 } 603 604 err = fmt.Errorf("%s: expected inode %d volume '%s' to be type %v, got %v", 605 utils.GetFnName(), inode.InodeNumber, vS.volumeName, expectedType, inode.InodeType) 606 607 var errVal blunder.FsError 608 switch expectedType { 609 case DirType: 610 errVal = blunder.NotDirError 611 case FileType: 612 errVal = blunder.NotFileError 613 case SymlinkType: 614 errVal = blunder.NotSymlinkError 615 default: 616 panic(fmt.Sprintf("unknown inode type=%v!", expectedType)) 617 } 618 err = blunder.AddError(err, errVal) 619 620 return 621 } 622 623 func (vS *volumeStruct) makeInMemoryInodeWithThisInodeNumber(inodeType InodeType, fileMode InodeMode, userID InodeUserID, groupID InodeGroupID, inodeNumber InodeNumber, volumeLocked bool) (inMemoryInode *inMemoryInodeStruct) { 624 var ( 625 birthTime time.Time 626 nonce uint64 627 snapShotID uint64 628 snapShotIDType headhunter.SnapShotIDType 629 ) 630 631 snapShotIDType, snapShotID, nonce = vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber)) 632 if headhunter.SnapShotIDTypeDotSnapShot == snapShotIDType { 633 logger.Fatalf("makeInMemoryInodeWithThisInodeNumber for headhunter.SnapShotIDTypeDotSnapShot not allowed") 634 } 635 636 birthTime = time.Now() 637 638 inMemoryInode = &inMemoryInodeStruct{ 639 inodeCacheLRUNext: nil, 640 inodeCacheLRUPrev: nil, 641 dirty: true, 642 volume: vS, 643 snapShotID: snapShotID, 644 openLogSegment: nil, 645 inFlightLogSegmentMap: make(map[uint64]*inFlightLogSegmentStruct), 646 inFlightLogSegmentErrors: make(map[uint64]error), 647 onDiskInodeV1Struct: onDiskInodeV1Struct{ 648 InodeNumber: InodeNumber(nonce), 649 InodeType: inodeType, 650 CreationTime: birthTime, 651 ModificationTime: birthTime, 652 AccessTime: birthTime, 653 AttrChangeTime: birthTime, 654 NumWrites: 0, 655 Mode: fileMode, 656 UserID: userID, 657 GroupID: groupID, 658 StreamMap: make(map[string][]byte), 659 LogSegmentMap: make(map[uint64]uint64), 660 }, 661 } 662 663 return 664 } 665 666 func (vS *volumeStruct) makeInMemoryInode(inodeType InodeType, fileMode InodeMode, userID InodeUserID, groupID InodeGroupID) (inMemoryInode *inMemoryInodeStruct, err error) { 667 inodeNumberAsUint64 := vS.headhunterVolumeHandle.FetchNonce() 668 669 inMemoryInode = vS.makeInMemoryInodeWithThisInodeNumber(inodeType, fileMode, userID, groupID, InodeNumber(inodeNumberAsUint64), false) 670 671 return 672 } 673 674 func (vS *volumeStruct) PatchInode(inodeNumber InodeNumber, inodeType InodeType, linkCount uint64, mode InodeMode, userID InodeUserID, groupID InodeGroupID, parentInodeNumber InodeNumber, symlinkTarget string) (err error) { 675 var ( 676 callerID dlm.CallerID 677 inode *inMemoryInodeStruct 678 inodeNumberDecodedAsInodeNumber InodeNumber 679 inodeNumberDecodedAsUint64 uint64 680 inodeRWLock *dlm.RWLockStruct 681 modeAdornedWithInodeType InodeMode 682 ok bool 683 parentInodeNumberDecodedAsInodeNumber InodeNumber 684 parentInodeNumberDecodedAsUint64 uint64 685 payload sortedmap.BPlusTree 686 snapShotIDType headhunter.SnapShotIDType 687 ) 688 689 snapShotIDType, _, inodeNumberDecodedAsUint64 = vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber)) 690 if headhunter.SnapShotIDTypeLive != snapShotIDType { 691 err = fmt.Errorf("PatchInode(inodeNumber==0x%016X,,,,,,,) must provide a non-SnapShot inodeNumber", inodeNumber) 692 return 693 } 694 inodeNumberDecodedAsInodeNumber = InodeNumber(inodeNumberDecodedAsUint64) 695 696 switch inodeType { 697 case DirType: 698 if 2 != linkCount { 699 err = fmt.Errorf("PatchInode(inodeNumber==0x%016X,inodeType==DirType,linkCount==%v,,,,,) must set linkCount to 2", inodeNumber, linkCount) 700 return 701 } 702 if InodeNumber(0) == parentInodeNumber { 703 err = fmt.Errorf("PatchInode(inodeNumber==0x%016X,inodeType==DirType,,,,,parentInodeNumber==0,) must provide a non-zero parentInodeNumber", inodeNumber) 704 return 705 } 706 if (RootDirInodeNumber == inodeNumber) && (RootDirInodeNumber != parentInodeNumber) { 707 err = fmt.Errorf("PatchInode(inodeNumber==RootDirInodeNumber,inodeType==DirType,,,,,parentInodeNumber==0x%016X,) must provide RootDirInode's parent as also RootDirInodeNumber", parentInodeNumber) 708 return 709 } 710 snapShotIDType, _, parentInodeNumberDecodedAsUint64 = vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber)) 711 if headhunter.SnapShotIDTypeLive != snapShotIDType { 712 err = fmt.Errorf("PatchInode(inodeNumber==0x%016X,inodeType==DirType,,,,,parentInodeNumber==0x%016X,) must provide a non-SnapShot parentInodeNumber", inodeNumber, parentInodeNumber) 713 return 714 } 715 parentInodeNumberDecodedAsInodeNumber = InodeNumber(parentInodeNumberDecodedAsUint64) 716 case FileType: 717 if 0 == linkCount { 718 err = fmt.Errorf("PatchInode(inodeNumber==0x%016X,inodeType==FileType,linkCount==0,,,,,) must provide a non-zero linkCount", inodeNumber) 719 return 720 } 721 case SymlinkType: 722 if 0 == linkCount { 723 err = fmt.Errorf("PatchInode(inodeNumber==0x%016X,inodeType==SymlinkType,linkCount==0,,,,,) must provide a non-zero linkCount", inodeNumber) 724 return 725 } 726 if "" == symlinkTarget { 727 err = fmt.Errorf("PatchInode(inodeNumber==0x%016X,inodeType==SymlinkType,,,,,,symlinkTarget==\"\") must provide a non-empty symlinkTarget", inodeNumber) 728 return 729 } 730 default: 731 err = fmt.Errorf("PatchInode(inodeNumber==0x%016X,inodeType==%v,,,,,,) must provide a inodeType of DirType(%v), FileType(%v), or SymlinkType(%v)", inodeNumber, inodeType, DirType, FileType, SymlinkType) 732 return 733 } 734 735 modeAdornedWithInodeType, err = determineMode(mode, inodeType) 736 if nil != err { 737 err = fmt.Errorf("PatchInode(inodeNumber==0x%016X,inodeType==%v,,mode==0o%011o,,,,) failed: %v", inodeNumber, inodeType, mode, err) 738 return 739 } 740 741 vS.Lock() 742 743 callerID = dlm.GenerateCallerID() 744 inodeRWLock, _ = vS.InitInodeLock(inodeNumber, callerID) 745 err = inodeRWLock.TryWriteLock() 746 if nil != err { 747 vS.Unlock() 748 err = fmt.Errorf("PatchInode(inodeNumber==0x%016X,,,,,,,) couldn't create a *dlm.RWLockStruct: %v", inodeNumber, err) 749 return 750 } 751 752 inode, ok, err = vS.inodeCacheFetchWhileLocked(inodeNumber) 753 if nil != err { 754 _ = inodeRWLock.Unlock() 755 vS.Unlock() 756 err = fmt.Errorf("PatchInode(inodeNumber==0x%016X,,,,,,,) couldn't search inodeCache for pre-existing inode: %v", inodeNumber, err) 757 return 758 } 759 if ok { 760 if inode.dirty { 761 _ = inodeRWLock.Unlock() 762 vS.Unlock() 763 err = fmt.Errorf("PatchInode(inodeNumber==0x%016X,,,,,,,) of dirty Inode is not allowed", inodeNumber) 764 return 765 } 766 ok, err = vS.inodeCacheDropWhileLocked(inode) 767 if nil != err { 768 _ = inodeRWLock.Unlock() 769 vS.Unlock() 770 err = fmt.Errorf("PatchInode(inodeNumber==0x%016X,,,,,,,) drop of pre-existing inode from inodeCache failed: %v", inodeNumber, err) 771 return 772 } 773 if !ok { 774 _ = inodeRWLock.Unlock() 775 vS.Unlock() 776 err = fmt.Errorf("PatchInode(inodeNumber==0x%016X,,,,,,,) drop of pre-existing inode from inodeCache returned !ok", inodeNumber) 777 return 778 } 779 } 780 781 inode = vS.makeInMemoryInodeWithThisInodeNumber(inodeType, modeAdornedWithInodeType, userID, groupID, inodeNumberDecodedAsInodeNumber, true) 782 783 inode.dirty = true 784 785 inode.onDiskInodeV1Struct.LinkCount = linkCount 786 787 switch inodeType { 788 case DirType: 789 payload = sortedmap.NewBPlusTree( 790 vS.maxEntriesPerDirNode, 791 sortedmap.CompareString, 792 &dirInodeCallbacks{treeNodeLoadable{inode: inode}}, 793 globals.dirEntryCache) 794 795 ok, err = payload.Put(".", inodeNumberDecodedAsInodeNumber) 796 if nil != err { 797 err = fmt.Errorf("PatchInode(inodeNumber==0x%016X,inodeType==DirType,,,,,,) failed to insert \".\" dirEntry: %v", inodeNumber, err) 798 panic(err) 799 } 800 if !ok { 801 err = fmt.Errorf("PatchInode(inodeNumber==0x%016X,inodeType==DirType,,,,,,) insert \".\" dirEntry got a !ok", inodeNumber) 802 panic(err) 803 } 804 805 ok, err = payload.Put("..", parentInodeNumberDecodedAsInodeNumber) 806 if nil != err { 807 err = fmt.Errorf("PatchInode(inodeNumber==0x%016X,inodeType==DirType,,,,,parentInodeNumber==0x%016X,) failed to insert \"..\" dirEntry: %v", inodeNumber, parentInodeNumber, err) 808 panic(err) 809 } 810 if !ok { 811 err = fmt.Errorf("PatchInode(inodeNumber==0x%016X,inodeType==DirType,,,,,parentInodeNumber==0x%016X,) insert \"..\" dirEntry got a !ok", inodeNumber, parentInodeNumber) 812 panic(err) 813 } 814 815 inode.payload = payload 816 inode.onDiskInodeV1Struct.SymlinkTarget = "" 817 case FileType: 818 payload = sortedmap.NewBPlusTree( 819 vS.maxExtentsPerFileNode, 820 sortedmap.CompareUint64, 821 &fileInodeCallbacks{treeNodeLoadable{inode: inode}}, 822 globals.fileExtentMapCache) 823 824 inode.payload = payload 825 inode.onDiskInodeV1Struct.SymlinkTarget = "" 826 case SymlinkType: 827 inode.payload = nil 828 inode.onDiskInodeV1Struct.SymlinkTarget = symlinkTarget 829 } 830 831 ok, err = vS.inodeCacheInsertWhileLocked(inode) 832 if nil != err { 833 err = fmt.Errorf("PatchInode(inodeNumber==0x%016X,inodeType==DirType,,,,,,) failed to insert inode in inodeCache: %v", inodeNumber, err) 834 panic(err) 835 } 836 if !ok { 837 err = fmt.Errorf("PatchInode(inodeNumber==0x%016X,inodeType==DirType,,,,,,) insert of inode in inodeCache got a !ok", inodeNumber) 838 panic(err) 839 } 840 841 _ = inodeRWLock.Unlock() 842 843 vS.Unlock() 844 845 err = vS.flushInode(inode) 846 if nil != err { 847 err = fmt.Errorf("PatchInode(inodeNumber==0x%016X,,,,,,,) failed to flush: %v", inodeNumber, err) 848 panic(err) 849 } 850 851 return 852 } 853 854 func (inMemoryInode *inMemoryInodeStruct) convertToOnDiskInodeV1() (onDiskInodeV1 *onDiskInodeV1Struct, err error) { 855 onDiskInode := inMemoryInode.onDiskInodeV1Struct 856 857 if (DirType == inMemoryInode.InodeType) || (FileType == inMemoryInode.InodeType) { 858 content := inMemoryInode.payload.(sortedmap.BPlusTree) 859 payloadObjectNumber, payloadObjectOffset, payloadObjectLength, flushErr := content.Flush(false) 860 if nil != flushErr { 861 panic(flushErr) 862 } 863 pruneErr := content.Prune() 864 if nil != pruneErr { 865 panic(pruneErr) 866 } 867 if onDiskInodeV1PayloadObjectOffset != payloadObjectOffset { 868 flushErr = fmt.Errorf("Logic Error: content.Flush() should have returned payloadObjectOffset == %v", onDiskInodeV1PayloadObjectOffset) 869 panic(flushErr) 870 } 871 onDiskInode.PayloadObjectNumber = payloadObjectNumber 872 onDiskInode.PayloadObjectLength = payloadObjectLength 873 } 874 875 // maps are refernce types, so this needs to be copied manually 876 877 onDiskInode.StreamMap = make(map[string][]byte) 878 for key, value := range inMemoryInode.StreamMap { 879 valueCopy := make([]byte, len(value)) 880 copy(valueCopy, value) 881 onDiskInode.StreamMap[key] = valueCopy 882 } 883 884 onDiskInode.LogSegmentMap = make(map[uint64]uint64) 885 for logSegmentNumber, logSegmentBytesUsed := range inMemoryInode.LogSegmentMap { 886 onDiskInode.LogSegmentMap[logSegmentNumber] = logSegmentBytesUsed 887 } 888 889 return &onDiskInode, nil 890 } 891 892 func (vS *volumeStruct) flushInode(inode *inMemoryInodeStruct) (err error) { 893 err = vS.flushInodes([]*inMemoryInodeStruct{inode}) 894 return 895 } 896 897 func (vS *volumeStruct) flushInodeNumber(inodeNumber InodeNumber) (err error) { 898 err = vS.flushInodeNumbers([]InodeNumber{inodeNumber}) 899 return 900 } 901 902 // REVIEW: Need to clearly explain what "flush" means (i.e. "to HH", not "to disk") 903 904 func (vS *volumeStruct) flushInodes(inodes []*inMemoryInodeStruct) (err error) { 905 var ( 906 dirtyInodeNumbers []uint64 907 dirtyInodeRecBytes []byte 908 dirtyInodeRecs [][]byte 909 emptyLogSegments []uint64 910 emptyLogSegmentsThisInode []uint64 911 inode *inMemoryInodeStruct 912 logSegmentNumber uint64 913 logSegmentValidBytes uint64 914 onDiskInodeV1 *onDiskInodeV1Struct 915 onDiskInodeV1Buf []byte 916 payloadAsBPlusTree sortedmap.BPlusTree 917 payloadObjectLength uint64 918 payloadObjectNumber uint64 919 toFlushInodeNumbers []uint64 920 ) 921 922 halter.Trigger(halter.InodeFlushInodesEntry) 923 defer halter.Trigger(halter.InodeFlushInodesExit) 924 925 toFlushInodeNumbers = make([]uint64, 0, len(inodes)) 926 for _, inode = range inodes { 927 toFlushInodeNumbers = append(toFlushInodeNumbers, uint64(inode.InodeNumber)) 928 } 929 930 evtlog.Record(evtlog.FormatFlushInodesEntry, vS.volumeName, toFlushInodeNumbers) 931 932 // Assemble slice of "dirty" inodes while flushing them 933 dirtyInodeNumbers = make([]uint64, 0, len(inodes)) 934 dirtyInodeRecs = make([][]byte, 0, len(inodes)) 935 emptyLogSegments = make([]uint64, 0) 936 937 for _, inode = range inodes { 938 if FileType == inode.InodeType { 939 err = vS.doFileInodeDataFlush(inode) 940 if nil != err { 941 evtlog.Record(evtlog.FormatFlushInodesErrorOnInode, vS.volumeName, uint64(inode.InodeNumber), err.Error()) 942 logger.ErrorWithError(err) 943 err = blunder.AddError(err, blunder.InodeFlushError) 944 return 945 } 946 emptyLogSegmentsThisInode = make([]uint64, 0) 947 for logSegmentNumber, logSegmentValidBytes = range inode.LogSegmentMap { 948 if 0 == logSegmentValidBytes { 949 emptyLogSegmentsThisInode = append(emptyLogSegmentsThisInode, logSegmentNumber) 950 } 951 } 952 for _, logSegmentNumber = range emptyLogSegmentsThisInode { 953 delete(inode.LogSegmentMap, logSegmentNumber) 954 } 955 emptyLogSegments = append(emptyLogSegments, emptyLogSegmentsThisInode...) 956 } 957 if SymlinkType != inode.InodeType { 958 // (FileType == inode.InodeType || (DirType == inode.InodeType) 959 payloadAsBPlusTree = inode.payload.(sortedmap.BPlusTree) 960 payloadObjectNumber, _, payloadObjectLength, err = payloadAsBPlusTree.Flush(false) 961 if nil != err { 962 evtlog.Record(evtlog.FormatFlushInodesErrorOnInode, vS.volumeName, uint64(inode.InodeNumber), err.Error()) 963 logger.ErrorWithError(err) 964 err = blunder.AddError(err, blunder.InodeFlushError) 965 return 966 } 967 if payloadObjectNumber > inode.PayloadObjectNumber { 968 if !inode.dirty { 969 err = fmt.Errorf("Logic error: inode.dirty should have been true") 970 evtlog.Record(evtlog.FormatFlushInodesErrorOnInode, vS.volumeName, uint64(inode.InodeNumber), err.Error()) 971 logger.ErrorWithError(err) 972 err = blunder.AddError(err, blunder.InodeFlushError) 973 return 974 } 975 // REVIEW: What if cache pressure flushed before we got here? 976 // Is it possible that Number doesn't get updated? 977 978 if inode.PayloadObjectNumber != 0 { 979 logger.Tracef("flushInodes(): volume '%s' %v inode %d: updating Payload"+ 980 " from Object %016X to %016X bytes %d to %d", 981 vS.volumeName, inode.InodeType, inode.InodeNumber, 982 inode.PayloadObjectNumber, payloadObjectNumber, 983 inode.PayloadObjectLength, payloadObjectLength) 984 } 985 inode.PayloadObjectNumber = payloadObjectNumber 986 inode.PayloadObjectLength = payloadObjectLength 987 988 evtlog.Record(evtlog.FormatFlushInodesDirOrFilePayloadObjectNumberUpdated, vS.volumeName, uint64(inode.InodeNumber), payloadObjectNumber) 989 } 990 } 991 if inode.dirty { 992 onDiskInodeV1, err = inode.convertToOnDiskInodeV1() 993 if nil != err { 994 evtlog.Record(evtlog.FormatFlushInodesErrorOnInode, vS.volumeName, uint64(inode.InodeNumber), err.Error()) 995 logger.ErrorWithError(err) 996 err = blunder.AddError(err, blunder.InodeFlushError) 997 return 998 } 999 onDiskInodeV1Buf, err = json.Marshal(onDiskInodeV1) 1000 if nil != err { 1001 evtlog.Record(evtlog.FormatFlushInodesErrorOnInode, vS.volumeName, uint64(inode.InodeNumber), err.Error()) 1002 logger.ErrorWithError(err) 1003 err = blunder.AddError(err, blunder.InodeFlushError) 1004 return 1005 } 1006 dirtyInodeRecBytes = make([]byte, 0, len(globals.inodeRecDefaultPreambleBuf)+len(onDiskInodeV1Buf)) 1007 dirtyInodeRecBytes = append(dirtyInodeRecBytes, globals.inodeRecDefaultPreambleBuf...) 1008 dirtyInodeRecBytes = append(dirtyInodeRecBytes, onDiskInodeV1Buf...) 1009 dirtyInodeNumbers = append(dirtyInodeNumbers, uint64(inode.InodeNumber)) 1010 dirtyInodeRecs = append(dirtyInodeRecs, dirtyInodeRecBytes) 1011 } 1012 } 1013 1014 // Go update HeadHunter (if necessary) 1015 if 0 < len(dirtyInodeNumbers) { 1016 err = vS.headhunterVolumeHandle.PutInodeRecs(dirtyInodeNumbers, dirtyInodeRecs) 1017 if nil != err { 1018 evtlog.Record(evtlog.FormatFlushInodesErrorOnHeadhunterPut, vS.volumeName, err.Error()) 1019 logger.ErrorWithError(err) 1020 err = blunder.AddError(err, blunder.InodeFlushError) 1021 return 1022 } 1023 for _, inode = range inodes { 1024 inode.dirty = false 1025 } 1026 } 1027 1028 // Now do phase one of garbage collection 1029 if 0 < len(emptyLogSegments) { 1030 for _, logSegmentNumber = range emptyLogSegments { 1031 err = vS.headhunterVolumeHandle.DeleteLogSegmentRec(logSegmentNumber) 1032 if nil != err { 1033 logger.WarnfWithError(err, "couldn't delete garbage log segment") 1034 } 1035 } 1036 } 1037 1038 evtlog.Record(evtlog.FormatFlushInodesExit, vS.volumeName, toFlushInodeNumbers) 1039 1040 err = nil 1041 return 1042 } 1043 1044 func (vS *volumeStruct) flushInodeNumbers(inodeNumbers []InodeNumber) (err error) { 1045 var ( 1046 inode *inMemoryInodeStruct 1047 inodes []*inMemoryInodeStruct 1048 inodeNumber InodeNumber 1049 ok bool 1050 ) 1051 1052 // Fetch referenced inodes 1053 inodes = make([]*inMemoryInodeStruct, 0, len(inodeNumbers)) 1054 for _, inodeNumber = range inodeNumbers { 1055 inode, ok, err = vS.fetchInode(inodeNumber) 1056 if nil != err { 1057 // the inode is locked so this should never happen (unless the inode 1058 // was evicted from the cache and it was corrupt when read from disk) 1059 // (err includes volume name and inode number) 1060 logger.ErrorfWithError(err, "%s: fetch of inode to flush failed", utils.GetFnName()) 1061 err = blunder.AddError(err, blunder.InodeFlushError) 1062 return 1063 } 1064 if !ok { 1065 // this should never happen (see above) 1066 err = fmt.Errorf("%s: fetch of inode %d volume '%s' failed because it is unallocated", 1067 utils.GetFnName(), inodeNumber, vS.volumeName) 1068 logger.ErrorWithError(err) 1069 err = blunder.AddError(err, blunder.NotFoundError) 1070 return 1071 } 1072 1073 inodes = append(inodes, inode) 1074 } 1075 1076 err = vS.flushInodes(inodes) 1077 1078 return 1079 } 1080 1081 func accountNameToVolumeName(accountName string) (volumeName string, ok bool) { 1082 var ( 1083 volume *volumeStruct 1084 ) 1085 1086 globals.Lock() 1087 1088 volume, ok = globals.accountMap[accountName] 1089 if ok { 1090 volumeName = volume.volumeName 1091 } 1092 1093 globals.Unlock() 1094 1095 return 1096 } 1097 1098 func volumeNameToAccountName(volumeName string) (accountName string, ok bool) { 1099 var ( 1100 volume *volumeStruct 1101 ) 1102 1103 globals.Lock() 1104 1105 volume, ok = globals.volumeMap[volumeName] 1106 if ok { 1107 accountName = volume.accountName 1108 } 1109 1110 globals.Unlock() 1111 1112 return 1113 } 1114 1115 func volumeNameToActivePeerPrivateIPAddr(volumeName string) (activePeerPrivateIPAddr string, ok bool) { 1116 var ( 1117 volume *volumeStruct 1118 ) 1119 1120 globals.Lock() 1121 1122 volume, ok = globals.volumeMap[volumeName] 1123 1124 if ok { 1125 activePeerPrivateIPAddr = volume.volumeGroup.activePeerPrivateIPAddr 1126 } 1127 1128 globals.Unlock() 1129 1130 return 1131 } 1132 1133 func fetchVolumeHandle(volumeName string) (volumeHandle VolumeHandle, err error) { 1134 globals.Lock() 1135 volume, ok := globals.volumeMap[volumeName] 1136 globals.Unlock() 1137 1138 if !ok { 1139 err = fmt.Errorf("%s: volumeName \"%v\" not found", utils.GetFnName(), volumeName) 1140 err = blunder.AddError(err, blunder.NotFoundError) 1141 return 1142 } 1143 1144 volumeHandle = volume 1145 1146 volume.Lock() // REVIEW: Once Tracker https://www.pivotaltracker.com/story/show/133377567 1147 defer volume.Unlock() // is resolved, these two lines should be removed 1148 1149 if !volume.served { 1150 err = fmt.Errorf("%s: volumeName \"%v\" not served", utils.GetFnName(), volumeName) 1151 err = blunder.AddError(err, blunder.NotActiveError) 1152 return 1153 } 1154 1155 _, ok, err = volume.headhunterVolumeHandle.GetInodeRec(uint64(RootDirInodeNumber)) 1156 if nil != err { 1157 // disk corruption of the inode btree (or software error) 1158 err = fmt.Errorf("%s: unable to lookup root inode for volume '%s': %v", 1159 utils.GetFnName(), volume.volumeName, err) 1160 err = blunder.AddError(err, blunder.NotFoundError) 1161 } 1162 if !ok { 1163 // First access didn't find root dir... so create it 1164 _, err = volume.createRootOrSubDir(PosixModePerm, 0, 0, true) 1165 if nil != err { 1166 err = fmt.Errorf("%s: unable to create root inode for volume '%s': %v", 1167 utils.GetFnName(), volume.volumeName, err) 1168 err = blunder.AddError(err, blunder.NotFoundError) 1169 } 1170 } 1171 1172 // If we get this far, return values are already set as desired 1173 1174 err = nil 1175 1176 return 1177 } 1178 1179 func (vS *volumeStruct) provisionPhysicalContainer(physicalContainerLayout *physicalContainerLayoutStruct) (err error) { 1180 if 0 == (physicalContainerLayout.containerNameSliceLoopCount % physicalContainerLayout.maxObjectsPerContainer) { 1181 // We need to provision a new PhysicalContainer in this PhysicalContainerLayout 1182 1183 physicalContainerNameSuffix := vS.headhunterVolumeHandle.FetchNonce() 1184 1185 newContainerName := fmt.Sprintf("%s%s", physicalContainerLayout.containerNamePrefix, utils.Uint64ToHexStr(physicalContainerNameSuffix)) 1186 1187 storagePolicyHeaderValues := []string{vS.defaultPhysicalContainerLayout.containerStoragePolicy} 1188 newContainerHeaders := make(map[string][]string) 1189 newContainerHeaders["X-Storage-Policy"] = storagePolicyHeaderValues 1190 1191 err = swiftclient.ContainerPut(vS.accountName, newContainerName, newContainerHeaders) 1192 if nil != err { 1193 return 1194 } 1195 1196 physicalContainerLayout.containerNameSlice[physicalContainerLayout.containerNameSliceNextIndex] = newContainerName 1197 } 1198 1199 err = nil 1200 return 1201 } 1202 1203 func (vS *volumeStruct) provisionObject() (containerName string, objectNumber uint64, err error) { 1204 objectNumber = vS.headhunterVolumeHandle.FetchNonce() 1205 1206 vS.Lock() 1207 1208 err = vS.provisionPhysicalContainer(vS.defaultPhysicalContainerLayout) 1209 if nil != err { 1210 vS.Unlock() 1211 return 1212 } 1213 1214 containerName = vS.defaultPhysicalContainerLayout.containerNameSlice[vS.defaultPhysicalContainerLayout.containerNameSliceNextIndex] 1215 1216 vS.defaultPhysicalContainerLayout.containerNameSliceNextIndex++ 1217 1218 if vS.defaultPhysicalContainerLayout.containerNameSliceNextIndex == vS.defaultPhysicalContainerLayout.containersPerPeer { 1219 vS.defaultPhysicalContainerLayout.containerNameSliceNextIndex = 0 1220 vS.defaultPhysicalContainerLayout.containerNameSliceLoopCount++ 1221 } 1222 1223 vS.Unlock() 1224 1225 err = nil 1226 return 1227 } 1228 1229 func (vS *volumeStruct) Access(inodeNumber InodeNumber, userID InodeUserID, groupID InodeGroupID, otherGroupIDs []InodeGroupID, accessMode InodeMode, override AccessOverride) (accessReturn bool) { 1230 var ( 1231 adjustedInodeNumber InodeNumber 1232 err error 1233 groupIDCheck bool 1234 ok bool 1235 otherGroupID InodeGroupID 1236 ourInode *inMemoryInodeStruct 1237 ourInodeGroupID InodeGroupID 1238 ourInodeMode InodeMode 1239 ourInodeUserID InodeUserID 1240 snapShotIDType headhunter.SnapShotIDType 1241 ) 1242 1243 snapShotIDType, _, _ = vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber)) 1244 1245 switch snapShotIDType { 1246 case headhunter.SnapShotIDTypeLive: 1247 adjustedInodeNumber = inodeNumber 1248 case headhunter.SnapShotIDTypeSnapShot: 1249 adjustedInodeNumber = inodeNumber 1250 case headhunter.SnapShotIDTypeDotSnapShot: 1251 adjustedInodeNumber = RootDirInodeNumber 1252 default: 1253 logger.Fatalf("headhunter.SnapShotU64Decode(inodeNumber == 0x%016X) returned unknown snapShotIDType: %v", inodeNumber, snapShotIDType) 1254 } 1255 if (headhunter.SnapShotIDTypeLive != snapShotIDType) && (0 != (W_OK & accessMode)) { 1256 err = blunder.NewError(blunder.InvalidArgError, "Access() where accessMode includes W_OK of non-LiveView inodeNumber not allowed") 1257 return 1258 } 1259 1260 ourInode, ok, err = vS.fetchInode(adjustedInodeNumber) 1261 if nil != err { 1262 // this indicates disk corruption or software bug 1263 // (err includes volume name and inode number) 1264 logger.ErrorfWithError(err, "%s: fetch of inode failed", utils.GetFnName()) 1265 1266 // if we can't fetch the inode we can't access it 1267 accessReturn = false 1268 return 1269 } 1270 if !ok { 1271 // disk corruption or client requested a free inode 1272 logger.Infof("%s: fetch of inode %d volume '%s' failed because it is unallocated", 1273 utils.GetFnName(), inodeNumber, vS.volumeName) 1274 1275 // if the inode is free then we can't access it 1276 accessReturn = false 1277 return 1278 } 1279 1280 ourInodeUserID = ourInode.UserID 1281 ourInodeGroupID = ourInode.GroupID 1282 1283 if headhunter.SnapShotIDTypeLive == snapShotIDType { 1284 ourInodeMode = ourInode.Mode 1285 } else { 1286 ourInodeMode = ourInode.Mode // TODO: Make it read-only... 1287 } 1288 1289 if F_OK == accessMode { 1290 // the inode exists so its F_OK 1291 accessReturn = true 1292 return 1293 } 1294 1295 if P_OK == accessMode { 1296 accessReturn = (InodeRootUserID == userID) || (userID == ourInodeUserID) 1297 return 1298 } 1299 1300 if accessMode != (accessMode & (R_OK | W_OK | X_OK)) { 1301 // Default to false if P_OK bit set along with any others) 1302 accessReturn = false 1303 return 1304 } 1305 1306 // Only the LiveView is ever writeable... even by the root user 1307 if (accessMode&W_OK != 0) && (headhunter.SnapShotIDTypeLive != snapShotIDType) { 1308 accessReturn = false 1309 return 1310 } 1311 1312 // The root user (if not squashed) can do anything except exec files 1313 // that are not executable by any user 1314 if userID == InodeRootUserID { 1315 if (accessMode&X_OK != 0) && (ourInodeMode&(X_OK<<6|X_OK<<3|X_OK) == 0) { 1316 accessReturn = false 1317 } else { 1318 accessReturn = true 1319 } 1320 return 1321 } 1322 1323 // We check against permissions for the user, group, and other. The 1324 // first match wins (not the first permission granted). If the user is 1325 // the owner of the file then those permission bits determine what 1326 // happens. In other words, if the permission bits deny read permission 1327 // to the owner of a file but allow read permission for group and other, 1328 // then everyone except the owner of the file can read it. 1329 // 1330 // On a local file system, the owner of a file is *not* allowed to write 1331 // to the file unless it was opened for writing and the permission bits 1332 // allowed it *or* the process created the file and opened it for 1333 // writing at the same time. However, NFS does not have an open state 1334 // (there's no file descriptor that tracks permissions when the the file 1335 // was opened) so we check for write permission on every write. This 1336 // breaks things like tar when it tries to unpack a file which has 1337 // permission 0444 (read only). On a local file system that works, but 1338 // it doesn't work for NFS unless we bend the rules a bit for the owner 1339 // of the file and allow the owner to write to the file even if 1340 // appropriate permissions are lacking. (This is only done for the user 1341 // that owns the file, not the group that owns the file. Note that the 1342 // owner can always change the permissions to allow writing so its not a 1343 // security risk, but the owning group cannot). 1344 // 1345 // Note that the NFS client will typically call Access() when an app 1346 // wants to open the file and fail an open request for writing that if 1347 // the permission bits do not allow it. 1348 // 1349 // Similar rules apply to Read() and Truncate() (for ftruncate(2)), but 1350 // not for execute permission. Also, this only applies to regular files 1351 // but we'll rely on the caller for that. 1352 if userID == ourInodeUserID { 1353 if override == OwnerOverride && (accessMode&X_OK == 0) { 1354 accessReturn = true 1355 } else { 1356 accessReturn = (((ourInodeMode >> 6) & accessMode) == accessMode) 1357 } 1358 return 1359 } 1360 1361 groupIDCheck = (groupID == ourInodeGroupID) 1362 if !groupIDCheck { 1363 for _, otherGroupID = range otherGroupIDs { 1364 if otherGroupID == ourInodeGroupID { 1365 groupIDCheck = true 1366 break 1367 } 1368 } 1369 } 1370 if groupIDCheck { 1371 accessReturn = ((((ourInodeMode >> 3) & 07) & accessMode) == accessMode) 1372 return 1373 } 1374 1375 accessReturn = ((((ourInodeMode >> 0) & 07) & accessMode) == accessMode) 1376 return 1377 } 1378 1379 func (vS *volumeStruct) ProvisionObject() (objectPath string, err error) { 1380 err = enforceRWMode(true) 1381 if nil != err { 1382 return 1383 } 1384 1385 containerName, objectNumber, err := vS.provisionObject() 1386 if nil != err { 1387 return 1388 } 1389 1390 objectPath = fmt.Sprintf("/v1/%s/%s/%016X", vS.accountName, containerName, objectNumber) 1391 1392 err = nil 1393 return 1394 } 1395 1396 func (vS *volumeStruct) Purge(inodeNumber InodeNumber) (err error) { 1397 var ( 1398 inode *inMemoryInodeStruct 1399 ok bool 1400 ) 1401 1402 err = enforceRWMode(false) 1403 if nil != err { 1404 return 1405 } 1406 1407 inode, ok, err = vS.inodeCacheFetch(inodeNumber) 1408 if (nil != err) || !ok { 1409 return 1410 } 1411 1412 if inode.dirty { 1413 err = fmt.Errorf("Inode dirty... cannot be purged") 1414 return 1415 } 1416 1417 ok, err = vS.inodeCacheDrop(inode) 1418 if nil != err { 1419 return 1420 } 1421 if !ok { 1422 err = fmt.Errorf("inodeCacheDrop(inode) failed") 1423 } 1424 1425 return 1426 } 1427 1428 func (vS *volumeStruct) Destroy(inodeNumber InodeNumber) (err error) { 1429 logger.Tracef("inode.Destroy(): volume '%s' inode %d", vS.volumeName, inodeNumber) 1430 1431 err = enforceRWMode(false) 1432 if nil != err { 1433 return 1434 } 1435 1436 snapShotIDType, _, _ := vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber)) 1437 if headhunter.SnapShotIDTypeLive != snapShotIDType { 1438 err = fmt.Errorf("Destroy() on non-LiveView inodeNumber not allowed") 1439 return 1440 } 1441 1442 ourInode, ok, err := vS.fetchInode(inodeNumber) 1443 if nil != err { 1444 // the inode is locked so this should never happen (unless the inode 1445 // was evicted from the cache and it was corrupt when read from disk) 1446 // (err includes volume name and inode number) 1447 logger.ErrorfWithError(err, "%s: fetch of inode failed", utils.GetFnName()) 1448 return 1449 } 1450 if !ok { 1451 // this should never happen (see above) 1452 err = fmt.Errorf("%s: cannot destroy inode %d volume '%s' because it is unallocated", 1453 utils.GetFnName(), inodeNumber, vS.volumeName) 1454 err = blunder.AddError(err, blunder.NotFoundError) 1455 logger.ErrorWithError(err) 1456 return 1457 } 1458 1459 ok, err = vS.inodeCacheDrop(ourInode) 1460 if nil != err { 1461 logger.ErrorfWithError(err, "%s: inodeCacheDrop() of inode failed: %v", utils.GetFnName(), err) 1462 return 1463 } 1464 if !ok { 1465 logger.ErrorfWithError(err, "%s: inodeCacheDrop() of inode returned !ok", utils.GetFnName()) 1466 return 1467 } 1468 1469 if ourInode.InodeType == FileType { 1470 _ = vS.doFileInodeDataFlush(ourInode) 1471 } 1472 1473 err = vS.headhunterVolumeHandle.DeleteInodeRec(uint64(inodeNumber)) 1474 if nil != err { 1475 logger.ErrorWithError(err) 1476 return 1477 } 1478 1479 if DirType == ourInode.InodeType { 1480 logger.Tracef("inode.Destroy(): volume '%s' inode %d: discarding dirmap payload Object %016X len %d", 1481 vS.volumeName, inodeNumber, ourInode.PayloadObjectNumber, ourInode.PayloadObjectLength) 1482 1483 dirMapping := ourInode.payload.(sortedmap.BPlusTree) 1484 1485 err = dirMapping.Discard() 1486 if nil != err { 1487 logger.ErrorWithError(err) 1488 return 1489 } 1490 1491 stats.IncrementOperations(&stats.DirDestroyOps) 1492 1493 } else if FileType == ourInode.InodeType { 1494 logger.Tracef("inode.Destroy(): volume '%s' inode %d: discarding extmap payload Object %016X len %d", 1495 vS.volumeName, inodeNumber, ourInode.PayloadObjectNumber, ourInode.PayloadObjectLength) 1496 1497 extents := ourInode.payload.(sortedmap.BPlusTree) 1498 1499 err = extents.Discard() 1500 if nil != err { 1501 logger.ErrorWithError(err) 1502 return 1503 } 1504 1505 for logSegmentNumber := range ourInode.LogSegmentMap { 1506 deleteSegmentErr := vS.headhunterVolumeHandle.DeleteLogSegmentRec(logSegmentNumber) 1507 if nil != deleteSegmentErr { 1508 logger.WarnfWithError(deleteSegmentErr, "couldn't delete destroy'd log segment") 1509 return 1510 } 1511 stats.IncrementOperations(&stats.GcLogSegDeleteOps) 1512 } 1513 stats.IncrementOperations(&stats.GcLogSegOps) 1514 1515 stats.IncrementOperations(&stats.FileDestroyOps) 1516 } else { // SymlinkType == ourInode.InodeType 1517 stats.IncrementOperations(&stats.SymlinkDestroyOps) 1518 } 1519 1520 return 1521 } 1522 1523 func (vS *volumeStruct) GetMetadata(inodeNumber InodeNumber) (metadata *MetadataStruct, err error) { 1524 var ( 1525 inode *inMemoryInodeStruct 1526 ok bool 1527 pos int 1528 snapShotIDType headhunter.SnapShotIDType 1529 ) 1530 1531 snapShotIDType, _, _ = vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber)) 1532 if headhunter.SnapShotIDTypeDotSnapShot == snapShotIDType { 1533 // For /<SnapShotDirName>, start with metadata from / 1534 inode, ok, err = vS.fetchInode(RootDirInodeNumber) 1535 } else { 1536 inode, ok, err = vS.fetchInode(inodeNumber) 1537 } 1538 1539 if nil != err { 1540 // this indicates disk corruption or software error 1541 // (err includes volume name and inode number) 1542 logger.ErrorfWithError(err, "%s: fetch of inode failed", utils.GetFnName()) 1543 return 1544 } 1545 if !ok { 1546 // disk corruption or client request for unallocated inode 1547 err = fmt.Errorf("%s: failing request for inode %d volume '%s' because it is unallocated", 1548 utils.GetFnName(), inodeNumber, vS.volumeName) 1549 err = blunder.AddError(err, blunder.NotFoundError) 1550 logger.InfoWithError(err) 1551 return 1552 } 1553 1554 metadata = &MetadataStruct{ 1555 InodeType: inode.InodeType, 1556 LinkCount: inode.LinkCount, 1557 Size: inode.Size, 1558 CreationTime: inode.CreationTime, 1559 ModificationTime: inode.ModificationTime, 1560 AccessTime: inode.AccessTime, 1561 AttrChangeTime: inode.AttrChangeTime, 1562 NumWrites: inode.NumWrites, 1563 InodeStreamNameSlice: make([]string, len(inode.StreamMap)), 1564 Mode: inode.Mode, 1565 UserID: inode.UserID, 1566 GroupID: inode.GroupID, 1567 } 1568 1569 if headhunter.SnapShotIDTypeDotSnapShot == snapShotIDType { 1570 // For /<SnapShotDirName>, simply remove Write Access... and skip InodeStreamNameSlice 1571 metadata.Mode &= metadata.Mode & ^(W_OK<<6 | W_OK<<3 | W_OK<<0) 1572 } else { 1573 if headhunter.SnapShotIDTypeSnapShot == snapShotIDType { 1574 // For inodes in a SnapShot, simply remove Write Access 1575 metadata.Mode &= metadata.Mode & ^(W_OK<<6 | W_OK<<3 | W_OK<<0) 1576 } 1577 pos = 0 1578 for inodeStreamName := range inode.StreamMap { 1579 metadata.InodeStreamNameSlice[pos] = inodeStreamName 1580 pos++ 1581 } 1582 } 1583 1584 stats.IncrementOperations(&stats.InodeGetMetadataOps) 1585 return 1586 } 1587 1588 func (vS *volumeStruct) GetType(inodeNumber InodeNumber) (inodeType InodeType, err error) { 1589 snapShotIDType, _, _ := vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber)) 1590 if headhunter.SnapShotIDTypeDotSnapShot == snapShotIDType { 1591 inodeType = DirType 1592 err = nil 1593 return 1594 } 1595 1596 inode, ok, err := vS.fetchInode(inodeNumber) 1597 if nil != err { 1598 // this indicates disk corruption or software error 1599 // (err includes volume name and inode number) 1600 logger.ErrorfWithError(err, "%s: fetch of inode failed", utils.GetFnName()) 1601 return 1602 } 1603 if !ok { 1604 // disk corruption or client request for unallocated inode 1605 err = fmt.Errorf("%s: failing request for inode %d volume '%s' because it is unallocated", 1606 utils.GetFnName(), inodeNumber, vS.volumeName) 1607 logger.InfoWithError(err) 1608 err = blunder.AddError(err, blunder.NotFoundError) 1609 return 1610 } 1611 1612 inodeType = inode.InodeType 1613 1614 stats.IncrementOperations(&stats.InodeGetTypeOps) 1615 return 1616 } 1617 1618 func (vS *volumeStruct) GetLinkCount(inodeNumber InodeNumber) (linkCount uint64, err error) { 1619 var ( 1620 adjustLinkCountForSnapShotSubDirInRootDirInode bool 1621 inode *inMemoryInodeStruct 1622 ok bool 1623 snapShotCount uint64 1624 snapShotIDType headhunter.SnapShotIDType 1625 ) 1626 1627 if RootDirInodeNumber == inodeNumber { 1628 // Account for .. in /<SnapShotDirName> if any SnapShot's exist 1629 snapShotCount = vS.headhunterVolumeHandle.SnapShotCount() 1630 adjustLinkCountForSnapShotSubDirInRootDirInode = (0 != snapShotCount) 1631 } else { 1632 snapShotIDType, _, _ = vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber)) 1633 if headhunter.SnapShotIDTypeDotSnapShot == snapShotIDType { 1634 // linkCount == 1 (/<SnapShotDirName>'s '.') + 1 (/'s reference to <SnapShotDirName>) + # SnapShot's (/..' in each SnapShot's /) 1635 snapShotCount = vS.headhunterVolumeHandle.SnapShotCount() 1636 linkCount = 1 + 1 + snapShotCount 1637 err = nil 1638 return 1639 } 1640 adjustLinkCountForSnapShotSubDirInRootDirInode = false 1641 } 1642 1643 inode, ok, err = vS.fetchInode(inodeNumber) 1644 if nil != err { 1645 // this indicates disk corruption or software error 1646 // (err includes volume name and inode number) 1647 logger.ErrorfWithError(err, "%s: fetch of inode failed", utils.GetFnName()) 1648 return 1649 } 1650 if !ok { 1651 // disk corruption or client request for unallocated inode 1652 err = fmt.Errorf("%s: failing request for inode %d volume '%s' because it is unallocated", 1653 utils.GetFnName(), inodeNumber, vS.volumeName) 1654 logger.InfoWithError(err) 1655 err = blunder.AddError(err, blunder.NotFoundError) 1656 return 1657 } 1658 1659 if adjustLinkCountForSnapShotSubDirInRootDirInode { 1660 linkCount = inode.LinkCount + 1 1661 } else { 1662 linkCount = inode.LinkCount 1663 } 1664 1665 return 1666 } 1667 1668 // SetLinkCount is used to adjust the LinkCount property to match current reference count during FSCK TreeWalk. 1669 func (vS *volumeStruct) SetLinkCount(inodeNumber InodeNumber, linkCount uint64) (err error) { 1670 err = enforceRWMode(false) 1671 if nil != err { 1672 return 1673 } 1674 1675 snapShotIDType, _, _ := vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber)) 1676 if headhunter.SnapShotIDTypeLive != snapShotIDType { 1677 err = fmt.Errorf("SetLinkCount() on non-LiveView inodeNumber not allowed") 1678 return 1679 } 1680 1681 inode, ok, err := vS.fetchInode(inodeNumber) 1682 if err != nil { 1683 // this indicates disk corruption or software error 1684 // (err includes volume name and inode number) 1685 logger.ErrorfWithError(err, "%s: fetch of inode failed", utils.GetFnName()) 1686 return 1687 } 1688 if !ok { 1689 // disk corruption or client request for unallocated inode 1690 err = fmt.Errorf("%s: failing request for inode %d volume '%s' because it is unallocated", 1691 utils.GetFnName(), inodeNumber, vS.volumeName) 1692 logger.InfoWithError(err) 1693 err = blunder.AddError(err, blunder.NotFoundError) 1694 return 1695 } 1696 1697 inode.dirty = true 1698 inode.LinkCount = linkCount 1699 1700 err = vS.flushInode(inode) 1701 if err != nil { 1702 logger.ErrorWithError(err) 1703 return err 1704 } 1705 1706 return 1707 } 1708 1709 func (vS *volumeStruct) SetCreationTime(inodeNumber InodeNumber, CreationTime time.Time) (err error) { 1710 err = enforceRWMode(false) 1711 if nil != err { 1712 return 1713 } 1714 1715 snapShotIDType, _, _ := vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber)) 1716 if headhunter.SnapShotIDTypeLive != snapShotIDType { 1717 err = fmt.Errorf("SetCreationTime() on non-LiveView inodeNumber not allowed") 1718 return 1719 } 1720 1721 inode, ok, err := vS.fetchInode(inodeNumber) 1722 if err != nil { 1723 // the inode is locked so this should never happen (unless the inode 1724 // was evicted from the cache and it was corrupt when read from disk) 1725 logger.ErrorfWithError(err, "%s: fetch of target inode failed", utils.GetFnName()) 1726 return err 1727 } 1728 if !ok { 1729 // this should never happen (see above) 1730 err = fmt.Errorf("%s: failing request for inode %d volume '%s' because it is unallocated", 1731 utils.GetFnName(), inodeNumber, vS.volumeName) 1732 logger.ErrorWithError(err) 1733 err = blunder.AddError(err, blunder.NotFoundError) 1734 return err 1735 } 1736 1737 inode.dirty = true 1738 inode.AttrChangeTime = time.Now() 1739 inode.CreationTime = CreationTime 1740 1741 err = vS.flushInode(inode) 1742 if err != nil { 1743 logger.ErrorWithError(err) 1744 return err 1745 } 1746 return 1747 } 1748 1749 func (vS *volumeStruct) SetModificationTime(inodeNumber InodeNumber, ModificationTime time.Time) (err error) { 1750 err = enforceRWMode(false) 1751 if nil != err { 1752 return 1753 } 1754 1755 snapShotIDType, _, _ := vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber)) 1756 if headhunter.SnapShotIDTypeLive != snapShotIDType { 1757 err = fmt.Errorf("SetModificationTime() on non-LiveView inodeNumber not allowed") 1758 return 1759 } 1760 1761 inode, ok, err := vS.fetchInode(inodeNumber) 1762 if err != nil { 1763 // the inode is locked so this should never happen (unless the inode 1764 // was evicted from the cache and it was corrupt when read from disk) 1765 logger.ErrorfWithError(err, "%s: fetch of target inode failed", utils.GetFnName()) 1766 return err 1767 } 1768 if !ok { 1769 // this should never happen (see above) 1770 err = fmt.Errorf("%s: failing request for inode %d volume '%s' because it is unallocated", 1771 utils.GetFnName(), inodeNumber, vS.volumeName) 1772 logger.ErrorWithError(err) 1773 err = blunder.AddError(err, blunder.NotFoundError) 1774 return err 1775 } 1776 1777 inode.dirty = true 1778 inode.AttrChangeTime = time.Now() 1779 inode.ModificationTime = ModificationTime 1780 1781 err = vS.flushInode(inode) 1782 if err != nil { 1783 logger.ErrorWithError(err) 1784 return err 1785 } 1786 1787 return 1788 } 1789 1790 func (vS *volumeStruct) SetAccessTime(inodeNumber InodeNumber, accessTime time.Time) (err error) { 1791 err = enforceRWMode(false) 1792 if nil != err { 1793 return 1794 } 1795 1796 snapShotIDType, _, _ := vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber)) 1797 if headhunter.SnapShotIDTypeLive != snapShotIDType { 1798 err = fmt.Errorf("SetAccessTime() on non-LiveView inodeNumber not allowed") 1799 return 1800 } 1801 1802 inode, ok, err := vS.fetchInode(inodeNumber) 1803 if err != nil { 1804 // the inode is locked so this should never happen (unless the inode 1805 // was evicted from the cache and it was corrupt when read from disk) 1806 logger.ErrorfWithError(err, "%s: fetch of target inode failed", utils.GetFnName()) 1807 return err 1808 } 1809 if !ok { 1810 // this should never happen (see above) 1811 err = fmt.Errorf("%s: failing request for inode %d volume '%s' because it is unallocated", 1812 utils.GetFnName(), inodeNumber, vS.volumeName) 1813 logger.ErrorWithError(err) 1814 err = blunder.AddError(err, blunder.NotFoundError) 1815 return err 1816 } 1817 1818 inode.dirty = true 1819 inode.AttrChangeTime = time.Now() 1820 inode.AccessTime = accessTime 1821 1822 err = vS.flushInode(inode) 1823 if err != nil { 1824 logger.ErrorWithError(err) 1825 return err 1826 } 1827 1828 return 1829 } 1830 1831 func determineMode(filePerm InodeMode, inodeType InodeType) (fileMode InodeMode, err error) { 1832 // Caller should only be setting the file perm bits, but samba seems to send file type 1833 // bits as well. Since we need to work with whatever samba does, let's just silently 1834 // mask off the other bits. 1835 if filePerm&^PosixModePerm != 0 { 1836 logger.Tracef("inode.determineMode(): invalid file mode 0x%x (max 0x%x); removing file type bits.", uint32(filePerm), uint32(PosixModePerm)) 1837 } 1838 1839 // Build fileMode starting with the file permission bits 1840 fileMode = filePerm & PosixModePerm 1841 1842 // Add the file type to the mode. 1843 switch inodeType { 1844 case DirType: 1845 fileMode |= PosixModeDir 1846 case FileType: 1847 fileMode |= PosixModeFile 1848 case SymlinkType: 1849 fileMode |= PosixModeSymlink 1850 default: 1851 err = fmt.Errorf("%s: unrecognized inode type %v", utils.GetFnName(), inodeType) 1852 err = blunder.AddError(err, blunder.InvalidInodeTypeError) 1853 return 1854 } 1855 1856 err = nil 1857 return 1858 } 1859 1860 func (vS *volumeStruct) SetPermMode(inodeNumber InodeNumber, filePerm InodeMode) (err error) { 1861 err = enforceRWMode(false) 1862 if nil != err { 1863 return 1864 } 1865 1866 snapShotIDType, _, _ := vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber)) 1867 if headhunter.SnapShotIDTypeLive != snapShotIDType { 1868 err = fmt.Errorf("SetPermMode() on non-LiveView inodeNumber not allowed") 1869 return 1870 } 1871 1872 inode, ok, err := vS.fetchInode(inodeNumber) 1873 if err != nil { 1874 // the inode is locked so this should never happen (unless the inode 1875 // was evicted from the cache and it was corrupt when read from disk) 1876 logger.ErrorfWithError(err, "%s: fetch of target inode failed", utils.GetFnName()) 1877 return err 1878 } 1879 if !ok { 1880 // this should never happen (see above) 1881 err = fmt.Errorf("%s: failing request for inode %d volume '%s' because it is unallocated", 1882 utils.GetFnName(), inodeNumber, vS.volumeName) 1883 logger.ErrorWithError(err) 1884 err = blunder.AddError(err, blunder.NotFoundError) 1885 return err 1886 } 1887 1888 // Create file mode out of file permissions plus inode type 1889 fileMode, err := determineMode(filePerm, inode.InodeType) 1890 if err != nil { 1891 return err 1892 } 1893 1894 inode.dirty = true 1895 inode.Mode = fileMode 1896 1897 updateTime := time.Now() 1898 inode.AttrChangeTime = updateTime 1899 1900 err = vS.flushInode(inode) 1901 if err != nil { 1902 logger.ErrorWithError(err) 1903 return err 1904 } 1905 1906 return 1907 } 1908 1909 func (vS *volumeStruct) SetOwnerUserID(inodeNumber InodeNumber, userID InodeUserID) (err error) { 1910 err = enforceRWMode(false) 1911 if nil != err { 1912 return 1913 } 1914 1915 snapShotIDType, _, _ := vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber)) 1916 if headhunter.SnapShotIDTypeLive != snapShotIDType { 1917 err = fmt.Errorf("SetOwnerUserID() on non-LiveView inodeNumber not allowed") 1918 return 1919 } 1920 1921 inode, ok, err := vS.fetchInode(inodeNumber) 1922 if err != nil { 1923 // the inode is locked so this should never happen (unless the inode 1924 // was evicted from the cache and it was corrupt when read from disk) 1925 logger.ErrorfWithError(err, "%s: fetch of target inode failed", utils.GetFnName()) 1926 return err 1927 } 1928 if !ok { 1929 // this should never happen (see above) 1930 err = fmt.Errorf("%s: failing request for inode %d volume '%s' because it is unallocated", 1931 utils.GetFnName(), inodeNumber, vS.volumeName) 1932 logger.ErrorWithError(err) 1933 err = blunder.AddError(err, blunder.NotFoundError) 1934 return err 1935 } 1936 1937 inode.dirty = true 1938 inode.UserID = userID 1939 1940 updateTime := time.Now() 1941 inode.AttrChangeTime = updateTime 1942 1943 err = vS.flushInode(inode) 1944 if err != nil { 1945 logger.ErrorWithError(err) 1946 return err 1947 } 1948 1949 return 1950 } 1951 1952 func (vS *volumeStruct) SetOwnerUserIDGroupID(inodeNumber InodeNumber, userID InodeUserID, groupID InodeGroupID) (err error) { 1953 err = enforceRWMode(false) 1954 if nil != err { 1955 return 1956 } 1957 1958 snapShotIDType, _, _ := vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber)) 1959 if headhunter.SnapShotIDTypeLive != snapShotIDType { 1960 err = fmt.Errorf("SetOwnerUserIDGroupID() on non-LiveView inodeNumber not allowed") 1961 return 1962 } 1963 1964 inode, ok, err := vS.fetchInode(inodeNumber) 1965 if err != nil { 1966 // the inode is locked so this should never happen (unless the inode 1967 // was evicted from the cache and it was corrupt when read from disk) 1968 logger.ErrorfWithError(err, "%s: fetch of target inode failed", utils.GetFnName()) 1969 return err 1970 } 1971 if !ok { 1972 // this should never happen (see above) 1973 err = fmt.Errorf("%s: failing request for inode %d volume '%s' because it is unallocated", 1974 utils.GetFnName(), inodeNumber, vS.volumeName) 1975 logger.ErrorWithError(err) 1976 err = blunder.AddError(err, blunder.NotFoundError) 1977 return err 1978 } 1979 1980 inode.dirty = true 1981 inode.UserID = userID 1982 inode.GroupID = groupID 1983 1984 updateTime := time.Now() 1985 inode.AttrChangeTime = updateTime 1986 1987 err = vS.flushInode(inode) 1988 if err != nil { 1989 logger.ErrorWithError(err) 1990 return err 1991 } 1992 1993 return 1994 } 1995 1996 func (vS *volumeStruct) SetOwnerGroupID(inodeNumber InodeNumber, groupID InodeGroupID) (err error) { 1997 err = enforceRWMode(false) 1998 if nil != err { 1999 return 2000 } 2001 2002 snapShotIDType, _, _ := vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber)) 2003 if headhunter.SnapShotIDTypeLive != snapShotIDType { 2004 err = fmt.Errorf("SetOwnerGroupID() on non-LiveView inodeNumber not allowed") 2005 return 2006 } 2007 2008 inode, ok, err := vS.fetchInode(inodeNumber) 2009 if err != nil { 2010 // the inode is locked so this should never happen (unless the inode 2011 // was evicted from the cache and it was corrupt when read from disk) 2012 logger.ErrorfWithError(err, "%s: fetch of target inode failed", utils.GetFnName()) 2013 return err 2014 } 2015 if !ok { 2016 // this should never happen (see above) 2017 err = fmt.Errorf("%s: failing request for inode %d volume '%s' because it is unallocated", 2018 utils.GetFnName(), inodeNumber, vS.volumeName) 2019 logger.ErrorWithError(err) 2020 err = blunder.AddError(err, blunder.NotFoundError) 2021 return err 2022 } 2023 2024 inode.dirty = true 2025 inode.GroupID = groupID 2026 2027 updateTime := time.Now() 2028 inode.AttrChangeTime = updateTime 2029 2030 err = vS.flushInode(inode) 2031 if err != nil { 2032 logger.ErrorWithError(err) 2033 return err 2034 } 2035 2036 return 2037 } 2038 2039 func (vS *volumeStruct) GetStream(inodeNumber InodeNumber, inodeStreamName string) (buf []byte, err error) { 2040 snapShotIDType, _, _ := vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber)) 2041 if headhunter.SnapShotIDTypeDotSnapShot == snapShotIDType { 2042 err = fmt.Errorf("No stream '%v'", inodeStreamName) 2043 return buf, blunder.AddError(err, blunder.StreamNotFound) 2044 } 2045 2046 inode, ok, err := vS.fetchInode(inodeNumber) 2047 if err != nil { 2048 // this indicates disk corruption or software error 2049 // (err includes volume name and inode number) 2050 logger.ErrorfWithError(err, "%s: fetch of inode failed", utils.GetFnName()) 2051 return nil, err 2052 } 2053 if !ok { 2054 // disk corruption or client request for unallocated inode 2055 err = fmt.Errorf("%s: failing request for inode %d volume '%s' because it is unallocated", 2056 utils.GetFnName(), inodeNumber, vS.volumeName) 2057 logger.InfoWithError(err) 2058 err = blunder.AddError(err, blunder.NotFoundError) 2059 return nil, err 2060 } 2061 2062 inodeStreamBuf, ok := inode.StreamMap[inodeStreamName] 2063 2064 if !ok { 2065 err = fmt.Errorf("No stream '%v'", inodeStreamName) 2066 return buf, blunder.AddError(err, blunder.StreamNotFound) 2067 } 2068 2069 buf = make([]byte, len(inodeStreamBuf)) 2070 2071 copy(buf, inodeStreamBuf) 2072 2073 err = nil 2074 2075 return 2076 } 2077 2078 func (vS *volumeStruct) PutStream(inodeNumber InodeNumber, inodeStreamName string, buf []byte) (err error) { 2079 err = enforceRWMode(false) 2080 if nil != err { 2081 return 2082 } 2083 2084 snapShotIDType, _, _ := vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber)) 2085 if headhunter.SnapShotIDTypeLive != snapShotIDType { 2086 err = fmt.Errorf("PutStream() on non-LiveView inodeNumber not allowed") 2087 return 2088 } 2089 2090 inode, ok, err := vS.fetchInode(inodeNumber) 2091 if err != nil { 2092 // this indicates disk corruption or software error 2093 // (err includes volume name and inode number) 2094 logger.ErrorfWithError(err, "%s: fetch of inode failed", utils.GetFnName()) 2095 return err 2096 } 2097 if !ok { 2098 // disk corruption or client request for unallocated inode 2099 err = fmt.Errorf("%s: failing request for inode %d volume '%s' because it is unallocated", 2100 utils.GetFnName(), inodeNumber, vS.volumeName) 2101 logger.InfoWithError(err) 2102 err = blunder.AddError(err, blunder.NotFoundError) 2103 return err 2104 } 2105 2106 inodeStreamBuf := make([]byte, len(buf)) 2107 2108 copy(inodeStreamBuf, buf) 2109 2110 inode.dirty = true 2111 inode.StreamMap[inodeStreamName] = inodeStreamBuf 2112 2113 updateTime := time.Now() 2114 inode.AttrChangeTime = updateTime 2115 2116 err = vS.flushInode(inode) 2117 if err != nil { 2118 logger.ErrorWithError(err) 2119 return err 2120 } 2121 2122 return 2123 } 2124 2125 func (vS *volumeStruct) DeleteStream(inodeNumber InodeNumber, inodeStreamName string) (err error) { 2126 err = enforceRWMode(false) 2127 if nil != err { 2128 return 2129 } 2130 2131 snapShotIDType, _, _ := vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber)) 2132 if headhunter.SnapShotIDTypeLive != snapShotIDType { 2133 err = fmt.Errorf("DeleteStream() on non-LiveView inodeNumber not allowed") 2134 return 2135 } 2136 2137 inode, ok, err := vS.fetchInode(inodeNumber) 2138 if err != nil { 2139 // this indicates disk corruption or software error 2140 // (err includes volume name and inode number) 2141 logger.ErrorfWithError(err, "%s: fetch of inode failed", utils.GetFnName()) 2142 return 2143 } 2144 if !ok { 2145 // disk corruption or client request for unallocated inode 2146 err = fmt.Errorf("%s: failing request for inode %d volume '%s' because it is unallocated", 2147 utils.GetFnName(), inodeNumber, vS.volumeName) 2148 logger.InfoWithError(err) 2149 err = blunder.AddError(err, blunder.NotFoundError) 2150 return 2151 } 2152 2153 inode.dirty = true 2154 delete(inode.StreamMap, inodeStreamName) 2155 2156 updateTime := time.Now() 2157 inode.AttrChangeTime = updateTime 2158 2159 err = vS.flushInode(inode) 2160 if err != nil { 2161 logger.ErrorWithError(err) 2162 return err 2163 } 2164 2165 return 2166 } 2167 2168 func (vS *volumeStruct) FetchLayoutReport(inodeNumber InodeNumber) (layoutReport sortedmap.LayoutReport, err error) { 2169 snapShotIDType, _, _ := vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber)) 2170 if headhunter.SnapShotIDTypeDotSnapShot == snapShotIDType { 2171 layoutReport = make(sortedmap.LayoutReport) 2172 err = nil 2173 return 2174 } 2175 2176 inode, ok, err := vS.fetchInode(inodeNumber) 2177 if err != nil { 2178 // this indicates disk corruption or software error 2179 // (err includes volume name and inode number) 2180 logger.ErrorfWithError(err, "%s: fetch of inode failed", utils.GetFnName()) 2181 return nil, err 2182 } 2183 if !ok { 2184 // disk corruption or client request for unallocated inode 2185 err = fmt.Errorf("%s: failing request for inode %d volume '%s' because it is unallocated", 2186 utils.GetFnName(), inodeNumber, vS.volumeName) 2187 logger.InfoWithError(err) 2188 err = blunder.AddError(err, blunder.NotFoundError) 2189 return nil, err 2190 } 2191 2192 if SymlinkType == inode.InodeType { 2193 layoutReport = make(sortedmap.LayoutReport) 2194 err = nil 2195 } else { 2196 layoutReport, err = inode.payload.(sortedmap.BPlusTree).FetchLayoutReport() 2197 } 2198 2199 return 2200 } 2201 2202 func (vS *volumeStruct) FetchFragmentationReport(inodeNumber InodeNumber) (fragmentationReport FragmentationReport, err error) { 2203 err = fmt.Errorf("FetchFragmentationReport not yet implemented") 2204 return 2205 } 2206 2207 func (vS *volumeStruct) Optimize(inodeNumber InodeNumber, maxDuration time.Duration) (err error) { 2208 err = enforceRWMode(false) 2209 if nil != err { 2210 return 2211 } 2212 2213 err = fmt.Errorf("Optimize not yet implemented") 2214 return 2215 } 2216 2217 func validateFileExtents(snapShotID uint64, ourInode *inMemoryInodeStruct) (err error) { 2218 var ( 2219 zero = uint64(0) 2220 ) 2221 2222 readPlan, readPlanBytes, err := ourInode.volume.getReadPlanHelper(snapShotID, ourInode, &zero, nil) 2223 if err != nil { 2224 return err 2225 } 2226 2227 // We read the whole file, so these should match 2228 if readPlanBytes != ourInode.Size { 2229 return blunder.NewError(blunder.CorruptInodeError, "inode %v had recorded size %v bytes, but full read plan was only %v bytes", ourInode.InodeNumber, ourInode.Size, readPlanBytes) 2230 } 2231 2232 // Let's check that the read plan is consistent with what the inode's 2233 // internal log-segment map says about which segments should have how much data. 2234 // 2235 // Make a copy of the inode's LogSegmentMap map so we can decrement the 2236 // byte count for each segment as we walk the readPlan entries. 2237 remainingExpectedBytes := make(map[uint64]uint64) 2238 for segmentNumber, segmentBytesUsed := range ourInode.LogSegmentMap { 2239 remainingExpectedBytes[segmentNumber] += segmentBytesUsed 2240 } 2241 // Then we can compare with the actual read plan we got ... 2242 for _, readPlanStep := range readPlan { 2243 2244 // holes in a sparse file aren't counted 2245 if readPlanStep.LogSegmentNumber == 0 { 2246 continue 2247 } 2248 pathSegments := strings.Split(readPlanStep.ObjectPath, "/") 2249 logSegmentRepresentation := pathSegments[len(pathSegments)-1] 2250 logSegmentNumber, hexConvErr := utils.HexStrToUint64(logSegmentRepresentation) 2251 if hexConvErr != nil { 2252 return blunder.NewError(blunder.CorruptInodeError, 2253 "conversion of read plan object name to log segment number failed; "+ 2254 "readPlanStep: %v logSegmentString: '%v' err: %v", 2255 readPlanStep, logSegmentRepresentation, hexConvErr) 2256 } 2257 remainingExpectedBytes[logSegmentNumber] -= readPlanStep.Length 2258 } 2259 // ... and fail validation if any log segment didn't match. We'll put the 2260 // mismatches in a separate map that we'll attach to the error in case a 2261 // consumer or logger wants it. 2262 logSegmentByteCountMismatches := make(map[uint64]uint64) 2263 for logSegmentNumber, remainingExpectedByteCount := range remainingExpectedBytes { 2264 if remainingExpectedByteCount != 0 { 2265 logSegmentByteCountMismatches[logSegmentNumber] = remainingExpectedByteCount 2266 } 2267 } 2268 if len(logSegmentByteCountMismatches) != 0 { 2269 rootErr := fmt.Errorf("inconsistency detected between log segment map and read plan for inode %v", ourInode.InodeNumber) 2270 return merry.WithValue(blunder.AddError(rootErr, blunder.CorruptInodeError), "logSegmentByteCountMismatches", logSegmentByteCountMismatches) 2271 } 2272 2273 // Having verified that our read plan is consistent with our internal log 2274 // segment map, we also want to check that it's consistent with the actual log 2275 // segment objects in Swift. First, we'll construct a map of object paths to 2276 // the largest offset we would need read up to in that object. 2277 objectPathToEndOffset := make(map[string]uint64) 2278 2279 for _, planStep := range readPlan { 2280 2281 // holes in a sparse file don't have objects 2282 if planStep.LogSegmentNumber == 0 { 2283 continue 2284 } 2285 stepEndOffset := planStep.Offset + planStep.Length 2286 endOffset, ok := objectPathToEndOffset[planStep.ObjectPath] 2287 if !ok || stepEndOffset > endOffset { 2288 objectPathToEndOffset[planStep.ObjectPath] = stepEndOffset 2289 } 2290 } 2291 2292 // then, HEAD each object to make sure that it has enough bytes. 2293 for objectPath, endOffset := range objectPathToEndOffset { 2294 accountName, containerName, objectName, err := utils.PathToAcctContObj(objectPath) 2295 if err != nil { 2296 logger.ErrorWithError(err) 2297 return err 2298 } 2299 2300 contentLength, err := swiftclient.ObjectContentLength(accountName, containerName, objectName) 2301 if err != nil { 2302 logger.ErrorWithError(err) 2303 return err 2304 } 2305 2306 if contentLength < endOffset { 2307 // REVIEW: it might be helpful to continue and make a combined report of all 2308 // insufficiently long log segments, rather than erroring out immediately 2309 err = fmt.Errorf("expected %q to have at least %v bytes, content length was %v", objectPath, endOffset, contentLength) 2310 logger.ErrorWithError(err) 2311 return err 2312 } 2313 2314 } 2315 2316 return nil 2317 } 2318 2319 func (vS *volumeStruct) markCorrupted(inodeNumber InodeNumber) (err error) { 2320 var ( 2321 inodeRec []byte 2322 ok bool 2323 snapShotIDType headhunter.SnapShotIDType 2324 ) 2325 2326 snapShotIDType, _, _ = vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber)) 2327 if headhunter.SnapShotIDTypeLive != snapShotIDType { 2328 err = blunder.NewError(blunder.InvalidArgError, "markCorrupted() of non-LiveView inodeNumber not allowed") 2329 return 2330 } 2331 2332 inodeRec, ok, err = vS.headhunterVolumeHandle.GetInodeRec(uint64(inodeNumber)) 2333 if nil == err && ok && (len(globals.corruptionDetectedTrueBuf) <= len(inodeRec)) { 2334 // Just overwrite CorruptionDetected field with true 2335 _ = copy(inodeRec, globals.corruptionDetectedTrueBuf) 2336 } else { 2337 // Use a simple CorruptionDetected == true inodeRec 2338 inodeRec = globals.corruptionDetectedTrueBuf 2339 } 2340 2341 err = vS.headhunterVolumeHandle.PutInodeRec(uint64(inodeNumber), inodeRec) 2342 2343 return 2344 } 2345 2346 func (vS *volumeStruct) Validate(inodeNumber InodeNumber, deeply bool) (err error) { 2347 var ( 2348 ok bool 2349 ourInode *inMemoryInodeStruct 2350 snapShotID uint64 2351 snapShotIDType headhunter.SnapShotIDType 2352 tree sortedmap.BPlusTree 2353 ) 2354 2355 snapShotIDType, snapShotID, _ = vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber)) 2356 if headhunter.SnapShotIDTypeDotSnapShot == snapShotIDType { 2357 err = nil // Since /<SnapShotDirName> is emulated, always return success 2358 return 2359 } 2360 2361 // we don't want to use the in-memory cache for this; we'll need to fetch 2362 // the current real-world bits from disk. 2363 2364 // If this is a file inode, we flush to ensure that the inode is not dirty 2365 // (and that DLM locking therefore ensures we have exclusive access to the 2366 // inode and don't need to serialize this operation, as there can be no pending 2367 // time-based flush to race with). 2368 2369 err = vS.flushInodeNumber(inodeNumber) 2370 if nil != err { 2371 logger.ErrorfWithError(err, "couldn't flush inode %v", inodeNumber) 2372 err = blunder.AddError(err, blunder.CorruptInodeError) 2373 return 2374 } 2375 2376 err = vS.Purge(inodeNumber) 2377 if nil != err { 2378 logger.ErrorfWithError(err, "couldn't purge inode %v", inodeNumber) 2379 err = blunder.AddError(err, blunder.CorruptInodeError) 2380 return 2381 } 2382 2383 ourInode, ok, err = vS.fetchInode(inodeNumber) 2384 if nil != err { 2385 // this indicates diskj corruption or software error 2386 // (err includes volume name and inode number) 2387 logger.ErrorfWithError(err, "%s: fetch of inode failed", utils.GetFnName()) 2388 err = blunder.AddError(err, blunder.CorruptInodeError) 2389 return 2390 } 2391 if !ok { 2392 // disk corruption or client request for unallocated inode 2393 err = fmt.Errorf("%s: failing request for inode %d volume '%s' because it is unallocated", 2394 utils.GetFnName(), inodeNumber, vS.volumeName) 2395 logger.InfoWithError(err) 2396 err = blunder.AddError(err, blunder.NotFoundError) 2397 return 2398 } 2399 2400 switch ourInode.InodeType { 2401 case DirType, FileType: 2402 tree, ok = ourInode.payload.(sortedmap.BPlusTree) 2403 if !ok { 2404 err = fmt.Errorf("type conversion of inode %v payload to sortedmap.BPlusTree failed", ourInode.InodeNumber) 2405 err = blunder.AddError(err, blunder.CorruptInodeError) 2406 _ = vS.markCorrupted(inodeNumber) 2407 return 2408 } 2409 err = tree.Validate() 2410 if nil != err { 2411 err = blunder.AddError(err, blunder.CorruptInodeError) 2412 _ = vS.markCorrupted(inodeNumber) 2413 return 2414 } 2415 if FileType == ourInode.InodeType { 2416 if deeply { 2417 err = validateFileExtents(snapShotID, ourInode) 2418 if nil != err { 2419 err = blunder.AddError(err, blunder.CorruptInodeError) 2420 _ = vS.markCorrupted(inodeNumber) 2421 return 2422 } 2423 } 2424 } 2425 case SymlinkType: 2426 // Nothing to be done here 2427 default: 2428 err = fmt.Errorf("unrecognized inode type") 2429 err = blunder.AddError(err, blunder.CorruptInodeError) 2430 _ = vS.markCorrupted(inodeNumber) 2431 return 2432 } 2433 2434 err = nil 2435 return 2436 }